Commit dbf82e0a authored by levintow's avatar levintow
Browse files

recreating R dataset to include outcome and censoring variables for ipwrisk

parent 8797299a
#####################################
## ipwrisk testing: ACTG example
## Sara Levintow
## August 7, 2017
#####################################
## Load packages
library("devtools")
devtools::install_bitbucket("novisci/ipwrisk",
auth_user = "salevintow@gmail.com",
dependencies = TRUE,
password = .rs.askForPassword("Bitbucket Password"))
library(ipwrisk)
library(data.table)
library(pryr) # had to add this, was not loaded as a dependency
library(ggplot2)
library(dplyr)
library(survival)
library(splines)
library(rms)
library(foreach)
## Load data
load("data/actg.Rda")
## Created event time and censoring time variables in R data set (don't need to recreate):
#actg$Y <- ifelse(actg$delta==1,actg$days,NA) #Y is time of event (AIDS or death)
#actg$C <- ifelse(actg$delta==0,actg$days,NA) #C is time of censoring
#actg$C1 <- ifelse(actg$drop==1,actg$days,NA) #C1 is time of censoring due to drop-out
#actg$C2 <- ifelse(actg$delta==0 & actg$drop==0,actg$days,NA) #C2 is time of censoring not due to drop-out
## Looking at distribution of covariates by treatment (ART)
vars_by_art <- actg %>%
group_by(art) %>%
summarize(n(),mean(male),mean(black),mean(hispanic),mean(idu),mean(r),mean(age),mean(karnof),mean(cd4))
## Crude analysis of treatment outcome
outcome_by_art <- actg %>%
group_by(art) %>%
summarize(n(),outcome=sum(delta),risk=mean(delta),time=mean(na.omit(Y)),dropout=sum(drop))
plot(survfit(Surv(days,delta)~art,data=actg),fun="event",xlim=c(0,365))
## Load into a data.table
a=data.table(actg)
## Specifying the data structure and models
ds.struct.1 = build_models(treat("art", formula = art~male+black+hispanic+idu+age+karnof+cd4),
cens("C", formula = ~strat(art)+male+black+hispanic+idu+age+karnof+cd4),
outcome("Y"))
actg1 = est.ipwrisk(a,ds.struct.1, times = seq(0,365,10),
label = c("actg","Main Analysis"))
plot(actg1)
plot(actg1,rd = TRUE) + theme_bw() + ggtitle("Cumulative RD in ACTG data")
hist(actg1) + theme_bw() + ggtitle("Propensity Score Distributions")
## covariates do not predict treatment due to randomization
## now using multiple censoring models (1 for drop-out, 1 for admin censoring)
ds.struct.2 = build_models(treat("art", formula = art~male+black+hispanic+idu+age+karnof+cd4),
cens("C1", formula = ~strat(art)+male+black+hispanic+idu+age+karnof+cd4),
cens("C2", formula = ~strat(art)+male+black+hispanic+idu+age+karnof+cd4),
outcome("Y"))
actg2 = est.ipwrisk(a,ds.struct.2, times = seq(0,365,10),
label = c("actg","Main Analysis"))
summary(actg2)
plot(actg2)
plot(actg2,rd = TRUE) + theme_bw() + ggtitle("Cumulative RD in ACTG data")
# comparing results (1 censoring model vs. 2 censoring models)
library(knitr)
kable(make_table2(actg1, actg2, index = 365), format="markdown")
# risks and RDs not calculated? table just shows NA
......@@ -5,6 +5,7 @@ use_data_raw()
# Reading in ACTG data set
actg<-read.table("/Users/salevintow/Documents/R Software Development/actg/data-raw/actg320.23nov16.dat.txt")
colnames(actg)<-c("id","male","black","hispanic","idu","art","delta","drop","r","age","karnof","days","cd4","stop")
# Review variables and make sure data read in correctly
str(actg)
# stop variable is currently a factor w/ 144 levels
......@@ -12,25 +13,12 @@ str(actg)
actg$stop<-as.character(actg$stop)
actg$stop<-ifelse(actg$stop=="."," ",actg$stop)
actg$stop<-as.numeric(actg$stop)
# Save R data set in the WIHS package
devtools::use_data(actg)
# Load R data set
load(file="actg.Rda")
# Initial analyses looking at data
library(ggplot2)
library(dplyr)
library(survival)
table(actg$art,actg$delta,useNA = "always")
review<-actg %>%
group_by(art) %>%
summarize(num=n(),outcome=mean(delta),dropout=mean(drop),time=mean(days))
# Crude survival analysis
actg$SurvObj <- with(actg, Surv(days, delta == 1))
head(actg)
# Kaplan-Meier estimator
km.as.one <- survfit(SurvObj ~ 1, data = actg)
km.by.art <- survfit(SurvObj ~ art, data = actg)
# Basic plots
plot(km.as.one)
plot(km.by.art)
## Creating event time and censoring time variables
actg$Y <- ifelse(actg$delta==1,actg$days,NA) #Y is time of event (AIDS or death)
actg$C <- ifelse(actg$delta==0,actg$days,NA) #C is time of censoring
actg$C1 <- ifelse(actg$drop==1,actg$days,NA) #C1 is time of censoring due to drop-out
actg$C2 <- ifelse(actg$delta==0 & actg$drop==0,actg$days,NA) #C2 is time of censoring not due to drop-out
# Save R data set in the actg package
devtools::use_data(actg,overwrite = TRUE)
No preview for this file type
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment