Commit 61cee4ea authored by levintow's avatar levintow
Browse files

updating ipwrisk testing with wihs data

parent 47f3e11c
#####################################
## ipwrisk testing: WIHS example
## Sara Levintow
## August 7, 2017
## August 21, 2017
#####################################
## Load packages
library("devtools")
library(devtools)
devtools::install_bitbucket("novisci/ipwrisk",
auth_user = "salevintow@gmail.com",
dependencies = TRUE,
password = .rs.askForPassword("Bitbucket Password"))
library(ipwrisk)
library(data.table)
library(ggplot2)
library(dplyr)
## These are the packages that need to be loaded - not done automatically with library(ipwrisk)
library(tidyverse)
library(survival)
library(splines)
library(rms)
library(foreach)
library(knitr)
library(data.table)
library(ggplot2)
library(dplyr)
library(survival)
## Load data
load("data/wihs.Rda")
load("~/Documents/R Software Development/wihs2009/data/wihs.Rda")
load("~/Documents/R Software Development/wihs2009/data/wihs2.Rda")
library(WIHS2009)
## Created these event time and censoring time variables in the R data set (do not need to recreate):
#wihs$Y <- ifelse(wihs$delta==1,wihs$time,NA) #Y is time of event (AIDS or death)
......@@ -64,43 +69,66 @@ ggplot(data=sd.1, aes(x=t, group=strata, fill=strata)) +
a=data.table(wihs)
## Specifying the data structure and models
library(pryr) # had to add this, was not loaded as a dependency
ds.struct.1 = build_models(treat("idu", formula = idu~white+age+cd4),
cens("C", formula = ~strat(idu)+white+age+cd4),
outcome("Y"))
## First looking at a crude model (no IP weights for confounding or censoring)
ds.struct.1 = build_models(treat("idu", formula = idu~1),
cens("C1", formula = C1~strat(idu)),
cens("C2", formula = C2~strat(idu)),
outcome("Y", formula = Y~idu))
wihs1 = est.ipwrisk(a,ds.struct.1, times = seq(0,10,0.1),
label = c("wihs1","Main Analysis"))
label = c("wihs1","Crude Analysis"))
kable(make_table2(wihs1, index = 10), format="markdown")
#in the table, idu=1 is labeled as Control and idu=0 is labeled as Treatment
#however, idu=0 has sample size of n=725 but a lower risk, not a higher one
#idu=1 should have a sample size of n=439 and a HIGHER risk
#there seems to be a mix-up of group labeling
#what value of PY is given in the table?
plot(wihs1) + theme_bw() + xlab("Follow-up time") + ylab("Risk of outcome")
plot(wihs1,rd = TRUE) + theme_bw() + ggtitle("Cumulative RD in WIHS data")
hist(wihs1) + theme_bw() + ggtitle("Propensity Score Distributions")
kable(make_table2(wihs1, index = 10), format="markdown")
## Now using multiple censoring models (1 for drop-out, 1 for admin censoring)
## Now using IP weights for confounding
ds.struct.2 = build_models(treat("idu", formula = idu~white+age+cd4),
cens("C1", formula = ~strat(idu)+white+age+cd4),
cens("C2", formula = ~strat(idu)+white+age+cd4),
cens("C1", formula = C1~strat(idu)),
cens("C2", formula = C2~strat(idu)),
outcome("Y"))
wihs2 = est.ipwrisk(a,ds.struct.2, times = seq(0,10,0.1),
label = c("wihs2","Main Analysis with 2 Censoring Models"))
label = c("wihs2","Main Analysis"))
kable(make_table2(wihs2, index = 10), format="markdown")
#sample sizes of treatment and control groups do not correspond to respective risk for each group
plot(wihs2) + theme_bw() + xlab("Follow-up time") + ylab("Risk of outcome")
plot(wihs2,rd = TRUE) + theme_bw() + ggtitle("Cumulative RD in WIHS data")
hist(wihs2) + theme_bw() + ggtitle("Propensity Score Distributions")
## Comparing results (1 censoring model vs. 2 censoring models)
kable(make_table2(wihs1, wihs2, index = 10), format="markdown")
## The results appear identical - does it not matter whether 1 or 2 censoring models are specified?
## Now using IP weights for confounding and censoring (not distinguishing between admin censoring or LTFU)
ds.struct.3 = build_models(treat("idu", formula = idu~white+age+cd4),
cens("C", formula = C~strat(idu)+white+age+cd4), #one censoring model
outcome("Y"))
## What happens if data structure does not specify predictors of treatment and censoring?
## Just want to look at crude results for comparison, using the package
ds.struct.3 = build_models(treat("idu", formula = idu~1),
cens("C1", formula = C1~strat(idu)),
cens("C2", formula = C2~strat(idu)),
outcome("Y", formula = Y~idu))
wihs3 = est.ipwrisk(a,ds.struct.3, times = seq(0,10,0.1),
label = c("wihs3","Crude Analysis"))
label = c("wihs3","Main Analysis"))
kable(make_table2(wihs3, index = 10), format="markdown")
plot(wihs3) + theme_bw() + xlab("Follow-up time") + ylab("Risk of outcome")
plot(wihs3,rd = TRUE) + theme_bw() + ggtitle("Cumulative RD in WIHS data")
hist(wihs3) + theme_bw() + ggtitle("Propensity Score Distributions")
## Now using multiple censoring models (1 for drop-out, 1 for admin censoring)
ds.struct.4 = build_models(treat("idu", formula = idu~white+age+cd4),
cens("C1", formula = C1~strat(idu)+white+age+cd4),
cens("C2", formula = C2~strat(idu)+white+age+cd4),
outcome("Y"))
wihs4 = est.ipwrisk(a,ds.struct.4, times = seq(0,10,0.1),
label = c("wihs4","Main Analysis with 2 Censoring Models"))
kable(make_table2(wihs4, index = 10), format="markdown")
plot(wihs4) + theme_bw() + xlab("Follow-up time") + ylab("Risk of outcome")
plot(wihs4,rd = TRUE) + theme_bw() + ggtitle("Cumulative RD in WIHS data")
## Comparing results
# 1) Crude model
# 2) IPTW model
# 2) IPTW/IPCW model (1 censoring variable)
# 3) IPTW/IPCW model (2 censoring variables)
kable(make_table2(wihs1, wihs2, wihs3, index = 10), format="markdown")
kable(make_table2(wihs1, wihs2, wihs3, wihs4, index = 10), format="markdown")
## The results appear identical for wihs 2, 3, 4
## does it not matter whether censoring models are specified?
---
title: "ipwrisk testing with WIHS 2009 data"
author: "Sara Levintow"
date: "8/21/2017"
output: html_document
---
## Set up
```{r}
library(ipwrisk)
```
In addition to ipwrisk, the following packages must be loaded (not done automatically by library(ipwrisk)):
```{r, message=FALSE, warning=FALSE}
library(tidyverse)
library(survival)
library(splines)
library(rms)
library(foreach)
library(knitr)
library(data.table)
library(ggplot2)
library(dplyr)
library(survival)
```
Loading WIHS data from my local version of wihs2009 repository:
```{r}
load("~/Documents/R Software Development/wihs2009/data/wihs.Rda")
```
## Initial review of data without ipwrisk functions
In the WIHS 2009 data, the main exposure is history of injection drug use. The distribution of covariates (race, age, and CD4 cell count) by exposure is below. There appear to be imbalances by exposure status; participants with a history of injection drug use are more likely to be older, are less likely to be white, and have a slightly higher CD4 cell count.
```{r, warning=FALSE}
wihs %>%
group_by(idu) %>%
dplyr::summarize(n(),white=sum(white)/n(),age=mean(age),cd4=mean(cd4))
```
The crude analysis of outcome (AIDS or death) by exposure shows that the 10-year risk is higher for those with a history of injection drug use (IDU=1). Participants without a history of injection drug use are more likely to be lost to follow-up.
```{r, warning=FALSE}
wihs %>%
group_by(idu) %>%
dplyr::summarize(n(),outcome=sum(delta),risk=mean(delta),time=mean(na.omit(Y)),LTFU=sum(drop)/n())
```
Plotting the crude risk by exposure suggests that the risk is consistently higher among the exposed (IDU=1) during 10 years of follow up.
```{r, echo=FALSE}
sum.surv.1 <- summary(survfit(Surv(time,delta)~idu, data=wihs))
sd.1 = data.frame(
t = sum.surv.1$time,
strata = sum.surv.1$strata,
n = sum.surv.1$n.risk,
r = 1-sum.surv.1$surv,
s = sum.surv.1$surv,
n.event = sum.surv.1$n.event,
n.censor = sum.surv.1$n.censor,
se = sum.surv.1$std.err,
upper = 1 - sum.surv.1$lower,
lower = 1 - sum.surv.1$upper
)
ggplot(data=sd.1, aes(x=t, group=strata, fill=strata)) +
geom_line(aes(y=r, color=strata)) +
geom_ribbon(aes(ymin=lower, ymax=upper), alpha=0.25) +
theme_bw() +
xlab("Follow-up time") +
ylab("Risk of outcome")
```
## Using ipwrisk functions
Loading data into a data.table for use with ipwrisk
```{r, warning=FALSE}
a=data.table(wihs)
```
### Model 1: First looking at a crude model (no IP weights for confounding or censoring)
```{r, warning=FALSE}
ds.struct.1 = build_models(treat("idu", formula = idu~1),
cens("C1", formula = C1~strat(idu)),
cens("C2", formula = C2~strat(idu)),
outcome("Y", formula = Y~idu))
wihs1 = est.ipwrisk(a,ds.struct.1, times = seq(0,10,0.1),
label = c("wihs1","Crude Analysis"))
```
The make_table2 function may be mixing up the group labeling: in the table, idu=1 is labeled as Control and idu=0 is labeled as Treatment. However, the idu=0 group should have a sample size of n=725 and a *lower* crude risk; the idu=1 group should have a sample size of n=439 and a *higher* crude risk. These sample sizes and risk values appear to be flipped in the table. Also, what value of person-years is given in the table for each exposure group?
```{r, warning=FALSE}
kable(make_table2(wihs1, index = 10), format="markdown")
```
Plots of the cumulative risk and risk difference are consistent with values in the table:
```{r, warning=FALSE}
plot(wihs1) + theme_bw() + xlab("Follow-up time") + ylab("Risk of outcome")
plot(wihs1,rd = TRUE) + theme_bw() + ggtitle("Cumulative RD in WIHS data")
```
## Model 2: Now using IP weights for confounding
```{r, warning=FALSE}
ds.struct.2 = build_models(treat("idu", formula = idu~white+age+cd4),
cens("C1", formula = C1~strat(idu)),
cens("C2", formula = C2~strat(idu)),
outcome("Y"))
wihs2 = est.ipwrisk(a,ds.struct.2, times = seq(0,10,0.1),
label = c("wihs2","Main Analysis"))
kable(make_table2(wihs2, index = 10), format="markdown")
```
The IP-weighted cumulative RD decreases slightly but still suggests a higher risk among the exposed. Again, the sample sizes of treatment and control groups in the output do not correspond to the respective risk values for each group. The plots below are consistent with the values in the table, and the histogram indicates that the assumption of positivity has not been violated:
```{r, warning=FALSE}
plot(wihs2) + theme_bw() + xlab("Follow-up time") + ylab("Risk of outcome")
plot(wihs2,rd = TRUE) + theme_bw() + ggtitle("Cumulative RD in WIHS data")
hist(wihs2) + theme_bw() + ggtitle("Propensity Score Distributions")
```
## Model 3: Now using IP weights for confounding and censoring (not distinguishing between admin censoring or loss to follow-up)
```{r, warning=FALSE}
ds.struct.3 = build_models(treat("idu", formula = idu~white+age+cd4),
cens("C", formula = C~strat(idu)+white+age+cd4), #one censoring model
outcome("Y"))
wihs3 = est.ipwrisk(a,ds.struct.3, times = seq(0,10,0.1),
label = c("wihs3","Main Analysis"))
kable(make_table2(wihs3, index = 10), format="markdown")
```
The RD from Model 3 is identical to the RD from Model 2, and all plots appear to be the same, despite including an additional weights model for censoring:
```{r, warning=FALSE}
plot(wihs3) + theme_bw() + xlab("Follow-up time") + ylab("Risk of outcome")
plot(wihs3,rd = TRUE) + theme_bw() + ggtitle("Cumulative RD in WIHS data")
```
## Model 4: Now using multiple censoring models (1 for admin censoring, 1 for loss to follow-up)
```{r, warning=FALSE}
ds.struct.4 = build_models(treat("idu", formula = idu~white+age+cd4),
cens("C1", formula = C1~strat(idu)+white+age+cd4),
cens("C2", formula = C2~strat(idu)+white+age+cd4),
outcome("Y"))
wihs4 = est.ipwrisk(a,ds.struct.4, times = seq(0,10,0.1),
label = c("wihs4","Main Analysis with 2 Censoring Models"))
kable(make_table2(wihs4, index = 10), format="markdown")
```
The results of Model 4 also appear identical to Models 2 and 3. Accounting for multiple sources of censoring does not appear to make a difference.
```{r, warning=FALSE}
plot(wihs4) + theme_bw() + xlab("Follow-up time") + ylab("Risk of outcome")
plot(wihs4,rd = TRUE) + theme_bw() + ggtitle("Cumulative RD in WIHS data")
```
## Model Comparison
1) Crude model
2) IPTW model
3) IPTW/IPCW model (1 censoring variable)
4) IPTW/IPCW model (2 censoring variables)
```{r, warning=FALSE}
kable(make_table2(wihs1, wihs2, wihs3, wihs4, index = 10), format="markdown")
```
### It seems odd that the results for models 2-4 are essentially identical, given that there is differential loss to follow-up by exposure. Is this due to a coding error on my part or a bug in the ipwrisk functions?
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment