cvMSM {cvDSA}R Documentation

Selecting/Fitting Marginal Structural Models

Description

'cvMSM' is used to select/fit marginal structural models with G-computation, IPTW and DR estimates.

Usage

cvMSM(y, a, v, w, data=NULL, yfamily='gaussian', afamily='binomial',
      model.msm=NULL, model.aw=NULL, model.av=NULL, model.yaw=NULL, 
      model.yyaw=NULL, wt.censor=NULL, ncv=5, ncv.nuisance=5, 
      mapping='DR', fitting='DR', all3=F, stable.wt=T, 
      fixed.terms=NULL, printout=T, dyn.treat=NULL, dyn.cutoff=NULL,
        rep.ID=F, ID=NULL) 

Arguments

y response variable: vector of length 'n'.
a treatment variable: vector of length 'n'.
v adjustment variable: vector/matrix.
w baseline covariates: vector/matrix.
data an optional data frame containing the variables in 'y', 'a', 'v' and 'w'.
yfamily a description of the error distribution and link function to be used in the 'y'-related models E[Y|A,W]. Availible choices are 'gaussian' and 'binomial'.
afamily a description of the error distribution and link function to be used in the 'a'-related models (g(A|W) and g(A|V)). Availible choices are 'gaussian' and 'binomial'.
model.msm a list description of the MSM, e.g., model.msm=list(Model=, Size=, Order=, Int= ).
model.aw a list description of g(A|W). See 'model.msm'.
model.av a list description of g(A|V). See 'model.msm'.
model.yaw a list description of E(Y|A,W). See 'model.msm'.
model.yyaw a list description of E(Y^2|A,W). See 'model.msm'.
wt.censor an optional vector of censoring weights to be used in the selecting/fitting process.
ncv an integer of the number of fold for the V-fold cross-validation for selecting MSM.
ncv.nuisance an integer of the number of fold for the V-fold cross-validation for selecting the nuisance parameter models, e.g., g(A|W), g(A|V), E(Y|A,W) and E(Y^2|A,W).
mapping the method to be used in calcuating the loss function, e.g., 'IPTW', 'Gcomp' and 'DR'.
fitting the method to be used in fitting the model, e.g., 'IPTW', 'Gcomp' and 'DR'.
all3 a logical value indicating whether to select/fit all three MSM's or the one specified by user.
stable.wt a logical value indicating whether stabilized weight should be used in 'IPTW' and 'DR' mapping/fitting.
fixed.terms a list description of the fixed terms included in selecting MSM.
printout if True, intermediate results are printed.
dyn.treat if NULL, non-dynamic treatment; if dyn.treat is given (as a treatment matrix), cvMSM() selects/fits a dynamic treatment MSM.
dyn.cutoff
rep.ID a logical value indicating whether the observations have repeated IDs (not independent observations).
ID a vector which identifies the clusters.

Value

If the MSM is not provided by the user, 'cvMSM' will return the best MSM fit in two formats: an easy-to-read format ($model) includes the formula and the coefficients; the other format ($model.pbeat), including '$formula' and '$coefficients', is in form a list (for formula) and an array (for the coefficients), which is easier to be used in 'cv.predict()' function. If the MSM is prespecified by the user, only the latter format is given. 'cvMSM' also returns the cross-validation risk matrix.

Note

All the 'models' should be defined as a list with either of the following four components: Model, Size, Order, Int. $Model is a string formula indicating the linear combination of model; $Size should give the maximum number of terms in the model; $Order is a vector with the same length as the number of covariates, indicating the maximal power is allowed for each covariate; $Int indicates the maximal interactions allowed in the model.

See Also

cvGLM, cvDCY, cv.predict, check.ETA, create.obs.data

Examples

# Example 1.
#Let W={W1, W2}
n <- 2000
w1 <- runif(n, 0, 1); w2 <- runif(n, 0, 1);
w <- cbind(w1=w1, w2=w2);
# g(A|W) = logit^(-1) (1 - W1 + W2)
model.aw <- list(formula="w1+w2", coef=c(1,-1,1));
# E(Y|A,W) = 1 + 2A + 1.5W1 + W2 - W1*W2
model.yaw <- list(formula="a+w1+w2+w1:w2", coef=c(1, 2, 1.5, 1, -1));

obs.data <- create.obs.data(w, afamily='binomial', yfamily='gaussian', 
            model.yaw=model.yaw, model.aw=model.aw);

msm.given <- cvMSM(y=y, a=a, v=w1, w=w, data=obs.data, 
            model.msm=list(Model="a+w1", Size=3, Order=c(1,2), Int=1), 
            model.av=list(Model="w1"), 
            model.aw=list(Model="w1+w2", Size=3, Int=1), 
            model.yaw=list(Model="a+w1+w2+w1:w2"),
            mapping='IPTW', fitting='IPTW', all3=T, stable.wt=T)
            
msm.iptw <- cvMSM(y=y, a=a, v=w1, w=w, data=obs.data, 
            model.msm=list(Model=NULL, Size=3, Order=c(1,2), Int=1), 
            model.av=list(Model="w1"), 
            model.aw=list(Model="w1+w2", Size=3, Int=1), 
            mapping='IPTW', fitting='IPTW', stable.wt=T)

# Example 2.
n<-2000;
w1<-runif(n); w2<-runif(n); w3<-runif(n); w4<-runif(n);
w<-cbind(w1,w2,w3,w4);

# Let g(A|W) = logit^(-1) (-1 + w1 - w2 + w1*w3)
model.aw <- list(formula = "w1+w2+w1:w3", coef = c(-1, 1, -5, 1))
          # about 60
        
# Let E(Y|A, W)=-1+A+w1+w2+w1^2;
model.yaw <- list(formula="a+w1+w2+w1^2", coef=c(-1, 1, 1, 1, 1));

obs.data <- create.obs.data(w, afamily='binomial', yfamily='gaussian', 
            model.yaw, model.aw)

msm.iptw <- cvMSM(y=y, a=a, v=w1, w=w, data=obs.data, 
            model.msm=list(Model=NULL, Size=3, Order=c(1,2), Int=1), 
            model.av=list(Model="w1"), 
            model.aw=list(Model="w1+w2+w1:w3", Size=3, Int=1), 
            mapping='IPTW', fitting='IPTW', stable.wt=T)

msm.g <- cvMSM(y=y, a=a, v=w1, w=w, data=obs.data,
           model.msm=list(Model=NULL, Size=3, Int=1), 
           model.yaw=list(Model="a+w1+w2+w1^2", Size=4, Int=2), 
           model.yyaw=list(Model=NULL, Size=4, Int=2), 
           mapping='Gcomp', fitting='Gcomp', stable.wt=F)

msm.dr <- cvMSM(y=y, a=a, v=w1, w=w, data=obs.data, 
          model.msm=list(Model=NULL, Size=3, Int=1), 
          model.av=list(Model="w1"), 
          model.aw=list(Model="w1+w2+w1:w3", Size=3, Int=1), 
          model.yaw=list(Model="a+w1+w2+w1^2", Size=4, Int=2), 
          model.yyaw=list(Model=NULL, Size=6, Int=2), 
          mapping='DR', fitting='DR', stable.wt=T)

# Example 3. Dynamic Treatment
# Don't try.

n <- 1000
w1 <- runif(n, 0, 1); w2 <- runif(n, 0, 1);
w <- cbind(w1=w1, w2=w2);
# g(A|W) = logit^(-1) (1 - W1 + W2)
model.aw <- list(formula=list(c(1,0),c(0,1)), coef=c(1,-1,1));
# Q(Y|A,W) = 1 + 2A + 1.5W1 + W2 - W1*W2
model.yaw <- list(formula=list(c(1,0,0),c(0,1,0),c(0,0,1), c(0,1,1)), 
                  coef=c(1, 2, 1.5, 1, -1));

obs.data <- create.obs.data(w, afamily='binomial', yfamily='gaussian', 
                 model.yaw=model.yaw, model.aw=model.aw);
attach(obs.data)
# dyn.treat: matrix, treatment under different rules (by column)
# dyn.cutoff: list(R1=c(theta1_1,theta1_2,...), R2=c(theta2_1, theta2_2,..),,..)
# e.g., dyn.cutoff <- list(R1: w1<5,a=1: c(5), R2: w1<8, a=1: c(8))
#       dyn.cutoff <- list(R1: w1<5,w2<.3, a=1: c(5,.3), R2: w1<8, w2<.7, a=1: c(8, .7))
# length(dyn.cutoff)== ncol(dyn.treat)
tr0<-rep(0,n); tr0[w1>0] <- 1
tr1<-rep(0,n); tr1[w1>0.2] <- 1
tr2<-rep(0,n); tr2[w1>.5] <- 1
tr3<-rep(0,n); tr3[w1>.7] <- 1
tr4<-rep(0,n); tr4[w1>1] <- 1
treats <- cbind(tr0,tr1,tr2,tr3,tr4)
cutoff <- list(R1=c(0), R2=c(0.2), R3=c(0.5), R4=c(0.7), R5=c(1))
detach(2)
dyn.MSM <- cvMSM(y, a, v=w2, w=cbind(w1,w2), data=obs.data, yfamily='gaussian', afamily='binomial', model.msm=list(Size=3, Order=c(1,1), Int=2), model.aw=NULL, model.av=NULL, model.yaw=list(Model=model.yaw$formula), model.yyaw=list(Model=model.yaw$formula), wt.censor=NULL, ncv=3, ncv.nuisance=3, mapping='Gcomp', fitting='Gcomp', all3=F, stable.wt=F, fixed.terms=NULL, printout=T, dyn.treat=treats, dyn.cutoff=cutoff) 

dyn.MSMs <- cvMSM(y, a, v=w2, w=cbind(w1,w2), data=obs.data, yfamily='gaussian', afamily='binomial', model.msm=list(Size=4, Order=c(1,4), Int=1), model.aw=NULL, model.av=NULL, model.yaw=list(Model=model.yaw$formula), model.yyaw=list(Model=model.yaw$formula), wt.censor=NULL, ncv=3, ncv.nuisance=3, mapping='Gcomp', fitting='Gcomp', all3=F, stable.wt=F, fixed.terms=NULL, printout=T, dyn.treat=treats, dyn.cutoff=NULL) 


[Package cvDSA version 0.5-3 Index]