### code for "The importance of scale for spatial-confounding bias and precision of spatial regression estimators" ### appearing in Statistical Science, 2010 ### Chris Paciorek, 5/4/2010 ###################################### ### auxiliary code ###################################### f.matern.euc <- function(dist,parm1,parm2){ # Matern correlation; see also Matern() in fields package # first finding log values is about same speed as on original scale but may be more stable numerically const=-lgamma(parm2)-(parm2-1)*log(2) tmp=exp(const+parm2*log(2.0*sqrt(parm2)*dist/parm1)+log(besselK(2.0*sqrt(parm2)*dist/parm1,parm2))) tmp[tmp>1]=1 tmp[dist==0]=1 return(tmp) } pcpGen=function(){ # generates n locations based on a Poisson cluster process where m is the average # children per parent point and sd is the spread of a Gaussian kernel for the children m=7 rho=n*4/m larger.region=c(0,1,0,1) nn <- rpois(1, lambda = rho * (larger.region[2] - larger.region[1]) * (larger.region[4] - larger.region[3])) parents <- cbind(runif(nn, larger.region[1], larger.region[2]), runif(nn, larger.region[3], larger.region[4])) sd=.03 num.child <- rpois(nn, lambda = m) num.tot <- sum(num.child) sim.events <- matrix(c(rnorm(num.tot, 0, sd) + rep(parents[, 1], num.child), rnorm(num.tot, 0, sd) + rep(parents[,2], num.child)), num.tot) sim.events=sim.events[sample(1:nrow(sim.events),n),] return(sim.events) } thresh=function(vec,up=NULL,lo=NULL){ # thresholds values for better plotting if(!is.null(up)){ vec[vec>up]=up } if(!is.null(lo)){ vec[vec0.0001){ SigmaEst=krigMod$sigmasq*f.matern.euc(dmat,phi,useNu) partial=SigmaEst diag(SigmaEst)=diag(SigmaEst)+krigMod$tausq sX=solve(SigmaEst,fullx) se=sqrt(solve(t(fullx)%*%sX)[2,2]) df=sum(diag(partial%*%solve(SigmaEst))) } else{ se=sqrt(krigMod$tausq*solve(t(fullx)%*%fullx)[2,2]) df=0 } beta[1,i,j]=beta[1,i,j]+bhat betaSD2[1,i,j]=betaSD2[1,i,j]+bhat^2 betaSE2[1,i,j]=betaSE2[1,i,j]+se^2 cvg[1,i,j]=cvg[1,i,j]+(abs(bhat-betaX)<2*se) betaReps[1,i,j,m]=bhat seReps[1,i,j,m]=se estdf[2,i,j,m]=df } else{ cntBad[i,j]=cntBad[i,j]+1 } # regression splines with 5, 15, 30 df regrMod=gam(y~x+s(xs1,xs2,k=6,fx=TRUE)) bhat=regrMod$coef[2] se=summary(regrMod)$p.table[2,2] beta[3,i,j]=beta[3,i,j]+bhat betaSD2[3,i,j]=betaSD2[3,i,j]+bhat^2 betaSE2[3,i,j]=betaSE2[3,i,j]+se^2 cvg[3,i,j]=cvg[3,i,j]+(abs(bhat-betaX)<2*se) betaReps[3,i,j,m]=bhat seReps[3,i,j,m]=se regrMod=gam(y~x+s(xs1,xs2,k=16,fx=TRUE)) bhat=regrMod$coef[2] se=summary(regrMod)$p.table[2,2] beta[4,i,j]=beta[4,i,j]+bhat betaSD2[4,i,j]=betaSD2[4,i,j]+bhat^2 betaSE2[4,i,j]=betaSE2[4,i,j]+se^2 cvg[4,i,j]=cvg[4,i,j]+(abs(bhat-betaX)<2*se) betaReps[4,i,j,m]=bhat seReps[4,i,j,m]=se regrMod=gam(y~x+s(xs1,xs2,k=31,fx=TRUE)) bhat=regrMod$coef[2] se=summary(regrMod)$p.table[2,2] beta[5,i,j]=beta[5,i,j]+bhat betaSD2[5,i,j]=betaSD2[5,i,j]+bhat^2 betaSE2[5,i,j]=betaSE2[5,i,j]+se^2 cvg[5,i,j]=cvg[5,i,j]+(abs(bhat-betaX)<2*se) betaReps[5,i,j,m]=bhat seReps[5,i,j,m]=se # penalized spline with fixed smoothing spVal=uniroot(psFun,c(.00001,100),5)$root regrMod=gam(y~x+s(xs1,xs2,k=75),sp=spVal) bhat=regrMod$coef[2] se=summary(regrMod)$p.table[2,2] beta[7,i,j]=beta[7,i,j]+bhat betaSD2[7,i,j]=betaSD2[7,i,j]+bhat^2 betaSE2[7,i,j]=betaSE2[7,i,j]+se^2 cvg[7,i,j]=cvg[7,i,j]+(abs(bhat-betaX)<2*se) betaReps[7,i,j,m]=bhat seReps[7,i,j,m]=se estdf[3,i,j,m]=summary(regrMod)$edf spVal=uniroot(psFun,c(.00001,100),15)$root regrMod=gam(y~x+s(xs1,xs2,k=75),sp=spVal) bhat=regrMod$coef[2] se=summary(regrMod)$p.table[2,2] beta[8,i,j]=beta[8,i,j]+bhat betaSD2[8,i,j]=betaSD2[8,i,j]+bhat^2 betaSE2[8,i,j]=betaSE2[8,i,j]+se^2 cvg[8,i,j]=cvg[8,i,j]+(abs(bhat-betaX)<2*se) betaReps[8,i,j,m]=bhat seReps[8,i,j,m]=se estdf[4,i,j,m]=summary(regrMod)$edf spVal=uniroot(psFun,c(.00001,100),30)$root regrMod=gam(y~x+s(xs1,xs2,k=75),sp=spVal) bhat=regrMod$coef[2] se=summary(regrMod)$p.table[2,2] beta[9,i,j]=beta[9,i,j]+bhat betaSD2[9,i,j]=betaSD2[9,i,j]+bhat^2 betaSE2[9,i,j]=betaSE2[9,i,j]+se^2 cvg[9,i,j]=cvg[9,i,j]+(abs(bhat-betaX)<2*se) betaReps[9,i,j,m]=bhat seReps[9,i,j,m]=se estdf[5,i,j,m]=summary(regrMod)$edf } # bias and prec when parameters are known Sigma=sdfun[i]^2*sig2z*betaZ*betaZ*Rc if(extraSmallScale){ Sigma=sdfun[i]^2*sig2z*betaZ*betaZ*Rc+sdfun[j]^2*sig2extra*Ru } diag(Sigma)=diag(Sigma)+tau2 sX=solve(Sigma,fullx) knownVar=solve(t(fullx)%*%sX) knownBias=pc*rho*sqrt(sig2z/sig2c)*betaZ mat=(1-pc)*Ru%*%solve(Rc) diag(mat)=diag(mat)+pc knownBias=knownBias*(knownVar%*%t(fullx)%*%solve(Sigma,solve(mat,x)))[2] # gls bias, var under generative model fixBiasGls[1,i,j]=fixBiasGls[1,i,j]+knownBias fixVarGls[1,i,j]=fixVarGls[1,i,j]+knownVar[2,2] knownVar=knownVar/sdfun[i]^2 knownBias=pcbase*rho*sqrt(sig2z/sig2c)*betaZ mat=mat*(1-pcbase)/(1-pc) diag(mat)=diag(mat)+pcbase knownBias=knownBias*(knownVar%*%t(fullx)%*%solve(Sigma,solve(mat,x)))[2] fixBiasGls[2,i,j]=fixBiasGls[2,i,j]+knownBias fixVarGls[2,i,j]=fixVarGls[2,i,j]+knownVar[2,2] } pz=pz/nSims print(paste('actual pz is ',pz,sep='')) print(c(date(),i,j)) } fixBiasGls=fixBiasGls/nSims fixVarGls=fixVarGls/nSims beta[2:nMethods,,]=beta[2:nMethods,,]/nSims betaSD2[2:nMethods,,]=betaSD2[2:nMethods,,]/nSims-beta[2:nMethods,,]^2 betaSE2[2:nMethods,,]=betaSE2[2:nMethods,,]/nSims cvg[2:nMethods,,]=cvg[2:nMethods,,]/nSims beta[1,,]=beta[1,,]/(nSims-cntBad) betaSE2[1,,]=betaSE2[1,,]/(nSims-cntBad) betaSD2[1,,]=betaSD2[1,,]/(nSims-cntBad)-beta[1,,]^2 cvg[1,,]=cvg[1,,]/(nSims-cntBad) meandf=array(0,c(5,nVals,nVals)) for(k in 1:5){ for(i in 1:nVals){ for(j in 1:nVals){ meandf[k,i,j]=mean(estdf[k,i,j,],na.rm=T) } } } mse=array(0,c(nMethods,nVals,nVals)) for(k in 1:nMethods){ for(i in 1:nVals){ for(j in 1:nVals){ mse[k,i,j]=mean((betaReps[k,i,j,]-betaX)^2,na.rm=T) } } } # example code for plotting Fig. 5 (this code may not fully run but will give you a feel for things) bs=(beta-betaX)/betaX bs=thresh(bs,.525,-.025) gr=expand.grid(theta1=theta1,theta2=theta2,spl=c('penalized spl.','regression spl.'),df=c(5,15,30)) gr$vals=NA gr$vals[gr$spl=='regression spl.'&gr$df==5]=bs[3,,] gr$vals[gr$spl=='regression spl.'&gr$df==15]=bs[4,,] gr$vals[gr$spl=='regression spl.'&gr$df==30]=bs[5,,] gr$vals[gr$spl=='penalized spl.'&gr$df==5]=bs[7,,] gr$vals[gr$spl=='penalized spl.'&gr$df==15]=bs[8,,] gr$vals[gr$spl=='penalized spl.'&gr$df==30]=bs[9,,] gr$df=as.factor(gr$df) gr$spl=as.factor(gr$spl) my.strip <-function(which.given, which.panel,bg, var.name, ...) { if (which.given == 1 && which.panel[2] == 2) strip.default(1, which.panel[1], var.name ='df',strip.names=c(T),style=1,bg='gray90',...) } my.strip.left <- function(which.given, which.panel, var.name,bg, ..., horizontal) { if (which.given == 2 && which.panel[1] == 1) strip.default(1, which.panel[2], var.name = '',strip.names=c(T),style=1, horizontal = FALSE,bg='gray90', ...) } # fig. 5a par(cex.axis=1.5,cex.lab=1.5,cex.main=1.5,cex.sub=1.5) levelplot(vals~theta1*theta2|df*spl,gr,zlim=c(-.025,0.525),at=seq(-.025,.525,by=.05),colorkey=list(labels=list(cex=1.4,at=seq(0,.5,by=.1))),col.regions=tim.colors(),cuts=10,,xlab=list(label=expression(paste('spatial scale of confounding, ',theta[c],sep=' ')),cex=1.5),ylab=list(label=expression(paste('unconfounded spatial scale, ',theta[u],sep=' ')),cex=1.5),strip=my.strip,strip.left=my.strip.left , par.settings =list(layout.heights = list(strip = c(0,.6)),layout.widths = list(strip.left = c(.8,0, 0))),main=list(label='(a) Bias',cex=1.5),scales=list(cex=c(1.4)),par.strip.text=list(cex=1.6)) dev.off() bs=betaSD2 bs=thresh(bs,1.05,-.05) gr=expand.grid(theta1=theta1,theta2=theta2,spl=c('penalized spl.','regression spl.'),df=c(5,15,30)) gr$vals=NA gr$vals[gr$spl=='regression spl.'&gr$df==5]=bs[3,,] gr$vals[gr$spl=='regression spl.'&gr$df==15]=bs[4,,] gr$vals[gr$spl=='regression spl.'&gr$df==30]=bs[5,,] gr$vals[gr$spl=='penalized spl.'&gr$df==5]=bs[7,,] gr$vals[gr$spl=='penalized spl.'&gr$df==15]=bs[8,,] gr$vals[gr$spl=='penalized spl.'&gr$df==30]=bs[9,,] gr$df=as.factor(gr$df) gr$spl=as.factor(gr$spl) my.strip <-function(which.given, which.panel,bg, var.name, ...) { if (which.given == 1 && which.panel[2] == 2) strip.default(1, which.panel[1], var.name ='df',strip.names=c(T),style=1,bg='gray90',...) } my.strip.left <- function(which.given, which.panel, var.name,bg, ..., horizontal) { if (which.given == 2 && which.panel[1] == 1) strip.default(1, which.panel[2], var.name = '',strip.names=c(T),style=1, horizontal = FALSE,bg='gray90', ...) } # fig 5b levelplot(vals~theta1*theta2|df*spl,gr,zlim=c(-.05,1.05),at=seq(-.05,1.05,by=.1),colorkey=list(labels=list(cex=1.4,at=seq(0,1,by=.2))),col.regions=tim.colors(),cuts=10,,xlab=list(label=expression(paste('spatial scale of confounding, ',theta[c],sep=' ')),cex=1.5),ylab=list(label=expression(paste('unconfounded spatial scale, ',theta[u],sep=' ')),cex=1.5),strip=my.strip,strip.left=my.strip.left , par.settings =list(layout.heights = list(strip = c(0,.6)),layout.widths = list(strip.left = c(.8,0, 0))),main=list(label='(b) Variance',cex=1.5),scales=list(cex=c(1.4)),par.strip.text=list(cex=1.6)) dev.off() ############################### ### code for Section 3.1 ############################### type='grid' # 'grid', 'unif', 'pcp' if(type=='grid'){ set.seed(0) n=100 x=expand.grid(seq(0,1,len=sqrt(n)),seq(0,1,len=sqrt(n))) dmat=rdist(x) theta1=theta2=seq(0,1,len=100) p=c(.1,.5,.9) ones=rep(1,n) precMat=array(0,c(length(theta1),length(theta2),3)) for(kk in 1:3){ for(i in 1:length(theta1)){ if(theta1[i]==0){ rmat1=diag(rep(1,n)) } else{ rmat1=f.matern.euc(dmat,theta1[i],2) } for(j in 1:length(theta2)){ if(theta2[j]==0){ rmat2=diag(rep(1,n)) } else{ rmat2=f.matern.euc(dmat,theta2[j],2) } covmat=p[kk]*rmat2 diag(covmat)=(1-p[kk])+diag(covmat) precMat[i,j,kk]=sum(diag(solve(covmat,rmat1))) den=t(ones)%*%solve(covmat,ones) num=t(ones)%*%solve(covmat,rmat1%*%solve(covmat,ones)) precMat[i,j,kk]=precMat[i,j,kk]-num/den print(c(i,j,kk)) } } } } if(type!='grid'){ set.seed(0) n=100 nreps=500 theta1=theta2=seq(0,1,len=100) p=c(.1,.5,.9) ones=rep(1,n) precMat=array(0,c(length(theta1),length(theta2),3)) for(kk in 1:3){ for(i in 1:length(theta1)){ for(j in 1:length(theta2)){ set.seed(0) for(m in 1:nreps){ if(type=='unif'){ xs=cbind(runif(n),runif(n)) } if(type=='pcp'){ xs=pcpGen() } dmat=rdist(xs) if(theta1[i]==0){ rmat1=diag(rep(1,n)) } else{ rmat1=f.matern.euc(dmat,theta1[i],2) } if(theta2[j]==0){ rmat2=diag(rep(1,n)) } else{ rmat2=f.matern.euc(dmat,theta2[j],2) } covmat=p[kk]*rmat2 diag(covmat)=(1-p[kk])+diag(covmat) precMatHold=sum(diag(solve(covmat,rmat1))) den=t(ones)%*%solve(covmat,ones) num=t(ones)%*%solve(covmat,rmat1%*%solve(covmat,ones)) precMat[i,j,kk]=precMat[i,j,kk]+precMatHold-num/den print(c(i,j,kk)) } } print(date()) } } precMat=precMat/nreps } ############################### ### code for Section 3.2 ############################### # code for grid set.seed(0) n=100 xs=expand.grid(seq(0,1,len=sqrt(n)),seq(0,1,len=sqrt(n))) nVals=100 theta1=theta2=seq(0,1.0,len=nVals) dmat=rdist(xs) nreps=500 p=c(.1,.5,.9) precMatRatio=precMatOLS=precMatGLS=array(0,c(length(theta1),length(theta2),length(p))) # issue of empirical vs population variance shouldn't matter because sigma_x cancels out of ratio of GLS and OLS precision; sigma2_g + tau2 also cancels out for(pp in 1:3){ for(i in 1:length(theta1)){ if(theta1[i]==0){ rmat1=diag(rep(1,n)) L1=rmat1 } else{ rmat1=f.matern.euc(dmat,theta1[i],2) L1=t(chol(rmat1)) } Sigma=rmat1*p[pp] diag(Sigma)=diag(Sigma)+(1-p[pp]) SigmaInv=solve(Sigma) for(j in 1:length(theta2)){ if(theta2[j]==0){ rmat2=diag(rep(1,n)) L2=rmat2 } else{ rmat2=f.matern.euc(dmat,theta2[j],2) L2=t(chol(rmat2)) } set.seed(0) for(m in 1:nreps){ x=L2%*%rnorm(n) fullx=cbind(rep(1,n),x) xtxInv=solve(t(fullx)%*%fullx) den=(solve(t(fullx)%*%(SigmaInv%*%fullx)))[2,2] num=(xtxInv%*%(t(fullx)%*%(Sigma%*%fullx))%*%xtxInv)[2,2] precMatRatio[i,j,pp]=precMatRatio[i,j,pp]+num/den precMatOLS[i,j,pp]=precMatOLS[i,j,pp]+1/num precMatGLS[i,j,pp]=precMatGLS[i,j,pp]+1/den } } print(c(pp,i,precMatRatio[i,4,pp]/nreps)) } print(date()) } # code for random uniform sampling or pcp type='unif' #'pcp' n=100 nVals=100 theta1=theta2=seq(0,1.0,len=nVals) nreps=500 p=c(.1,.5,.9) precMatRatio=precMatOLS=precMatGLS=array(0,c(length(theta1),length(theta2),length(p))) # issue of empirical vs population variance shouldn't matter because sigma_x cancels out of ratio of GLS and OLS precision; sigma2_g + tau2 also cancels out for(pp in 1:3){ for(i in 1:length(theta1)){ for(j in 1:length(theta2)){ set.seed(0) for(m in 1:nreps){ if(type=='unif'){ xs=cbind(runif(n),runif(n)) } if(type=='pcp'){ xs=pcpGen() } dmat=rdist(xs) if(theta1[i]==0){ rmat1=diag(rep(1,n)) L1=rmat1 } else{ rmat1=f.matern.euc(dmat,theta1[i],2) L1=t(chol(rmat1)) } Sigma=rmat1*p[pp] diag(Sigma)=diag(Sigma)+(1-p[pp]) SigmaInv=solve(Sigma) if(theta2[j]==0){ rmat2=diag(rep(1,n)) L2=rmat2 } else{ rmat2=f.matern.euc(dmat,theta2[j],2) L2=t(chol(rmat2)) } x=L2%*%rnorm(n) fullx=cbind(rep(1,n),x) xtxInv=solve(t(fullx)%*%fullx) den=(solve(t(fullx)%*%(SigmaInv%*%fullx)))[2,2] num=(xtxInv%*%(t(fullx)%*%(Sigma%*%fullx))%*%xtxInv)[2,2] precMatRatio[i,j,pp]=precMatRatio[i,j,pp]+num/den precMatOLS[i,j,pp]=precMatOLS[i,j,pp]+1/num precMatGLS[i,j,pp]=precMatGLS[i,j,pp]+1/den } } print(c(pp,i,precMatRatio[i,4,pp]/nreps)) } } ############################### ### code for Section 3.3 ############################### set.seed(0) # code for gridded locations n=100 xs=expand.grid(seq(0,1,len=sqrt(n)),seq(0,1,len=sqrt(n))) xs=cbind(runif(n),runif(n)) nVals=100 theta1=theta2=seq(0,1.0,len=nVals) dmat=rdist(xs) nreps=500 p=c(.1,.5,.9) precMatRatio=precMatOLS=precMatGLS=array(0,c(length(theta1),length(theta2),length(p))) # issue of empirical vs population variance shouldn't matter because sigma_x cancels out of ratio of GLS and OLS precision; sigma2_g + tau2 also cancels out for(pp in 1:3){ for(i in 1:length(theta1)){ if(theta1[i]==0){ rmat1=diag(rep(1,n)) L1=rmat1 } else{ rmat1=f.matern.euc(dmat,theta1[i],2) L1=t(chol(rmat1)) } Sigma=rmat1*p[pp] diag(Sigma)=diag(Sigma)+(1-p[pp]) SigmaInv=solve(Sigma) for(j in 1:length(theta2)){ if(theta2[j]==0){ rmat2=diag(rep(1,n)) L2=rmat2 } else{ rmat2=f.matern.euc(dmat,theta2[j],2) L2=t(chol(rmat2)) } set.seed(0) for(m in 1:nreps){ x=L2%*%rnorm(n) sxx=sum(x*x) num=t(x)%*%(Sigma%*%x) precMatRatio[i,j,pp]=precMatRatio[i,j,pp]+num/sxx } } print(c(pp,i,precMatRatio[i,4,pp]/nreps)) } print(date()) } precMatRatio=precMatRatio/nreps # code for uniform or pcp type='unif' # 'unif', 'pcp' n=100 nVals=100 theta1=theta2=seq(0,1.0,len=nVals) nreps=500 p=c(.1,.5,.9) precMatRatio=precMatOLS=precMatGLS=array(0,c(length(theta1),length(theta2),length(p))) # issue of empirical vs population variance shouldn't matter because sigma_x cancels out of ratio of GLS and OLS precision; sigma2_g + tau2 also cancels out for(pp in 1:3){ for(i in 1:length(theta1)){ for(j in 1:length(theta2)){ set.seed(0) for(m in 1:nreps){ if(type=='unif'){ xs=cbind(runif(n),runif(n)) } if(type=='pcp'){ xs=pcpGen() } dmat=rdist(xs) if(theta1[i]==0){ rmat1=diag(rep(1,n)) L1=rmat1 } else{ rmat1=f.matern.euc(dmat,theta1[i],2) L1=t(chol(rmat1)) } Sigma=rmat1*p[pp] diag(Sigma)=diag(Sigma)+(1-p[pp]) SigmaInv=solve(Sigma) if(theta2[j]==0){ rmat2=diag(rep(1,n)) L2=rmat2 } else{ rmat2=f.matern.euc(dmat,theta2[j],2) L2=t(chol(rmat2)) } x=L2%*%rnorm(n) sxx=sum(x*x) num=t(x)%*%(Sigma%*%x) precMatRatio[i,j,pp]=precMatRatio[i,j,pp]+num/sxx } } print(c(pp,i,)) print(date()) } }