#### Simple analysis for Lipid Data #### ## Lipid Data #### source("http://www.stat.washington.edu/tsr/s566/labs/y0y1polytopenew.R") #source("~/madrid/people/jamie/iv_minimal/y0y1polytope.R") # The p's are stored in the order # p(y0, x0 | z0), p(y0, x1 | z0), p(y1, x0 | z0), p(y1, x1 | z0), # p(y0, x0 | z1), p(y0, x1 | z1), p(y1, x0 | z1), p(y1, x1 | z1) #lipid data lipid.counts <- c(158,0,14,0,52,23,12,78) lipid.obs.freqs <- c(158/172,0,14/172,0,52/165,23/165,12/165,78/165) lipid.obs.freqs[1:4] sum(lipid.obs.freqs[1:4]) lipid.obs.freqs[5:8] sum(lipid.obs.freqs[5:8]) ## Check that observed distribution obeys IV inequalities check.iv.ineqs(lipid.obs.freqs) empirical.ace.bounds <- ace.bounds(lipid.obs.freqs) ######## BAYESIAN INFERENCE ################ ## Specification of prior for p(x,y|z) prior.z0 <- c(1,0,1,0) # Dirichlet prior # zeros because for Lipid data Z=0 => X=0 prior.z1 <- c(1,1,1,1) # Dirichlet prior # Alternative unit prior specification #prior.z0 <- c(1/2,0,1/2,0) #prior.z1 <- c(1/4,1/4,1/4,1/4) ## post.z0 <- prior.z0 + lipid.counts[1:4] post.z1 <- prior.z1 + lipid.counts[5:8] num.sims <- 10000 # Number of posterior simulations for each arm theta.sims.z0 <- t(replicate(num.sims, dirichlet(post.z0) )) colnames(theta.sims.z0) <- c("X0,Y0|Z0","X1,Y0|Z0","X0,Y1|Z0","X1,Y1|Z0") theta.sims.z0.lst <- lapply(apply(theta.sims.z0,1,"c",as.list),function(x) x[1:4] ) # list is constructed for convenience with mapply theta.sims.z1 <- t(replicate(num.sims, dirichlet(post.z1) )) colnames(theta.sims.z1) <- c("X0,Y0|Z1","X1,Y0|Z1","X0,Y1|Z1","X1,Y1|Z1") theta.sims <- cbind(theta.sims.z0, theta.sims.z1) summary(theta.sims) ## Remove sampled distributions that violate the IV inequalites: all.ivs.ok <- rep(NA,num.sims) for (i in 1:num.sims){ all.ivs.ok[i] <- check.iv.ineqs(theta.sims[i,],verbose=FALSE) } summary(all.ivs.ok) mean(all.ivs.ok) posterior.theta.sims.iv <- theta.sims[all.ivs.ok,] ### posterior.theta.sims.iv contains simulations from the posterior ### distribution of p(x,y|z) under our model n <- sum(all.ivs.ok) n # number of sims remaining after removing those # violating IV inequalities ace.bnds.lipid <- rep(1,n*2) dim(ace.bnds.lipid) <- c(n,2) for(i in 1:n){ ace.bnds.lipid[i,] <- ace.bounds(posterior.theta.sims.iv[i,]) } #### Plot upper and lower bounds do.tri.plot(ace.bnds.lipid[1:5000,],title.txt="Bounds on ACE for Lipid Data") #### dens.ace.lower <- density(ace.bnds.lipid[,1]) dens.ace.upper <- density(ace.bnds.lipid[,2]) par(mfrow=c(1,1)) plot(dens.ace.upper,xlab="ACE(X -> Y)", main="Posteriors on bounds for ACE(X->Y) for Lipid Data",xlim=c(-1,1),lty=1,col="green",type="l") points(dens.ace.lower,col="red",type="l") abline(v= empirical.ace.bounds, lty=3,lwd=1.5,col="blue") abline(h=0, lty=1,col="black") for(i in 1:10){ simp <- do.simplex(phi=30,theta=120,r=1.7,main=paste("Lipid Data Posterior Draw: ",i)) do.polytope(posterior.theta.sims.iv[i,],simp) do.ace.line(ace.bnds.lipid[i,1], simp) # lower bound do.ace.line(ace.bnds.lipid[i,2], simp) cat("Hit a key to see next plot:") scan("", what = character(), nmax = 1, quiet = TRUE)} # plots from below for(i in 1:30){ simp <- do.simplex(phi=-90,theta=90,r=1000,main="Lipid Data: %HE vs %HU") ace.bnds.lipid[i,] <- ace.bounds(posterior.theta.sims.iv[i,]) do.polytope(posterior.theta.sims.iv[i,],simp) do.ace.line(ace.bnds.lipid[i,1], simp) # lower bound do.ace.line(ace.bnds.lipid[i,2], simp) scan("", what = character(), nmax = 1, quiet = TRUE) } ####