if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager") BiocManager::install("RBGL") #BiocManager::install("bioclite") #source("http://bioconductor.org/biocLite.R") ## It may ask you to update some dependent packages. For example, # Old packages: 'manipulate' # Update all/some/none? [a/s/n]: ## OR try this if the packages were not updated. # update.packages(lib.loc = .libPaths()[1], repos="http://cran.fhcrc.org/") # install.packages("pcalg", repos="http://cran.fhcrc.org/") library("pcalg") library("graph") biocLite("Rgraphviz") ## seems not to run on many systems library("Rgraphviz") plotcpdag <- "Rgraphviz" %in% print(.packages(lib.loc = .libPaths()[1])) ## You may need to run these as well. # install.packages(c("abind", "corpcor", "sfsmisc", "robustbase"), # repos="http://cran.fhcrc.org/") # library(abind) # library(corpcor) # library(sfsmisc) # library(robustbase) ## Load some utility functions (showAmat; showEdgeList) ## (Thanks to Markus Kalisch!) source("http://www.stat.washington.edu/tsr/s566/labs/pc-output-utilities.r") #data <- read.table("lab4DAG1.dat") data <- read.table("http://www.stat.washington.edu/tsr/s566/labs/lab4DAG1.dat") attr(data,"names") x <- data$x y <- data$y z <- data$z w <- data$w names <- attr(data,"names") cor(data) # graphical display pairs(data, lower.panel = NULL) #if anything looks close to zero we can test for this using: # cor.test( put-var1-here, put-var2-here ) cor.test(y, z) # zeros correspond to marginal independence tmp <- lm( x ~ y + z) summary(tmp)$coef tmp <- lm( x ~ w + z) summary(tmp)$coef tmp <- lm( y ~ w + x) summary(tmp)$coef tmp <- lm( x ~ w + y) summary(tmp)$coef # a coefficient of y close to zero indicates independence # (again assuming normality!) tmp <- lm( x ~ y + z + w) summary(tmp)$coef tmp <- lm( z ~ y + x + w) summary(tmp)$coef ##### Using the PC Algorithm to do the job for us: n <- nrow(data) p <- ncol(data) indepTest <- gaussCItest suffStat <- list(C=cor(data), n = n) ## estimate CPDAG alpha <- 0.05 pc.fit <- pc(suffStat, indepTest, p = p, alpha = alpha, verbose = TRUE) ### Read through the output - can you see what it has done? showAmat(pc.fit) showEdgeList(pc.fit,names) if (plotcpdag) { plot(pc.fit, main = "Estimated CPDAG",labels=c("x","y","z","w")) } #data <- read.table("lab4DAG2.dat") data <- read.table("http://www.stat.washington.edu/tsr/s566/labs/lab4DAG2.dat") attr(data,"names") x <- data$x y <- data$y z <- data$z w <- data$w names <- attr(data,"names") cor(data) # graphical display pairs(data, lower.panel = NULL) #if anything looks close to zero we can test for this using # cor.test( put-var1-here, put-var2-here ) cor.test(y, z) # zeros correspond to marginal independence (under normality) tmp <- lm( x ~ y + z) summary(tmp) # a coefficient of y close to zero indicates independence # (again assuming normality!) tmp <- lm( x ~ y + z + w) summary(tmp) ##### Using the PC Algorithm to do the job for us: n <- nrow(data) p <- ncol(data) indepTest <- gaussCItest suffStat <- list(C=cor(data), n = n) ## estimate CPDAG alpha <- 0.05 pc.fit <- pc(suffStat, indepTest, p = p, alpha = alpha, verbose = TRUE) showAmat(pc.fit) showEdgeList(pc.fit,names) if (plotcpdag) { plot(pc.fit, main = "Estimated CPDAG",labels=c("x","y","z","w")) ## Note undirected edges are represented here as <-> } #data <- read.table("lab4DAG2.dat") data <- read.table("http://www.stat.washington.edu/tsr/s566/labs/lab4DAG3.dat") attr(data,"names") x <- data$x y <- data$y z <- data$z w <- data$w names <- attr(data,"names") # Some linear regressions again summary(lm(z~w, data))$coef summary(lm(z~w + x, data))$coef summary(lm(z~w + y, data))$coef ##### Using the PC Algorithm to do the job for us: n <- nrow(data) p <- ncol(data) indepTest <- gaussCItest suffStat <- list(C=cor(data), n = n) ## estimate CPDAG alpha <- 0.05 pc.fit <- pc(suffStat, indepTest, p = p, alpha = alpha, verbose = TRUE) showAmat(pc.fit) showEdgeList(pc.fit,names) if (plotcpdag) { plot(pc.fit, main = "Estimated CPDAG",labels=c("x","y","z","w")) }