## ====================================================================== ## Pan-American Advanced Study Institute on ## Spatio-Temporal Statistics (June 2014) ## Peter F. Craigmile, pfc@stat.osu.edu ## ## Change of support - R practicum ## ====================================================================== ## load the geoR library library(geoR) ## ====================================================================== ## Read in the data ## ====================================================================== # Read in the data, storing as a data frame called ’ches’ ches <- read.table("chesbay.txt", header=TRUE) ## What are the name of the variables? names(ches) ## read in a dataset containing the boundaries of Chesapeake Bay. boundary <- read.table("boundary.txt", header=TRUE) ## ====================================================================== ## Carry out some spatial exploratory data analysis (EDA) ## ====================================================================== ## Show the boundaries in gray. plot(boundary, xlab="Easting", ylab="Northing", col="gray40", type="l") ## Plot the locations of the nitrogren samples over the top. points(ches$easting, ches$northing) ## Show block 1 rect(390, 4100, 410, 4150, border="blue", lwd=2) ## Show block 2 rect(390, 4150, 410, 4200, border="blue", lwd=2) ## Show the boundaries in gray. plot(boundary, xlab="Easting", ylab="Northing", col="gray40", type="l") ## plot the nitrogen values text(ches$easting, ches$northing, round(ches$nitrogen,1), cex=0.7) ## Produce a plot of the surface nitrogren levels by the easting and ## northing coordinates par(mfrow=c(1,2)) plot(ches$easting, ches$nitrogen, xlab="easting", ylab="nitrogen concentration") plot(ches$northing, ches$nitrogen, xlab="northing", ylab="nitrogen concentration") ## calculate the log nitrogen values log.nitro <- log(ches$nitrogen) ## Now produce scatterplots of this transformed variable plot(ches$easting, log.nitro, xlab="easting", ylab="log nitrogen concentration") plot(ches$northing, log.nitro, xlab="northing", ylab="log nitrogen concentration") ## Using OLS, we estimate a spatial trend in the northing direction trend.model <- lm(log.nitro ~ ches$northing) summary(trend.model) ## Calculate the residuals from the OLS fit trend.resids <- resid(trend.model) ## And summarize these residuals graphically. ## Is there any trend remaining? par(mfrow=c(2,2), cex=1.75, mar=c(4,4,1,1), mgp=c(2,0.5,0), bty="L") plot(boundary, xlab="Easting", ylab="Northing", col="gray40", type="l") text(ches$easting, ches$northing, round(trend.resids,1), cex=0.7) qqnorm(trend.resids, xlab="Normal quantiles", ylab="Sample quantiles of residuals", main="") qqline(trend.resids) plot(ches$easting, trend.resids, ylim=c(-0.4, 0.4), xlab="easting", ylab="residuals") abline(h=0, lty=2) plot(ches$northing, trend.resids, ylim=c(-0.4, 0.4), xlab="northing", ylab="residuals") abline(h=0, lty=2) ## ====================================================================== ## Estimate the variogram of the residuals ## ====================================================================== ## Create a geodata object for the residuals my.resids <- cbind(ches$easting, ches$northing, trend.resids) Gnitro <- as.geodata(my.resids, coords.col=1:2, data.col=3) Euclidean.dist.matrix <- function (sites) { ## ====================================================================== ## Purpose: The function calculate the Euclidean distances among sites. ## Assumes: 'sites' are matrices or data frames. ## ====================================================================== dd <- as.matrix(dist(sites, upper=TRUE, diag=TRUE)) dimnames(dd) <- NULL dd } ## calculate the Euclidean distances between the sites. dists <- Euclidean.dist.matrix(Gnitro$coords) ## Summarize the distances hist(dists) summary(as.numeric(dists)) par(mfrow=c(2,2)) ## Estimate the semivariogram using binning emp.var <- variog(Gnitro, max.dist=150) plot(emp.var) par(mfrow=c(1,2)) ## Using directional variogram to assess isotropy of the data emp.var4 <- variog4(Gnitro, estimator.type="modulus", max.dist=150) plot(emp.var4) ## ====================================================================== ## NOTE: ## You might also remove the outlier, and not rescale ## Thanks Paulo J. Ribeiro Jr! ## ====================================================================== tx <- 2.5 * ches$easting ty <- ches$northing ## Create a geodata object for the residuals, ## rescaling the easting direction by a factor of 2.5 Rmy.resids <- cbind(tx, ty, trend.resids) Rnitro <- as.geodata(Rmy.resids, coords.col=1:2, data.col=3) ## Using directional variogram to assess isotropy of the data emp.var4 <- variog4(Rnitro, estimator.type="modulus", max.dist=150) plot(emp.var4) ## Estimate the semivariogram using binning and the robust estimator emp.var.final <- variog(Rnitro, estimator="modulus", max.dist=150) plot(emp.var.final) ## Estimate the covariance parameters using maximum likelihood, ## assuming that the residuals are Gaussian. ml.fit <- likfit(Rnitro, cov.model="exp", fix.nugget=FALSE, nugget=1, ini.cov.pars=c(1,1)) summary(ml.fit) ## Estimate the covariance parameters using REML reml.fit <- likfit(Rnitro, cov.model="exp", fix.nugget=FALSE, lik.method="REML", nugget=1, ini.cov.pars=c(1,1)) summary(reml.fit) ## Show the fitted ML-based and REML-based variograms over ## the robust binned estimator. plot(emp.var.final) lines(ml.fit, lty=1, col="blue") lines(reml.fit, lty=2, col="red") ## We can also fit a REML-based model to the original data, ## including a linear trend. ## Fit we set up the new geo.data object my.data <- cbind(tx, ty, log.nitro) Onitro <- as.geodata(my.data, coords.col=1:2, data.col=3) ## You need to fill in your trend model here after the tilde ~ your.trend.model <- ~ ty ## The we estimate the covariance parameters using REML trend.reml.fit <- likfit(Onitro, cov.model="exp", fix.nugget=FALSE, trend=trend.spatial(your.trend.model), lik.method="REML", nugget=1, ini.cov.pars=c(1,1)) summary(trend.reml.fit) ## What locations shall we predict at? px <- seq(360, 440, length=30) py <- seq(4080, 4390, length=100) pred.sites <- expand.grid(px, py) # Show these prediction locations on a map. par(mfrow=c(1,1)) plot(boundary, xlab="Easting", ylab="Northing", col="gray40", type="l") points(pred.sites, pch=4, cex=0.5) ## Define your transformed x and y coodinates (fill in the blanks ## by transforming pred.sites[,1] and pred.sites[,2]). ptx <- 2.5 * pred.sites[,1] pty <- pred.sites[,2] ## To krige we have to remember to rescale the locations predicted.sites <- cbind(ptx, pty) ## Let us start by predicting the residual field krige <- krige.conv(Rnitro, loc=predicted.sites, krige = krige.control( cov.model=reml.fit$cov.model, cov.pars=reml.fit$cov.pars, nugget=reml.fit$nugget)) ## Now produce some pretty pictures of the predictions. par(mfrow=c(1,2)) ## Plot the axes plot(boundary, xlab="Easting", ylab="Northing", type="n", main="Prediction") ## Show the predicted values, along with a contour map to indicate the values image(px, py, matrix(krige$pred, length(px)), add=T) contour(px, py, matrix(krige$pred, length(px)), add=T, nlevels=10) ## Add the boundaries lines(boundary, col="gray40") ## And the locations of the nitrogren samples over the top. points(ches$easting, ches$northing) ## Show the boundaries in gray. plot(boundary, xlab="Easting", ylab="Northing", type="n", main="SE of Prediction") ## Show the SE of the predicted values, along with a contour map to ## indicate the values image(px, py, matrix(sqrt(krige$krige.var), length(px)), add=T) contour(px, py, matrix(krige$krige.var, length(px)), add=T, nlevels=10) ## Add the boundaries lines(boundary, col="gray40") ## Plot the locations of the nitrogren samples over the top. points(ches$easting, ches$northing) ## define the trend for the predictions using the columns ## of predicted.sites your.predicted.trend.model <- ~ predicted.sites[,2] ## Kriging, with a trend krige <- krige.conv(Onitro, loc=predicted.sites, krige = krige.control( type.krige = "OK", trend.d = trend.spatial(your.trend.model), trend.l = trend.spatial(your.predicted.trend.model), beta=trend.reml.fit$beta, cov.model=trend.reml.fit$cov.model, cov.pars=trend.reml.fit$cov.pars, nugget=trend.reml.fit$nugget)) ## ====================================================================== ## Now upscale ## ====================================================================== ## Set up a grid of 100 points uniformly covering the first block block.sample <- expand.grid(seq(390, 410, length=10) * 2.5, seq(4100, 4150, length=10)) ## Calculate the mean at each of the 100 points mu.block.elems <- trend.spatial( ~ block.sample[,2]) %*% trend.reml.fit$beta ## And average to estimate the block mean mu.block <- mean(mu.block.elems) ## Calculate the mean at the observed points mu.obs <- trend.spatial(your.trend.model) %*% trend.reml.fit$beta ## calculate the Euclidean distances between the rescaled sites. dists <- Euclidean.dist.matrix(Onitro$coords) ## Calculate the covariance (without the nugget) Sigma <- cov.spatial(dists, cov.model=trend.reml.fit$cov.model, cov.pars=trend.reml.fit$cov.pars) ## Add the nugget effect to the covariance diag(Sigma) <- diag(Sigma) + trend.reml.fit$nugget ## How many observations do we have? n <- nrow(Onitro$coords) ## Calculate the covariance between the block and each observation i ## and store in cB cB <- rep(NA, n) for (i in 1:n) { dist.between.i <- colSums(t(block.sample) - Onitro$coords[i,])^2 cB[i] <- mean(cov.spatial(cbind(dist.between.i), cov.model=trend.reml.fit$cov.model, cov.pars=trend.reml.fit$cov.pars)) } ## Now calculate the kriging mean for the block mu.block + t(cB) %*% solve(Sigma) %*% (Onitro$data - mu.obs) ## And the kriging variance for the block (trend.reml.fit$cov.pars[1] + trend.reml.fit$nugget) - t(cB) %*% solve(Sigma) %*% cB