## ======================================================================
## Pan-American Advanced Study Institute on
## Spatio-Temporal Statistics (June 2014)
## Peter F. Craigmile, pfc@stat.osu.edu
##
## Change of support - R practicum
## ======================================================================


## load the geoR library
library(geoR)


## ======================================================================
## Read in the data
## ======================================================================

# Read in the data, storing as a data frame called ’ches’
ches <- read.table("chesbay.txt", header=TRUE)

## What are the name of the variables?
names(ches)

## read in a dataset containing the boundaries of Chesapeake Bay.
boundary <- read.table("boundary.txt", header=TRUE)


## ======================================================================
## Carry out some spatial exploratory data analysis (EDA)
## ======================================================================

## Show the boundaries in gray.
plot(boundary, xlab="Easting", ylab="Northing", col="gray40", type="l")

## Plot the locations of the nitrogren samples over the top.
points(ches$easting, ches$northing)

## Show block 1
rect(390, 4100, 410, 4150, border="blue", lwd=2)

## Show block 2
rect(390, 4150, 410, 4200, border="blue", lwd=2)


## Show the boundaries in gray.
plot(boundary, xlab="Easting", ylab="Northing", col="gray40", type="l")

## plot the nitrogen values
text(ches$easting, ches$northing, round(ches$nitrogen,1), cex=0.7)


## Produce a plot of the surface nitrogren levels by the easting and
## northing coordinates
par(mfrow=c(1,2))

plot(ches$easting, ches$nitrogen, xlab="easting", ylab="nitrogen concentration")
plot(ches$northing, ches$nitrogen, xlab="northing", ylab="nitrogen concentration")


## calculate the log nitrogen values
log.nitro <- log(ches$nitrogen)


## Now produce scatterplots of this transformed variable
plot(ches$easting, log.nitro, xlab="easting", ylab="log nitrogen concentration")
plot(ches$northing, log.nitro, xlab="northing", ylab="log nitrogen concentration")


## Using OLS, we estimate a spatial trend in the northing direction
trend.model <- lm(log.nitro ~ ches$northing)
summary(trend.model)


## Calculate the residuals from the OLS fit
trend.resids <- resid(trend.model)


## And summarize these residuals graphically.
## Is there any trend remaining?
par(mfrow=c(2,2), cex=1.75, mar=c(4,4,1,1), mgp=c(2,0.5,0), bty="L")

plot(boundary, xlab="Easting", ylab="Northing", col="gray40", type="l")
text(ches$easting, ches$northing, round(trend.resids,1), cex=0.7)

qqnorm(trend.resids,
       xlab="Normal quantiles", ylab="Sample quantiles of residuals", main="")
qqline(trend.resids)

plot(ches$easting, trend.resids, ylim=c(-0.4, 0.4),
     xlab="easting", ylab="residuals")
abline(h=0, lty=2)

plot(ches$northing, trend.resids, ylim=c(-0.4, 0.4),
     xlab="northing", ylab="residuals")
abline(h=0, lty=2)


## ======================================================================
## Estimate the variogram of the residuals
## ======================================================================


## Create a geodata object for the residuals
my.resids <- cbind(ches$easting, ches$northing, trend.resids)
Gnitro <- as.geodata(my.resids, coords.col=1:2, data.col=3)


Euclidean.dist.matrix <- function (sites) {
  ## ======================================================================
  ## Purpose: The function calculate the Euclidean distances among sites.
  ## Assumes: 'sites' are matrices or data frames.
  ## ======================================================================

  dd <- as.matrix(dist(sites, upper=TRUE, diag=TRUE))
  dimnames(dd) <- NULL
  dd
}


## calculate the Euclidean distances between the sites.
dists <- Euclidean.dist.matrix(Gnitro$coords)

## Summarize the distances
hist(dists)
summary(as.numeric(dists))


par(mfrow=c(2,2))

## Estimate the semivariogram using binning
emp.var <- variog(Gnitro, max.dist=150)
plot(emp.var)


par(mfrow=c(1,2))

## Using directional variogram to assess isotropy of the data
emp.var4 <- variog4(Gnitro, estimator.type="modulus", max.dist=150)
plot(emp.var4)


## ======================================================================
## NOTE:
## You might also remove the outlier, and not rescale
## Thanks Paulo J. Ribeiro Jr!
## ======================================================================


tx <- 2.5 * ches$easting
ty <- ches$northing

## Create a geodata object for the residuals,
## rescaling the easting direction by a factor of 2.5
Rmy.resids <- cbind(tx, ty, trend.resids)
Rnitro <- as.geodata(Rmy.resids, coords.col=1:2, data.col=3)

## Using directional variogram to assess isotropy of the data
emp.var4 <- variog4(Rnitro, estimator.type="modulus", max.dist=150)
plot(emp.var4)

## Estimate the semivariogram using binning and the robust estimator
emp.var.final <- variog(Rnitro, estimator="modulus", max.dist=150)
plot(emp.var.final)


## Estimate the covariance parameters using maximum likelihood,
## assuming that the residuals are Gaussian.
ml.fit <- likfit(Rnitro, cov.model="exp", fix.nugget=FALSE,
                 nugget=1, ini.cov.pars=c(1,1))
summary(ml.fit)


## Estimate the covariance parameters using REML
reml.fit <- likfit(Rnitro, cov.model="exp", fix.nugget=FALSE,
                   lik.method="REML",
                   nugget=1, ini.cov.pars=c(1,1))
summary(reml.fit)


## Show the fitted ML-based and REML-based variograms over
## the robust binned estimator.
plot(emp.var.final)
lines(ml.fit, lty=1, col="blue")
lines(reml.fit, lty=2, col="red")


## We can also fit a REML-based model to the original data,
## including a linear trend.
## Fit we set up the new geo.data object
my.data <- cbind(tx, ty, log.nitro)
Onitro <- as.geodata(my.data, coords.col=1:2, data.col=3)

## You need to fill in your trend model here after the tilde ~
your.trend.model <- ~ ty

## The we estimate the covariance parameters using REML
trend.reml.fit <- likfit(Onitro, cov.model="exp", fix.nugget=FALSE,
                         trend=trend.spatial(your.trend.model),
                         lik.method="REML",
                         nugget=1, ini.cov.pars=c(1,1))
summary(trend.reml.fit)


## What locations shall we predict at?
px <- seq(360, 440, length=30)
py <- seq(4080, 4390, length=100)
pred.sites <- expand.grid(px, py)


# Show these prediction locations on a map.
par(mfrow=c(1,1))
plot(boundary, xlab="Easting", ylab="Northing", col="gray40", type="l")
points(pred.sites, pch=4, cex=0.5)

## Define your transformed x and y coodinates (fill in the blanks
## by transforming pred.sites[,1] and pred.sites[,2]).
ptx <- 2.5 * pred.sites[,1]
pty <- pred.sites[,2]
  
## To krige we have to remember to rescale the locations
predicted.sites <- cbind(ptx, pty)


## Let us start by predicting the residual field
krige <- krige.conv(Rnitro, loc=predicted.sites,
                    krige = krige.control(
                      cov.model=reml.fit$cov.model,
                      cov.pars=reml.fit$cov.pars,
                      nugget=reml.fit$nugget))


## Now produce some pretty pictures of the predictions.
par(mfrow=c(1,2))

## Plot the axes
plot(boundary, xlab="Easting", ylab="Northing", type="n", main="Prediction")

## Show the predicted values, along with a contour map to indicate the values
image(px, py, matrix(krige$pred, length(px)), add=T)
contour(px, py, matrix(krige$pred, length(px)), add=T, nlevels=10)

## Add the boundaries
lines(boundary, col="gray40")

## And the locations of the nitrogren samples over the top.
points(ches$easting, ches$northing)

## Show the boundaries in gray.
plot(boundary, xlab="Easting", ylab="Northing", type="n", main="SE of Prediction")

## Show the SE of the predicted values, along with a contour map to
## indicate the values
image(px, py, matrix(sqrt(krige$krige.var), length(px)), add=T)
contour(px, py, matrix(krige$krige.var, length(px)), add=T, nlevels=10)

## Add the boundaries
lines(boundary, col="gray40")

## Plot the locations of the nitrogren samples over the top.
points(ches$easting, ches$northing)


## define the trend for the predictions using the columns
## of predicted.sites
your.predicted.trend.model <- ~ predicted.sites[,2]

## Kriging, with a trend
krige <- krige.conv(Onitro, loc=predicted.sites,
                    krige = krige.control(
                      type.krige = "OK",
                      trend.d = trend.spatial(your.trend.model),
                      trend.l = trend.spatial(your.predicted.trend.model),
                      beta=trend.reml.fit$beta,
                      cov.model=trend.reml.fit$cov.model,
                      cov.pars=trend.reml.fit$cov.pars,
                      nugget=trend.reml.fit$nugget))


## ======================================================================
## Now upscale
## ======================================================================


## Set up a grid of 100 points uniformly covering the first block
block.sample <- expand.grid(seq(390, 410, length=10) * 2.5,
                            seq(4100, 4150, length=10))

## Calculate the mean at each of the 100 points
mu.block.elems <- trend.spatial( ~ block.sample[,2]) %*% trend.reml.fit$beta

## And average to estimate the block mean
mu.block <-  mean(mu.block.elems)

## Calculate the mean at the observed points
mu.obs <- trend.spatial(your.trend.model) %*% trend.reml.fit$beta


## calculate the Euclidean distances between the rescaled sites.
dists <- Euclidean.dist.matrix(Onitro$coords)

## Calculate the covariance (without the nugget)
Sigma <- cov.spatial(dists,
                     cov.model=trend.reml.fit$cov.model,
                     cov.pars=trend.reml.fit$cov.pars)

## Add the nugget effect to the covariance
diag(Sigma) <- diag(Sigma) + trend.reml.fit$nugget

## How many observations do we have?
n <- nrow(Onitro$coords)

## Calculate the covariance between the block and each observation i
## and store in cB
cB <- rep(NA, n)

for (i in 1:n) {

  dist.between.i <- colSums(t(block.sample) - Onitro$coords[i,])^2
  
  cB[i] <- mean(cov.spatial(cbind(dist.between.i),
                            cov.model=trend.reml.fit$cov.model,
                            cov.pars=trend.reml.fit$cov.pars))
}

## Now calculate the kriging mean for the block
mu.block + t(cB) %*% solve(Sigma) %*% (Onitro$data - mu.obs)

## And the kriging variance for the block
(trend.reml.fit$cov.pars[1] + trend.reml.fit$nugget) - t(cB) %*% solve(Sigma) %*% cB