rm(list=ls()) require("geoR") prices=read.table("house_prices.txt",header=TRUE) prices1= prices[1:500,] names(prices1) prices=na.exclude(prices1) names(prices) #standardizing covariates stdsqft=as.numeric(scale(prices$sqft,scale=TRUE)) stdage=as.numeric(scale(prices$age,scale=TRUE)) stddistfree=as.numeric(scale(prices$dist_freeway,scale=TRUE)) #using the standardizing variables prices$sqft=stdsqft prices$age=stdage prices$dist_freeway=stddistfree #transforming into a geoR object geoprices=as.geodata(prices1,coords.col=c(8,9),covar.col=2:7,data.col=1) plot(geoprices) plot(geoprices,scatter3d=TRUE) names(geoprices) #checking for duplicated coordinates dup.coords(geoprices) #jittering the duplicated locations coord=geoprices$coords coordjitter=jitter2d(geoprices$coords,max=0.001) dup.coords(coordjitter) cbind(geoprices$coords,coordjitter) geoprices$coords=coordjitter plot(geoprices) #removing some observed values for prediction predloc=c(2,10,85,91,102) prices2=prices[-predloc,] #standard regression reg=lm(prices2[,1]~prices2[,2]+prices2[,3]+prices2[,4]+prices2[,6]) fitreg=summary(reg) fitreg #residual analysis in geoR resid=matrix(c(prices2[,8],prices2[,9],fitreg$residuals),ncol=3,byrow=FALSE) georesid=as.geodata(resid,coords.col=c(1,2),data.col=3) plot(georesid) #variogram of the residuals # binned variogram vario.b <- variog(georesid, max.dist=1) # variogram cloud vario.c <- variog(georesid, max.dist=1, op="cloud") #binned variogram and stores the cloud vario.bc <- variog(georesid, max.dist=1, bin.cloud=TRUE) # smoothed variogram #vario.s <- variog(georesid, max.dist=1, op="sm", band=0.2) # plotting the variograms: par(mfrow=c(2,2)) plot(vario.c, main="variogram cloud") plot(vario.bc, bin.cloud=TRUE, main="clouds for binned variogram") plot(vario.b, main="binned variogram") #plot(vario.s, main="smoothed variogram") #OLS estimate of the variogram varioresid=variog(georesid,max.dist=0.08) #ols estimates of the variogram parameters olsvari=variofit(varioresid,ini=c(0.02,0.1)) summary(olsvari) par(mfrow=c(1,1)) plot(variog(georesid,max.dist=0.10)) lines(olsvari) #Directional variograms #Need to remove the colocated data # I am using the residual from the regression #plot(variog4(georesid)) #estimate of the omndirectional variogram #lines(olsvari,col=2,lwd=2) #Envelopes for an empirical variogram by simulating data for given model parameters. #Computes bootstrap paremeter estimates resid.mc.env <- variog.mc.env(georesid, obj.variog = varioresid) plot(varioresid,ylim=c(0,0.1)) lines(resid.mc.env) #fitting the parameters of a variogram "by eye" eyefit(varioresid) #fitting a spatial regression model meantrend=trend.spatial(~sqft+age+bedrooms+dist_freeway,geoprices) #a set of initial values #spatialreg1=likfit(geoprices,trend=meantrend,ini.cov.pars=c(0.15,0.1),nospatial=TRUE) load(file="spatialreg1.RData") #save("spatialreg1", file="spatialreg1.RData") summary(spatialreg1) #another set of initial values #spatialreg2=likfit(geoprices,trend=meantrend,ini.cov.pars=c(0.4,0.3),nospatial=TRUE) #summary(spatialreg2) #spatial interpolation for locations left out from inference names(prices) xp=prices[predloc,c(2:4,7)] locationpred=prices[predloc,8:9] #spatial interpolation for locations left out from inference names(prices) meantrend.loc <- trend.spatial(~sqft+age+bedrooms+dist_freeway, xp) kc <- krige.conv(geoprices, loc=locationpred, krige=krige.control(trend.d=meantrend, trend.l=meantrend.loc, obj.model=spatialreg1), output=output.control(n.pred=1000)) names(kc) dim(kc$simul) par(mfrow=c(3,2), mar=c(3,3,0.5,0.5)) for(i in 1:5){ hist(kc$simul[i,], main="", prob=T) lines(density(kc$simul[i,])) abline(v=prices[predloc,1][i], col=2) } meantrend.loc <- trend.spatial(~sqft+age+bedrooms+dist_freeway, xp) kc <- krige.conv(geoprices, loc=locationpred, krige=krige.control(trend.d=meantrend, trend.l=meantrend.loc, obj.model=spatialreg1), output=output.control(n.pred=1000)) names(kc) dim(kc$simul) par(mfrow=c(3,2), mar=c(3,3,0.5,0.5)) for(i in 1:5){ hist(kc$simul[i,], main="", prob=T) lines(density(kc$simul[i,])) abline(v=prices[predloc,1][i], col=2) } #Bayesian kriging kb <- krige.bayes(geoprices, loc=locationpred, model=model.control(trend.d=meantrend, trend.l=meantrend.loc), prior=prior.control(phi.prior="rec", phi.disc=seq(0, 0.8, l=11), tausq.rel.prior="rec", tausq.rel.disc=seq(0,1,l=11) ), output=output.control(n.pred=1000)) kb$post kb$pred kb$pred$simul