################################### # R Demo 1: polynomial regression # ################################### ### Simulate y and x ### set.seed(527) x = c(seq(1, 10, 0.2), 12,15 ) ex = 5+1*x + 0.3*x^2 - 0.1*x^3 # True model y = ex + rnorm(length(x),0,15) length(x) plot(x, y, main="", ylab="y", xlab="x") # Fit 1 polynomial model, and plot plot(x, y, main="", ylab="y", xlab="x") mod <-lm(y ~ poly(x, degree = 2)) lines(x, mod$fitted,lwd=2) # Fit several polynomial models, and compare plot(x, y, main="", ylab="y", xlab="x") degreeRange <- c(1, 3, 7, 9) for (i in degreeRange){ mod <-lm(y ~ poly(x, degree = i)) lines(x, mod$fitted,col= (i+1)/2, lty=i, lwd=2) } legend("bottomleft", lty=degreeRange, col=(degreeRange+1)/2, lwd=2, legend=paste("degree", degreeRange), bty="n") # Add the true model lines(x, ex, lty=1, lwd=3, col="red") # Residual plots par(mfrow=c(2,2)) for (i in degreeRange) { mod <-lm(y~poly(x, degree=i)) plot(x, mod$residuals,ylab="residuals",main=paste("Degree=",i)) abline(h=0,col="red",lty=2) } # Training and test set dat <- data.frame(cbind(y,x)) set.seed(20) n.train <- round(nrow(dat)/2) scramble <- sample(1:nrow(dat)) id.train <- scramble[1:n.train] train = dat[id.train, ] test = dat[-id.train,] given.degree = 5 mod <-lm(y ~ poly(x, degree = given.degree), data=train) MSE.inSample = mean( (predict(mod)-train$y )^2) pred = predict(mod, newdata=test) MSE.outSample = mean( (pred - test$y)^2 ) cbind(pred, test$y) c(given.degree, MSE.inSample, MSE.outSample) # Now see how MSE changes if we change the degree to 5 ? ############################## # R Demo 2: Ridge Regression # ############################## # Simulate data # set.seed(7) n = 60 x1 = rnorm(n,1,2) x2 = rnorm(n,1,1) x3 = rnorm(n,2,1) x4 = rnorm(n,2,2) x5 = rnorm(n,1,1) x6 = rnorm(n,1,2) x7 = rnorm(n,2,1) error = rnorm(n, 0,2) y = 5 + 0.5*x1 + 0.8*x2+1*x3 + error # True model dat = data.frame(cbind(x1,x2,x3,x4,x5,x6,x7,y)) head(dat) # Divide the data into training and test sets # set.seed(20) n.train <- round(nrow(dat)/2) scramble <- sample(1:nrow(dat)) id.train <- scramble[1:n.train] train = dat[id.train, ] test = dat[-id.train,] dim(train) dim(test) head(train) # Fit ridge regression by lm.ridge function in library(MASS) # install.packages("MASS") library(MASS) lambda.range <- seq(0, 50, by=1) fit = lm.ridge(y ~ . , data=train, lambda = lambda.range) # You can just give un-standardize data into the function. It will by default normalize x and center y for you. And the algorithm won't penalize the itercept plot(fit) abline(h=0, col="grey") # Prediction given a lambda value given.lambda = 10 mod =lm.ridge(y ~ . , data=train, lambda=given.lambda) names(mod) # Way 1 pred = scale(test[,1:7], center=mod$xm, scale=mod$scales) %*% mod$coef + mod$ym # Way 2 pred2 <- as.matrix(cbind(rep(1,30),test[,1:7]))%*%coef(mod) # Wrong way pred.wrong = as.matrix(test[,1:7]) %*% mod$coef data.frame( true.y=test$y, pred=pred, pred.wrong=pred.wrong) MSE = mean((test$y - pred)^2) MSE ################### # R Demo 3: Lasso # ################### # Simulate data # set.seed(7) n = 100 x1 = rnorm(n,1,2) x2 = rnorm(n,1,1) x3 = rnorm(n,2,1) x4 = rnorm(n,2,2) x5 = rnorm(n,1,1) x6 = rnorm(n,1,2) x7 = rnorm(n,2,1) error = rnorm(n, 0,1) y = 5 + 0.5*x1 + 0.8*x2+1*x3 + error dat = data.frame(cbind(x1,x2,x3,x4,x5,x6,x7,y)) head(dat) # the lasso vs. shrinkage factor install.package("lasso2") library("lasso2") s.range <- seq(0.01, 1, 0.05) mod <- l1ce(y~.,data=dat, bound=s.range) ?l1ce() coef(mod) coefmat = coef(mod)[,-1] # # Plots lasso coefficients vs. shrinkage factor # par(mar=c(5,5,4,2)) matplot(s.range, coefmat, type="l",xlim=c(0,1), ylab=expression(hat(beta)), xlab="Shrinkage Factor", lty=c(1:6,8),col=1:7) legend("topleft",bty="n",legend=paste("x",1:7),lty=c(1:6,8,9), col=1:8) abline(0,0,lty=1,col="grey") # Prediction mod = l1ce(y~.,data=dat,bound=0.5) pred = predict(mod, newdata = dat) pred cbind(pred, dat$y) # # You can also fit lasso using LARS: produce the full trajectory of betas and shrinkage factor library(lars) mod.lars <- lars(as.matrix( dat[,1:7] ), dat$y) plot(mod.lars) names(mod.lars) # Handy Cross validation r <- cv.lars(as.matrix(dat[,1:7]), dat$y, K=10) # The shrinkage factor of the best model bestfraction <- r$index[which.min(r$cv)] bestfraction # coefficients for the best model from 10-fold cv coef.best = predict(mod.lars, as.matrix(dat[1:7]), s=bestfraction, type="coefficient", mode="fraction") coef.best # Prediction pred.best <- predict(mod.lars, as.matrix(dat[1:7]), s=bestfraction, type="fit", mode="fraction")$fit cbind(pred.best, dat$y) mean((pred.best - dat$y)^2)