# We generate sample observations from a normal distribution with mean=0, sd=1 # first, set random seed so everyone gets the same results set.seed(1) # generate a sample of size 10 x10 <- rnorm(10) x10 # and tag on a final observation equal to 10 as an outlier x10out <- c(x10, 10) x10out # generate a sample of size 100, and again tag on the outlier x100 <- rnorm(100) x100out <- c(x100,10) # and a sample of size 1000, and add the outlier on the end x1000 <- rnorm(1000) x1000out <- c(x1000,10) sample.ns <- c("10","100","1000") # compute means of the three samples, with and without the outlier # and the percentage change in the mean caused by adding the outlier mean.orig <- c(mean(x10), mean(x100), mean(x1000)) mean.out <- c(mean(x10out), mean(x100out), mean(x1000out)) means.pctdiff <- abs(100*(mean.orig - mean.out)/mean.orig) all.means <- rbind(mean.orig, mean.out, means.pctdiff) colnames(all.means) <- sample.ns round(all.means, 2) barplot(means.pctdiff, names.arg=colnames(all.means), main = "Outlier effect on mean by sample size", sub = "single outlier x=10 added to N(0,1)", xlab = "sample size", ylab = "pct change in mean") # Repeat for the standard deviations sd.orig <- c(sd(x10), sd(x100), sd(x1000)) sd.out <- c(sd(x10out), sd(x100out), sd(x1000out)) sds.pctdiff <- abs(100*(sd.orig - sd.out)/sd.orig) all.sds <- rbind(sd.orig, sd.out, sds.pctdiff) colnames(all.sds) <- sample.ns round(all.sds, 2) barplot(sds.pctdiff, names.arg=sample.ns, main = "Outlier effect on standard deviation by sample size", sub = "single outlier x=10 added to N(0,1)", xlab = "sample size", ylab = "pct change in sd")