# We generate sample observations from a normal distribution with mean=0, sd=1

# first, set random seed so everyone gets the same results
set.seed(1)

# generate a sample of size 10
x10 <- rnorm(10)
x10

# and tag on a final observation equal to 10 as an outlier
x10out <- c(x10, 10)
x10out

# generate a sample of size 100, and again tag on the outlier
x100 <- rnorm(100)
x100out <- c(x100,10)

# and a sample of size 1000, and add the outlier on the end
x1000 <- rnorm(1000)
x1000out <- c(x1000,10)

sample.ns <- c("10","100","1000")

# compute means of the three samples, with and without the outlier
#     and the percentage change in the mean caused by adding the outlier

mean.orig <- c(mean(x10), mean(x100),  mean(x1000))
mean.out <- c(mean(x10out), mean(x100out), mean(x1000out))
means.pctdiff <- abs(100*(mean.orig - mean.out)/mean.orig)

all.means <- rbind(mean.orig, mean.out, means.pctdiff)
colnames(all.means) <- sample.ns
round(all.means, 2)
barplot(means.pctdiff, names.arg=colnames(all.means),
        main = "Outlier effect on mean by sample size",
        sub = "single outlier x=10 added to N(0,1)",
        xlab = "sample size",
        ylab = "pct change in mean")

# Repeat for the standard deviations

sd.orig <- c(sd(x10), sd(x100),  sd(x1000))
sd.out <- c(sd(x10out), sd(x100out), sd(x1000out))
sds.pctdiff <- abs(100*(sd.orig - sd.out)/sd.orig)


all.sds <- rbind(sd.orig, sd.out, sds.pctdiff)
colnames(all.sds) <- sample.ns
round(all.sds, 2)
barplot(sds.pctdiff, names.arg=sample.ns,
        main = "Outlier effect on standard deviation by sample size",
        sub = "single outlier x=10 added to N(0,1)",
        xlab = "sample size",
        ylab = "pct change in sd")