# R script for lecture 1 in STK4900/9900 # Slide 13: CLT ---------------------------------------------------------------- x <- seq(0,1,by = 0.01) k <- 10000 para <- matrix(c(1,10,0.1,1,1,0.1),3,2) ns <- c(5,20,50) par(mfrow = c(3,4),mai = c(0.6, 0.5, 0.3, 0.2)) for (i in 1:3){ a <- para[i,1] b <- para[i,2] plot(x,dbeta(x,para[i,1],para[i,2]), col = "steelblue", type = "l", lwd = 2, ylab = "Density", main = paste("a = ",para[i,1],", b = ",para[i,2], sep = "")) for (n in ns){ y <- dnorm(x, mean = a/(a+b), sd = sqrt((a*b/((a+b+1)*(a+b)^2))/n)) hist(rowMeans(matrix(rbeta(n = n*k,para[i,1],para[i,2]),k,n)), freq = FALSE, xlab = "Sample mean", ylab = "Density", main = paste("n =",n), xlim = c(0,1), ylim = c(0,max(y)), col = "gray", border = "gray") lines(x, y, col = "darkred", lwd = 1.5, lty = "solid") } } # Slide 18: CI with unknown sd ------------------------------------------------- data <- c(249, 254, 243, 268, 253, 269, 287, 241, 273, 306, 303, 280, 260, 256, 278, 344, 304, 283, 310) m <- mean(data) s <- sd(data) n <- length(data) c <- qt(p=0.975, df=n-1) x <- seq(-3,3,0.01) plot(x, dt(x,n-1), type="l") # lines(x, dnorm(x,0,1), col="red") lower <- m-c*(s/sqrt(n)) upper <- m+c*(s/sqrt(n)) paste("95% CI: (",round(lower,1),",",round(upper,1),")", sep="") # Slide 22: t-test ------------------------------------------------------------- tt <- (m-265)/(s/sqrt(n)) pval <- 1-pt(q=tt, df=n-1) # Slide 32: Bootstrap ---------------------------------------------------------- n_bs <- 1000 mean_bs <- vector("numeric",n_bs) #median_bs <- vector("numeric",n_bs) for (i in 1:n_bs) { data_bs <- sample(x=data, size=n, replace=TRUE) mean_bs[i] <- mean(data_bs) #median_bs[i] <- median(data_bs) } lower_bs <- quantile(mean_bs,0.025) upper_bs <- quantile(mean_bs,0.975) #lower_bs <- quantile(median_bs,0.025) #upper_bs <- quantile(median_bs,0.975) paste("95% CI: (",round(lower_bs,1),",",round(upper_bs,1),")", sep="")