# R script for lecture 4 in STK4900/9900 # Slide 2: Unadjusted linear regression ---------------------------------------- data.hers <- read.table("http://www.uio.no/studier/emner/matnat/math/STK4900/data/hers.txt", header=T) ind <- (data.hers$diabetes==0) data.hers <- data.hers[ind,] boxplot(glucose~exercise, data=data.hers) lm.fit <- lm(glucose~exercise, data=data.hers) summary(lm.fit) # Slide 5: Confounding patterns ------------------------------------------------ # E <- C -> Y C <- runif(100)*10 Y <- rnorm(100, mean=C, sd=0.5) E <- vector("numeric",100) for (i in 1:100){ E[i] <- rbinom(n=1, size=1, prob=C[i]/10) } data <- data.frame(Y=Y, E=E, C=C) boxplot(Y~E, data=data) plot(C, Y, pch = 19, col = factor(E)) lm.fit.unadj <- lm(Y~E, data=data) summary(lm.fit.unadj) lm.fit.adj <- lm(Y~E+C, data=data) summary(lm.fit.adj) # Y <- E <- C -> Y C <- runif(100)*10 E <- vector("numeric",100) for (i in 1:100){ E[i] <- rbinom(n=1, size=1, prob=C[i]/10) } Y <- rnorm(100, mean=C-4*E, sd=0.5) data <- data.frame(Y=Y, E=E, C=C) boxplot(Y~E, data=data) plot(C, Y, pch = 19, col = factor(E)) lm.fit.unadj <- lm(Y~E, data=data) summary(lm.fit.unadj) lm.fit.adj <- lm(Y~E+C, data=data) summary(lm.fit.adj) # Slide 7: Adjusted linear regression ------------------------------------------ data.hers$BMI <- as.numeric(data.hers$BMI) data.hers$drinkany <- as.numeric(data.hers$drinkany) lm.fit <- lm(glucose~exercise+age+drinkany+BMI, data=data.hers) summary(lm.fit) # Slide 10-11: Confounder effect ----------------------------------------------- data.hers.nm <- data.hers[!is.na(data.hers$BMI),] fit.a <- lm(glucose~exercise, data=data.hers.nm) fit.a$coef fit.b <- lm(glucose~exercise+BMI, data=data.hers.nm) fit.b$coef r12 <- cor(data.hers.nm$exercise, data.hers.nm$BMI) s1 <- sd(data.hers.nm$exercise) s2 <- sd(data.hers.nm$BMI) fit.b$coef[2]+fit.b$coef[3]*r12*s2/s1 fit.a$coef # Slide 17-19: Interaction effect ---------------------------------------------- data.hers <- read.table("http://www.uio.no/studier/emner/matnat/math/STK4900/data/hers.txt", header=T) data.hers$LDL1 <- as.numeric(data.hers$LDL1) ht.fit <- lm(LDL1~HT+statins+HT:statins, data=data.hers) summary(ht.fit) library(contrast) par1 <- list(HT=1, statins=1) par2 <- list(HT=0, statins=1) contrast(ht.fit, par1, par2) ht <- data.hers$HT st <- data.hers$statins data.hers$HTstat <- 1*(ht==0 & st==0)+2*(ht==1 & st==0)+3*(ht==0 & st==1)+4*(ht==1 & st==1) data.hers$HTstat <- factor(data.hers$HTstat) ht.fit.b <- lm(LDL1~HTstat, data=data.hers) summary(ht.fit.b) # Slide 22: Interaction effect ------------------------------------------------- data.hers$LDL <- as.numeric(data.hers$LDL) data.hers$BMI <- as.numeric(data.hers$BMI) data.hers$cBMI <- data.hers$BMI-mean(data.hers$BMI[!is.na(data.hers$BMI)]) stat.fit <- lm(LDL~statins+cBMI+statins:cBMI, data=data.hers) summary(stat.fit) # Slide 28: Two-way ANOVA ------------------------------------------------------ polymer <- read.table("http://www.uio.no/studier/emner/matnat/math/STK4900/v11/polymer.txt",header=T) polymer$ftemp <- factor(polymer$temp) polymer$fcat <- factor(polymer$cat) fit <- lm(rate~ftemp+fcat+ftemp:fcat, data=polymer) summary(fit) anova(fit) # Slide 38: Confidence/prediction intervals ------------------------------------ data.hers <- read.table("http://www.uio.no/studier/emner/matnat/math/STK4900/data/hers.sample.txt", header=T) fit.lm <- lm(sbp~age, data=data.hers) x <- 45:80 newage <- data.frame(age=x) est.sbp <- predict(fit.lm, newage, int="conf") pred.sbp <- predict(fit.lm, newage, int="pred") yint <- c(min(pred.sbp[,2]),max(pred.http://127.0.0.1:16613/graphics/plot_zoom_png?width=547&height=401sbp[,3])) plot(x,est.sbp[,1],type="l",ylim=yint, ylab="Systolic blood pressure") lines(x,est.sbp[,2],lty=2) lines(x,est.sbp[,3],lty=2) lines(x,pred.sbp[,2],lty=3) lines(x,pred.sbp[,3],lty=3) points(data.hers, col="grey") legend(42,186,c("Estimated","Confidence int.","Prediction int."),lty=1:3,bty="n")