# R script for lecture 5 in STK4900/9900 # Slide 5: CPR plots ----------------------------------------------------------- library(car) trees <- read.table("http://www.uio.no/studier/emner/matnat/math/STK4900/data/trees.txt",header=T) fit.both <- lm(volume~diameter+height, data=trees) crPlots(fit.both, terms=~diameter+height) fit.sq <- lm(volume~diameter+I(diameter^2)+height, data=trees) crPlots(fit.sq, terms=~diameter+I(diameter^2)+height) # Slide 8-9: Residual plot - Constant variance --------------------------------- fit.sq <- lm(volume~diameter+I(diameter^2)+height, data=trees) plot(fit.sq$fitted.values, fit.sq$residuals, xlab="Fitted values", ylab="Residuals") plot(fit.sq, 1) plot(fit.sq, 3) # Slide 11: Residual plot - Normality ------------------------------------------ hist(fit.sq$residuals) boxplot(fit.sq$residuals) qqnorm(fit.sq$residuals) qqline(fit.sq$residuals, col="red") plot(fit.sq,2) # Slide 14: Residual plot - Normality ------------------------------------------ cigarettes <- read.table("http://www.uio.no/studier/emner/matnat/math/STK4900/data/sigarett.txt", header=T) fit.cig <- lm(co~nicot+tar, data=cigarettes) boxplot(dfbeta(fit.cig)) boxplot(dfbetas(fit.cig)) # Slide 20-22: GAMs ------------------------------------------------------------ library(gam) fit.gam.both <- gam(volume~s(diameter)+s(height), data=trees) par(mfrow=c(1,2)) plot(fit.gam.both, se=T) fit.gam.dia <- gam(volume~s(diameter)+height, data=trees) fit <- gam(volume~diameter+height, data=trees) anova(fit, fit.gam.dia, fit.gam.both) # Slide 40-43: Lasso ----------------------------------------------------------- library("glmnet") n <- 100 p <- 20 x <- matrix(rnorm(n=n*p, mean=0, sd=1),n,p) beta <- 4*(runif(5)-0.5) y <- x[,1:5]%*%beta+rnorm(n,0,1) cv.fit.lasso <- cv.glmnet(x, y, family="gaussian", alpha=1) lambda.min <- cv.fit.lasso$lambda.min beta.lasso <- coef(cv.fit.lasso, s=lambda.min) plot(cv.fit.lasso) fit.lasso <- glmnet(x, y, family="gaussian", alpha=1) plot(fit.lasso) data <- data.frame(y,x) fit.ls <- lm(y~., data=data) beta.ls <- fit.ls$coefficients summary(fit.ls) cbind(beta.lasso, beta.ls)