sat.data=read.table("/Users/pcaragea/teaching/Stat401/STAT401F07/Compute/sat.txt",header=TRUE) attach(sat.data) #full model full=lm(sat~I(log(takers))+income+years+public+expend+rank) anova(full) #to see a complete summary of the model: summary(full)#--->look for Rsquare, Adjusted R-squared, significan coeficients, etc. #If you want to compute AIC or BIC for this model: AIC(full,k=2)#to compute AIC: penalty 2 x p AIC(full,k=log(length(sat))) #to compute BIC: penalty p x log(n) #qqplot for residuals qqnorm(resid(full)) qqline(resid(full), col=2) #residual plots #interactive: plot(full) #or, to get one at a time: plot(full,which=1)#simple residual plot plot(full, which=2) #qqplot for residuals plot(full, which=3)#standardized residuals plot(full,which=4)#Cook's distance plot #stepwise regression: AIC is used if k=2, BIC if k=log(n) all.step=step(full,direction="both",k=2)#stepwise selection back.step=step(full,direction="back")#stepwise selection #choose best model: my.model=lm(sat~I(log(takers))+years+expend+rank) #check residual plotts plot(my.model,which=1)#simple residual plot plot(my.model, which=2) #qqplot for residuals plot(my.model, which=3)#standardized residuals plot(my.model,which=4)#Cook's distance plot #compute various diagnostics using ls.diag all.diagnostics=ls.diag(my.model) #look at Cook's distance: all.diagnostics$cook #are there any larger than 1? which(all.diagnostics$cook>1)#--->none for this example ######## one way of computing all-subset regression # need to download "leaps" package for doing all subset variables selection install.packages("leaps") library(leaps) ########### Y=sat X=cbind(log(takers),income,years,public,expend,rank) leaps(X, Y) leaps(X, Y, method="adjr2") ############### #to conduct all subset regression, use regsubsets allsubs=regsubsets(sat~I(log(takers))+income+years+public+expend+rank,data=sat.data,nbest=4) #you can look at a summary, or even save it for later use: measures=summary(allsubs) measures #too many things to look at, let's graph plot.regsubsets(allsubs)#orders models with respect to BIC plot.regsubsets(allsubs,scale="Cp")#orders models with respect to BIC plot.regsubsets(allsubs,scale="adjr2")#orders models with respect to BIC plot.regsubsets(allsubs,scale="r2")#orders models with respect to BIC #you can see the summaries for each model: cbind(measures$rsq,measures$cp,measures$bic) #best model: