# Fitting a linear model tips <- read.csv(file.choose()) # Choose the tips data head(tips) dim(tips) summary(tips) colnames(tips)[8]<-"partysize" qplot(tip, data=tips, geom="histogram", fill=I("black"), binwidth=0.1) tips.lm<-lm(tip~bill+partysize+sex+smoker+time+day,data=tips) str(tips.lm) tips.lm$coef anova(tips.lm) qplot(bill, tip, data=tips, geom=c("point", "smooth"), method="lm", facets=.~partysize) deviance(tips.lm) extract.aic(tips.lm) tips.lm.diag<-data.frame(resid=tips.lm$residuals, fitted=tips.lm$fitted.values, data=tips) qplot(resid, fitted, data=tips.lm.diag) qplot(bill, tip, data=tips, geom=c("point", "smooth"), method="lm", facets=sex~smoker) p<-qplot(bill, tip, data=tips, geom="point") p<-p+geom_smooth(aes(x=bill, y=tip, colour=smoker), method="lm") print(p) qplot(bill, tip, data=tips, geom=c("point", "smooth"), method="lm", facets=time~day) tips.lm <- lm(tip~bill,data=tips) deviance(tips.lm) tips.lm <- lm(tip~bill+partysize,data=tips) deviance(tips.lm) tips.lm<-lm(tip~bill*partysize*sex*smoker,data=tips) deviance(tips.lm) qplot(bill, tip, data=tips, geom=c("point", "smooth"), method="lm", facets=smoker~partysize) indx <- c(1:244)[tips$partysize>1 & tips$partysize<5] tips.lm<-lm(tip~bill,data=tips, subset=indx) deviance(tips.lm) tips.lm<-lm(tip~bill+partysize, data=tips, subset=indx) anova(tips.lm) deviance(tips.lm) tips.lm<-lm(tip~bill+partysize+sex*smoker, data=tips, subset=indx) anova(tips.lm) deviance(tips.lm) step(tips.lm) dropterm(tips.lm, test="Chisq") tips.lm<-lm(tip~bill,data=tips, subset=indx) anova(tips.lm) deviance(tips.lm) tips.lm$coefficients tips.lm.diag<-data.frame(resid=tips.lm$residuals, fitted=tips.lm$fitted.values, data=tips) qplot(resid, fitted, data=tips.lm.diag) qplot(resid, data=tips.lm.diag, geom="histogram", fill=I("black"), binwidth=0.5) qplot(bill, tip, data=tips, subset=indx, geom=c("point","smooth"), method="lm") new.data<-data.frame(bill=20.00,partysize=2,sex="M",smoker="No") predict(tips.lm,newdata=new.data) cor(tips$tip[indx],tips$bill[indx])^2 deviance(tips.lm)/tips.lm$df.resid deviance(tips.lm)/tips.lm$df.resid/var(tips$tip[indx]) # Your turn data(mtcars) ?mtcars head(mtcars) cars.lm<-lm(mpg~hp+wt+cyl+disp, data=mtcars) cars.lm anova(cars.lm) deviance(cars.lm) step(cars.lm) cars.lm.diag<-data.frame(resid=cars.lm$residuals,fitted=cars.lm$fitted.values, data=mtcars) # Now repeat mtcars with iplots library(iplots) ?iplot iplot(fitted(cars.lm), residuals(cars.lm), xlab="Fitted", ylab="Residuals") ihist(residuals(cars.lm)) iplot(mtcars$hp,mtcars$mpg) iplot(mtcars$wt,mtcars$mpg) iplot(mtcars$cyl,mtcars$mpg) ibar(rownames(mtcars))