#An example from "Design of Experiments: Statistical #Principles of Research Design and Analysis" #2nd Edition by Robert O. Kuehl d=read.delim("http://www.public.iastate.edu/~dnett/S511/PlantDensity.txt") d head(d) tail(d) names(d)=c("x","y") head(d) plot(d[,1],d[,2],col=4,pch=16,xlab="Plant Density", ylab="Grain Yield") o=lm(y~x,data=d) model.matrix(o) summary(o) anova(o) #Let's add the best fitting simple linear regression #line to our plot. u=seq(0,60,by=.01) #overkill here but used later. lines(u,coef(o)[1]+coef(o)[2]*u,col=2) #The fit doesn't look very good. #Let's formally test for lack of fit. o2=lm(y~x+as.factor(x),data=d) #Could have used 'update' to obtain the same fit. #o2=update(o,.~.+as.factor(x)) model.matrix(o2) summary(o2) anova(o2) #It looks like a linear fit is inadequate. #Let's try a quadratic fit. o3=lm(y~x+I(x^2)+as.factor(x),data=d) model.matrix(o3) summary(o3) anova(o3) #It looks like a quadratic fit is adequate. #Let's estimate the coefficients for the best #quadratic fit. b=coef(lm(y~x+I(x^2),data=d)) #Let's add the best fitting quadratic curve #to our plot. lines(u,b[1]+b[2]*u+b[3]*u^2,col=3) #Let's add the treatment group means to our plot. trt.means=tapply(d\$y,d\$x,mean) points(unique(d\$x),trt.means,pch="X") #Note that we can break down the 4 degrees of #freedom for treatment into orthogonal polynomial #contrasts corresponding to linear, quadratic, #cubic, and quartic terms. o4=lm(y~x+I(x^2)+I(x^3)+I(x^4),data=d) anova(o4) anova(lm(y~as.factor(x),data=d)) #The quartic fit will pass through the treatment #means. b=coef(o4) lines(u,b[1]+b[2]*u+b[3]*u^2+b[4]*u^3+b[5]*u^4,col=1) #The following code shows how the tests in the ANOVA #table corresponding to linear, quadratic, cubic, and #quartic terms can alternatively be obtained by #testing whether certain contrasts of the cell #means are equal to 0. x1=unique(d\$x) x1 x2=x1^2 x3=x1^3 x4=x1^4 c1=resid(lm(x1 ~ 1)) c2=resid(lm(x2 ~ c1)) c3=resid(lm(x3 ~ c1 + c2)) c4=resid(lm(x4 ~ c1 + c2 + c3)) #Each vector is a contrast vector. sum(c1) sum(c2) sum(c3) sum(c4) #These contrast vectors are orthogonal. t(c1)%*%c2 t(c1)%*%c3 t(c1)%*%c4 t(c2)%*%c3 t(c2)%*%c4 t(c3)%*%c4 #Now let's verify that tests of c'beta=0 using #these contrast vectors provide the same tests #in the anova table. #Fit the cell means model. o=lm(y~0+as.factor(x),data=d) #Obtain estimated means and variance. b=coef(o) v=vcov(o) #Compute F-statistics. F1=(t(c1)%*%b)^2/(t(c1)%*%v%*%c1) F2=(t(c2)%*%b)^2/(t(c2)%*%v%*%c2) F3=(t(c3)%*%b)^2/(t(c3)%*%v%*%c3) F4=(t(c4)%*%b)^2/(t(c4)%*%v%*%c4) F1 F2 F3 F4 #Compute p-values. 1-pf(F1,1,df.residual(o)) 1-pf(F2,1,df.residual(o)) 1-pf(F3,1,df.residual(o)) 1-pf(F4,1,df.residual(o)) #Compare with previous results. anova(o4)