# This is SPLUS code for plotting # relative wind speed against distance # behind a shelter belt of trees. # This file is posted as shelter.ssc # The data are posted in the file # shelter.dat # There are two numbers on each line # in the following order: # Distance from the shelter belt (dist) # Relative wind speed (speed) # Enter the data into a data frame cw <- read.table("c:/documents and settings/kkoehler.IASTATE/my documents/courses/st415/shelter.dat", col.names=c("dist","speed")) # Sort the data by distance i <- sort.list(cw\$dist) cw <- cw[i,] cw # Fit a polynomial model cw\$dist2<-cw\$dist^2 cw\$dist3<-cw\$dist^3 cw\$dist4<-cw\$dist^4 cw.4 <- lm(speed~dist + dist2 + dist3 + dist4 ,data=cw) summary(cw.4) coef(cw.4) anova(cw.4) par(fin=c(6.0,6.0),pch=18,mkh=.1,mex=1.5) plot(cw\$dist, cw\$speed, type="p", xlab="Distance(meters)", ylab="Relative Speed", main="Relative Wind Speed \n (4-th degree polynomial)") a <- seq(0, 34, .5) cw.4p<- predict(cw.4, data.frame(dist=a,dist2=a^2,dist3=a^3,dist4=a^4), se=T) cw4.upper <- cw.4p\$fit + qt(.975,cw.4p\$df)*cw.4p\$se.fit cw4.lower <- cw.4p\$fit - qt(.975,cw.4p\$df)*cw.4p\$se.fit lines(a, cw.4p\$fit,lty=1,lwd=3) lines(smooth.spline(a, cw4.upper ), lty=3,lwd=3) lines(smooth.spline(a, cw4.lower ), lty=3,lwd=3) # Examine residual plots scatter.smooth(cw\$dist, resid(cw.4), span=.25, degree=2) qqnorm(residuals(cw.4)) qqline(residuals(cw.4)) # 95 percent predicition intervals cw.4pi <- cbind(a,cw4.lower,cw.4p\$fit,cw4.upper) cw.4pi[c(3,5,11,21,41,61), ] # Fit a polynomial cw\$dist5 <- cw\$dist^5 cw.5 <- lm(speed~dist + dist2 + dist3 + dist4+ dist5,data=cw) summary(cw.5) coef(cw.5) anova(cw.5) par(fin=c(6.0,6.0),pch=18,mkh=.1,mex=1.5) plot(cw\$dist, cw\$speed, type="p", xlab="Distance(meters)", ylab="Relative Speed", main="Relative Wind Speed \n (5-th degree polynomial)") a <- seq(0, 34, .5) cw.5p<- predict(cw.5, data.frame(dist=a,dist2=a^2,dist3=a^3, dist4=a^4,dist5=a^5), se=T) cw5.upper <- cw.5p\$fit + qt(.975,cw.5p\$df)*cw.5p\$se.fit cw5.lower <- cw.5p\$fit - qt(.975,cw.5p\$df)*cw.5p\$se.fit lines(a, cw.5p\$fit,lty=1,lwd=3) lines(smooth.spline(a, cw5.upper ), lty=3,lwd=3) lines(smooth.spline(a, cw5.lower ), lty=3,lwd=3) # Examine residual plots scatter.smooth(cw\$dist, resid(cw.5), span=.25, degree=2) qqnorm(residuals(cw.5)) qqline(residuals(cw.5)) # 95 percent predicition intervals cw.5pi <- cbind(a,cw5.lower,cw.5p\$fit,cw5.upper) cw.5pi[c(3,5,11,21,41,61), ] # Fit a polynomial cw\$dist6 <- cw\$dist^6 cw.6 <- lm(speed~dist + dist2 + dist3 + dist4+ dist5 + dist6, data=cw) summary(cw.6) coef(cw.6) anova(cw.6) par(fin=c(6.0,6.0),pch=18,mkh=.1,mex=1.5) plot(cw\$dist, cw\$speed, type="p", xlab="Distance(meters)", ylab="Relative Speed", main="Relative Wind Speed \n (6-th degree polynomial)") a <- seq(0, 34, .5) cw.6p<- predict(cw.6, data.frame(dist=a,dist2=a^2,dist3=a^3, dist4=a^4,dist5=a^5,dist6=a^6), se=T) cw6.upper <- cw.6p\$fit + qt(.975,cw.6p\$df)*cw.6p\$se.fit cw6.lower <- cw.6p\$fit - qt(.975,cw.6p\$df)*cw.6p\$se.fit lines(a, cw.6p\$fit,lty=1,lwd=3) lines(smooth.spline(a, cw6.upper ), lty=3,lwd=3) lines(smooth.spline(a, cw6.lower ), lty=3,lwd=3) # Examine residual plots scatter.smooth(cw\$dist, resid(cw.6), span=.25, degree=2) qqnorm(residuals(cw.6)) qqline(residuals(cw.6)) # 95 percent predicition intervals cw.6pi <- cbind(a,cw6.lower,cw.6p\$fit,cw6.upper) cw.6pi[c(3,5,11,21,41,61), ] # Fit a polynomial model cw\$dist7 <- cw\$dist^7 cw.7 <- lm(speed~dist + dist2 + dist3 + dist4 + dist5 + dist6 + dist7,data=cw) summary(cw.7) coef(cw.7) anova(cw.7) par(fin=c(6.0,6.0),pch=18,mkh=.1,mex=1.5) plot(cw\$dist, cw\$speed, type="p", xlab="Distance(meters)", ylab="Relative Speed", main="Relative Wind Speed \n (7-th degree polynomial)") a <- seq(0, 34, .5) cw.7p<- predict(cw.7, data.frame(dist=a,dist2=a^2,dist3=a^3, dist4=a^4,dist5=a^5,dist6=a^6,dist7=a^7), se=T) cw7.upper <- cw.7p\$fit + qt(.975,cw.7p\$df)*cw.7p\$se.fit cw7.lower <- cw.7p\$fit - qt(.975,cw.7p\$df)*cw.7p\$se.fit lines(a, cw.7p\$fit,lty=1,lwd=3) lines(smooth.spline(a, cw7.upper ), lty=3,lwd=3) lines(smooth.spline(a, cw7.lower ), lty=3,lwd=3) # Examine residuals scatter.smooth(cw\$dist, resid(cw.7), span=.25, degree=2) qqnorm(residuals(cw.7)) qqline(residuals(cw.7)) # 95 percent predicition intervals cw.7pi <- cbind(a,cw7.lower,cw.7p\$fit,cw7.upper) cw.7pi[c(3,5,11,21,41,61), ] # Fit a polynomial model cw\$dist8 <- cw\$dist^8 cw.8 <- lm(speed~dist + dist2 + dist3 + dist4 + dist5 + dist6 + dist7 + dist8,data=cw) summary(cw.8) coef(cw.8) anova(cw.8) par(fin=c(6.0,6.0),pch=18,mkh=.1,mex=1.5) plot(cw\$dist, cw\$speed, type="p", xlab="Distance(meters)", ylab="Relative Speed", main="Relative Wind Speed \n (8-th degree polynomial)") a <- seq(0, 34, .5) cw.8p<- predict(cw.8, data.frame(dist=a,dist2=a^2,dist3=a^3, dist4=a^4,dist5=a^5,dist6=a^6,dist7=a^7,dist8=a^8), se=T) cw8.upper <- cw.8p\$fit + qt(.975,cw.8p\$df)*cw.8p\$se.fit cw8.lower <- cw.8p\$fit - qt(.975,cw.8p\$df)*cw.8p\$se.fit lines(a, cw.8p\$fit,lty=1,lwd=3) lines(smooth.spline(a, cw8.upper ), lty=3,lwd=3) lines(smooth.spline(a, cw8.lower ), lty=3,lwd=3) # Examine residuals scatter.smooth(cw\$dist, resid(cw.8), span=.25, degree=2) qqnorm(residuals(cw.8)) qqline(residuals(cw.8)) # 95 percent predicition intervals cw.8pi <- cbind(a,cw8.lower,cw.8p\$fit,cw8.upper) cw.8pi[c(3,5,11,21,41,61), ] # Fit a polynomial model cw\$dist9 <- cw\$dist^9 cw.9 <- lm(speed~dist + dist2 + dist3 + dist4 + dist5 + dist6 + dist7 + dist8 + dist9 ,data=cw) summary(cw.9) coef(cw.9) anova(cw.9) par(fin=c(6.0,6.0),pch=18,mkh=.1,mex=1.5) plot(cw\$dist, cw\$speed, type="p", xlab="Distance(meters)", ylab="Relative Speed", main="Relative Wind Speed \n (9-th degree polynomial)") a <- seq(0, 34, .5) cw.9p<- predict(cw.9, data.frame(dist=a,dist2=a^2,dist3=a^3, dist4=a^4,dist5=a^5,dist6=a^6,dist7=a^7,dist8=a^8,dist9=a^9), se=T) cw9.upper <- cw.9p\$fit + qt(.975,cw.9p\$df)*cw.9p\$se.fit cw9.lower <- cw.9p\$fit - qt(.975,cw.9p\$df)*cw.9p\$se.fit lines(a, cw.9p\$fit,lty=1,lwd=3) lines(smooth.spline(a, cw9.upper ), lty=3,lwd=3) lines(smooth.spline(a, cw9.lower ), lty=3,lwd=3) # Examine residuals scatter.smooth(cw\$dist, resid(cw.9), span=.25, degree=2) qqnorm(residuals(cw.9)) qqline(residuals(cw.9)) # 95 percent predicition intervals cw.9pi <- cbind(a,cw9.lower,cw.9p\$fit,cw9.upper) cw.9pi[c(3,5,11,21,41,61), ] # Fit loess curves. First use a local straight line method. cw.100 <- loess(formula=speed~dist,data=cw,span=1.00,degree=1) cw.75 <- loess(formula=speed~dist,data=cw,span=.75,degree=1) cw.25 <- loess(formula=speed~dist,data=cw,span=.25,degree=1) cw.10 <- loess(formula=speed~dist,data=cw,span=.10,degree=1) cw.06 <- loess(formula=speed~dist,data=cw,span=.06,degree=1) par(fin=c(6.0,6.0),pch=18,mkh=.1,mex=1.5) plot(cw\$dist, cw\$speed, type="p", xlab="Distance (meters)", ylab="Relative Speed", main="Relative Wind Speed \n Local Linear Loess Curves") cw.100p<- predict(cw.100, data.frame(dist=a), se=T) cw.75p<- predict(cw.75, data.frame(dist=a), se=T) cw.25p<- predict(cw.25, data.frame(dist=a), se=T) cw.10p<- predict(cw.10, data.frame(dist=a), se=T) cw.06p<- predict(cw.06, data.frame(dist=a), se=T) lines(a, cw.100p\$fit, lty=1,lwd=3) lines(a, cw.75p\$fit, lty=3,lwd=3) lines(a, cw.25p\$fit, lty=4,lwd=3) lines(a, cw.10p\$fit, lty=7,lwd=3) lines(a, cw.06p\$fit, lty=14,lwd=3) legend(20,60,c("span=1.00","span=0.75", "span=0.25","span=0.10","span=0.06"), lty=c(1,3,4,7,14),bty="n") # Compare the loess curves with different spans anova(cw.100,cw.75,cw.25,cw.10,cw.06) # Plot residuals scatter.smooth(cw\$dist, resid(cw.25), span=1, degree=2) scatter.smooth(cw\$dist, resid(cw.10), span=1, degree=2) scatter.smooth(cw\$dist, resid(cw.06), span=1, degree=2) qqnorm(residuals(cw.06)) qqline(residuals(cw.06)) # Consider a second degree loess smoother cw.100 <- loess(formula=speed~dist,data=cw,span=1.00,degree=2) cw.75 <- loess(formula=speed~dist,data=cw,span=.75,degree=2) cw.25 <- loess(formula=speed~dist,data=cw,span=.25,degree=2) cw.10 <- loess(formula=speed~dist,data=cw,span=.10,degree=2) cw.07 <- loess(formula=speed~dist,data=cw,span=.07,degree=2) par(fin=c(6.0,6.0),pch=18,mkh=.1,mex=1.5) plot(cw\$dist, cw\$speed, type="p", xlab="Distance (meters)", ylab="Relative Speed", main="Relative Wind Speed \n Local Quadratic Loess Curves") cw.100p<- predict(cw.100, data.frame(dist=a), se=T) cw.75p<- predict(cw.75, data.frame(dist=a), se=T) cw.25p<- predict(cw.25, data.frame(dist=a), se=T) cw.10p<- predict(cw.10, data.frame(dist=a), se=T) cw.06p<- predict(cw.07, data.frame(dist=a), se=T) lines(a, cw.100p\$fit, lty=1,lwd=3) lines(a, cw.75p\$fit, lty=3,lwd=3) lines(a, cw.25p\$fit, lty=4,lwd=3) lines(a, cw.10p\$fit, lty=7,lwd=3) lines(a, cw.07p\$fit, lty=14,lwd=3) legend(20,60,c("span=1.00","span=0.75", "span=0.25","span=0.10","span=0.07"), lty=c(1,3,4,7,14),bty="n") anova(cw.100,cw.75,cw.25,cw.10,cw.07) # Plot residuals scatter.smooth(cw\$dist, residuals(cw.25), span=1, degree=2) scatter.smooth(cw\$dist, residuals(cw.10), span=1, degree=2) scatter.smooth(cw\$dist, residuals(cw.07), span=1, degree=2) qqnorm(residuals(cw.10)) qqline(residuals(cw.10)) # Make predictions using predict( ) and # compute pointwise 95% confidence intervals cwq10 <- predict(cw.10, a, se=T) cwq10.upper <- cwq10\$fit + qt(.975,cwq10\$df)*cwq10\$se.fit cwq10.lower <- cwq10\$fit - qt(.975,cwq10\$df)*cwq10\$se.fit cwq10pi <- cbind(a,cwq10.lower,cwq10\$fit,cwq10.upper) cwq10pi[c(3,5,11,21,41,61), ] # Plot the curve with pointwise prediction intervals # Avoid distances with missing values a2 <- seq(1,32,.5) cwq10 <- predict(cw.10, a2, se=T) cwq10.upper <- cwq10\$fit + qt(.975,cwq10\$df)*cwq10\$se.fit cwq10.lower <- cwq10\$fit - qt(.975,cwq10\$df)*cwq10\$se.fit par(fin=c(6.0,6.0),pch=18,mkh=.001) plot(cw\$dist, cw\$speed, type="n", xlab="Distance (meters)", ylab="Relative Speed", main="Local Quadratic Loess Curve") lines(smooth.spline(a2, cwq10\$fit), lty=1,lwd=3) lines(smooth.spline(a2, cwq10.upper ), lty=3,lwd=3) lines(smooth.spline(a2, cwq10.lower ), lty=3,lwd=3) # Use kernel smoothers. par(fin=c(6.0,6.0),pch=18,mkh=.1,mex=1.5) plot(cw\$dist, cw\$speed, type="p", xlab="Distance", ylab="Wind Speed", main="Shelter Belt Wind Speed \n Gaussian Kernel Smoothers") lines(ksmooth(cw\$dist, cw\$speed, kernel="normal", bandwidth=0.5), lty=1,lwd=3) lines(ksmooth(cw\$dist, cw\$speed, kernel="normal", bandwidth=2), lty=3,lwd=3) lines(ksmooth(cw\$dist, cw\$speed, kernel="normal", bandwidth=5), lty=4,lwd=3) legend(18,50,c("bandwidth=0.5", "bandwidth=2", "bandwidth=5"), lty=c(1,3,4),bty="n") # Make predictions cw.ks05<-ksmooth(cw\$dist, cw\$speed, kernel="normal", bandwidth=0.5) cw.app<-approx(as.vector(cw.ks05\$x),as.vector(cw.ks05\$y),as.vector(a)) cw.app\$y[c(3,5,11,21,41,61)] # Fit cubic splines with 3, 5, and 11 knots # determined by cross-validation library(splines) par(fin=c(6.0,6.0),pch=18,mkh=.1,mex=1.5) plot(cw\$dist, cw\$speed, type="p", xlab="Distance", ylab="Wind Speed", main="Shelter Belt Wind Speed \n Cubic Splines") lines(cw\$dist, fitted(lm(cw\$speed ~ ns(cw\$dist, knots=c(5,15,25)))), lty=7,lwd=3) lines(cw\$dist, fitted(lm(cw\$speed ~ ns(cw\$dist, df=4))), lty=1,lwd=3) lines(cw\$dist, fitted(lm(cw\$speed ~ ns(cw\$dist, df=8))), lty=3,lwd=3) lines(cw\$dist, fitted(lm(cw\$speed ~ ns(cw\$dist, df=16))), lty=4,lwd=3) # Use approximate F-test to compare the # fit of two cubic splines with different # numbers of knots anova(lm(cw\$speed ~ ns(cw\$dist, knots=c(5,15,25))), lm(cw\$speed ~ ns(cw\$dist, df=4))) anova(lm(cw\$speed ~ ns(cw\$dist, df=4)), lm(cw\$speed ~ ns(cw\$dist, df=8))) anova(lm(cw\$speed ~ ns(cw\$dist, df=8)), lm(cw\$speed ~ ns(cw\$dist, df=16))) anova(lm(cw\$speed ~ ns(cw\$dist, df=4)), lm(cw\$speed ~ ns(cw\$dist, df=16))) # A cubic spline with three knots seems to be # adequate. Display the curve par(fin=c(6.0,6.0),pch=18,mkh=.1,mex=1.5) plot(cw\$dist, cw\$speed, type="p", xlab="Distance", ylab="Wind Speed", main="Shelter Belt Wind Speed \n Cubic Splines") lines(cw\$dist, fitted(lm(cw\$speed ~ ns(cw\$dist, df=4))), lty=1,lwd=3) # Fit a smooth spline with the degree of fit # determined by cross-validation par(fin=c(6.0,6.0),pch=18,mkh=.1,mex=1.5) plot(cw\$dist, cw\$speed, type="p", xlab="Distance", ylab="Wind Speed", main="Shelter Belt Wind Speed \n Smooth Splines") lines(smooth.spline(cw\$dist, cw\$speed), lty=1,lwd=3) # Make predictions cw.ss <- smooth.spline(cw\$dist, cw\$speed) cw.ssp <- predict(cw.ss, a, se=T) names(cw.ss) res <- (cw\$speed - cw.ss\$y)/(1-cw.ss\$lev) sigma <- sqrt(var(res)) upper <- cw.ss\$y + 2.0*sigma*sqrt(cw.ss\$lev) lower <- cw.ss\$y - 2.0*sigma*sqrt(cw.ss\$lev) # Fit a smooth spline with the degree of fit # determined by cross-validation par(fin=c(6.0,6.0),pch=18,mkh=.1,mex=1.5) plot(cw\$dist, cw\$speed, type="n", xlab="Distance", ylab="Wind Speed", main="Shelter Belt Wind Speed \n Smooth Splines") lines(smooth.spline(cw.ss\$x, cw.ss\$y ), lty=1,lwd=3) lines(smooth.spline(cw.ss\$x, upper ), lty=3,lwd=3) lines(smooth.spline(cw.ss\$x, lower ), lty=3,lwd=3)