# This is SPLUS code for plotting # relative wind speed against distance # behind a shelter belt of trees. # This file is posted as shelter.ssc # The data are posted in the file # shelter.dat # There are two numbers on each line # in the following order: # Distance from the shelter belt (dist) # Relative wind speed (speed) # Enter the data into a data frame cw <- read.table("c:/mydocuments/stat415/shelter.dat", col.names=c("dist","speed")) # Sort the data by distance i <- sort.list(cw\$dist) cw <- cw[i,] cw # Fit a polynomial model cw.8 <- lm(speed~dist + dist^2 + dist^3 + dist^4 + dist^5 + dist^6 + dist^7 + dist^8 ,data=cw) summary(cw.8) coef(cw.8) par(fin=c(7.0,7.0),pch=18,mkh=.1,mex=1.5) plot(cw\$dist, cw\$speed, type="p", xlab="Distance(meters)", ylab="Relative Speed", main="Relative Wind Speed \n (8-th degree polynomial)") a <- seq(0, 34, .5) lines(a, predict(cw.8, data.frame(dist=a), type="response"),lty=1,lwd=3) # Make predictions and examine residual plots cw.se8 <- predict(cw.8, data.frame(dist=c(1,10,30)), type="response",se.fit=T) cw.se8 pointwise(cw.se8, coverage=.95) scatter.smooth(cw\$dist, resid(cw.8), span=.25, degree=2) qqnorm(residuals(cw.8)) qqline(residuals(cw.8)) # Fit a polynomial model cw.7 <- lm(speed~dist + dist^2 + dist^3 + dist^4 + dist^5 + dist^6 + dist^7,data=cw) summary(cw.7) par(fin=c(7.0,7.0),pch=18,mkh=.1,mex=1.5) plot(cw\$dist, cw\$speed, type="p", xlab="Distance(meters)", ylab="Relative Speed", main="Relative Wind Speed \n (7-th degree polynomial)") a <- seq(0, 34, .5) lines(a, predict(cw.7, data.frame(dist=a), type="response"),lty=1,lwd=3) # Examine residuals scatter.smooth(cw\$dist, resid(cw.7), span=.25, degree=2) qqnorm(residuals(cw.7)) qqline(residuals(cw.7)) # Fit loess curves. First use a local straight line method. cw.100 <- loess(formula=speed~dist,data=cw,span=1.00,degree=1) cw.75 <- loess(formula=speed~dist,data=cw,span=.75,degree=1) cw.25 <- loess(formula=speed~dist,data=cw,span=.25,degree=1) cw.10 <- loess(formula=speed~dist,data=cw,span=.10,degree=1) cw.06 <- loess(formula=speed~dist,data=cw,span=.06,degree=1) par(fin=c(7.0,7.0),pch=18,mkh=.1,mex=1.5,) plot(cw\$dist, cw\$speed, type="p", xlab="Distance (meters)", ylab="Relative Speed", main="Relative Wind Speed \n Local Linear Loess Curves") lines(cw\$dist, cw.100\$fitted.values, lty=1,lwd=3) lines(cw\$dist, cw.75\$fitted.values, lty=3,lwd=3) lines(cw\$dist, cw.25\$fitted.values, lty=4,lwd=3) lines(cw\$dist, cw.10\$fitted.values, lty=7,lwd=3) lines(cw\$dist, cw.06\$fitted.values, lty=14,lwd=3) legend(20,60,c("span=1.00","span=0.75", "span=0.25","span=0.10","span=0.06"), lty=c(1,3,4,7,14),bty="n") # Compare the loess curves with different spans anova(cw.100,cw.75,cw.25,cw.10,cw.06) # Plot residuals scatter.smooth(cw\$dist, resid(cw.25), span=1, degree=2) scatter.smooth(cw\$dist, resid(cw.10), span=1, degree=2) scatter.smooth(cw\$dist, resid(cw.06), span=1, degree=2) qqnorm(residuals(cw.06)) qqline(residuals(cw.06)) # Consider a second degree loess smoother cw.100 <- loess(formula=speed~dist,data=cw,span=1.00,degree=2) cw.75 <- loess(formula=speed~dist,data=cw,span=.75,degree=2) cw.25 <- loess(formula=speed~dist,data=cw,span=.25,degree=2) cw.10 <- loess(formula=speed~dist,data=cw,span=.10,degree=2) cw.07 <- loess(formula=speed~dist,data=cw,span=.07,degree=2) par(fin=c(7.0,7.0),pch=18,mkh=.1,mex=1.5,) plot(cw\$dist, cw\$speed, type="p", xlab="Distance (meters)", ylab="Relative Speed", main="Relative Wind Speed \n Local Quadratic Loess Curves") lines(cw\$dist, cw.100\$fitted.values, lty=1,lwd=3) lines(cw\$dist, cw.75\$fitted.values, lty=3,lwd=3) lines(cw\$dist, cw.25\$fitted.values, lty=4,lwd=3) lines(cw\$dist, cw.10\$fitted.values, lty=7,lwd=3) lines(cw\$dist, cw.07\$fitted.values, lty=14,lwd=3) legend(16,50,c("span=1.0", "span=0.75", "span=0.25","span=0.10","span=0.07"), lty=c(1,3,4,7,14),bty="n") anova(cw.100,cw.75,cw.25,cw.10,cw.07) # Plot residuals scatter.smooth(cw\$dist, residuals(cw.10), span=1, degree=2) qqnorm(residuals(cw.10)) qqline(residuals(cw.10)) # Make predictions using predict( ) and # compute pointwise 95% confidence intervals cw.se <- predict(cw.10, c(1,10,30), se.fit=T) pointwise(cw.se, coverage=.95) # Use kernel smoothers. par(fin=c(7.0,7.0),pch=18,mkh=.1,mex=1.5, plt=c(.2,.8,.2,.8)) plot(cw\$dist, cw\$speed, type="p", xlab="Distance", ylab="Wind Speed", main="Shelter Belt Wind Speed \n Gaussian Kernel Smoothers") lines(ksmooth(cw\$dist, cw\$speed, kernel="normal", bandwidth=0.5), lty=1,lwd=3) lines(ksmooth(cw\$dist, cw\$speed, kernel="normal", bandwidth=2), lty=3,lwd=3) lines(ksmooth(cw\$dist, cw\$speed, kernel="normal", bandwidth=5), lty=4,lwd=3) legend(18,50,c("bandwidth=0.5", "bandwidth=2", "bandwidth=5"), lty=c(1,3,4),bty="n") # Make predictions cw.ks05<-ksmooth(cw\$dist, cw\$speed, kernel="normal", bandwidth=0.5) cw.app<-approx(as.vector(cw.ks05\$x),as.vector(cw.ks05\$y),as.vector(c(1,10,30))) cw.app # Plot resisduals cw.ks05.res <- cw\$speed-cw.ks05\$y scatter.smooth(cw\$dist, cw.ks05.res, span=.25, degree=2) qqnorm(cw.ks05.res) qqline(cw.ks05.res) # Use the super smoother in S-plus # It is based on a symmetric k-nearest # neighbor linear least squares procedure # Cross-validation is used to choose a # k for each x. Larger values of the bass # parameter encourage more smoothing. par(fin=c(7.0,7.0),pch=18,mkh=.001,mex=1.5, plt=c(.2,.8,.2,.8)) plot(cw\$dist, cw\$speed, type="p", xlab="Distance", ylab="Wind Speed", main="Shelter Belt Wind Speed \n Super Smoother") lines(supsmu(cw\$dist, cw\$speed, bass=0), lty=1,lwd=3) lines(supsmu(cw\$dist, cw\$speed, bass=1), lty=3,lwd=3) lines(supsmu(cw\$dist, cw\$speed, bass=3), lty=4,lwd=3) legend(5,1.31,c("bass=0", "base=1", "bass=3"), lty=c(1,3,4),bty="n") # Make predictions at specific distances cw.ss <- supsmu(cw\$dist, cw\$speed, bass=0) approx(as.vector(cw.ss\$x), as.vector(cw.ss\$y), as.vector(c(1,10,30))) # Plot residuals cw.ss.res <- cw\$speed-cw.ss\$y scatter.smooth(cw\$dist, cw.ss.res, span=.5, degree=2) qqnorm(cw.ss.res) qqline(cw.ss.res) # Fit cubic splines with 3, 5, and 11 knots # determined by cross-validation par(fin=c(7.0,7.0),pch=18,mkh=.1,mex=1.5, plt=c(.2,.8,.2,.8)) plot(cw\$dist, cw\$speed, type="p", xlab="Distance", ylab="Wind Speed", main="Shelter Belt Wind Speed \n Cubic Splines") lines(cw\$dist, fitted(lm(cw\$speed ~ ns(cw\$dist, knots=c(5,15,25)))), lty=7,lwd=3) lines(cw\$dist, fitted(lm(cw\$speed ~ ns(cw\$dist, df=4))), lty=1,lwd=3) lines(cw\$dist, fitted(lm(cw\$speed ~ ns(cw\$dist, df=8))), lty=3,lwd=3) lines(cw\$dist, fitted(lm(cw\$speed ~ ns(cw\$dist, df=16))), lty=4,lwd=3) # Use approximate F-test to compare the # fit of two cubic splines with different # numbers of knots anova(lm(cw\$speed ~ ns(cw\$dist, knots=c(5,15,25))), lm(cw\$speed ~ ns(cw\$dist, df=4))) anova(lm(cw\$speed ~ ns(cw\$dist, df=4)), lm(cw\$speed ~ ns(cw\$dist, df=8))) anova(lm(cw\$speed ~ ns(cw\$dist, df=8)), lm(cw\$speed ~ ns(cw\$dist, df=16))) anova(lm(cw\$speed ~ ns(cw\$dist, df=4)), lm(cw\$speed ~ ns(cw\$dist, df=16))) # A cubic spline with three knots seems to be # adequate. Display the curve par(fin=c(7.0,7.0),pch=18,mkh=.1,mex=1.5, plt=c(.2,.8,.2,.8)) plot(cw\$dist, cw\$speed, type="p", xlab="Distance", ylab="Wind Speed", main="Shelter Belt Wind Speed \n Cubic Splines") lines(cw\$dist, fitted(lm(cw\$speed ~ ns(cw\$dist, df=4))), lty=1,lwd=3) # Fit a smooth spline with the degree of fit # determined by cross-validation par(fin=c(7.0,7.0),pch=18,mkh=.1,mex=1.5, plt=c(.2,.8,.2,.8)) plot(cw\$dist, cw\$speed, type="p", xlab="Distance", ylab="Wind Speed", main="Shelter Belt Wind Speed \n Smooth Splines") lines(smooth.spline(cw.dist, cw\$speed), lty=1,lwd=3) # Make predictions cw.ss <- smooth.spline(cw\$dist, cw\$speed) predict(cw.ss, c(1,3,5,7,9,11,13,15,17,19,21,23,25,27,29)) names(cw.ss) res <- (cw\$speed - cw.ss\$y)/(1-cw.ss\$lev) sigma <- sqrt(var(res)) upper <- cw.ss\$y + 2.0*sigma*sqrt(cw.ss\$lev) lower <- cw.ss\$y - 2.0*sigma*sqrt(cw.ss\$lev) # Fit a smooth spline with the degree of fit # determined by cross-validation par(fin=c(7.0,7.0),pch=18,mkh=.1,mex=1.5, plt=c(.2,.8,.2,.8)) plot(cw\$dist, cw\$speed, type="n", xlab="Distance", ylab="Wind Speed", main="Shelter Belt Wind Speed \n Smooth Splines") lines(smooth.spline(cw.ss\$x, cw.ss\$y ), lty=1,lwd=3) lines(smooth.spline(cw.ss\$x, upper ), lty=3,lwd=3) lines(smooth.spline(cw.ss\$x, lower ), lty=3,lwd=3)