# This is SPLUS code for plotting # log(C-peptide concentration) # against age. # This file stored as cpeptide.ssc # The data stored in the file # cpeptide.dat # There are four numbers on each line # in the following order: # Subject identification code # Age at diagnosis (years) # Base deficit (measure of acidity) # C-peptide concentration (pmol/ml) # Enter the data into a data frame # Compute the natural log of the # C-peptide concentration. cpep <- read.table("c:/mydocuments/courses/st415/cpeptide.dat", header=T) cpep$Y <- log(cpep$Cpeptide) cpep$Y <- round(cpep$Y,digits=3) # Sort the data by age i <- sort.list(cpep$age) cpep <- cpep[i,] cpep # Code for plotting weight against time # Specify plotting symbol and size of graph in inches. # fin=c(w,h) specifies a plot that is w inches wide # and h inches high, not including labels # pch=18 requests a filled rectangle as a plotting # symbol # mkh=b requests plotting symbols that are b # inches high # mex=a sets the spacing between lines printed in # the margins # plt plt=c(.2,.8,.2,.8) defines the fraction of # figure region to use for plotting. This # can provide more space for to label margins par(fin=c(7.0,7.0),pch=18,mkh=.1,mex=1.5, plt=c(.2,.8,.2,.8)) plot(cpep$age, cpep$Y, type="p", xlab="Age (years)", ylab="log(concentration)", main="C-peptide Concentrations") # The following three lines are for adding # an axis for C-peptide concentration on # the original scale (pmol/ml). # pretty(): Returns a vector of ordered and equally # spaced values that span the range of the input. Y.exp <- pretty(range(exp(cpep$Y))) axis(side=4, at=log(Y.exp) , lab=Y.exp, srt=90) mtext("Concentration (pmol/ml)", side=4, line=3) # Fit a straight line model cpep.lin <- lm(Y~age,data=cpep) par(fin=c(7.0,7.0),pch=18,mkh=.1,mex=1.5, plt=c(.2,.8,.2,.8)) plot(cpep$age, cpep$Y, type="p", xlab="Age (years)", ylab="log(concentration)", main="C-peptide Concentrations") a <- seq(1, 16, .5) lines(a, predict(cpep.lin, data.frame(age=a), type="response"),lty=1,lwd=3) # Fit a quadratic model cpep.q <- lm(Y~age+age^2,data=cpep) par(fin=c(7.0,7.0),pch=18,mkh=.1,mex=1.5, plt=c(.2,.8,.2,.8)) plot(cpep$age, cpep$Y, type="p", xlab="Age (years)", ylab="log(concentration)", main="C-peptide Concentrations") a <- seq(1, 16, .5) lines(a, predict(cpep.q, data.frame(age=a), type="response"),lty=1,lwd=3) # Fit a cubic model cpep.3 <- lm(Y~age+age^2+age^3,data=cpep) par(fin=c(7.0,7.0),pch=18,mkh=.1,mex=1.5, plt=c(.2,.8,.2,.8)) plot(cpep$age, cpep$Y, type="p", xlab="Age (years)", ylab="log(concentration)", main="C-peptide Concentrations") a <- seq(1, 16, .5) lines(a, predict(cpep.3, data.frame(age=a), type="response"),lty=1,lwd=3) # Fit cubic splines with 3, 7, and 15 knots # determined by cross-validation par(fin=c(7.0,7.0),pch=18,mkh=.1,mex=1.5, plt=c(.2,.8,.2,.8)) plot(cpep$age, cpep$Y, type="p", xlab="Age (years)", ylab="log(concentration)", main="C-peptide Concentrations \n Cubic Splines") lines(cpep$age, fitted(lm(cpep$Y ~ ns(cpep$age, df=4))), lty=1,lwd=3) lines(cpep$age, fitted(lm(cpep$Y ~ ns(cpep$age, df=8))), lty=3,lwd=3) lines(cpep$age, fitted(lm(cpep$Y ~ ns(cpep$age, df=16))), lty=4,lwd=3) # Use approximate F-test to compare the # fit of two cubic splines with different # numbers of knots anova(lm(cpep$Y ~ ns(cpep$age, df=4)), lm(cpep$Y ~ ns(cpep$age, df=8))) anova(lm(cpep$Y ~ ns(cpep$age, df=3)), lm(cpep$Y ~ ns(cpep$age, df=4))) # A cubic spline with three knots seems to be # adequate. Display the curve par(fin=c(7.0,7.0),pch=18,mkh=.1,mex=1.5, plt=c(.2,.8,.2,.8)) plot(cpep$age, cpep$Y, type="p", xlab="Age (years)", ylab="log(concentration)", main="C-peptide Concentrations \n Cubic Spline with Two Knots") lines(cpep$age, fitted(lm(cpep$Y ~ ns(cpep$age, df=3))), lty=1,lwd=3) # Fit a smooth spline with the degree of fit # determined by cross-validation par(fin=c(7.0,7.0),pch=18,mkh=.1,mex=1.5, plt=c(.2,.8,.2,.8)) plot(cpep$age, cpep$Y, type="p", xlab="Age (years)", ylab="log(concentration)", main="C-peptide Concentrations \n Smooth Spline") lines(smooth.spline(cpep$age, cpep$Y ), lty=1,lwd=3) # Make predictions cpep.ss <- smooth.spline(cpep$age, cpep$Y ) predict(cpep.ss, c(2,4,6,8,10,12,14,16)) res <- (cpep.ss$yin - cpep.ss$y)/(1-cpep.ss$lev) sigma <- sqrt(var(res)) upper <- cpep.ss$y + 2.0*sigma*sqrt(cpep.ss$lev) lower <- cpep.ss$y - 2.0*sigma*sqrt(cpep.ss$lev) # Plot the curve with approximate pointwise # confidence limits par(fin=c(7.0,7.0),pch=18,mkh=.001,mex=1.5, plt=c(.2,.8,.2,.8)) plot(cpep.ss$x, cpep.ss$y, type="n", xlim=c(0,16), ylim=c(1.0,1.8), xlab="Age (years)", ylab="log(concentration)", main="C-peptide Concentrations \n Smooth Spline") lines(smooth.spline(cpep.ss$x, cpep.ss$y ), lty=1,lwd=3) lines(smooth.spline(cpep.ss$x, upper ), lty=3,lwd=3) lines(smooth.spline(cpep.ss$x, lower ), lty=1,lwd=3) # Fit loess curves par(fin=c(7.0,7.0),pch=18,mkh=.1,mex=1.5, plt=c(.2,.8,.2,.8)) plot(cpep$age, cpep$Y, type="p", xlab="Age (years)", ylab="log(concentration)", main="C-peptide Concentrations \n Loess Curves") lines(cpep$age, loess(formula=Y~age,data=cpep,span=1,degree=1)$fitted.values, lty=1,lwd=3) lines(cpep$age, loess(formula=Y~age,data=cpep,span=.75,degree=1)$fitted.values, lty=3,lwd=3) lines(cpep$age, loess(formula=Y~age,data=cpep,span=.25,degree=1)$fitted.values, lty=4,lwd=3) legend(5,1.31,c("span=1.0", "span=0.75", "span=0.25"), lty=c(1,3,4),bty="n") # Compare the loess curves with different spans cpep.lo100 <- loess(formula=Y~age,data=cpep,span=1.00,degree=1) cpep.lo75 <- loess(formula=Y~age,data=cpep,span=.75,degree=1) cpep.lo25 <- loess(formula=Y~age,data=cpep,span=.25,degree=1) anova(cpep.lo100,cpep.lo75,cpep.lo25) # Plot residuals scatter.smooth(fitted(cpep.lo100), residuals(cpep.lo100), span=1, degree=1) scatter.smooth(fitted(cpep.lo75), residuals(cpep.lo75), span=1, degree=1) scatter.smooth(fitted(cpep.lo25), residuals(cpep.lo25), span=1, degree=1) qqnorm(residuals(cpep.lo75)) qqline(residuals(cpep.lo75)) # Consider a second degree polynomial smoother par(fin=c(7.0,7.0),pch=18,mkh=.1,mex=1.5, plt=c(.2,.8,.2,.8)) plot(cpep$age, cpep$Y, type="p", xlab="Age (years)", ylab="log(concentration)", main="C-peptide Concentrations \n Loess Curves") lines(cpep$age, loess(formula=Y~age,data=cpep, span=.75,degree=1)$fitted.values, lty=1,lwd=3) lines(cpep$age, loess(formula=Y~age,data=cpep, span=.75,degree=2)$fitted.values, lty=3,lwd=3) legend(5,1.31,c("degree=1.0", "degree=2"), lty=c(1,3),bty="n") cpep.lo751 <- loess(formula=Y~age, data=cpep,span=.75,degree=1) cpep.lo752 <- loess(formula=Y~age, data=cpep,span=.75,degree=2) anova(cpep.lo751,cpep.lo752) # Plot residuals scatter.smooth(fitted(cpep.lo752), residuals(cpep.lo752), span=1, degree=1) qqnorm(residuals(cpep.lo752)) qqline(residuals(cpep.lo752)) # Make predictions using the predict( ) and # compute pointwise 95% confidence intervals cpep.se <- predict(cpep.lo752, seq(1,15,1), se.fit=T) cpep.locl <- pointwise(cpep.se, coverage=.95) cpep.locl$x <- seq(1,15,1) plot(cpep.lo752, confidence=15,coverage=0.95, ylim=c(1.0,1.8)) # Plot the curve with approximate pointwise # confidence limits par(fin=c(7.0,7.0),pch=18,mkh=.001,mex=1.5, plt=c(.2,.8,.2,.8)) plot(cpep$age, cpep$Y, type="n", xlim=c(0,16), ylim=c(1.0,1.8), xlab="Age (years)", ylab="log(concentration)", main="C-peptide Concentrations \n Local Quadratic Loess Smoother") lines(smooth.spline(cpep.locl$x, cpep.locl$fit), lty=1,lwd=3) lines(smooth.spline(cpep.locl$x, cpep.locl$upper ), lty=3,lwd=3) lines(smooth.spline(cpep.locl$x, cpep.locl$lower ), lty=3,lwd=3) # Use the super smoother in S-plus # It is based on a symmetric k-nearest # neighbor linear least squares procedure # Cross-validation is used to choose a # k for each x. Larger values of the bass # parameter encourage more smoothing. par(fin=c(7.0,7.0),pch=18,mkh=.001,mex=1.5, plt=c(.2,.8,.2,.8)) plot(cpep$age, cpep$Y, type="p", xlab="Age (years)", ylab="log(concentration)", main="C-peptide Concentrations \n using the super smoother") lines(supsmu(cpep$age, cpep$Y, bass=0), lty=1,lwd=3) lines(supsmu(cpep$age, cpep$Y, bass=3), lty=3,lwd=3) lines(supsmu(cpep$age, cpep$Y, bass=7), lty=4,lwd=3) legend(5,1.31,c("bass=0", "base=3", "bass=7"), lty=c(1,3,4),bty="n") cpep.ss <- supsmu(cpep$age,cpep$Y,bass=3) approx(cpep.ss, c(2,4,6,8,10,12,14,16)) cpep.ss.fit <- approx(cpep.ss, cpep$age) cpep.ss.res <- cpep$Y-cpep.ss.fit$y # Use kernel smoothers. A bandwidth of # one corresponds to the IQR for the # explanatory variable par(fin=c(7.0,7.0),pch=18,mkh=.1,mex=1.5, plt=c(.2,.8,.2,.8)) plot(cpep$age, cpep$Y, type="p", xlab="Age (years)", ylab="log(concentration)", main="C-peptide Concentrations \n Kernal(box) Smoother") lines(ksmooth(cpep$age, cpep$Y, kernel="box", bandwidth=5, n.points=11), lty=1,lwd=3) lines(ksmooth(cpep$age, cpep$Y, kernel="box", bandwidth=5, n.points=7), lty=3,lwd=3) lines(ksmooth(cpep$age, cpep$Y, kernel="box", bandwidth=5, n.points=3), lty=4,lwd=3) legend(5,1.31,c("points=11", "points=7", "points=3"), lty=c(1,3,4),bty="n") par(fin=c(7.0,7.0),pch=18,mkh=.1,mex=1.5, plt=c(.2,.8,.2,.8)) plot(cpep$age, cpep$Y, type="p", xlab="Age (years)", ylab="log(concentration)", main="C-peptide Concentrations \n Kernel (normal) Smoother") lines(ksmooth(cpep$age, cpep$Y, kernel="normal", bandwidth=.5), lty=1,lwd=3) lines(ksmooth(cpep$age, cpep$Y, kernel="normal", bandwidth=2), lty=3,lwd=3) lines(ksmooth(cpep$age, cpep$Y, kernel="normal", bandwidth=5), lty=4,lwd=3) legend(5,1.31,c("bw=0.5", "bw=1", "bw=5"), lty=c(1,3,4),bty="n")