# This is SPLUS code for fitting a nonlinear # model to the weight loss data in # Venables and Ripley. # This file stored as wtloss.ssc # First access the MASS library library(MASS) # Enter the data stored in the file # wtloss.dat # There are three numbers on each line in the # following order: # Identification code # Days since beginning of the study # Weight (in kg) wtloss <- read.table("c:/courses/st511/snew/wtloss.dat") # Code for plotting weight against time. # Unix users should insert the motif( ) # command here to open a graphics window. # Specify plotting symbol and size of graph in inches. # fin=c(w,h) specifies a plot that is w inches wide # and h inches high, not including labels # pch=18 requests a filled rectangle as a plotting # symbol # mkh=b requests plotting symbols that are b # inches high # mex=a sets the spacing between lines printed in # the margins # plt plt=c(.2,.8,.2,.8) defines the fraction of # figure region to use for plotting. This # can provide more space for labels par(fin=c(7.0,7.0),pch=18,mkh=.1,mex=1.5, plt=c(.2,.8,.2,.8)) plot(wtloss$Days, wtloss$Weight, type="p", ylab="Weight (kg)", main="Weight Loss") # Fit the exponential decay curve. # The initial values of 90, 95, and 190 are # specified by the user. Since no formulas # for derivatives are specified, The Gauss-Newton # optimization procedure is applied with numerical # approximations to the first partial derviatives wtloss.fm <- nls(formula = Weight ~ b0 + b1*exp(-Days/b2), data = wtloss, start = c(b0=90, b1=95, b2=190), trace = T) # The first line of output comes from the initial # value and the last three lines of output show # the value of the sum of squared errors and # the values of the parameter estimates for # successive iterations of the minimization # procedure. This output was requested by # the trace=T option. Other information is # obtained in the following way: summary(wtloss.fm) # Print the residual sum of squares. This # will not work if you do not attach the # MASS library deviance(wtloss.fm) # Print the estimated covariance matrix of # the estimated parameters. This will not # work if you do not attach the MASS library. vcov(wtloss.fm) # Other information in the object created # by the nls( ) function names(wtloss.fm) # Plot the curve par(fin=c(7.0,7.0),pch=18,mkh=.1,mex=1.5, plt=c(.2,.8,.2,.8)) plot(wtloss$Days, wtloss$Weight, type="p", xlab="Days", ylab="Weight (kg)", main="Weight Loss Data") lines(wtloss$Days, wtloss.fm$fitted.values, lty=1,lwd=3) # Check residual plots. The scatter.smooth # passes a smooth curve through the points # on the residual plot qqnorm(residuals(wtloss.fm)) qqline(residuals(wtloss.fm)) scatter.smooth(wtloss.fm$fitted.values, wtloss.fm$residuals, span=.75, degree=1, main="Residuals") # Now create a function to generate starting # values for parameter estimates in the # negative exponential decay model. negexp.sv <- function(x, y ) { mx<-mean(x) x1<-x-mx x2<-((x-mx)^2)/2 b <- as.vector(lm(y~x1+x2)$coef) b2<- -b[2]/b[3] b <- as.vector(lm(y~exp(-x/b2))$coef,ncol=1) parms <- cbind(b[1],b[2],b2) parms } b.start <- negexp.sv(wtloss$Days, wtloss$Weight) wtloss.ss <- nls( formula = Weight ~ b0 + b1*exp(-Days/b2), data = wtloss, start = c(b0=b.start[1], b1=b.start[2], b2=b.start[3]), trace = T) # You can supply formulas for first partial # derivatives to the nls() function. Sometimes # this makes the minimization of the sum # of squared residuals more stable. # Derivatives can only be provided as an # attribute of the model. We will create a # function to calculate the mean vector and # a D matrix of values of first partial # derivatives evaluated at the current values # of the parameter estimates. The D matrix is # included as a "gradient" attribute. Note # that the gradient matrix must have column # names matching those of the corresponding # parameters. We will also compute a matrix # of second partial derivatives. # Use the symbolic differentiation function, # deriv3( ), to create a function to specify # the model and compute derivatives. You must # add the MASS library to use deriv3( ). expn1 <- deriv3( expr = y ~ b0 + b1 * exp(-x/b2), namevec = c("b0", "b1", "b2"), function.arg = function(b0, b1, b2, x) NULL) # List the contents of expn1 expn1 # Now fit the model wtloss.gr <- nls( formula = Weight ~ expn1(b0, b1, b2, Days), data = wtloss, start = c(b0=b.start[1], b1=b.start[2], b2=b.start[3]), trace = T) # Print final parameter estimates wtloss.gr$parameters # Print large sample estimate of the # covariance matrix covb<-vcov(wtloss.gr) covb # Print large sample estimates of # standard errors for the parameter estimates sqrt(diag(covb)) # Compute 95% confidence intervals for the # parameters using a profile likelihood # method rbind(b0 = confint(wtloss.gr,parm="b0"), b1 = confint(wtloss.gr,parm="b1"), b2 = confint(wtloss.gr,parm="b2")) # Compute confidence intervals for parameters # using standard large sample normal theory. # We will have to create our own function. # The intervals( ) function cannot be # applied to objects created by nls( ). est.b <- function(obj, level=0.95) { b <- coef(obj) n <- length(obj$fitted.values) - length(obj$parameters) stderr <- sqrt(diag(vcov(obj))) low <- b - qt(1 - (1 - level)/2, n)*stderr high <- b + qt(1 - (1 - level)/2, n)*stderr a <- cbind(b, stderr, low, high) a } est.b(wtloss.gr) # Construct confidence interval for the time needed # to achieve specific predicted weights(110,100,90) # Compute the estimated times and their standard # errors. First use the deriv3() function to get # values of the first partial derviatives of # the mean function needed to apply the delta method. time.pds <- deriv3(~ -b2*log((y0-b0)/b1), c("b0","b1","b2"), function(y0, b0, b1, b2) NULL) # list the function time.pds # Compute the estimates of the times and the large sample # covariance matrix and standard errors est.time <- function(y0, obj, level=.95) { b <- coef(obj) tmp <- time.pds(y0, b["b0"], b["b1"], b["b2"]) x0 <- as.vector(tmp) lam <- attr(tmp, "gradient") np <- length(b) v <- (lam%*%vcov(obj)*lam)%*%matrix(1,np,1) n <- length(obj$fitted.values) - np low <- x0 - qt(1 - (1 - level)/2, n)*sqrt(v) high <- x0 + qt(1 - (1 - level)/2, n)*sqrt(v) a <- cbind(x0, sqrt(v), low, high) dimnames(a) <- list(paste(y0, "kg: "), c("x0", "SE", "low", "high")) a } est.time(c(110, 100, 90), wtloss.gr) # Calculate confidence intervals by inverting # large sample F-tests. Reparameterize the # model in terms of the desired quantity. Then # use deriv3( ) to construct a function that # provides the model formula and partial # derivatives. This method may fail for some # values of some quantities. Try obtaining a # confidence interval for days until weight # reaches 85 lb in the following example. expn2 <- deriv3(~b0 + b1*((w0-b0)/b1)^(x/d0), c("b0","b1","d0"), function(b0,b1,d0,x,w0) NULL) # Define a function to calculate initial values. wtloss.init <- function(obj,w0) { p <- coef(obj) d0 <- -log( (w0 - p["b0"]) / p["b1"]) * p["b2"] c(p[c("b0","b1")],d0 = as.vector(d0)) } # Construct the confidence intervals result <- NULL w0s <- c(110,100,90) for (w0 in w0s) { fm <- nls(Weight ~ expn2(b0, b1, d0, Days, w0), data = wtloss, start = wtloss.init(wtloss.gr,w0)) result <- rbind(result, c(coef(fm)["d0"], confint(fm,"d0"))) } dimnames(result) <- list(paste(w0s,"kg:"), c("time","low","high")) result