# This code is used to explore the # Stormer viscometer data. It is stored # in the file # # stormer.ssc # # Enter the data into a data frame. # The data are stored in the file # # stormer.dat stormer <- read.table("stormer.dat") stormer # Attach the MASS library library(MASS) # Use a linear approximation to obtain # starting values for least squares # estimation in the non-linear model fm0 <- lm(Wt*Time ~ Viscosity + Time - 1, data=stormer) b0 <- coef(fm0) names(b0) <- c("b1","b2") # Fit the non-linear model storm.fm <- nls( formula = Time ~ b1*Viscosity/(Wt-b2), data = stormer, start = b0, trace = T) # Create a bivariate confidence region # for the the (b1,b2) parameters. # First set up a grid of (b1,b2) values bc <- coef(storm.fm) se <- sqrt(diag(vcov(storm.fm))) dv <- deviance(storm.fm) gsize<-51 b1 <- bc[1] + seq(-3*se[1], 3*se[1], length = gsize) b2 <- bc[2] + seq(-3*se[2], 3*se[2], length = gsize) bv <- expand.grid(b1, b2) # Create a function to evaluate sums of squares ssq <- function(b) sum((stormer\$Time - b[1] * stormer\$Viscosity/ (stormer\$Wt-b[2]))^2) # Evalute the sum of squared residuals and # approximate F-ratios for all of the # (b1,b2) values on the grid dbeta <- apply(bv, 1, ssq) n<-length(stormer\$Viscosity) df1<-length(bc) df2<-n-df1 fstat <- matrix( ((dbeta - dv)/df1) / (dv/df2), gsize, gsize) # Create the plot. UNIX users should use # the motif() function to open a graphics # window par(fin=c(7.0,7.0), mex=1.5,lwd=3) plot(b1, b2, type="n", main="95% Confidence Region") contour(b1, b2, fstat, levels=c(1,2,5,7,10,15,20), labex=0.75, lty=2, add=T) contour(b1, b2, fstat, levels=qf(0.95,2,21), labex=0, lty=1, add=T) text(31.6,0.3,"95% CR", adj=0, cex=0.75) points(bc[1], bc[2], pch=3, mkh=0.15) # remove b1,b2, and bv rm(b1,b2,bv,fstat) #==================================== # Bootstrapping (V&R, Section 8.4) #==================================== #==================================== # Bootstrap I: # Treat the regressors as random and # resample cases (y,x1,x2). #==================================== storm.boot1 <- bootstrap(stormer, coef(nls(Time~b1*Viscosity/(Wt-b2), data=stormer,start=bc)),B=1000) summary(storm.boot1) # Produce histograms of the bootstrapped values of # the regression coefficients # The following code is used to draw non-parametric # estimated densities, normal densities, and histogram # on the same graphic window. # truehist(): Plot a Histogram (prob=T by default) # In case of hist(), probability=F by default. # width.SJ(): Bandwidth Selection by Pilot Estimation of # Derivatives. Uses the method of Sheather # & Jones (1991) to select the bandwidth # of a Gaussian kernel density estimator. # density() : Estimate Probability Density Function. # Returns x and y coordinates of a # non-parametric estimate of the probability # density of the data. Options include the choice # of the window to use and the number of points # at which to estimate the density. # n = the number of equally spaced points at # which to estimate the density. b1.boot <- storm.boot1\$rep[,1] b2.boot <- storm.boot1\$rep[,2] par(fin=c(7.0,7.0),mex=1.5) mm <- range(b1.boot) min.int <- floor(mm[1]) max.int <- ceiling(mm[2]) truehist(b1.boot,xlim=c(min.int,max.int),density=.0001) width.SJ(b1.boot) b1.boot.dns1 <- density(b1.boot,n=200,width=.6) b1.boot.dns2 <- list( x = b1.boot.dns1\$x, y = dnorm(b1.boot.dns1\$x,mean(b1.boot), sqrt(var(b1.boot)))) # Draw non-parametric density lines(b1.boot.dns1,lty=3,lwd=2) # draw normal densities lines(b1.boot.dns2,lty=1,lwd=2) legend(27.,-0.30,c("Nonparametric","Normal approx."), lty=c(3,1),bty="n",lwd=2) # Display the distribution of the estimate # of the second parameter par(fin=c(7.0,7.0),mex=1.5) mm <- range(b2.boot) min.int <- floor(mm[1]) max.int <- ceiling(mm[2]) truehist(b2.boot,xlim=c(min.int,max.int),density=.001) width.SJ(b2.boot) b2.boot.dns1 <- density(b2.boot,n=200,width=.8) b2.boot.dns2 <- list( x = b2.boot.dns1\$x, y = dnorm(b2.boot.dns1\$x,mean(b2.boot), sqrt(var(b2.boot)))) lines(b2.boot.dns1,lty=3,lwd=2) lines(b2.boot.dns2,lty=1,lwd=2) legend(0.5,-0.30,c("Nonparametric","Normal approx."), lty=c(3,1),bty="n",lwd=2) #======================================= # Bootstrap II: # Treat the regressors as fixed and # resample from the residuals #======================================= # Center the residuals at zero and # divide residuals by linear approximations # to a multiple of the standard errors. These # centered and scaled residuals approximately # have the same first two moments as the # random errors, but they are not quite # uncorrelated. rs <- scale(resid(storm.fm),center=T,scale=F) grad.f <- deriv3( expr = Y ~ (b1*X1)/(X2-b2), namevec = c("b1", "b2"), function.arg = function(b1, b2, X1, X2, Y) NULL) g2 <- grad.f(b[1], b[2], stormer\$Viscosity, stormer\$Wt, stormer\$Tim) D <- attr(g2, "gradient") h <- 1-diag(D%*%solve(t(D)%*%D)%*%t(D)) rs <- rs/sqrt(h) dfe <- length(rs)-length(coef(storm.fm)) vr <- var(rs) rs <- rs%*%sqrt(deviance(storm.fm)/dfe/vr) storm.bf2 <- function(rs) { assign("Tim", fitted(storm.fm) + rs, frame = 1) nls(formula = Tim ~ (b1*Viscosity)/(Wt-b2), data = stormer, start = coef(storm.fm))\$parameters } summary(storm.fm)\$parameters # Compute 1000 bootrapped values of the # regression parameters storm.boot2 <- bootstrap(data=rs, statistic=storm.bf2(rs),B=1000) b1.boot <- storm.boot2\$rep[,1] b2.boot <- storm.boot2\$rep[,2] # The BCA intervals may not be correctly # computed by the following summary(storm.boot2) # The following code is used to draw non-parametric # estimated densities, normal densities, and histogram # on the same graphic window. par(fin=c(7.0,7.0),mex=1.3) mm <- range(b1.boot) min.int <- floor(mm[1]) max.int <- ceiling(mm[2]) truehist(b1.boot,xlim=c(min.int,max.int),density=.0001) b1.boot.dns1 <- density(b1.boot,n=200, width=width.SJ(b1.boot)) b1.boot.dns2 <- list( x = b1.boot.dns1\$x, y = dnorm(b1.boot.dns1\$x,mean(b1.boot), sqrt(var(b1.boot)))) # Draw non-parametric density lines(b1.boot.dns1,lty=3,lwd=2) # draw normal densities lines(b1.boot.dns2,lty=1,lwd=2) legend(27,-0.22,c("Nonparametric","Normal approx."), lty=c(3,1),bty="n",lwd=2) # Display the distribution for the other # parameter estimate par(fin=c(7.0,7.0),mex=1.3) mm <- range(b2.boot) min.int <- floor(mm[1]) max.int <- ceiling(mm[2]) truehist(b2.boot,xlim=c(min.int,max.int),density=.00001) b2.boot.dns1 <- density(b2.boot,n=200, width=width.SJ(b2.boot)) b2.boot.dns2 <- list( x = b2.boot.dns1\$x, y = dnorm(b2.boot.dns1\$x,mean(b2.boot), sqrt(var(b2.boot)))) lines(b2.boot.dns1,lty=3,lwd=2) lines(b2.boot.dns2,lty=1,lwd=2) legend(0.5,-0.25,c("Nonparametric","Normal approx."), lty=c(3,1),bty="n",lwd=2)