# This code is used to explore the # Stormer viscometer data. It is stored # in the file # # stormer.ssc # # Enter the data into a data frame. # The data are stored in the file # # stormer.dat stormer <- read.table("c:/MyDocuments/courses/st415/stormer.dat") stormer # Use a linear approximation to obtain # starting values for least squares # estimation in the non-linear model fm0 <- lm(Wt*Time ~ Viscosity + Time - 1, data=stormer) b0 <- coef(fm0) names(b0) <- c("b1","b2") # Fit the non-linear model storm.fm <- nls( formula = Time ~ b1*Viscosity/(Wt-b2), data = stormer, start = b0, trace = T) summary(storm.fm)$parameters bc <- coef(storm.fm) names(bc) <- c("b1","b2") #==================================== # Bootstrap Estimatiom #==================================== #==================================== # Bootstrap I: # Treat the regressors as random and # resample cases (y,x1,x2). #==================================== storm.boot1 <- bootstrap(stormer, coef(nls(Time~b1*Viscosity/(Wt-b2), data=stormer,start=bc)),B=1000) summary(storm.boot1) # Produce histograms of the bootstrapped values of # the regression coefficients # The following code is used to draw non-parametric # estimated densities, normal densities, and histogram # on the same graphic window. # hist(): Plot a Histogram. Use prob=T to put on a # probability scale. Use density=.0001 to # get unshaded bars # # density() : Estimate a probability density function. # Returns x and y coordinates of a # non-parametric estimate of the probability # density of the data. Options include the choice # of the window to use and the number of points # at which to estimate the density. # n = the number of equally spaced points at # which to estimate the density. b1.boot <- storm.boot1$rep[,1] b2.boot <- storm.boot1$rep[,2] par(fin=c(7.0,7.0),mex=1.5) hist(b1.boot,nclass=40,density=.0001,prob=T) b1.boot.dns1 <- density(b1.boot,n=200,width=.6) b1.boot.dns2 <- list( x = b1.boot.dns1$x, y = dnorm(b1.boot.dns1$x,mean(b1.boot), sqrt(var(b1.boot)))) # Draw non-parametric density lines(b1.boot.dns1,lty=3,lwd=2) # draw normal densities lines(b1.boot.dns2,lty=1,lwd=2) legend(27.,-0.30,c("Nonparametric","Normal approx."), lty=c(3,1),bty="n",lwd=2) # Display the distribution of the estimate # of the second parameter par(fin=c(7.0,7.0),mex=1.5) hist(b2.boot,nclass=40,density=.00000001,prob=T) b2.boot.dns1 <- density(b2.boot,n=200,width=.6) b2.boot.dns2 <- list( x = b2.boot.dns1$x, y = dnorm(b2.boot.dns1$x,mean(b2.boot), sqrt(var(b2.boot)))) lines(b2.boot.dns1,lty=3,lwd=2) lines(b2.boot.dns2,lty=1,lwd=2) legend(0.5,-0.30,c("Nonparametric","Normal approx."), lty=c(3,1),bty="n",lwd=2) #======================================= # Bootstrap II: # Treat the regressors as fixed and # resample from the residuals #======================================= # Center the residuals at zero and # divide residuals by linear approximations # to a multiple of the standard errors. These # centered and scaled residuals approximately # have the same first two moments as the # random errors, but they are not quite # uncorrelated. library(MASS) rs <- scale(resid(storm.fm),center=T,scale=F) grad.f <- deriv3( expr = Y ~ (b1*X1)/(X2-b2), namevec = c("b1", "b2"), function.arg = function(b1, b2, X1, X2, Y) NULL) g2 <- grad.f(b[1], b[2], stormer$Viscosity, stormer$Wt, stormer$Tim) D <- attr(g2, "gradient") h <- 1-diag(D%*%solve(t(D)%*%D)%*%t(D)) rs <- rs/sqrt(h) dfe <- length(rs)-length(coef(storm.fm)) vr <- var(rs) rs <- rs%*%sqrt(deviance(storm.fm)/dfe/vr) storm.bf2 <- function(rs) { assign("Tim", fitted(storm.fm) + rs, frame = 1) nls(formula = Tim ~ (b1*Viscosity)/(Wt-b2), data = stormer, start = coef(storm.fm))$parameters } summary(storm.fm)$parameters # Compute 1000 bootrapped values of the # regression parameters storm.boot2 <- bootstrap(data=rs, statistic=storm.bf2(rs),B=1000) b1.boot <- storm.boot2$rep[,1] b2.boot <- storm.boot2$rep[,2] # The BCA intervals may not be correctly # computed by the following summary(storm.boot2) # The following code is used to draw non-parametric # estimated densities, normal densities, and histogram # on the same graphic window. par(fin=c(7.0,7.0),mex=1.3) hist(b1.boot,nclass=40,density=.0000001,prob=T) b1.boot.dns1 <- density(b1.boot,n=200, width=.6) b1.boot.dns2 <- list( x = b1.boot.dns1$x, y = dnorm(b1.boot.dns1$x,mean(b1.boot), sqrt(var(b1.boot)))) # Draw non-parametric density lines(b1.boot.dns1,lty=3,lwd=2) # draw normal densities lines(b1.boot.dns2,lty=1,lwd=2) legend(27,-0.22,c("Nonparametric","Normal approx."), lty=c(3,1),bty="n",lwd=2) # Display the distribution for the other # parameter estimate par(fin=c(7.0,7.0),mex=1.3) hist(b2.boot,nclass=40,density=.00000001,prob=T) b2.boot.dns1 <- density(b2.boot,n=200, width=.6) b2.boot.dns2 <- list( x = b2.boot.dns1$x, y = dnorm(b2.boot.dns1$x,mean(b2.boot), sqrt(var(b2.boot)))) lines(b2.boot.dns1,lty=3,lwd=2) lines(b2.boot.dns2,lty=1,lwd=2) legend(0.5,-0.25,c("Nonparametric","Normal approx."), lty=c(3,1),bty="n",lwd=2)