#Install the packages bootstrap and boot. #Load the bootstrap package. library(bootstrap) #This package is only loaded so that the #law school data sets can be accessed. #The entire population of 82 law schools. law82 #The random sample of 15 law schools. law #Some example bootstrap samples. set.seed(511) law[sample(1:15,replace=T),] law[sample(1:15,replace=T),] law[sample(1:15,replace=T),] #Load the boot package. library(boot) #Write a function that will compute the #statistic for any bootstrap sample denoted #by i=indices of resampled observations. theta.hat=function(d,i) { cor(d[i,1],d[i,2]) } #Perform bootstrapping using the boot function. set.seed(9373564) o=boot(data=law,statistic=theta.hat,R=5000) o #Examine the sample statistic theta.hat. #This is the sample correlation coefficient. o\$t0 #Examine the distribution of any bootstrap #replications of theta.hat. These are the #sample correlation coefficients computed #for each of the 5000 bootstrap samples. hist(o\$t,xlab="theta.hat.star", main="Histogram of the 5000 Bootstrap Replications", nclass=50,col=4) box() #We can examine characteristics of the #bootstrap distribution for theta.hat. quantile(o\$t,c(.95,.75,.5,.25,.05)) min(o\$t) max(o\$t) mean(o\$t) sd(o\$t) #How does this compare to the true distribution #theta.hat? If this were a real problem, we #wouldn't know for sure. However, in this case. #we know the true population and have previously #used simulation to get a good approximation of #the true distribution of theta.hat. Comparing #back to the simulation results, we see that #the bootstrap distribution seems shifted to the #right relative to the true distribution. #This is perhaps not surprising in this case, #given that our sample estimate theta.hat is an #overestimate of the population parameter theta. o\$t0 cor(law82[,2],law82[,3]) #However, note that the standard error estimate #is pretty good (0.1348 compared to 0.1302). #The bias of theta.hat is estimated to be mean(o\$t)-o\$t0 #The bias corrected estimate is o\$t0-(mean(o\$t)-o\$t0) #In this case, our biased corrected estimate #is further from the true theta than the #uncorrected estimate. o\$t0-(mean(o\$t)-o\$t0) cor(law82[,2],law82[,3]) o\$t0 #However, note that our previous simulation #did demonstrate that the bias of theta.hat #is negative. #The mean of 100,000 replications of #theta.hat was 0.7464. Thus, the true bias #is approxiated by 0.7464-cor(law82[,2],law82[,3]) #Thus, the bootstrap estimate of bias was in #the right direction even if correcting for it #was harmful in this case. #Let's find an approximate 95% confidence #interval for theta using the percentile #bootstrap approach. boot.ci(o,conf=.95,type="perc") quantile(o\$t,c(0.025,0.975)) #The above results don't exactly agree #because different algorithms are used #for finding the quantiles. These differences #are not important. abline(v=quantile(o\$t,c(0.025,0.975)),col=2,lwd=2) #Now let's find the BCa interval using the law data. BCa.int=boot.ci(o,conf=.95,type="bca") BCa.int attributes(BCa.int) BCa.int\$bca sort(o\$t)[c(27,28,4693,4694)] abline(v=BCa.int\$bca[1,4:5],col=3,lwd=2)