#Install the packages bootstrap and boot. #Load the bootstrap package. library(bootstrap) #This package is only loaded so that the #cholostyramine dataset can be accessed. head(cholost) #z is the percentage that represents compliance. #y is the decrease in total blood plasma #cholesterol level from the beginning of the #experiment to the end. #That we can refer to z and y rather than #cholost\$z and cholost\$y... attach(cholost) plot(cholost,xlab="Compliance", ylab="Improvement", pch=19,col=4) #Load the SemiPar package. library(SemiPar) #Fit a penalized linear spline to the data. #Use the REML approach to determine the #smoothing parameter. o=spm(y~f(z,basis="trunc.poly",degree=1)) #Add the fitted spline to the scatterplot. lines(o,shade=F,se=F) title("Penalized Linear Spline Fit") #Was the mean improvement significantly #greater at compliance equal 50% than at #compliance equal 25%? comp=data.frame(z=c(25,50)) comp pred=predict(o,newdata=comp) pred pred[2]-pred[1] #Load the boot package. library(boot) #Write a function that will compute the #statistic for any bootstrap sample denoted #by i=indices of resampled observations. compstar=data.frame(zstar=c(25,50)) compstar theta.hat=function(d,i) { ystar<<-d\$y[i] zstar<<-d\$z[i] o=spm(ystar~f(zstar,basis="trunc.poly",degree=1)) pred=predict(o,newdata=compstar) pred[2]-pred[1] } #Perform bootstrapping using the boot function. set.seed(90125) oboot=boot(data=cholost,statistic=theta.hat,R=5000) oboot #Examine the sample statistic theta.hat. #This is the difference between predictions #at compliance=50% and compliance=25%. oboot\$t0 #Examine the distribution of many bootstrap #replications of theta.hat. These are the #difference between predictions at 50% and 25% #for each of the 5000 bootstrap samples. hist(oboot\$t,xlab="theta.hat.star", main="Histogram of the 5000 Bootstrap Replications", nclass=50,col=4) box() #Let's find an approximate 95% confidence #interval for theta using the percentile #bootstrap approach. boot.ci(oboot,conf=.95,type="perc") abline(v=quantile(oboot\$t,c(0.025,0.975)),col=2,lwd=2) #Now let's find the BCa interval. BCa.int=boot.ci(oboot,conf=.95,type="bca") BCa.int abline(v=BCa.int\$bca[1,4:5],col=3,lwd=2)