# This is S-plus code for creating # bootstrapped confidence intervals # for a correlation coefficient. It is # stored in the file # # lawschl.ssc # # Any line preceded with a pound sign # is a comment that is ignored by the # program. The law school data are # read from the file # # lawschl.dat # Enter the law school data into a data frame laws <- read.table("lawschl.dat", col.names=c("School","LSAT","GPA")) laws # Plot the data par(fin=c(7.0,7.0),pch=16,mkh=.15,mex=1.5) plot(laws\$LSAT,laws\$GPA, type="p", xlab="LSAT score",ylab="GPA", main="Law School Data") # Compute the sample correlation matrix rr<-cor(laws\$LSAT,laws\$GPA) cat("Estimated correlation: ", round(rr,5), fill=T) # First test for zero correlation n <- length(laws\$LSAT); tt<- sqrt(n-2)*rr/sqrt(1 - rr*rr) pval <- 1 - pt(tt,n-2) pval <- round(pval,digits=5) cat("t-test for zero correlation: ", round(tt,4), fill=T) cat("p-values for the t-test for zero correlation: ", pval, fill=T) # Use Fisher's z-transformation to construct # approximate confidence intervals. # First set the level of confidence at 1-alpha. alpha <- .10 z <- 0.5*log((1+rr)/(1-rr)) zl <- z - qnorm(1-alpha/2)/sqrt(n-3) zu <- z + qnorm(1-alpha/2)/sqrt(n-3) rl <- round((exp(2*zl)-1)/(exp(2*zl)+1),digits=4) ru <- round((exp(2*zu)-1)/(exp(2*zu)+1),digits=4) per <- (1-alpha)*100; cat( per,"% confidence interval: (", rl,", ",ru,")",fill=T) # Compute bootstrap confidence intervals. # Use B=5000 bootstrap samples. nboot <- 5000 rboot <- bootstrap(data=laws, statistic=cor(GPA,LSAT),B=nboot) # limits.emp(): Calculates the empirical percentiles # for the bootstrapped parameter # estimates in a resamp object. # The quantile function is used to # calculate the empirical percentiles. # usage: # limits.emp(x, probs=c(0.025, 0.05, 0.95, 0.975)) limits.emp(rboot, probs=c(0.05,0.95)) # Do another set of 5000 bootstrapped values rboot <- bootstrap(data=laws, statistic=cor(GPA,LSAT),B=nboot) limits.emp(rboot, probs=c(0.05,0.95)) # limits.bca(): Calculates BCa (bootstrap # bias-correct, adjusted) # confidence limits. # usage: # limits.bca(boot.obj, # probs=c(0.025, 0.05, 0.95, 0.975), # details=F, z0=NULL, # acceleration=NULL, # group.size=NULL, # frame.eval.jack=sys.parent(1)) limits.bca(rboot,probs=c(0.05,0.95),detail=T) # Both sets of confidence intervals could # have been obtained from the summary( ) # function summary(rboot) # Make a histogram of the bootstrapped # correlations. hist(rboot\$rep,nclass=50, xlab=" ", main="5000 Bootstrap Correlations", density=.0001)