# THIS CODE ASSUMES THAT THE INPUT FILE IS DIRECTLY READ FROM THE CLASS WEB PAGE # # sigma2 ~ Inv-chisq(nu.p,sigma2.p) # y_i|sigma2~IID N(mu,sigma2), i = 1, ..., n # # written with trick to avoid S+/R memory-management problems # due to Brian Ripley (author of Modern Applied Statistics with S-PLUS) # # Inputs: # # y = data vector, of length n = sample size # mu = known mean in Gaussian likelihood # nu.p = prior effective sample size # sigma2.p = prior estimate of sigma2 # sigma2.0 = initial value for sigma2 in Hastings iterations # nu.star = scaling factor for Hastings proposal distribution (affects the acceptance rate R; to increase R, increase # nu.star). nu.star must be > 2 in this implementation; values near 20-25 lead to good mixing # n.burnin = length of burn-in period # n.monitor = length of monitoring period # n.thin = thinning constant (only every n.thin-th iteration in the monitoring period will be written to disk) # # seed = random number seed (for generating repeatable sequences of Hastings iterations); must be an integer # from 0 to 1000. # output.file.prefix = character string naming where you want the MCMC data set to go; # for example, output.file.prefix = "NB10" would write the MCMC data set to the file "NB10.d" # # Outputs: # # Acceptance rate R returned when iterations are finished # A file with the name that you have specified on the output.file.prefix parameter on the main routine is written. This file # will be written in the same directory where R/S+ has been started. This file contains one row for each monitored iteration # and three columns: the monitored iteration number (from 1 to # n.monitor / n.thin), # the simulated draw from the posterior for sigma2 for that iteration, # and the corresponding simulated draw from the predictive distribution for a new y.star. # WARNING: If the output file exists before the function is invoked, it will be over-written # # Preliminary calculations y<-c(0,1,3,5) n<-c(5,5,5,5) dose<-c(-0.863,-0.296,-0.053,0.727) dose.glm <- glm(y/n ~dose,family=binomial,weights=n) summary(dose.glm) # INTERMEDIATE FUNCTIONS # Proposal distribution simulation PD.sim <- function( nu, sigma2 ){return( nu * sigma2 / rchisq( 1, nu ) ) } # Acceptance probability calculation alpha <- function( sigma2.old, sigma2.new, y, mu, nu.p, sigma2.p,nu.star ) { return(min(1,exp(log.post(sigma2.new,y,mu,nu.p,sigma2.p)+ log.PD(sigma2.old,nu.star,sigma2.new) - log.PD( sigma2.new,nu.star,sigma2.old) - log.post(sigma2.old,y,mu,nu.p,sigma2.p)))) } # log(posterior) calculation log.post <- function( sigma2,y,mu,nu.p,sigma2.p){return(log.lik( sigma2, y, mu ) + log.prior( sigma2, nu.p,sigma2.p ) ) } # log(likelihood) calculation log.lik <- function( sigma2, y, mu ) {n <- length( y ) return( ( - n / 2 ) * log( sigma2 ) - sum( ( y - mu )^2 ) /( 2 * sigma2 ) ) } # Trick due to Ripley to overcome R/S+ memory-management problems # # Ripley idea: put everything inside an explicit loop into a function that returns nothing, reading from and writing to # disk as needed to maintain communication. This will keep R/S+ from accumulating dynamic memory as it goes around the loop loop <- function( y, mu, nu.p, sigma2.p, nu.star,n.burnin,n.thin,output.file.prefix, i ) { loop.result <- scan( "loop.result" ) sigma2.old <- loop.result[1] R <- loop.result[2] sigma2.star <- PD.sim( nu.star, ( nu.star - 2 ) * sigma2.old /nu.star ) u <- runif( 1 ) b <- ( u <= alpha( sigma2.old, sigma2.star, y, mu, nu.p,sigma2.p, nu.star ) ) sigma2.new <- sigma2.star * b + sigma2.old * ( 1 - b ) y.new <- rnorm( 1, mu, sqrt( sigma2.new ) ) if ( i > n.burnin ) R <- R + b if ( ( i > n.burnin ) & ( ( i - n.burnin ) %% n.thin == 0 ) ) write( c(( i - n.burnin )/n.thin, signif( c( sigma2.new,y.new ), digits = 5)), paste(output.file.prefix,".d",sep = ""), ncol=3,append=(i>n.burnin+n.thin)) sigma2.old <- sigma2.new write( c( sigma2.old, R ),"loop.result",append = F ) return( NULL ) } # log(prior) calculation log.prior <- function(sigma2,nu.p,sigma2.p){return((-1-nu.p/2)*log(sigma2)-nu.p*sigma2.p/(2*sigma2)) } # log(proposal distribution) calculation log.PD <- function(sigma2,nu.star,sigma2.star){ return((nu.star/2)*log(sigma2.star)-(1+nu.star/2)*log(sigma2)-(nu.star-2)*sigma2.star/(2*sigma2)) } # MAIN ROUTINE hastings.gaussian.variance <- function( y,mu,nu.p,sigma2.p,sigma2.0,nu.star,n.burnin,n.monitor,n.thin,seed, output.file.prefix){ sigma2.old <- sigma2.0 R <- 0 write( c( sigma2.old, R ), "loop.result", append = F ) set.seed( seed ) for (i in 1:(n.burnin+n.monitor)){null<-loop(y,mu,nu.p,sigma2.p,nu.star,n.burnin,n.thin,output.file.prefix,i ) } loop.result <- scan("loop.result") R <- loop.result[2] return(R/n.monitor) } # LOAD DATA AND RUN MAIN ROUTINE y <- matrix(scan("http://www.stat.iastate.edu/stat544/nb10.dat"),ncol=1,byrow=TRUE) n <- ncol(y) output.file.prefix <- "NB10" n.draws <- 1000 # set the number of draws acc.rate <- hastings.gaussian.variance(y, 404.59, 10, 40, 5, 20, 1,n.draws, 1, 78, output.file.prefix) # Shows acceptance rate and calculate 95% credible set for sigma acc.rate out.file<-paste(output.file.prefix,".d",sep = "") x <- matrix(scan(out.file),nrow=n.draws,byrow=TRUE) quantile(x[,2],probs=c(0.025,0.975))