# This is SPLUS code for fitting a nonlinear # model to the mixture data from Myers (1994) # Technometrics, 343-356. Maximum likelihood # estimation is used. # This file stored as myers.ssc # First access the MASS library library(MASS, first=T) # Enter the data stored in the file # myers.dat # There are five numbers on each line in the # following order: # mixture: Identification code # x1: percent nitroglycerine (NG) # x2: percent triacetin (TA) # x3: percent 2-nitrodiphenylamine (2N) # y: specific gravity of the mixture myers <- read.table("c:/courses/st511/snew/myers.dat", col.names=c("mixture","x1","x2","x3","y")) # Create a scatterplot matrix with smooth # curves. Unix users should first use motif( ) # to open a graphics window points.lines <- function(x, y){ points(x, y) lines(loess.smooth(x, y, 0.90))} par(din=c(7,7), pch=18, mkh=.15, cex=1.2, lwd=3) pairs(myers[ ,-1], panel=points.lines) # Use maximum likelihood estimation to model # specific gravity of the mixture as the # inverse of a linear combination of the # percentages of NG, TA, and 2N plus a # random error with a normal distribution. # Initial values for coefficients are obtained # from a regression of the inverse of the # specific gravity of the mixture on a # linear combination of the percentages of # NG, TA, and 2N. myers$d <- 1/myers$y; myers.lm <- lm(d~x1+x2+x3-1,data=myers) b <- as.matrix(coef(myers.lm),ncol=1) b # Use the sum of squared errors to get a # starting value for the variance estimate n <- length(myers$y) ss <- var(myers$y-1/fitted(myers.lm))* (n-1.)/(n-length(b)) ss # Compute maximum likelihood estimates for # a model where the conditional responses # are independent and normally distributed # with homogeneous variances. Use deriv3( ) # to create a function to specify the # log-likelihood and compute first and # second partial derivatives. You must # access the MASS library to use deriv3( ). # Note that S-PLUS stores the value of # pi in an object called pi. Furthermore, # ms( ) is a minimization function so we # must give it the negative of the log- # likelihood. lmyers <- deriv3( ~ 0.50*(log(2*pi)+log(ss)+ ((y-1/(b1*x1+b2*x2+b3*x3))^2)/ss), c("b1", "b2", "b3", "ss"), function(y, x1, x2, x3, b1, b2, b3, ss) NULL) # List the contents of this function lmyers # We wil trace the iterative process that miinimizes # the negative of the log-likelihood with the # following function that provides the value # of the negative of the log-likelihood, parameter # estimates and the gradient. tr.ms <- function(info, theta, grad, scale, flags, fit.pars) { cat(signif(info[3]),":",signif(theta),"\n", ":",signif(grad[1:length(theta)]),"\n") invisible(list()) } # Now maximize the likelihood myers.ms <- ms( ~ lmyers(y, x1, x2, x3, b1, b2, b3, ss), data = myers, start = c(b1=b[1], b2=b[2], b3=b[3], ss=ss), trace = tr.ms, control=list(maxiter=50,tolerance=10^(-10), rel.tolerance=10^(-10),miniscale=1000)) # Check the gradient myers.ms$gradient # Check residual plots. The scatter.smooth # passes a smooth curve through the points # on the residual plot myers.ms$fitted <- (as.matrix(myers[ ,c("x1","x2","x3")])%*% as.vector(myers.ms$parameters[c("b1","b2","b3")]))^(-1) myers.ms$residuals <- myers$y-myers.ms$fitted scatter.smooth(myers.ms$fitted, myers.ms$residuals, span=1, degree=1, xlab="Fitted values", ylab="Residuals", main="Residuals") qqnorm(myers.ms$residuals) qqline(myers.ms$residuals) # Compute confidence intervals for parameters # using standard large sample normal theory. # We will have to create our own function. # The intervals( ) function cannot be # applied to objects created by ms( ). conf.ms <- function(obj, level=0.95) { b <- coef(obj) thessian<-obj$hessian+t(obj$hessian)-diag(diag(obj$hessian)) vcov <- solve(thessian) stderr <- sqrt(diag(vcov)) low <- b - qnorm(1 - (1 - level)/2)*stderr high <- b + qnorm(1 - (1 - level)/2)*stderr a <- cbind(b, stderr, low, high) a } conf.ms(myers.ms) # In this case, least squares estimation # gives essentially the same estimates myers$d <- 1/myers$y; myers.lm <- lm(d~x1+x2+x3-1,data=myers) b <- as.matrix(coef(myers.lm),ncol=1) myers.nls <- nls( formula = y ~ 1/(b1*x1+b2*x2+b3*x3), data = myers, start = c(b1=b[1], b2=b[2], b3=b[3]), trace = T) summary(myers.nls) # The first case may be an outlier. Delete # the first case and refit the model. myers.1 <- myers[-1, ] myers.1$d <- 1/myers.1$y; myers.lm <- lm(d~x1+x2+x3-1,data=myers.1) b <- as.matrix(coef(myers.lm),ncol=1) b n <- length(myers.1$y) ss <- var(myers.1$y-1/fitted(myers.lm))* (n-1.)/(n-length(b)) ss myers.ms <- ms( ~ lmyers(y, x1, x2, x3, b1, b2, b3, ss), data = myers.1, start = c(b1=b[1], b2=b[2], b3=b[3], ss=ss), trace = tr.ms, control=list(maxiter=50,tolerance=10^(-10))) # Check the gradient myers.ms$gradient # Check residual plots. The scatter.smooth # passes a smooth curve through the points # on the residual plot myers.ms$fitted <- (as.matrix(myers.1[ ,c("x1","x2","x3")])%*% as.vector(myers.ms$parameters[c("b1","b2","b3")]))^(-1) myers.ms$residuals <- myers.1$y-myers.ms$fitted scatter.smooth(myers.ms$fitted, myers.ms$residuals, span=1, degree=1, xlab="Fitted values", ylab="Residuals", main="Residuals") qqnorm(myers.ms$residuals) qqline(myers.ms$residuals) # Compute confidence intervals for parameters # using standard large sample normal theory. # We will have to create our own function. # The intervals( ) function cannot be # applied to objects created by ms( ). conf.ms <- function(obj, level=0.95) { b <- coef(obj) thessian<-obj$hessian+t(obj$hessian)-diag(diag(obj$hessian)) vcov <- solve(thessian) stderr <- sqrt(diag(vcov)) low <- b - qnorm(1 - (1 - level)/2)*stderr high <- b + qnorm(1 - (1 - level)/2)*stderr a <- cbind(b, stderr, low, high) a } conf.ms(myers.ms)