data meat; input time ph; logtime = log(time); if time > 5 then length = 'long'; else length = 'short'; cards; 1 7.02 1 6.93 2 6.42 2 6.51 4 6.07 4 5.99 6 5.59 6 5.80 8 5.51 8 5.36 ; run; *making a scatterplot of the data; proc gplot data=meat; plot ph*logtime; run; * low resolution plot in the output window; proc plot data=meat; plot ph*logtime; run; *making an ANOVA table, obtaining estimates of the parameters (intercept and slope) and individual t-tests for each parameter; *notice there is NO class statement; *also obtaining an estimate of the ph at time 5 * (i.e. logtime = ln(5) = 1.6094 ). *the output statement creates a SAS data set with the residuals and predicted values included; proc glm data=meat; model ph=logtime; estimate 'ph at 5 hr' intercept 1 logtime 1.6094; output out=resids r=resid p=yhat; run; *if desired, a plot of the residuals vs predicted values is made; proc gplot data=resids; plot resid*yhat; run; /* now overlay predicted values and observed data */ proc gplot data = resids; plot ph*logtime yhat*logtime / overlay; symbol1 i=none v=dot; symbol2 i=join v=none; title 'One way to draw regr. line'; proc gplot data = resids; plot ph*logtime yhat*logtime /overlay; symbol1 i=none v=dot; symbol2 i=rl v=none; title 'Second way'; run; /* previous stuff only produces predicted values for obs in data set */ /* Here's how to get and plot predictions for other X's */ /* read those time values into a new data set, meat2 */ /* the . means that the pH value is missing in these observations */ data meat2; input time ph; logtime = log(time); datalines; 1 . 1.5 . 2 . 2.5 . 3 . 3.5 . 4 . 4.5 . 5 . ; /* create a new data set called ALL by concatenating the two data sets */ data all; set meat meat2; /* Fit regression to all values - those with missing Y's are ignored */ /* the estimate statement gives the predicted value and the s.e. (prediction */ /* and again on the data in all. The output statement produces a data set */ /* called resid2, with lots of additional variables. */ /* you can use or omit keywords and variable names as needed */ proc glm data = all; model ph=logtime; estimate 'ph at 5 hr' intercept 1 logtime 1.6094; estimate 'ph at 2 hr' intercept 1 logtime 0.6931; output out=resid2 residual=resid /* residuals */ predicted=yhat /* predicted values */ stdp = seline /* s.e. of predicted mean (the line) */ stdi = seobs /* s.e. on predicted observation */ lclm = lciline /* lower 95% c.i. limit for pred. mean */ uclm = uciline /* upper 95% c.i. limit for pred. mean */ lcl = lciobs /* lower 95% c.i. limit for pred. obs */ ucl = uciobs /* upper 95% c.i. limit for pred. obs */ ; /* and here, finally, is the end of the output */ title 'regression on all data'; /* all the information from the output command goes into a data set */ /* print it to see the stuff you need */ proc print data = resid2; title 'output from all obs'; /* can also plot stuff in many ways */ /* the following plots three pieces of information together: */ /* the predicted line and the upper and lower bounds of the */ /* 95% c.i. for predicted observations */ /* the where statement restricts the plot to those obs. where ph is missing */ /* this restricts the plot to those points in the meat2 data set */ /* (i.e. the additional regularly spaced time points) */ proc gplot; where ph = .; plot yhat*logtime lciline*logtime uciline*logtime uciobs*logtime lciobs*logtime /overlay; symbol1 i=join v=none; symbol2 i=join v=none; symbol3 i=join v=none; symbol4 i=join v=none; symbol5 i=join v=none; run;