/* ** NOTE: In SAS version 8, the phreg procedure directly incorporates the ** computation of a robust variance; this macro is no longer necessary. ** ** proc phreg data=mydata covsandwich(aggregate); ** model time * status(0) = x1 x2 x3; ** id group; ** ** Is same as ** %phlev(data=mydata, time=time, event=status, xvars=x1 x2 x3, ** id=group, collapse='Y'); ** with the former giving nicer printout, more options, etc. */ /* ***************************************************************************; Macro name: %phlev Function: Returns the leverage residuals matrix L for a proportional hazards model. The returned residuals data set is n by p, where n is the number of (non-missing) subjects used in the PHREG fit, and p is the number of fitted covariates. The leverage residuals are an approximation to the jackknife; the i-th observation of the data set contains the approximate change in beta if observation i were dropped from the model. These residuals will normally be plotted as an aid to finding influential or "high leverage" observations Some data sets, particularily those with recurring events, may contain multiple observations for a single individual. The macro has the option of "collapsing" the matrix of leverage residuals so that the output has only one observation per unique individual. The procedure can also estimate the robust covariance estimate L'L. When there are multiple, possibly correlated, observations per subject this option combined with the collapsing mentioned above, gives an estimate of variance that is corrected for the correlation. Developers: E. Bergstralh, J. Kosanke and T. Therneau Section of Biostatistics Mayo Clinic Rochester, Mn 55905 Date: September 8, 1993 Call statement: %phlev (data=,time=,event=,xvars=, strata=,id=,collapse=,outlev=,outvar=, plot=,scaled=,ref=) Parameters: Required: data=input SAS data set name time=time variable for survival event=event variable for survival, 1=event, 0=censored xvars=list of covariates for Cox model Optional: strata=stratification variable(one only) for stratifed Cox models. id=name of id variable to be included in the output dataset. It must be numeric. collapse= Y,N,T,F. If yes(Y,T), then the residual matrix is collapsed (summed) based on unique values of the id variable. Default is not to collapse(N). outlev= name of the data set containing the leverage residuals. The variables names will be the same as the covariate names in the Cox model. The data set will also contain the ID variable, if one was specified. Default data set name is "phlev". outvar = name of the output data set containing the robust variance estimate. There is one observation and one variable for each covariate. Default data set name is "phvar". plot = Y,N,T,F. If yes(Y,T), a plot of residuals for each observation or ID value(if collapse=Y) is produced. A separate plot is created for each covariate. Default is N. scale = Y,N,T,F. If yes(Y,T), the scaled score residuals are plotted. If N(default), the raw residuals are used. ref = Y,N,T,F. If yes(Y,T), reference lines are drawn on the plots at +-se(beta). Default is N. Output: The macro prints the PHREG output used to fit the Cox model and a summary table including the Cox model betas, SE and chi-square along with the robust SE and its associated chi-square. It also includes the global Wald chi-square test based on the leverage residuals. The summary table is available as a SAS data set named _b_se. References: The leverage residuals are discussed in Cain and Lange, Biometrics 40:, 493-9 (1984). The robust variance estimate L'L is derived in Lin and Wei, JASA 84: 725-8 (1989). Extension of this to correlated data, using the collapsed leverage matrix is developed in Wei, Lin, and Weissfeld, JASA 84: 1065-83, amoung others. An overview of the methods is found in Mayo Clinic Tech Report 58. ***************************************************************************; */ %macro phlev(data=,time=,event=,xvars=, strata=, id=, collapse=N, outlev=phlev, outvar=phvar, plot=N, scaled=N, ref=N); %if %upcase(&collapse)=T %then %let collapse=Y; %if %upcase(&collapse)=F %then %let collapse=N; %if %upcase(&plot)=T %then %let plot=Y; %if %upcase(&plot)=F %then %let plot=N; %if %upcase(&scaled)=T %then %let scaled=Y; %if %upcase(&scaled)=F %then %let scaled=N; %if %upcase(&ref)=T %then %let ref=Y; %if %upcase(&ref)=F %then %let ref=N; footnote"phlev macro: event=&event time=&time strata=&strata id=&id collapse= &collapse"; footnote2"Xvars= &xvars"; %let nxs=0; %**count the number of x vars; %do i=1 %to 50; %if %scan(&xvars,&i)= %then %goto done; %let nxs=%eval(&nxs+1); %end; %done: %put &nxs; %macro xlist(prefix); %**set-up var list code; %do j=1 %to &nxs; &prefix&j %end; %mend xlist; data _setup; set &data; **delete obs with missing values; keep &id &time &event &strata &xvars; xmiss=0; %do i=1 %to &nxs; xx=%scan(&xvars,&i); if xx=. then xmiss=1; %end; if &event=. or &time=. or xmiss=1 then delete; ** run phreg and grab output datasets; proc phreg data=_setup covout outest=_est ; model &time*&event(0)= &xvars ; %if &strata ^= %then %do; strata &strata; %end; output out=_sch ressco=s1-s&nxs; id &id; data _sch5 ; set _sch(keep=&id &strata &time &event s1-s&nxs); keep &strata &id &time &xvars &event; label &time= &event= ; %do i=1 %to &nxs; **rename scor_i vars to xvars; %scan(&xvars,&i) =s&i; %end; *proc print data=_sch5 ; *title3'Crude score residuals'; run; %if %upcase(&collapse)=Y %then %do; proc sort data=_sch5 ; by &id; **collapse the dataset..sum within id; proc means noprint; by &id; var &xvars; output out=_sch5 sum=&xvars; *title3"Crude score residuals..collapsed(summed) over &id"; *proc print data=_sch5 ; run; %end; proc iml; **Invoke IML to get L..scaled residuals; use _sch5 ; setin _sch5 ; read all var{ &xvars } into r; %if &id ^= %then %do; read all var{&id } into id_time; %end; use _est; setin _est; read all var{&xvars } into covb where(_type_="COV "); read var{&xvars } into beta where(_type_="PARMS"); L=r*covb; **Scaled score residuals; LTL=L`*L; **Robust var matrix; se_cox=sqrt(vecdiag(covb)); **SE from Cox model; se_scr=sqrt(vecdiag(LTL )); **SE based on L'*L; chisqcox=(beta` / se_cox)##2; **Chi-square Cox model; chisqrob=(beta` / se_scr)##2; **Chi-square based on L'*L; wald_x2=beta*ginv(LTL)*beta`; **Global Wald chi-square..robust; eigv=eigval(LTL); df=sum(eigv > .000000000001); **Wald degrees freedom; p_wald= 1 - probchi(wald_x2,df); **Wald p-value; idl= %if &id ^= %then %do; id_time || %end; L; b_se= beta` || se_cox || se_scr || chisqcox || chisqrob; wald= wald_x2 || df || p_wald; bl_1x5=j(1,5,.); bl_px3=j(&nxs,3,.); table= (b_se || bl_px3) // ( bl_1x5 || wald ); %let sp=%str( ); varn ={ %if &id^= %then %do; "&id" %end; %do i=1 %to &nxs; %let t= "%scan(&xvars,&i)" &sp ; &t %end; }; xnames={ %do i=1 %to &nxs; %let t= "%scan(&xvars,&i)" &sp ; &t %end; }; xname ={ %do i=1 %to &nxs; %let t= "%scan(&xvars,&i)" &sp ; &t %end; " " }; sen={ "b_cox" "se_cox" "se_robst" "chi_cox" "chi_rob" "chi_wald" "df" "p_wald" }; create &outlev from idl[colname=varn ]; append from idl; create &outvar from ltl[rowname=xnames colname=xnames]; append from ltl[rowname=xnames]; create _b_se from table[ rowname=xname colname=sen ]; append from table[rowname=xname ]; *show datasets; *show contents; quit; ** quit IML; title4"Comparison of Cox model Beta, SE and chi-square to robust estimates"; title5"Wald chi-square is based on the robust estimates"; %if %upcase(&collapse)=Y %then %do; title6"Robust SE is based on the collapsed(summed within &id) L matrix"; %end; data _b_se; set _b_se end=eof; if eof=0 then p=1-probchi(chi_rob,1); else do; p=p_wald; xname='wald'; end; label xname='Variable' b_cox='Parameter/Estimate' se_cox='SE' se_robst='Robust/SE' chi_cox='Chi-Square' chi_rob='Robust/Chi-Square' chi_wald='Wald/Chi-Square'; %if %upcase(&scaled)=Y %then %do i=1 %to &nxs; if _n_=&i then do; call symput("pvref&i",se_robst); call symput("nvref&i",-1*se_robst); end; %end; proc print data=_b_se label split='/'; id xname; var b_cox se_cox se_robst chi_cox chi_rob chi_wald df p; format chi_cox chi_rob chi_wald 7.3 p 6.4; run; footnote; symbol; title4; %if %upcase(&plot)=Y %then %do; %if %upcase(&scaled)=N %then %do; %if &id= %then %do; data _sch5; set _sch5; obs=_n_; %let id=obs; %end; proc plot data=_sch5; plot (&xvars)*&id; title4 'Plot of Raw Residuals'; %end; %if %upcase(&scaled)=Y %then %do; %if &id= %then %do; data &outlev; set &outlev; obs=_n_; %let id=obs; %end; proc plot data=&outlev; %do i=1 %to &nxs; %let xvble=%scan(&xvars,&i); plot &xvble*&id %if %upcase(&ref)=Y %then %do; /vref=&&pvref&i &&nvref&i %end; ; %end; title4 'Plot of Scaled Residuals'; %end; %end; proc datasets nolist; delete _setup _est _sch _sch5 ; run; quit; %mend phlev ; /**** sample code **** %LET XX= RX1 RX2 RX3 RX4 NUMBER1 NUMBER2 NUMBER3 NUMBER4 SIZE1 SIZE2 SIZE3 SIZE4; %PHLEV(DATA=Bladder,TIME=time,EVENT=stat, XVARS= &XX, STRATA=enum, ID=id, COLLAPSE=Y); *********************/