# R code to estimate hazard functions # for the VA lung cancer trial # of 137 male patients with inoperable # lung cancer. This code is posted as # vakm.smooth.R # Variables # Treatment: 1=standard, 2=test (chemotherapy) # Celltype: 1=squamous, 2=smallcell, # 3=adeno, 4=large # Survival in days # Status: 1=dead, 0=censored # Karnofsky score # Months from Diagnosis # Age in years # Prior therapy: 0=no, 10=yes # Enter the data into a data frame. va <- read.table("c:/documents and settings/kkoehler.IASTATE/My Documents/courses/st565/data/va.dat", header=F, col.names=c("rx", "cellt", "time", "status", "karno", "months", "age", "prior_rx")) va$prior_rx <- va$prior_rx/10; va library(survival) vafit <- survfit(Surv(time, status) ~ rx, data = va, conf.int=.95, conf.type="log-log", type="kaplan-meier", se.fit=T) vafit #options #Specify the type of survival curve estimator. # Possible values are # "kaplan-meier", "fleming-harrington" or "fh2" # if a formula is given and # "aalen" or "kaplan-meier" # if the first argument is a coxph object (only the # first two characters are necessary). The default # is "aalen" when a coxph object is given, and # it is "kaplan-meier" otherwise. # conf.type: specifies the confidence interval type. # Possible values are: # "none" for no confidence intervals # "plain" for standard intervals, # "curve +- k*se(curve)", where k is determined from conf.int, # "log" for intervals based on the cumulative hazard or # log(survival) (the default), # "log-log" for intervals based on the log hazard or # log(-log(survival)). # The last type will never extend past 0 or 1. plot(vafit, lty = 2:3, lwd=4, cex=3) legend(400, .8, c("Standard", "Chemotherapy"), lty = 2:3) summary(vafit) survdiff(Surv(time, status) ~ rx, data=va, rho=0) # Fit the Fleming-Harrington estimator # for the survivor function fit <- survfit(Surv(time, status) ~ rx, data = va, conf.int=.95, conf.type="log-log", se.fit=T, type="fleming-harrington") plot(fit, lty = 2:3, main="Fleming-Harrinton Estimator", xlab="Time(days)", ylab="Survival Function") legend(200, .9, c("Standard", "Chemotherapy"), lty = 2:3) logs <- -1.*log(fit$surv) m1<- fit$strata[1] m2<- fit$strata[2] # Check for fit of the Weibull model plot(log(fit$time[1:m1]), log(logs[1:m1]), main="VA Cancer Study", ylab="Log(-Log(surv))", xlab="Log(time)",pch=1,cex=1.5, xlim=c(0,7), ylim=c(-4,2)) points(log(fit$time[(m1+1):(m1+m2)]), log(logs[(m1+1):(m1+m2)]), pch=16, cex=1.5) legend(0.1,2, c("Standard", "Chemotherapy"), marks=c(1,16),cex=0.9) # Smoothed estimates for hazard functions # First delete the censored cases e1<-fit$n.event[1:m1] n1 <- m1-length(e1[e1<1]) e2<-fit$n.event[(m1+1):(m1+m2)] n2 <- m2-length(e2[e2<1]) time <- fit$time[fit$n.event>0] event <- fit$n.event[fit$n.event>0] risk <- fit$n.risk[fit$n.event>0] par(fin=c(7.0,7.0),pch=18,mkh=.1,mex=1.5, plt=c(.2,.8,.2,.8)) den <- event[1:(n1+n2-1)]/risk[1:(n1+n2-1)]/ (time[2:(n1+n2)]-time[1:(n1+n2-1)]) plot(time[1:(n1+n2-1)], den, type="n", xlab="Time(days)", ylab="Hazard", cex=1.5, main="Smoothed Hazard Estimation \n Gaussian Kernel Smoothers ") lines(ksmooth(time[1:(n1-2)], den[1:(n1-2)], kernel="normal", bandwidth=150), lty=1,lwd=3) lines(ksmooth(time[(n1+1):(n1+n2-2)], den[(n1+1):(n1+n2-2)], kernel="normal", bandwidth=300), lty=3,lwd=3) legend(50,0.035,c("Standard", "Chemotherapy" ), lty=c(1,3))