Survival R Commands

Plotting the Survival Distribution with \(\alpha=1.5\) and \(\lambda=0.03\)

curve(pweibull(x, shape=1.5, scale=1/0.03,lower.tail=F), from=0, to=80,
      ylim=c(0,1), ylab='Survival probability', xlab='Time')

Plotting the Hazard Function

weibHaz <- function(x, shape, scale) dweibull(x, shape=shape, scale=scale)/pweibull(x, shape=shape, scale=scale, lower.tail=F)

When \(\alpha=1.5\) and \(\lambda=0.03\)

curve(weibHaz(x, shape=1.5, scale=1/0.03), from=0, to=80, ylab='Hazard', xlab='Time', col="red")

When \(\alpha=0.75\) and \(\lambda=0.03\)

curve(weibHaz(x, shape=1.5, scale=1/0.03), from=0, to=80, ylab='Hazard', xlab='Time', col="red")
curve(weibHaz(x, shape=0.75, scale=1/0.03), from=0, to=80, ylab='Hazard', xlab='Time', col="blue",add=T)

When \(\alpha=1\) and \(\lambda=0.03\)

curve(weibHaz(x, shape=1.5, scale=1/0.03), from=0, to=80, ylab='Hazard', xlab='Time', col="red")
curve(weibHaz(x, shape=0.75, scale=1/0.03), from=0, to=80, ylab='Hazard', xlab='Time', col="blue",add=T)
curve(weibHaz(x, shape=1, scale=1/0.03), from=0, to=80, ylab='Hazard', xlab='Time', add=T)

Simulating values from the Weibull distribution

tt.weib <- rweibull(1000, shape=1.5, scale=1/0.03)
mean(tt.weib)

## [1] 30.51191

median(tt.weib)

## [1] 26.56338

The Theoretical Mean and Median

gamma(1 + 1/1.5)/0.03 # mean

## [1] 30.09151

(log(2)^(1/1.5))/0.03 # median

## [1] 26.10733

Plotting the Survival Function using the Hazard Function

library(survival)

## Warning: package 'survival' was built under R version 3.5.2

tm <- 1:110 # subattributes(survexp.us)
hazMale <- survexp.us[,"male","2004"] # 2004 males
hazFemale <- survexp.us[,"female","2004"]

tm.diff <- diff(c(0,tm))
survMale <- exp(-cumsum(hazMale*tm.diff)*365.24)
plot(tm,survMale,type="l",col="blue",lwd=2, ylab="Survival Probability", xlab="Age in Years")

survFemale <- exp(-cumsum(hazFemale*tm.diff)*365.24)
points(tm,survFemale,type="l",col="red",lwd=2,add=T)

## Warning in plot.xy(xy.coords(x, y), type = type, ...): "add" is not a
## graphical parameter

Computing the Mean usng the Survival Function

tm <- c(0,1:110)
tm.diff <- diff(tm)
survMale <- exp(-cumsum(hazMale*tm.diff)*365.24)
survFemale <- exp(-cumsum(hazFemale*tm.diff)*365.24)

sum(survMale*tm.diff)

## [1] 74.38014

sum(survFemale*tm.diff)

## [1] 79.44648

Nonparametric Methods

library(survival)
tt=c(7,6,6,5,2,4)
status=c(0,1,0,0,1,1)
Surv(tt,status)

## [1] 7+ 6  6+ 5+ 2  4

result.km=survfit(Surv(tt,status)~1,conf.type="log-log")
summary(result.km)

## Call: survfit(formula = Surv(tt, status) ~ 1, conf.type = "log-log")
## 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##     2      6       1    0.833   0.152       0.2731        0.975
##     4      5       1    0.667   0.192       0.1946        0.904
##     6      3       1    0.444   0.222       0.0662        0.785

plot(result.km)

Nelson-Aalen Estimator

result.fh=survfit(Surv(tt,status)~1,type="fh",conf.type="log-log")
summary(result.fh)

## Call: survfit(formula = Surv(tt, status) ~ 1, type = "fh", conf.type = "log-log")
## 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##     2      6       1    0.846   0.141        0.306        0.977
##     4      5       1    0.693   0.180        0.229        0.913
##     6      3       1    0.497   0.210        0.101        0.807

plot(result.fh)

GastricXelox Example

We now consider data from an actual clinical trial. The data set “gastricXelox” is a Phase II (single sample) clinical trial of the chemotherapeutic agent Xelox administered to patients with advanced gastric cancer prior to surgery (Wang et al. The primary outcome of interest is “progression-free survival.” This quantity is defined as the time from entry into a clinical trial until progression or death, whichever comes first. The survival data set was extracted from the paper, and the survival times rounded to the nearest week.

library(asaur)
data(gastricXelox)
timeMonths <- gastricXelox$timeWeeks*7/30.25
delta <- gastricXelox$delta
result.km <- survfit(Surv(timeMonths, delta) ~ 1, conf.type="log-log")
plot(result.km, conf.int=T, mark="|", xlab="Time in months", ylab="Survival probability")
title("Progression-free Survival in Gastric Cancer Patients")

Comparing Two Groups of Survival Times

library(survival)
tt <- c(6, 7, 10, 15, 19, 25)
status <- c(1, 0, 1, 1, 0, 1)
treat <- c(0, 0, 1, 0, 1, 1)
survdiff(Surv(tt, status) ~ treat)

## Call:
## survdiff(formula = Surv(tt, status) ~ treat)
## 
##         N Observed Expected (O-E)^2/E (O-E)^2/V
## treat=0 3        2     1.08     0.776      1.27
## treat=1 3        2     2.92     0.288      1.27
## 
##  Chisq= 1.3  on 1 degrees of freedom, p= 0.3

survdiff(Surv(tt, status) ~ treat, rho=0)

## Call:
## survdiff(formula = Surv(tt, status) ~ treat, rho = 0)
## 
##         N Observed Expected (O-E)^2/E (O-E)^2/V
## treat=0 3        2     1.08     0.776      1.27
## treat=1 3        2     2.92     0.288      1.27
## 
##  Chisq= 1.3  on 1 degrees of freedom, p= 0.3

survdiff(Surv(tt, status) ~ treat, rho=1)

## Call:
## survdiff(formula = Surv(tt, status) ~ treat, rho = 1)
## 
##         N Observed Expected (O-E)^2/E (O-E)^2/V
## treat=0 3     1.62    0.917     0.547      1.07
## treat=1 3     1.25    1.958     0.256      1.07
## 
##  Chisq= 1.1  on 1 degrees of freedom, p= 0.3

Stratified Data for Survival Times

let us consider the data set “pharmacoSmoking” in the “asaur” package, where the primary goal is to compare the time to relapse (defined in this study as return to smoking) between two treatment groups. We may compare the two groups using a log-rank test as follows:

library(asaur)
attach (pharmacoSmoking)
survdiff(Surv(ttr, relapse) ~ grp)

## Call:
## survdiff(formula = Surv(ttr, relapse) ~ grp)
## 
##                  N Observed Expected (O-E)^2/E (O-E)^2/V
## grp=combination 61       37     49.9      3.36      8.03
## grp=patchOnly   64       52     39.1      4.29      8.03
## 
##  Chisq= 8  on 1 degrees of freedom, p= 0.005

result.ps <- survdiff(Surv(ttr, relapse) ~ grp)
plot(survfit(Surv(ttr, relapse) ~ grp) , xlab ="Time", ylab ="Survival probability",col=c("blue","red"), lwd=2)
legend("topright", legend=c("combination", "Patch Only"), col=c("blue","red"), lwd=2)

If we are concerned that the group comparison may differ by age, wemay define a categorical variable, “ageGroup2”, that divides the subjects into those 49 and under and those 50 and above.We may summarize this variable as follows:

table(ageGroup2)

## ageGroup2
## 21-49   50+ 
##    66    59

survdiff(Surv(ttr, relapse) ~ grp + strata(ageGroup2))

## Call:
## survdiff(formula = Surv(ttr, relapse) ~ grp + strata(ageGroup2))
## 
##                  N Observed Expected (O-E)^2/E (O-E)^2/V
## grp=combination 61       37     49.1      2.99      7.03
## grp=patchOnly   64       52     39.9      3.68      7.03
## 
##  Chisq= 7  on 1 degrees of freedom, p= 0.008

The chi-square test in this case differs only slightly from the unadjusted value, indicating that it was not necessary to stratify on this variable.

Example

The data set “pancreatic” in the “asaur” package consists of pancreatic cancer data froma Phase II clinical trial where the primary outcome of interest is progression-free survival (PFS). This quantity is defined as the time from entry into a clinical trial until progression or death, whichever comes first. The data consist of, for each patient, the stage, classified as “LAPC” (locally advanced pancreatic cancer) or “MPC” (metastatic pancreatic cancer), the date of entry into the clinical trial, the date of death (all of the patients in this study died), and the date of progression, if that was observed before death. The first six observations are shown in this output,

library(asaur)
head(pancreatic)

##   stage    onstudy progression      death
## 1     M 12/16/2005    2/2/2006 10/19/2006
## 2     M   1/6/2006   2/26/2006  4/19/2006
## 3    LA   2/3/2006    8/2/2006  1/19/2007
## 4     M  3/30/2006           .  5/11/2006
## 5    LA  4/27/2006   3/11/2007  5/29/2007
## 6     M   5/7/2006   6/25/2006 10/11/2006

Patient #4, for example, died with no recorded progression (shown using the missing value indicator “NA” or “.”), so that person’s PFS is time to death. For the five other patients in this list the PFS is time to the date of progression. Following is code to compute PFS for all 41 patients:

attach(pancreatic) 

# convert the text dates in to R dates
Progression.d <- as.Date(as.character(progression))
# OnStudy.d <- as.Date(as.character(onstudy))
# Death.d <- as.Date(as.character(death))

# compute progression-free survival

#progressionOnly <- Progression.d - OnStudy.d
#overallSurvival <- Death.d - OnStudy.d
#pfs <- progressionOnly
#pfs [is.na(pfs)] <- overallSurvival[is.na(pfs)]