curve(pweibull(x, shape=1.5, scale=1/0.03,lower.tail=F), from=0, to=80,
ylim=c(0,1), ylab='Survival probability', xlab='Time')
weibHaz <- function(x, shape, scale) dweibull(x, shape=shape, scale=scale)/pweibull(x, shape=shape, scale=scale, lower.tail=F)
curve(weibHaz(x, shape=1.5, scale=1/0.03), from=0, to=80, ylab='Hazard', xlab='Time', col="red")
curve(weibHaz(x, shape=1.5, scale=1/0.03), from=0, to=80, ylab='Hazard', xlab='Time', col="red")
curve(weibHaz(x, shape=0.75, scale=1/0.03), from=0, to=80, ylab='Hazard', xlab='Time', col="blue",add=T)
curve(weibHaz(x, shape=1.5, scale=1/0.03), from=0, to=80, ylab='Hazard', xlab='Time', col="red")
curve(weibHaz(x, shape=0.75, scale=1/0.03), from=0, to=80, ylab='Hazard', xlab='Time', col="blue",add=T)
curve(weibHaz(x, shape=1, scale=1/0.03), from=0, to=80, ylab='Hazard', xlab='Time', add=T)
tt.weib <- rweibull(1000, shape=1.5, scale=1/0.03)
mean(tt.weib)
## [1] 30.51191
median(tt.weib)
## [1] 26.56338
gamma(1 + 1/1.5)/0.03 # mean
## [1] 30.09151
(log(2)^(1/1.5))/0.03 # median
## [1] 26.10733
library(survival)
## Warning: package 'survival' was built under R version 3.5.2
tm <- 1:110 # subattributes(survexp.us)
hazMale <- survexp.us[,"male","2004"] # 2004 males
hazFemale <- survexp.us[,"female","2004"]
tm.diff <- diff(c(0,tm))
survMale <- exp(-cumsum(hazMale*tm.diff)*365.24)
plot(tm,survMale,type="l",col="blue",lwd=2, ylab="Survival Probability", xlab="Age in Years")
survFemale <- exp(-cumsum(hazFemale*tm.diff)*365.24)
points(tm,survFemale,type="l",col="red",lwd=2,add=T)
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "add" is not a
## graphical parameter
tm <- c(0,1:110)
tm.diff <- diff(tm)
survMale <- exp(-cumsum(hazMale*tm.diff)*365.24)
survFemale <- exp(-cumsum(hazFemale*tm.diff)*365.24)
sum(survMale*tm.diff)
## [1] 74.38014
sum(survFemale*tm.diff)
## [1] 79.44648
library(survival)
tt=c(7,6,6,5,2,4)
status=c(0,1,0,0,1,1)
Surv(tt,status)
## [1] 7+ 6 6+ 5+ 2 4
result.km=survfit(Surv(tt,status)~1,conf.type="log-log")
summary(result.km)
## Call: survfit(formula = Surv(tt, status) ~ 1, conf.type = "log-log")
##
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 2 6 1 0.833 0.152 0.2731 0.975
## 4 5 1 0.667 0.192 0.1946 0.904
## 6 3 1 0.444 0.222 0.0662 0.785
plot(result.km)
result.fh=survfit(Surv(tt,status)~1,type="fh",conf.type="log-log")
summary(result.fh)
## Call: survfit(formula = Surv(tt, status) ~ 1, type = "fh", conf.type = "log-log")
##
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 2 6 1 0.846 0.141 0.306 0.977
## 4 5 1 0.693 0.180 0.229 0.913
## 6 3 1 0.497 0.210 0.101 0.807
plot(result.fh)
We now consider data from an actual clinical trial. The data set “gastricXelox” is a Phase II (single sample) clinical trial of the chemotherapeutic agent Xelox administered to patients with advanced gastric cancer prior to surgery (Wang et al. The primary outcome of interest is “progression-free survival.” This quantity is defined as the time from entry into a clinical trial until progression or death, whichever comes first. The survival data set was extracted from the paper, and the survival times rounded to the nearest week.
library(asaur)
data(gastricXelox)
timeMonths <- gastricXelox$timeWeeks*7/30.25
delta <- gastricXelox$delta
result.km <- survfit(Surv(timeMonths, delta) ~ 1, conf.type="log-log")
plot(result.km, conf.int=T, mark="|", xlab="Time in months", ylab="Survival probability")
title("Progression-free Survival in Gastric Cancer Patients")
library(survival)
tt <- c(6, 7, 10, 15, 19, 25)
status <- c(1, 0, 1, 1, 0, 1)
treat <- c(0, 0, 1, 0, 1, 1)
survdiff(Surv(tt, status) ~ treat)
## Call:
## survdiff(formula = Surv(tt, status) ~ treat)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## treat=0 3 2 1.08 0.776 1.27
## treat=1 3 2 2.92 0.288 1.27
##
## Chisq= 1.3 on 1 degrees of freedom, p= 0.3
survdiff(Surv(tt, status) ~ treat, rho=0)
## Call:
## survdiff(formula = Surv(tt, status) ~ treat, rho = 0)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## treat=0 3 2 1.08 0.776 1.27
## treat=1 3 2 2.92 0.288 1.27
##
## Chisq= 1.3 on 1 degrees of freedom, p= 0.3
survdiff(Surv(tt, status) ~ treat, rho=1)
## Call:
## survdiff(formula = Surv(tt, status) ~ treat, rho = 1)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## treat=0 3 1.62 0.917 0.547 1.07
## treat=1 3 1.25 1.958 0.256 1.07
##
## Chisq= 1.1 on 1 degrees of freedom, p= 0.3
let us consider the data set “pharmacoSmoking” in the “asaur” package, where the primary goal is to compare the time to relapse (defined in this study as return to smoking) between two treatment groups. We may compare the two groups using a log-rank test as follows:
library(asaur)
attach (pharmacoSmoking)
survdiff(Surv(ttr, relapse) ~ grp)
## Call:
## survdiff(formula = Surv(ttr, relapse) ~ grp)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## grp=combination 61 37 49.9 3.36 8.03
## grp=patchOnly 64 52 39.1 4.29 8.03
##
## Chisq= 8 on 1 degrees of freedom, p= 0.005
result.ps <- survdiff(Surv(ttr, relapse) ~ grp)
plot(survfit(Surv(ttr, relapse) ~ grp) , xlab ="Time", ylab ="Survival probability",col=c("blue","red"), lwd=2)
legend("topright", legend=c("combination", "Patch Only"), col=c("blue","red"), lwd=2)
If we are concerned that the group comparison may differ by age, wemay define a categorical variable, “ageGroup2”, that divides the subjects into those 49 and under and those 50 and above.We may summarize this variable as follows:
table(ageGroup2)
## ageGroup2
## 21-49 50+
## 66 59
survdiff(Surv(ttr, relapse) ~ grp + strata(ageGroup2))
## Call:
## survdiff(formula = Surv(ttr, relapse) ~ grp + strata(ageGroup2))
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## grp=combination 61 37 49.1 2.99 7.03
## grp=patchOnly 64 52 39.9 3.68 7.03
##
## Chisq= 7 on 1 degrees of freedom, p= 0.008
The chi-square test in this case differs only slightly from the unadjusted value, indicating that it was not necessary to stratify on this variable.
The data set “pancreatic” in the “asaur” package consists of pancreatic cancer data froma Phase II clinical trial where the primary outcome of interest is progression-free survival (PFS). This quantity is defined as the time from entry into a clinical trial until progression or death, whichever comes first. The data consist of, for each patient, the stage, classified as “LAPC” (locally advanced pancreatic cancer) or “MPC” (metastatic pancreatic cancer), the date of entry into the clinical trial, the date of death (all of the patients in this study died), and the date of progression, if that was observed before death. The first six observations are shown in this output,
library(asaur)
head(pancreatic)
## stage onstudy progression death
## 1 M 12/16/2005 2/2/2006 10/19/2006
## 2 M 1/6/2006 2/26/2006 4/19/2006
## 3 LA 2/3/2006 8/2/2006 1/19/2007
## 4 M 3/30/2006 . 5/11/2006
## 5 LA 4/27/2006 3/11/2007 5/29/2007
## 6 M 5/7/2006 6/25/2006 10/11/2006
Patient #4, for example, died with no recorded progression (shown using the missing value indicator “NA” or “.”), so that person’s PFS is time to death. For the five other patients in this list the PFS is time to the date of progression. Following is code to compute PFS for all 41 patients:
attach(pancreatic)
# convert the text dates in to R dates
Progression.d <- as.Date(as.character(progression))
# OnStudy.d <- as.Date(as.character(onstudy))
# Death.d <- as.Date(as.character(death))
# compute progression-free survival
#progressionOnly <- Progression.d - OnStudy.d
#overallSurvival <- Death.d - OnStudy.d
#pfs <- progressionOnly
#pfs [is.na(pfs)] <- overallSurvival[is.na(pfs)]