-
Notifications
You must be signed in to change notification settings - Fork 3
/
survival_separation_5y.R
167 lines (132 loc) · 5.18 KB
/
survival_separation_5y.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
require(survival)
require(survRM2)
## Computes L1 based measures of separation between survival curves
## note: higher ==> more separation
##
## Inputs:
## data = data.frame with samples units on rows and two colums with names
## 'Survival' that is survivial time
## 'Death' that is a 0-1 variable indicating death
##
## Outputs: a list with multiple separation measures
##
##
survival.curves.separation <- function(data, cluster, tau){
ans <- list()
tmp <- table(cluster)
ClusterLabel <- as.numeric(names(tmp))
ClusterSize <- as.numeric(tmp)
K <- length(ClusterLabel)
n <- sum(ClusterSize)
## Set a time grid
time_grid <- min(data$Survival) : min(max(data$Survival), tau)
time_scale <- 1/diff(range(time_grid))
## Create estimated survival curve matrix: [time x clusters]
H <- matrix(NA, nrow=length(time_grid), ncol=K)
colnames(H) <- paste('Cluster_', ClusterLabel, sep='')
## Estimate the KM curve clusterwise on a common support
for(k in 1:K){
## Compute Kaplan-Meier estimator on the kth cluster
km <- survfit(Surv(Survival, Death)~1, data = data[cluster==ClusterLabel[k] , ] )
## Construct the KM estimator function
KMfun <- stepfun(x=km$time[-1], y=km$surv)
H[,k] <- KMfun(time_grid)
}
## construct matrix of pairwise L1 distances
D <- matrix(0, ncol=K, nrow=K)
for (i in 1:K){
for(j in 1:K)
if(i!=j){
D[i,j] <- D[j,i] <- sum( abs( H[ , i] - H[ , j] ))
}
}
## Some scaling is given so that these numbers are somewhow interpretable
## for the same number of clusters independently of the time interval
D <- D * time_scale
## Metric 1: min pairwise L1 distance
iut <- which(upper.tri(D, diag=FALSE))
ans$L1min <- min(D[iut])
## Metric 2: compute the summed L1 distance of each
## cluster to the nearest one
diag(D) <- NA
ans$L1sum <- sum( D[iut] ) / length(iut)
return(ans)
}
## Computes the discrepancy between survival curves in temrs of RMST
## (restricted mean survival time).
##
## For all these measures: higher ==> more separation
##
## Inputs:
## data = data.frame with samples units on rows and two colums with names
## 'Survival' that is survivial time
## 'Death' that is a 0-1 variable indicating death
## tau truncation time, default 5 years, but if the min is less
## this is set to minimum across groups
##
## Outputs: a list with multiple separation measures
##
##
rmst.separation <- function(data, cluster, tau) {
ans <- list()
tmp <- table(cluster)
ClusterLabel <- as.numeric(names(tmp))
ClusterSize <- as.numeric(tmp)
K <- length(ClusterLabel)
n <- sum(ClusterSize)
## Compute the minimum of the largest observed time in each of the two groups
max.time <- rep(0, K)
for (k in 1:K) {
max.time[k] <- max(data$Survival[cluster == ClusterLabel[k]])
}
TAU <- min(max.time, tau)
## Names
## * RMST = restricted mean survival time
## * LER = life expectancy ratio
##
## LER: Life Expectancy Ratio Matrix
## LER[i,j] = LER[j,i] = max{RMST[i] / RMST[j], RMST[j] / RMST[i]}
## note that here we don't have a baseline group so we define the ratio
## always using in the denominator the group that have smaller RMST
##
## LED: Life Expectancy Difference
## LED[i,j] = LED[j,i] = abs(RMST[i] - RMST[j])
## note that here we don't have a baseline group so we define tha abs difference
##
LER <- LED <- matrix(0, ncol = K, nrow = K)
for (i in 1:K) {
for (j in 1:K)
if (i != j) {
## First select data from the two groups
idx <- { cluster == ClusterLabel[i] | cluster == ClusterLabel[j] }
x <- data[idx,]
## Create a 0-1 vector, with gr==1 if cluster==ClusterLabel[i]
gr0 <- cluster[idx]
gr <- ifelse(gr0 == ClusterLabel[i], 1, 0)
u <- rmst2(time = x$Survival, status = x$Death, arm = gr, tau = TAU)
rmst_i <- u$RMST.arm1$rmst[1]
rmst_j <- u$RMST.arm0$rmst[1]
LER[i,j] <- LER[j, i] <- max(rmst_i / rmst_j, rmst_j / rmst_i )
LED[i, j] <- LED[j, i] <- abs(rmst_i - rmst_j)
}
}
## index of the upper triangle
iut <- which(upper.tri(LER, diag = FALSE))
## metric: min of pairwise LER discrepancy
ans$LERmin <- min(LER[iut])
## metric: scaled summed pairwise LER discrepancy
ans$LERsum <- sum(LER[iut]) / length(iut)
## metric: min of pairwise LED discrepancy
ans$LEDmin <- min(LED[iut])
## metric: scaled summed pairwise LED discrepancy
ans$LEDsum <- sum(LED[iut]) / length(iut)
return(ans)
}
###
survPval = function(SurvDat,CLg,nYears=5){
fit <- survfit(Surv(Survival, Death) ~ CLg,data = SurvDat,subset = Survival < (365 * nYears))
suv <- survminer::ggsurvplot(fit, risk.table = TRUE, risk.table.height = 0.5,
xlim = c(0,5000), break.time.by = 500, pval = TRUE)
pVal <- survminer:::surv_pvalue(fit,method = "survdiff",data=SurvDat)
return(list(fit=fit,suv = suv,pVal=pVal))
}