-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path01_monilinia.post_survival_CV.R
120 lines (81 loc) · 4.63 KB
/
01_monilinia.post_survival_CV.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
## ******************************************************************* ##
## ******************* MONILINIA SURVIVAL ANALYSIS ******************* ##
## ******************************************************************* ##
## **************
## load observed data
source("00_1_monilinia.post_data-retrieve.R")
## **************
## ********************* models to test
cov.string = c("wd7", "weight_g.scale", "temp", "rain7", "perc_rot", "cum_deg_day.scale", "t7")
n_cov = length(cov.string)
id_cov = unlist( lapply(1:n_cov, function(i) combn(1:n_cov, i, simplify=FALSE) ), recursive=FALSE )
formula_cov = sapply(id_cov, function(i) paste("Surv(time, status) ~",paste(cov.string[i],collapse="+")) )
## ****************
## **************** CREATE CROSS-VALIDATION FOLDS
k.cv = 6
cv.folds = createFolds(as.factor(mon.survival$status), k = k.cv) # create index for crossvalidation
df.train = lapply(X = 1:k.cv, FUN = function(X) mon.survival[-cv.folds[[X]], ])
df.test = lapply(X = 1:k.cv, FUN = function(X) mon.survival[ cv.folds[[X]], ])
## **************** ##
# **************** parallel calibration function
funct_cv.gompertz = function(x, k.cv){
# x = 1 ## test
## ******************************************************************* ##
## AIC - BIC
## ******************************************************************* ##
mod.flxs = flexsurvreg( as.formula(formula_cov[x]),
data = mon.survival,
dist = "gompertz");
npar = mod.flxs$npars
ndata = dim(mon.survival)[1]
aic = mod.flxs$AIC
aic.C = aic + (2*npar*(npar+1)/(ndata-npar-1))
loglike = mod.flxs$loglik
bic = -(2*loglike) + (npar*log(ndata))
print(as.formula(formula_cov[x]))
## ******************************************************************* ##
## Cross-Validation
## ******************************************************************* ##
Dxy = NULL # Somer's rank correlation for censored response variable
Concordance = NULL # c index
for (i in 1:k.cv) {
# i=1 ## test
train.fit = flexsurvreg( as.formula(formula_cov[x]) ,
data = df.train[[i]],
dist = "gompertz")
lpnew.calc = summary(train.fit, newdata = df.test[[i]], type = "median", tidy = TRUE) %>% mutate(epx.lp = exp(est))
lpnew = lpnew.calc$est
Surv.rsp.new = Surv( df.test[[i]]$time, df.test[[i]]$status)
# ConcordanceUno[i] = UnoC(Surv.rsp, Surv.rsp.new, lpnew); print(ConcordanceUno[i])
rcorr = rcorr.cens(lpnew, Surv.rsp.new)
Dxy[i] = rcorr["Dxy"]; # print(Dxy[i])
Concordance[i] = rcorr["C Index"]; # print(Concordance[i])
}
db.res = data.frame(model = model,
mod.formula = formula_cov[x],
n.par = npar,
aic = aic, bic = bic, loglike = loglike,
D.xy_somers = mean(Dxy, na.rm=T),
D.xy_sd = sd(Dxy, na.rm=T),
C.index = mean(Concordance, na.rm=T),
k.crossval = k.cv )
return(db.res)
}
## **************** ##
res.cv = mclapply(X = 1:length(formula_cov), funct_cv.gompertz, mc.cores = 4, k.cv = k.cv )
results.crossvalidation.db = mclapply(X = 1:length(formula_cov), funct_cv.gompertz, mc.cores = 4, k.cv = k.cv )
save(results.crossvalidation.db, file = paste0("results.crossvalidation.db_", Sys.Date(),"_k",k.cv,"_median.RData") )
# load("results.crossvalidation.db_............_median.RData")
results.crossvalidation.db = results.crossvalidation_gompertz.db
utopia = c(bic = min(results.crossvalidation.db$bic) , D.xy_somers = min(results.crossvalidation.db$D.xy_somers) )
# euclidean distance between the Pareto solutions and the utopian point
euclidean = sqrt( (utopia[1] - results.crossvalidation.db$bic)^2 + (utopia[2] - results.crossvalidation.db$D.xy_somers)^2 )
results.crossvalidation.db.utopia = results.crossvalidation.db %>%
mutate( obj_euclidean_dist = euclidean )
par_opt_utopia = results.crossvalidation.db.utopia[ with(results.crossvalidation.db.utopia, which(obj_euclidean_dist==min(obj_euclidean_dist) ) ),]
library(rPref)
a = psel(results.crossvalidation.db, low(bic) * high(D.xy_somers) )
ggplot(results.crossvalidation.db) +
geom_point(aes(bic, D.xy_somers)) +
xlim(c(3200,4000))+
theme_bw()