-
Notifications
You must be signed in to change notification settings - Fork 1
/
mgc_survival_functions.R
executable file
·139 lines (112 loc) · 7.42 KB
/
mgc_survival_functions.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
################################
# Name: MacIntosh Cornwell
# Email: [email protected]
################################
## Survival Analysis Functions
## Load in Libraries
packagelist = c("ggplot2", "reshape2", "DESeq2", "grid", "gridExtra", "scales", "ggrepel", "tools", "Hmisc", "survminer", "plyr")
junk <- lapply(packagelist, function(xxx) suppressMessages(
require(xxx, character.only = TRUE,quietly=TRUE,warn.conflicts = FALSE)))
## Input to functions needs to be survival data:
# THERE REALLY ISNT ANY WAY TO ADD COVARIATES FOR THIS - YOU SHOULD JUST CREATE YOUR COHORTS SEPARATELY, AND ADD IT TO THE COHORT COL
## EX) (COH1_MALE, COH2_MALE, COH1_FEMALE, COH2_FEMALE, ETC.)
# table with rownames - then (1) Time to event, (2) event status, (3) the cohort that it is in.
# time breakparam will manually split by the value you give, otherwise - will break into increments of 10
create_survival_plot <- function(survivaldata, timebreakparam = NULL, ylimitparam = NULL, confintervalparam = FALSE) {
## Read in the table and grab all the original labels (need for formula later)
## Rename with specific names - necessary for all of the formulas you need to do.
modeltable <- survivaldata
colnames(modeltable) <- c("Time_to_event", "event_status", "Cohort")
## Set a proper time break param
if (is.null(timebreakparam)) {
maxtimeparam <- max(modeltable[,"Time_to_event"])
timebreakparam <- ceiling(max(modeltable[,"Time_to_event"])/10)
} else {
maxtimeparam <- round_any(max(modeltable[,"Time_to_event"]), 100, f = ceiling)
}
## Create the sruvival object
survobject <- surv_fit(Surv(Time_to_event, event_status) ~ Cohort, data = modeltable)
## Save out the survival p value for output
outsurvpvalue <- surv_pvalue(survobject)
## Create the table of data
res <- summary(survobject, times = seq(0,timebreakparam*ceiling(maxtimeparam/timebreakparam), timebreakparam))
outsurvtable <- as.data.frame(res[c("strata", "time", "n.risk", "n.event", "surv", "std.err", "lower", "upper", "cumhaz", "std.chaz")])
## KM curves with a log-rank p value do NOT have any intrinsic effect sizes, so we have to resort to another analysis (coxph) to get that info
# TOSH - DO YOU NEED TO ADD A REF PARAM HERE???
# Suppressing warnings here due to inf coefficient warning 2021/02/02
suppressWarnings(outcoxphobject <- coxph(Surv(Time_to_event, event_status) ~ Cohort, data = modeltable))
## SURVIVAL CURVES CANT WORK WITH CONTINUOUS VARIABLES, BUT COXPH CAN
## Added a ylimparam for smaller changes that I need to expand to plot - this requires a param for pval.coord change as well
if (!is.null(ylimitparam)) {
pvalcordparam <- c(max(modeltable[,1]) * 0.01, min(ylimitparam) + diff(range(ylimitparam))/10 * 3)
pvalmethodcordparam <- c(max(modeltable[,1]) * 0.01, min(ylimitparam) + diff(range(ylimitparam))/10 * 1)
} else {
pvalcordparam <- pvalmethodcordparam <- NULL
}
outsurvplot <- ggsurvplot(
fit = survobject,
data = modeltable,
risk.table = TRUE,
conf.int = confintervalparam,
conf.int.style = "ribbon",
pval = TRUE, pval.method = TRUE,
break.time.by = timebreakparam,
ggtheme = theme_minimal(base_size = 10),
risk.table.y.text.col = TRUE,
risk.table.y.text = FALSE,
ylim = ylimitparam, pval.coord = pvalcordparam, pval.method.coord = pvalmethodcordparam)
return(list(outsurvtable = outsurvtable, outsurvplot = outsurvplot,
outsurvpvalue = outsurvpvalue, outsurvobject = survobject,
outcoxphobject = outcoxphobject))
}
## Input to functions needs to be survival data:
# table with rownames - then (1) Time to event, (2) event status, (3) the cohort that it is in.
# optional parameter for the covariates - WHICH APPARENTLY IS JUST USELESS, YOU JUST ADD ADDITIONAL COLUMNS TO THE SURVIVAL INDATA
coxph_analysis <- function(survivaldata) {
## Rename with specific names - necessary for all of the formulas you need to do.
modeltable <- survivaldata
colnames(modeltable)[1:2] <- c("Time_to_event", "event_status")
## Failsafe to make column names formula safe:
colnames(modeltable) <- gsub("-", "\\.", colnames(modeltable))
coxph_object <- coxph(as.formula(paste0("Surv(Time_to_event, event_status) ~ ", paste(colnames(modeltable)[3:(ncol(modeltable))], collapse = " + "))),
data = modeltable)
coxph_outtable <- merge(summary(coxph_object)[[7]], summary(coxph_object)[[8]], by = c("row.names", "exp(coef)"))
# I need a failsafe in here: 09/28/2022
## Heres a thought - a failsafe check that asks if any covar has 0 events, that combine that level with the next biggest level
# if (sum(unlist(coxph_outtable[,grepl("\\.95", colnames(coxph_outtable))] > 100)) > 0) { # If there are any "Inf" or massive CI vals, then remove that label
#
# }
# covarlabels <- colnames(survivaldata)[3:ncol(survivaldata)]
# for (covarnum in seq_len(length(covarlabels))) {coxph_outtable[,1] <- gsub(paste0("covar", covarnum), paste0(covarlabels[covarnum], "__"), coxph_outtable[,1])}
coxph_outplot <- ggforest(coxph_object, data = modeltable, fontsize = 1)
## Read in the table and grab all the original labels (need for formula later)
# intable <- survivaldata
# inlabels <- colnames(survivaldata)
# intime <- inlabels[1]
# inevent <- inlabels[2]
# incohort <- inlabels[3]
# colnames(modeltable) <- c("Time_to_event", "event_status", "Cohort")
# colnames(modeltable)[3:(ncol(modeltable))] <- paste0("covar", seq(ncol(modeltable)-2))
# coxph_object <- coxph(Surv(Time_to_event, event_status) ~ Cohort, data = modeltable)
# coxph_outtable <- merge(summary(coxph_object)[[7]], summary(coxph_object)[[8]], by = c("row.names", "exp(coef)"))
# coxph_outplot <- ggforest(coxph_object, data = modeltable, fontsize = 1.3)
#
# ## If there is covar data, read in the covariate data and merge it with out other table
# if (!is.null(covardata)) {
# modeltable <- merge(modeltable, covardata, by = "row.names")
# rownames(modeltable) <- modeltable[,1]
# modeltable <- modeltable[,!grepl("Row.names", colnames(modeltable))]
# colnames(modeltable)[4:(3+ncol(covardata))] <- paste0("covar", seq(ncol(covardata)))
#
# coxph_object <- coxph(as.formula(paste0("Surv(Time_to_event, event_status) ~ Cohort + ",
# paste(colnames(modeltable)[4:(3+ncol(covardata))], collapse = " + "))),
# data = modeltable)
# coxph_outtable <- merge(summary(coxph_object)[[7]], summary(coxph_object)[[8]], by = c("row.names", "exp(coef)"))
# ## Put the original labels back
# covarlabels <- colnames(covardata)
# for (covarnum in seq_len(length(covarlabels))) {coxph_outtable[,1] <- gsub(paste0("covar", covarnum), covarlabels[covarnum], coxph_outtable[,1])}
#
# coxph_outplot <- ggforest(coxph_object, data = modeltable, fontsize = 1)
# }
return(list(coxph_outtable = coxph_outtable, coxph_outplot = coxph_outplot))
}