forked from andy1764/Distributed-ComBat
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathneuroCombat.R
163 lines (140 loc) · 5.18 KB
/
neuroCombat.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# Author: Jean-Philippe Fortin, [email protected]
# Date: July 14 2020
# Projet repo: github.com/Jfortin1/ComBatHarmonization
# This is a modification of the ComBat function code from the sva package that can be found at
# https://bioconductor.org/packages/release/bioc/html/sva.html
# The original code is under the Artistic License 2.0.
# The present code is under the MIT license
# If using this code, make sure you agree and accept this license.
#' @export
neuroCombat <- function(dat,
batch,
mod=NULL,
eb=TRUE,
parametric=TRUE,
mean.only=FALSE,
ref.batch=NULL,
verbose=TRUE
){
dat <- as.matrix(dat)
.checkConstantRows(dat)
.checkNARows(dat)
## Check for missing values
hasNAs <- any(is.na(dat))
if (hasNAs & verbose){
cat(paste0("[neuroCombat] Found ", sum(is.na(dat)), " missing data values. \n"))
}
if(mean.only){
if (verbose) cat("[neuroCombat] Performing ComBat with mean only\n")
}
if (eb){
if (verbose) cat("[neuroCombat] Performing ComBat with empirical Bayes\n")
} else {
if (verbose) cat("[neuroCombat] Performing ComBat without empirical Bayes (L/S model)\n")
}
##################### Getting design ############################
dataDict <- getDataDict(batch, mod, verbose=verbose, mean.only=mean.only, ref.batch=ref.batch)
design <- dataDict[["design"]]
####################################################################
##################### Data standardization #######################
if (verbose) cat('[neuroCombat] Standardizing Data across features\n')
stdObjects <- getStandardizedData(dat=dat,
dataDict=dataDict,
design=design,
hasNAs=hasNAs
)
s.data <- stdObjects[["s.data"]]
####################################################################
##################### Getting L/S estimates #######################
if (verbose) cat("[neuroCombat] Fitting L/S model and finding priors\n")
naiveEstimators <- getNaiveEstimators(s.data=s.data,
dataDict=dataDict,
hasNAs=hasNAs,
mean.only=mean.only
)
####################################################################
######################### Getting final estimators ####################
if (eb){
if (parametric){
if (verbose) cat("[neuroCombat] Finding parametric adjustments\n")}else{
if (verbose) cat("[neuroCombat] Finding non-parametric adjustments\n")
}
estimators <- getEbEstimators(naiveEstimators=naiveEstimators,
s.data=s.data,
dataDict=dataDict,
parametric=parametric,
mean.only=mean.only
)
} else {
estimators <- getNonEbEstimators(naiveEstimators=naiveEstimators, dataDict=dataDict)
}
####################################################################
######################### Correct data #############################
if (verbose) cat("[neuroCombat] Adjusting the Data\n")
bayesdata <- getCorrectedData(dat=dat,
s.data=s.data,
dataDict=dataDict,
estimators=estimators,
naiveEstimators=naiveEstimators,
stdObjects=stdObjects,
eb=eb
)
####################################################################
# List of estimates:
estimates <- list(gamma.hat=naiveEstimators[["gamma.hat"]],
delta.hat=naiveEstimators[["delta.hat"]],
gamma.star=estimators[["gamma.star"]],
delta.star=estimators[["delta.star"]],
gamma.bar=estimators[["gamma.bar"]],
t2=estimators[["t2"]],
a.prior=estimators[["a.prior"]],
b.prior=estimators[["b.prior"]],
stand.mean=stdObjects[["stand.mean"]],
mod.mean=stdObjects[["mod.mean"]],
var.pooled=stdObjects[["var.pooled"]],
beta.hat=stdObjects[["beta.hat"]],
mod=mod,
batch=batch,
ref.batch=ref.batch,
eb=eb,
parametric=parametric,
mean.only=mean.only,
s.data=s.data
)
return(list(dat.combat=bayesdata, estimates=estimates))
}
#' @export
neuroCombatFromTraining <- function(dat, batch, estimates, mod=NULL, verbose=TRUE){
cat("[neuroCombatFromTraining] In development ...\n")
new.levels <- unique(batch)
missing.levels <- new.levels[!new.levels %in% estimates$batch]
if (length(missing.levels)!=0){
stop(paste0("The batches ", missing.levels, " are not part of the training dataset\n"))
}
# Step 0: standardize data
var.pooled <- estimates$var.pooled
stand.mean <- estimates$stand.mean[,1]
mod.mean <- estimates$mod.mean
gamma.star <- estimates$gamma.star
delta.star <- estimates$delta.star
n.array <- ncol(dat)
if (!is.null(estimates$mod)){
if (verbose){
cat("[neuroCombatFromTraining] Using mean imputation to account for previous covariates adjustment in training dataset\n")
}
}
if (is.null(mod)){
stand.mean <- stand.mean+rowMeans(mod.mean)
} else {
stop("Including covariates for ComBat correction on a new dataset is not supported yet\n")
}
# Step 1: standardize data
bayesdata <- (dat-stand.mean)/sqrt(var.pooled)
# Step 2: remove estimates
gamma <- t(gamma.star[batch,,drop=FALSE])
delta <- t(delta.star[batch,,drop=FALSE])
bayesdata <- (bayesdata-gamma)/sqrt(delta)
# Step 3: transforming to original scale
bayesdata <- bayesdata*sqrt(var.pooled) + stand.mean
return(bayesdata)
}