-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfunction_seurat_pipeline_v1.R
executable file
·193 lines (165 loc) · 8.11 KB
/
function_seurat_pipeline_v1.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
# Updated: 2/2021
library(Seurat)
library(tidyverse)
library(future) # For integration anchor mode
# Initial process
spatial_initial_process = function(obj, pca_npcs=30, resolution=0.5, verbose=T, no_normalization =F, method = 'SCT' ){
# Normalization/PCA
message(str_glue("Using {pca_npcs} pcs, resolution = {resolution}"))
if(method == 'SCT'){
if(no_normalization){
message('Warning, skipping Normalization step. Make sure object has been normalizated. Might only work for single slice')
DefaultAssay(obj) ='SCT'
obj = obj %>%
RunPCA(assay = "SCT", verbose = verbose, npcs=pca_npcs)
}else{
message('Using SCTransform for normalization')
obj = obj %>%
SCTransform(assay = "Spatial", return.only.var.genes = FALSE, verbose = verbose) %>%
RunPCA(assay = "SCT", verbose = verbose, npcs=pca_npcs)
}
}else{ # Using log normal
if(no_normalization){
message('Warning, skipping Normalization step. Make sure object has been normalizated')
DefaultAssay(obj) ='Spatial'
obj = obj %>%
FindVariableFeatures(verbose = verbose) %>%
ScaleData(verbose = verbose) %>%
RunPCA(verbose = verbose, npcs=pca_npcs)
}else{
message('Using LogNormalize for normalization')
DefaultAssay(obj) = 'Spatial'
obj = obj %>%
NormalizeData(assay = "Spatial", verbose = verbose) %>%
FindVariableFeatures() %>%
ScaleData() %>%
RunPCA( verbose = verbose, npcs=pca_npcs)
}
}
# find cluster
obj = obj %>% FindNeighbors(reduction = "pca", dims = 1:pca_npcs) %>%
FindClusters(verbose = verbose, resolution = resolution) %>%
RunUMAP(reduction = "pca", dims = 1:pca_npcs)
obj
}
# Integration with "merge" or "anchor" mode
spatial_integration = function(st_objs, integration_mode='merge', run_cluster=T, future_worker=50, future_max_gb=40,
RPCA_reference, shared_variable_feature_method='SelectIntegrationFeatures',
cluster_resolution =0.5){
if(integration_mode=='merge'){
# Merge integration. Standard integration ----------------------
print("Using Merge mode")
ptm = Sys.time()
st_int = merge(st_objs[[1]], st_objs[-1], add.cell.ids = names(st_objs))
Sys.time() - ptm; ptm = Sys.time()
DefaultAssay(st_int) <- "SCT"
# Variable Features
print('Using intersection of variable features in all objects')
shared_variable_feats = reduce(map(st_objs, VariableFeatures), intersect)
VariableFeatures(st_int) = shared_variable_feats
}else if(integration_mode=='anchor'){
print("Using Anchor mode for batch effect correction. For large object recommand 'RPCA' instead")
# Anchor based Integration. For sample with batch effect ------------------------
plan("multiprocess", workers = future_worker)
options(future.globals.maxSize = future_max_gb * 1000 * 1024^2) # Increase this if showing "This exceeds the maximum allowed size"
set.seed(1599) # to replicate
# select features that are repeatedly variable across datasets for integration
int_features = SelectIntegrationFeatures(object.list = st_objs)
st_objs = PrepSCTIntegration(object.list = st_objs, anchor.features = int_features, verbose = FALSE)
int_anchor = FindIntegrationAnchors(object.list = st_objs, normalization.method = "SCT",
anchor.features = int_features, verbose = FALSE)
st_int = IntegrateData(anchorset = int_anchor, normalization.method = "SCT", verbose = FALSE)
DefaultAssay(st_int) <- "integrated"
}else if(integration_mode=='RPCA'){
if(missing(RPCA_reference)){
stop('For RPCA mode, please specify the index (e.g. c(1,3,8)) of reference obj in the object list using RPCA_reference argument')
}
# Using SCT assay instead of NormalizeData/FindVariableFeatures/ScaleData
int_features <- SelectIntegrationFeatures(object.list = st_objs)
st_objs <- map(st_objs, RunPCA, features = int_features)
int_anchors <- FindIntegrationAnchors(object.list = st_objs, reference = RPCA_reference, reduction = "rpca",
dims = 1:30)
# this command creates an 'integrated' data assay
ptm = Sys.time(); ptm
st_int <- IntegrateData(anchorset = int_anchors, dims = 1:30)
Sys.time() - ptm; ptm = Sys.time()
# specify that we will perform downstream analysis on the corrected data note that the original
DefaultAssay(st_int) <- "integrated"
# Run the standard workflow for visualization and clustering
st_int <- ScaleData(st_int, verbose = FALSE)
}else{
stop("integration_mode only support 'merge', 'anchor' or 'RPCA' mode at the moment")
}
# PCA
# Assay for RunPCR needs to be "integrated" if using Anchor method. "SCT" for Merge mode
if(integration_mode=='merge') {
st_int = RunPCA(st_int, assay = "SCT", verbose = FALSE)
}else if(integration_mode=='anchor'){
st_int = RunPCA(st_int, assay = "integrated", verbose = FALSE)
}else if(integration_mode=='RPCA'){
st_int = RunPCA(st_int,, verbose = FALSE)
}
# Cluster
if(run_cluster){
print('Clustering the integrated obj')
st_int %>%
FindNeighbors(reduction = "pca", dims = 1:30) %>%
FindClusters(verbose = FALSE, resolution = cluster_resolution) %>%
RunUMAP(reduction = "pca", dims = 1:30)
}else{
print('Only ran PCA. Still need FindNeighbors/FindClusters/RunUMAP')
st_int
}
}
# wrapper to run seurat cell type transfer from scNRA/ scRNA to ST
seurat_cell_type_transfer = function(st_obj, reference_obj, ref_cell_type_col_name='cell_type',
output_assay_name = 'cell_type_transfer', assign_dominant=T, output_dominant_col){
# Check
if(missing(st_obj)|missing(reference_obj)){stop("Please provide both reference (scRNA/snRNA) and ST object")}
if(!'pca' %in% names(st_obj@reductions)){stop("PCA not found in reductions slot of ST obj. Run RunPCA first?")}
if(!'SCT' %in% names(st_obj@assays)){stop("SCT not found in assay slot of ST obj. Run SCTransform first?")}
if(!ref_cell_type_col_name %in% names([email protected])){stop(str_glue("{ref_cell_type_col_name} not found in meta.data slot of reference obj. Check the column name for the cell type col again?"))}
# Start
ptm = Sys.time() # timer
shared_variable_feats = intersect(VariableFeatures(reference_obj), VariableFeatures(st_obj))
message(glue::glue(
"Using {length(shared_variable_feats)} shared variable features ",
"between scRNA and ST to transfer label..."
))
Sys.time() - ptm; ptm = Sys.time() # timer
anchors = FindTransferAnchors(
reference = reference_obj,
query = st_obj,
normalization.method = "SCT",
features = shared_variable_feats,
reference.assay = 'SCT',
query.assay = 'SCT'
)
Sys.time() - ptm; ptm = Sys.time() # timer
# Predict the label transfer
predictions.assay = TransferData(
anchorset = anchors,
refdata = [email protected][, ref_cell_type_col_name],
prediction.assay = TRUE,
weight.reduction = st_obj[["pca"]],
dims = 1:30
)
Sys.time() - ptm; ptm = Sys.time() # timer
print(str_glue("Prediction done. Save result to {output_assay_name} assay"))
st_obj[[output_assay_name]] = predictions.assay
# Assign cell type
if(assign_dominant){
if(missing(output_dominant_col)){output_dominant_col = str_c(output_assay_name, "_dominant")}
print(str_glue("Using cell type with max proportions to assign cell type. Save to {output_dominant_col}"))
[email protected][[output_dominant_col]] = getDominantAssignment(st_obj, assay_name=output_assay_name)
}
return(st_obj)
}
# Get
getDominantAssignment=function(obj, assay_name){
if(!assay_name%in%names(obj@assays)) {stop(str_glue("Assay {assay_name} not found."))}
celltype_percent = obj@assays[[assay_name]] %>% GetAssayData %>% as.matrix %>% t %>% as.data.frame
celltype_df = data.frame(row.names = rownames(celltype_percent),
celltype = colnames(celltype_percent)[max.col(celltype_percent, ties.method = 'first')])
celltype_df
}