-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfunction_ST_distance_pattern_analysis_v1.R
executable file
·293 lines (260 loc) · 12.6 KB
/
function_ST_distance_pattern_analysis_v1.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
# For Distance Extraction
# - Created 01/15/2021
# File this function from : PKD/script/PKD_neighborhood_extraction_DWF_clustering_01152021.ipynb
# FUNCTIONS
# General ----------------------------------------------------------------------------------
# FUNCTIONS
# Get coord_meta df
get_coord_meta_df = function(st, slice, cell_ids, column_include){
if(missing(slice)) slice = Images(st)[[1]]
if(missing(cell_ids)) cell_ids = rownames(st@images[[slice]]@coordinates)
if(missing(column_include)) column_include = colnames([email protected])
cbind([email protected][cell_ids, column_include], st@images[[slice]]@coordinates[cell_ids, ])
}
# FUNCTIONS
#functions For distance mtx
get_distance_mtx = function(coord_df, normalize = T){
dist_mtx = coord_df[,c('imagerow','imagecol')] %>% as.matrix %>% dist %>% as.matrix
if(normalize){
min_dist = min(dist_mtx[1,-1])
dist_mtx = round(dist_mtx / min_dist)
}
dist_mtx
}
# List of all neighbor of each spot
get_neighbor_id_list = function(distance_mtx, size = 1){
# Go through row to get neighbor
neighbor_df = apply(distance_mtx, 1, function(distances){
names(distances[ distances <= size])
}) %>% setNames(rownames(distance_mtx)) # rowname is the 'center' spot
# Remove self, center spot, from neighbor
neighbor_list = imap(neighbor_df, ~setdiff(.x , .y))
neighbor_list
}
# Get neighbor of arbitrary points
get_neighbor_id = function(quary_id, neighbor_list){
selected_list = neighbor_list[quary_id]
neighbor_spots = unlist(selected_list) %>% setdiff(quary_id)
neighbor_spots
}
# Function
# From target spot, get source spot and distance
get_source_spots = function(source_spots, neighbor_spots, distance_mtx, include_source_spots=T){
# Id of source
source_position = distance_mtx[source_spots, neighbor_spots] %>% apply(2, which.min) # Value is Source's row in table
# Distance to source
distance_vector = distance_mtx[source_spots, neighbor_spots] %>% apply(2, min)
# Combine
source_dist_df = data.frame(neighbor_spots = names(source_position), # Name is target spots
source_spot = spots_of_interest[source_position],
distance_to_source = distance_vector[names(source_position)]) # Keep in same order
# Add source spots
if(include_source_spots){
source_to_source_df = data.frame(neighbor_spots = source_spots,
source_spot = source_spots,
distance_to_source = 0)
rownames(source_to_source_df) = source_spots
source_dist_df = rbind(source_dist_df, source_to_source_df)
}
source_dist_df
}
# Add meta of source
add_source_meta = function(meta_df, source_dist_df, meta_columns = c('histology'), add_column_name = 'source'){
# Extract source meta info
# Assuming rownames is the 'spot_id'
source_meta_df = meta_df[,meta_columns, drop=F] %>%
setNames(str_c(add_column_name, names(.), sep='_')) %>%
rownames_to_column('source_spot')
# Add to source_dist_df
source_dist_df = left_join(source_dist_df, source_meta_df, by = 'source_spot')
return(source_dist_df)
}
# function
# Wrapper
# # 3. Get final annotated neighborhood spot df
# # With Coordinates, source, distance, source meta, neighborhood spot meta
get_neighborhood_df = function(source_spots, neighboor_spots, distance_mtx, include_source_spots=T,
coord_meta_df,
source_meta_columns = c('histology'),
#neighboor_meta_columns = c('histology'),
add_source_name = 'source',
add_neighbor_name = 'neighbor'){
# Get 'source' spot and their meta for each gene of interests
source_dist_df = get_source_spots(source_spots, neighboor_spots, distance_mtx, include_source_spots)
source_dist_df = add_source_meta(coord_meta_df, source_dist_df, meta_columns = source_meta_columns,
add_column_name = add_source_name)
# Create final annotation df
coord_meta_df = coord_meta_df %>%
setNames(str_c(add_neighbor_name, names(.), sep='_')) %>%
rownames_to_column('neighbor_spots') # Target (neighbor) point meta
neighborhood_annotated_df = right_join(coord_meta_df,
source_dist_df, # source point meta and distance
by = 'neighbor_spots')
neighborhood_annotated_df
}
# function
# Add features
add_neighboor_features = function(st_obj, neighborhood_df, features, assay){
if(!missing(assay)) DefaultAssay(st_obj) = assay
# Spots
neighbor_spots = neighborhood_df[, 'neighbor_spots']
feature_df = FetchData(object = st_obj, vars = features, cells = neighbor_spots)[neighbor_spots,,drop=F] # Get feature for st object
return(cbind(neighborhood_df, feature_df))
}
# DE function ----------------------------------------------------------------------------------
# DE function
FindNeighborhoodDE = function(obj, neighborhood_df, source_range = 0,
distant_spot_range = 3
){
source_range_spots = neighborhood_df %>% filter(distance_to_source %in% source_range) %>% pull(neighbor_spots)
distant_range_spots = neighborhood_df %>% filter(distance_to_source %in% distant_spot_range) %>% pull(neighbor_spots)
FindMarkers(obj = obj[['SCT']], cells.1 = source_range_spots,
cells.2 = distant_range_spots, ) %>% rownames_to_column('gene')
}
SweepNeighborhoodDE = function(obj, neighborhood_df, source_range=0, sweep_range=3:5, sweep_size=1, sweep_overlap =T){
# Remove range overlap with source
sweep_range = setdiff(sweep_range, source_range)
# Set sweep range
if(sweep_overlap){
# Overlap
sweep_list = map(sweep_range, ~seq(from = ., length.out = sweep_size))
}
else{
# No Overlap
sweep_group_min = seq(from = min(sweep_range), to = max(sweep_range), by = sweep_size)
sweep_list = map(sweep_group_min , ~seq(from = ., length.out = sweep_size))
}
# Run list of range
map(sweep_list, function(sweep_group){
FindNeighborhoodDE(obj, neighborhood_df, source_range, distant_spot_range = sweep_group) %>%
mutate(source_range = toString(source_range),
distant_range = toString(sweep_group)
)
}) %>% bind_rows()
}
GetSweepNeighborhoodDEJaccardScore = function(neighborhood_de_sweep_df, fdr_cutoff = 0.05, complete_table =T){
# Get gene list split by sweep groups
sweep_gene_list = neighborhood_de_sweep_df %>%
filter(p_val_adj < fdr_cutoff) %>%
group_by(distant_range) %>%
split(f = .$distant_range, x=.$gene)
# Get Jaccard score
df = combn(names(sweep_gene_list), m = 2) %>% t %>% as.data.frame %>%
pmap(function(V1, V2){
intersect_gene = intersect(sweep_gene_list[[V1]], sweep_gene_list[[V2]])
union_gene = union(sweep_gene_list[[V1]], sweep_gene_list[[V2]])
jaccard = length(intersect_gene)/length(union_gene)
data.frame(Group1 =V1,
Group2 =V2,
Group1_n = length(sweep_gene_list[[V1]]),
Group2_n = length(sweep_gene_list[[V2]]),
intersect_n = length(intersect_gene),
union_n = length(union_gene),
Jaccard_score = jaccard)
}) %>% bind_rows()
if(complete_table){
df = rbind(df,
df %>% dplyr::rename(Group2 = Group1, Group1= Group2) %>%
dplyr::rename(Group2_n = Group1_n, Group1_n= Group2_n)
)
}
df
}
# PLOT FUNCtion
## Expression in the neighborhood
plot_spatial_neighboor_feature = function(neighborhood_df,
x_col = 'neighbor_imagecol',
y_col = 'neighbor_imagerow',
flip_axis = c('none','x','y','both'),
pt_size = 1.5,
feature,
rev_color = F,
palette
){
if(missing(feature)) stop('Missing feature to plot')
# Adjust axis
flip_axis = match.arg(flip_axis)
if(flip_axis %in% c('x','both')) neighborhood_df[[x_col]] = -neighborhood_df[[x_col]]
if(flip_axis %in% c('y','both')) neighborhood_df[[y_col]] = -neighborhood_df[[y_col]]
# Color
col_palette = Seurat:::SpatialColors(10)
if(rev_color) col_palette = rev(col_palette)
if(!missing(palette)) col_palette = palette
neighborhood_df %>%
ggplot(aes(x = .data[[x_col]], y = .data[[y_col]], color = .data[[feature]]))+
geom_point(size = pt_size) +
coord_fixed() +
scale_color_gradientn(colors = col_palette) +
labs(title = feature)+
theme_void() +
theme(plot.title = element_text(size = 20, face = 'bold', hjust = 0.5))
}
# Function
# Distance. A wrapper
plot_spatial_neighboor_distance = function(neighborhood_gene_df,
distance_col_name = 'distance_to_source',
plot_title = 'Distance to Source',
...){
loc_palette = rev(viridisLite::inferno(10))#paletteer::paletteer_d("colorBlindness::Blue2DarkOrange12Steps")#paletteer::paletteer_d("awtools::a_palette") #RColorBrewer::brewer.pal(n=9, 'BuPu')
plot_spatial_neighboor_feature(neighborhood_gene_df, feature=distance_col_name, palette = loc_palette, ...) +
labs(title = plot_title)
}
# Wrapper
# Plot multiple features and add distance
plot_spatial_neighboor_features = function(neighborhood_gene_df,
features,
show_distance_plot = T,
distance_col_name = 'distance_to_source', ...){
# Feature plots
plist = map(features, function(feature){
plot_spatial_neighboor_feature(neighborhood_gene_df, feature = feature, ...)
}) %>% setNames(features)
# Add distant plot
if(show_distance_plot){
distance_p = plot_spatial_neighboor_distance(neighborhood_gene_df, distance_col_name, ...)
plist = c(list(distance = distance_p), plist)
}
# Combine plot
wrap_plots(plist)
}
plot_feature_distance_curve = function(neighborhood_feature_df,
features,
group.by = 'source_histology',
idents,
distance_col = 'distance_to_source',
data_type = 'Gene',
value_type = 'Expression', # use 'Percentage or probability' for Cell type
smooth = T
){
if(missing(idents)) idents = unique(neighborhood_feature_df[[group.by]])
# Facet formula
facet_formula = str_c(data_type,'~', group.by) %>% as.formula
cols_to_keep = c(group.by, distance_col, features)
df = neighborhood_feature_df %>%
filter(.data[[group.by]] %in% idents) %>%
select(all_of(cols_to_keep)) %>% a
pivot_longer(cols = -c(.data[[distance_col]], .data[[group.by]]),
names_to = data_type,
values_to = value_type)
df_Avgexp = df %>% group_by(.data[[distance_col]], .data[[group.by]], .data[[data_type]]) %>%
summarise(Ave_exp = mean(.data[[value_type]], na.rm=T))
if(smooth){ # Use smooth line
p = ggplot(df, aes(x = .data[[distance_col]],
y = .data[[value_type]],
color = .data[[group.by]]) ) +
facet_grid(facet_formula)+
geom_smooth(aes(fill = .data[[group.by]]), method = 'loess', formula = 'y~x', alpha = 0.15, size = 0, span=0.75) + # SE. span control smoothness
stat_smooth(geom="line", method = 'loess', formula = 'y~x', alpha=0.6, size=2, span = 0.75) #span control smoothness
} else { # Use Ave expression line plot
p = ggplot(df_Avgexp, aes(x = .data[[distance_col]],
y = Ave_exp,
color = .data[[group.by]]) ) +
facet_grid(facet_formula)+
geom_line() +
geom_point()
}
p + theme_bw() +
theme(panel.grid = element_blank(),
strip.background = element_rect(fill = 'gray90', color = 'transparent')
)
}