-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
787 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,193 @@ | ||
# conda activate clusterprofiler | ||
# 2023/09/13 Simon Mo | ||
### ------------------ Load data ------------------ ### | ||
# Load samples using script | ||
source('/PATH/TO/0_Load_All_Sample/script/load_samples.r') | ||
|
||
# Parameters | ||
out_path = '' | ||
|
||
### ------------------ Function ------------------ ### | ||
src_path="/PATH/TO/3_Pathway/script" | ||
source(str_glue('{str_path}/function_findmarker_enhanced.r')) | ||
# Load GSEA pathway analysis scripts | ||
source(str_glue('{src_path}/function_runGSEA.r')) | ||
# Heatmap function | ||
source(str_glue('{src_path}/function_AnnDotPlot.r')) | ||
# Module score | ||
source(str_glue('{src_path}/function_TestAndFixAddModuleScore.r')) | ||
# Add average expression of genetic subclone | ||
source(str_glue('{src_path}/function_CalculateCloneExpressionLevel.r')) | ||
# Load data base | ||
LoadMSigDBHuman() | ||
|
||
### ------------------ Analysis ------------------ ### | ||
### ------------------ Find DEG, GSEA ------------------ ### | ||
# plot | ||
library(Seurat) | ||
library(enrichplot) | ||
library(patchwork) | ||
|
||
out_path_analysis = str_glue('{out_path}/1A_subclone_vs_TME_plot') | ||
#ST_microregion_deg_list | ||
st_groupby = 'genetic_clone' | ||
|
||
## -------- This GSEA workflow is generic and can be apply to many datasets -------- ## | ||
# Get DEGs, GSEA for each sample | ||
sample_use_all = names(st_list) | ||
for(sample_use in sample_use_all){ | ||
# 0. select sample | ||
message("Processing sample:", sample_use) | ||
ST_use = st_list[[sample_use]] | ||
Idents(ST_use) = st_groupby | ||
#ST_use = subset(ST_use, downsample = 5) # For Testing | ||
|
||
# 1. Run DEG analysis | ||
# TME as ref, run A. miroregion vs TME, B. TME vs NotTME | ||
deg_all_df = FindMarkersEachVsRefComplete(ST_use, group.by = st_groupby, ident_ref = '0') | ||
|
||
# save result | ||
dir.create(str_glue('{out_path_analysis}/{sample_use}'), recursive = TRUE, showWarnings = FALSE) | ||
write_tsv(deg_all_df, str_glue('{out_path_analysis}/{sample_use}/0_DEG_result.tsv')) | ||
|
||
# 2. get GSEA result for TME vs Tumor for multiple genesets | ||
gsea_genesets_list = FindAllMarkerTable2GSEAresult_MsigDB(deg_all_df , genesets_include = c("H","C6")) %>% | ||
discard(.p = ~length(.)==0) | ||
|
||
# 3. plot GSEA result | ||
iwalk(gsea_genesets_list, function(result_list, geneset_name){ | ||
# save result | ||
dir.create(str_glue('{out_path_analysis}/{sample_use}'), recursive = TRUE, showWarnings = FALSE) | ||
saveRDS(result_list, str_glue('{out_path_analysis}/{sample_use}/1_GSEA_result_{geneset_name}.rds')) | ||
# plot | ||
p_st = SpatialDimPlot(ST_use, group.by = st_groupby, stroke = NA, image.alpha = 0, label = T) | ||
pdf(str_glue('{out_path_analysis}/{sample_use}/1_dotplot_{geneset_name}.pdf'), height = 12, width = 10) | ||
MakeGeneSetDotplot(result_list) %>% print() | ||
print(p_st) | ||
dev.off() | ||
}) | ||
} | ||
|
||
# ------------------ Set up parameters ------------------ # | ||
markerset_name = "H" | ||
|
||
# ------------------ PLOT ------------------ # | ||
# 1. Plot DEG heatmap | ||
# B. Extract top GSEA pathways for nonTME and plot top features | ||
sample_use_all = names(st_list) | ||
|
||
n_genes_plot = Inf # set Inf to plot all genes | ||
for(sample_use_name in sample_use_all){ | ||
# Parameters | ||
gsea_file_path = str_glue('{out_path_analysis}/{sample_use}/1_GSEA_result_{geneset_name}.rds') | ||
if(!file.exists(gsea_file_path)) next | ||
message("Processing sample:", sample_use_name) | ||
gsea_use_list = readRDS(gsea_file_path) | ||
st_obj_use = st_list[[sample_use_name]] | ||
|
||
# Get GSEA core genes for a list of GSEA results | ||
# Select 15 genes from each select GSEA result to plot | ||
gsea_genes_df = imap(gsea_use_list, function(gsea_use, ident){ | ||
GetGSEAgenes(gsea_use) %>% mutate(ident = ident) | ||
}) %>% bind_rows() %>% | ||
distinct(ID, core_enrichment) %>% | ||
group_by(ID) %>% | ||
slice_head(n=n_genes_plot) | ||
gsea_genes_plt_list = split(gsea_genes_df$core_enrichment, gsea_genes_df$ID) | ||
|
||
# PreCheck if will cause issue when running AddModuleScore | ||
st_obj_use = TestAndFixSeuratForAddModuleScore(st_obj_use) | ||
|
||
# Annotation heatmap | ||
dir.create(str_glue('{out_path_analysis}/{sample_use_name}/2_ExpHeatmap/{markerset_name}/'), recursive = TRUE, showWarnings = FALSE) | ||
p_list = imap(gsea_genes_plt_list, possibly(function(features_plt, geneset_id){ | ||
AnnoDotPlot(st_obj_use, group.by = 'Filtered_tumor_regions', features = features_plt, | ||
annotation_idents = c('genetic_clone'), label_ident= T, | ||
cluster_row = T, | ||
cluster_col = T, | ||
mode = 'Heatmap', | ||
title = geneset_id, | ||
subtitle = sample_use_name, | ||
ModuleScoreHeight = 3, | ||
highlight_tiles = F, | ||
highlight_cutoff_quantile = 0.6, | ||
highlight_color = "#333333", | ||
highlight_thickness = 0.5, | ||
) | ||
|
||
}, otherwise = NULL)) %>% discard(.p = ~is.null(.x)) | ||
|
||
# Plot | ||
iwalk(p_list, function(p, geneset_id){ | ||
message("Plotting:", geneset_id) | ||
pdf(str_glue('{out_path_analysis}/{sample_use_name}/2_ExpHeatmap/{markerset_name}/2_annoDotplot_{geneset_id}.pdf'), height = 8, width = 8) | ||
print(p) | ||
dev.off() | ||
}) | ||
} | ||
|
||
## ----- Plot tumor region expression plot for each pathway ----- ## | ||
## Next Plot tumor region expression plot for each pathway | ||
# Version 2 - 20231010 | ||
|
||
# Extract the genes list and save | ||
# For loop and split gene by panel | ||
samples_use = names(st_list) | ||
|
||
for(sample_use_name in samples_use){ | ||
# Parameters | ||
gsea_file_path = str_glue('{out_path_analysis}/{sample_use}/1_GSEA_result_{geneset_name}.rds') | ||
if(!file.exists(gsea_file_path)) next | ||
message("Processing sample:", sample_use_name) | ||
gsea_use_list = readRDS(gsea_file_path) | ||
st_obj_use = st_list[[sample_use_name]] | ||
st_tumor_use = tumor_list[[sample_use_name]] | ||
|
||
panels_per_file = 9 | ||
gsea_genes_df = imap(gsea_use_list, function(gsea_use, ident){ | ||
GetGSEAgenes(gsea_use) %>% mutate(ident = ident) | ||
}) %>% bind_rows() %>% | ||
#filter(ID %in% gsea_id_plt) %>% # Use all geneset | ||
distinct(ID, core_enrichment) %>% | ||
group_by(ID) %>% | ||
# Split by number of panels | ||
mutate(ID_split = ceiling(seq_along(core_enrichment)/panels_per_file)) %>% | ||
mutate(ID_split_full = str_c(ID, '_', ID_split)) | ||
|
||
# Add average expression of genetic subclone | ||
gsea_genes_df = gsea_genes_df %>% | ||
left_join( | ||
y = CalculateCloneExpressionLevel(obj_use, features = unique(.$core_enrichment)), | ||
by = c('core_enrichment' = 'Gene') | ||
) %>% # rearrange by clone group | ||
arrange(ID, max_tumor) | ||
write_tsv(gsea_genes_df, str_glue('{out_path_analysis}/{sample_use_name}/4_GSEA_result_long_{markerset_name}.tsv')) | ||
|
||
# filtered version | ||
gsea_genes_filtered_df = gsea_genes_df %>% filter(min_max_tumor_ratio > 1.5, max_tme_ratio > 1.5) %>% | ||
# Rearrange | ||
arrange(ID, max_tumor) %>% | ||
# Redo splitting | ||
group_by(ID) %>% | ||
# Split by number of panels | ||
mutate(ID_split = ceiling(seq_along(core_enrichment)/panels_per_file)) %>% | ||
mutate(ID_split_full = str_c(ID, '_', ID_split)) %>% | ||
mutate(ID_split_full = str_c(ID_split_full, '_', max_tumor)) | ||
|
||
write_tsv(gsea_genes_filtered_df, str_glue('{out_path_analysis}/{sample_use_name}/4_GSEA_result_long_{markerset_name}_filtered.tsv')) | ||
|
||
|
||
# Plot | ||
gsea_genes_plt_list = split(gsea_genes_filtered_df$core_enrichment, gsea_genes_filtered_df$ID_split_full) | ||
# SpatialPlot | ||
dir.create(str_glue('{out_path_analysis}/{sample_use_name}/5_SpatialPltPathwayGenesFiltered/{markerset_name}/'), recursive = TRUE, showWarnings = FALSE) | ||
iwalk(gsea_genes_plt_list, function(features_plt, geneset_name){ | ||
p = SpatialPlot(st_tumor_use, features = features_plt, stroke = NA, image.alpha = 0.4) | ||
pwhole = SpatialPlot(st_obj_use, features = features_plt, stroke = NA, image.alpha = 0.4) | ||
message("Plotting:", geneset_name) | ||
pdf(str_glue('{out_path_analysis}/{sample_use_name}/5_SpatialPltPathwayGenesFiltered/{markerset_name}/3_SpatialFeature_{geneset_name}.pdf'), height = 8, width = 8) | ||
print(p) | ||
print(pwhole) | ||
dev.off() | ||
}) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
|
||
library(patchwork) | ||
|
||
# External functions | ||
source('/diskmnt/Datasets/Spatial_Transcriptomics/Analysis/Shared_resource/script_git/Clustering/function_reoderbyhcluster.r') | ||
source('/diskmnt/Datasets/Spatial_Transcriptomics/Analysis/ST_subclone/38-GenomicSubclone/5_GSEA/script/src/function_TestAndFixAddModuleScore.r') | ||
## ---------- Plot Function ---------- ## | ||
# 1. Dotplot/Heatmap with annotation | ||
AnnoDotPlot = function(obj, group.by = 'seurat_clusters', features, annotation_idents, label_ident = T, | ||
mode = c("Dot","Heatmap"), | ||
cluster_row = T, | ||
cluster_col = T, | ||
# title | ||
title = NULL, subtitle = NULL, | ||
# Module score | ||
AddModuleScore = T, ModuleScoreHeight = 2, | ||
# Highlight based on value cutoff | ||
highlight_tiles = F, | ||
highlight_cutoff_quantile = 0.75, | ||
highlight_color = '#333333', | ||
highlight_thickness = 1, | ||
# Column to splot | ||
split_column = NULL, | ||
...){ | ||
message("annotation_idents:", annotation_idents, "Take values from @meta.data") | ||
# A. Main Dot/Heatmap plot | ||
pdata = DotPlot(obj, group.by = group.by, features = features) %>% .$data | ||
# A0. Filter out no expression gene | ||
pdata = pdata %>% filter(!is.nan(avg.exp.scaled)) | ||
pdata = pdata %>% filter(!is.na(features.plot)) # This is weird need check | ||
# A1. Hierarchical clustering | ||
if(cluster_row) pdata = pdata %>% ReorderByHCluster(ident_column = 'id', groupby_column = 'features.plot', value_column = 'avg.exp.scaled') | ||
if(cluster_col) pdata = pdata %>% ReorderByHCluster(ident_column = 'features.plot', groupby_column = 'id', value_column = 'avg.exp.scaled') | ||
# test = pdata %>% ReorderSplitByHCluster(ident_column = 'id', groupby_column = 'features.plot', value_column = 'avg.exp.scaled', split_by_vector = ) | ||
# return(test) | ||
|
||
p_dot_exp = pdata %>% ggplot(aes(x = id, y = features.plot)) | ||
# A2. Dot of Heatmap plot | ||
mode = match.arg(mode) | ||
if(mode == 'Dot'){ | ||
p_dot_exp = p_dot_exp + | ||
geom_point(aes(color = avg.exp.scaled, size = pct.exp)) + | ||
scale_color_gradient2(low = '#3333DD', mid = '#E0E0E0', high = '#DD3333', midpoint = 0) | ||
}else if(mode == 'Heatmap'){ | ||
#Heatmap | ||
p_dot_exp = p_dot_exp + | ||
geom_tile(aes(fill = avg.exp.scaled)) + | ||
scale_fill_gradient2(low = '#3333DD', mid = '#E0E0E0', high = '#DD3333', midpoint = 0) | ||
} | ||
# Highlight specific tiles | ||
if(highlight_tiles){ | ||
highlight_cutoff_values = quantile(pdata$avg.exp.scaled, probs = highlight_cutoff_quantile) | ||
pdata_highlight = pdata %>% filter(avg.exp.scaled > highlight_cutoff_values) | ||
p_dot_exp = p_dot_exp + | ||
geom_tile(data = pdata_highlight, aes(fill = avg.exp.scaled), color = highlight_color, linejoin= "round", linewidth = highlight_thickness) | ||
} | ||
# Theme Adjustments | ||
p_dot_exp = p_dot_exp + | ||
theme_bw() + | ||
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) | ||
# Get groupby orders | ||
order_group_by = levels(pdata$id) | ||
# Get final feature count | ||
p_dot_exp_nrow = length(unique(pdata$features.plot)) | ||
|
||
|
||
# B. Create column annotation bars | ||
# Use idetns in meta.data | ||
annotation_df = FetchData(obj, vars = c(group.by, annotation_idents)) | ||
annotation_collapsed_df = annotation_df %>% | ||
group_by(.data[[group.by]]) %>% | ||
summarize(across(all_of(annotation_idents), ~paste(sort(unique(.)), collapse = ' '))) %>% | ||
mutate({{group.by}} := factor(.data[[group.by]], levels = order_group_by)) # Reoder groupby | ||
# # Create bar/column plot | ||
p_bar_nrow = length(unique(annotation_idents)) | ||
p_bar_list = map(annotation_idents, function(anno_ident){ | ||
p_bar = annotation_collapsed_df[, c(group.by, anno_ident)] %>% | ||
ggplot(aes(x = .data[[group.by]], y = anno_ident, fill = .data[[anno_ident]])) + | ||
geom_tile() + | ||
theme_void() + | ||
#theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) | ||
# Remove x axis | ||
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.title.x = element_blank()) + | ||
# Add y text back | ||
theme(axis.text.y = element_text(angle = 0, hjust = 1, vjust = 0.5)) + | ||
# Use color scale colorspace::rainbow_hcl(n) | ||
scale_fill_manual(values = colorspace::rainbow_hcl(n = length(unique(annotation_collapsed_df[[anno_ident]])))) | ||
if(label_ident) p_bar = p_bar + geom_text(aes(label = str_wrap(.data[[anno_ident]], width = 4))) | ||
return(p_bar) | ||
}) %>% setNames(annotation_idents) | ||
|
||
# B1. Make current height arragment | ||
plot_height_arrangement = c(rep(1,p_bar_nrow), p_dot_exp_nrow) | ||
|
||
# B. Add module score | ||
if(AddModuleScore){ | ||
# First test if need to fix object | ||
obj = TestAndFixSeuratForAddModuleScore(obj) | ||
# Plot | ||
p_modulescore = ModuleScoreBoxplot(obj, group.by = group.by, features_plt = features) + | ||
theme_bw() + | ||
# Remove x axis | ||
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.title.x = element_blank()) + | ||
# Remove minor grid | ||
theme(panel.grid.minor = element_blank()) + | ||
# Add y text back | ||
theme(axis.title.y = element_text(angle = 0, hjust = 1, vjust = 0.5)) | ||
# Reorder column | ||
p_modulescore$data = p_modulescore$data %>% | ||
mutate({{group.by}} := factor(.data[[group.by]], levels = order_group_by)) # Reoder groupby | ||
# update plot arrangement | ||
p_bar_list = c(list(ModuleScore = p_modulescore), p_bar_list) | ||
plot_height_arrangement = c(ModuleScoreHeight, plot_height_arrangement) # Append height of module score to top | ||
} | ||
|
||
# C. Combined | ||
p_all = wrap_plots(c(p_bar_list, list(Dotplot= p_dot_exp)), ncol = 1, heights = plot_height_arrangement, guides = "collect") & | ||
# put annotation to bottom | ||
theme(legend.position = 'bottom') | ||
# D, Titles | ||
p_all = p_all + plot_annotation(title = title, subtitle = subtitle, | ||
theme = theme( | ||
plot.title = element_text(hjust = 0.5, face = 'bold'), | ||
plot.subtitle = element_text(hjust = 0.5, face = 'italic')) | ||
) | ||
return(p_all) | ||
} | ||
|
||
# Module score boxplot | ||
ModuleScoreBoxplot = function(obj, group.by = 'seurat_clusters', features_plt){ | ||
#browser() | ||
message("Calculating Module score") | ||
obj_tmp = AddModuleScore(obj, features = list(ModuleScore=features_plt)) | ||
obj_tmp@meta.data[['ModuleScore']] = obj_tmp@meta.data[['Cluster1']] | ||
|
||
# Module score boxplot | ||
FetchData(obj_tmp, vars = c(group.by, 'ModuleScore')) %>% | ||
mutate({{group.by}} := as.character(.data[[group.by]])) %>% | ||
ggplot(aes(x = .data[[group.by]], y = ModuleScore, fill = .data[[group.by]], group = .data[[group.by]])) + | ||
geom_boxplot(width = 0.4, alpha = 0.5, outlier.shape = NA) + | ||
theme_bw() + | ||
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) | ||
} |
30 changes: 30 additions & 0 deletions
30
Figure3/3_Pathway/script/src/function_CalculateCloneExpressionLevel.r
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
# Add average expression of genetic subclone | ||
CalculateCloneExpressionLevel = function(obj, features, assay = 'SCT', group.by = 'genetic_clone'){ | ||
message("Note, currently use 0 as TME and clone to detect tumor columns") | ||
message("Assay = ", assay) | ||
# Calculate average expression of genetic subclone | ||
exp_df = obj %>% | ||
AverageExpression( | ||
assays = assay, slot = 'data', | ||
group.by = group.by, | ||
features = features) %>% | ||
.[[assay]] %>% | ||
as.data.frame %>% | ||
rownames_to_column('Gene') | ||
# Add Min, max and difference | ||
exp_df %>% | ||
rowwise() %>% | ||
# Get Min and Max | ||
mutate( | ||
max_tumor_value = max(c_across(contains('clone'))), | ||
max_tumor = unlist(pmap(across(contains('clone')), ~names(c(...)[which.max(c(...))]))), | ||
min_tumor_value = min(c_across(contains('clone'))), | ||
min_tumor = unlist(pmap(across(contains('clone')), ~names(c(...)[which.min(c(...))]))) | ||
# ^^ https://stackoverflow.com/questions/17735859/for-each-row-return-the-column-name-of-the-largest-value | ||
) %>% | ||
# Add difference | ||
mutate( | ||
min_max_tumor_ratio = max_tumor_value / min_tumor_value, | ||
max_tme_ratio = max_tumor_value / `0` | ||
) | ||
} |
Oops, something went wrong.