diff --git a/figure_scripts/extended_data_figure_1/extended_data_figure_1.html b/figure_scripts/extended_data_figure_1/extended_data_figure_1.html index 67f3f97..788baf1 100644 --- a/figure_scripts/extended_data_figure_1/extended_data_figure_1.html +++ b/figure_scripts/extended_data_figure_1/extended_data_figure_1.html @@ -873,7 +873,7 @@

Supplemental Figure 1F

theme(axis.line = element_line(colour = "black"), panel.border = element_blank(), panel.background = element_blank()) -

+

@@ -1829,7 +1829,7 @@

Supplemental Figure 1G

x = element_blank(), y = "pg/mL + 1")print(p_1e) -

+

@@ -2800,315 +2800,421 @@

Supplemental Figure 1K

y = element_blank())

Supplemental figure 1L

-

Supplemental figure 1M

-
plot_info <- read.csv("/projects/home/nealpsmith/projects/myocarditis/swimmer_plot/swimmer_plot_info.csv")
-
-# Need to figure out how long to make the line for everyone
-plot_info$end_line <- apply(plot_info, 1, function(d){
-  if (!is.na(d[["post_steroid_samp_day"]])){
-    return(as.numeric(d[["post_steroid_samp_day"]]) - as.numeric(d[["ici_start_day"]]))
-  } else {
-    return(as.numeric(d[["steroid_start_day"]]) - as.numeric(d[["ici_start_day"]]))
-  }
-})
-plot_info$days_to_steroid <-  plot_info$steroid_start_day - plot_info$ici_start_day
-plot_info$days_to_post_steroid <-plot_info$post_steroid_samp_day - plot_info$ici_start_day
-plot_df <- plot_info %>%
-  dplyr::select(ID, time_ici_to_pre_steroid_samp, time_pre_steroid_samp_to_steroid,
-                time_steroid_start_to_post_steroid_samp,days_to_steroid, days_to_post_steroid, end_line)
-# Lets get the rows ordered
-fatal_ids <- c("SIC_3", "SIC_17", "SIC_175", "SIC_232", "SIC_264", "SIC_266")
-plot_df$fatal <- ifelse(plot_df$ID %in% fatal_ids, "fatal", "non-fatal")
-plot_df$has_pre <- ifelse(is.na(plot_df$time_ici_to_pre_steroid_samp), "no", "yes")
-
-order <- plot_df %>%
-  arrange(desc(has_pre), fatal) %>%
-  .$ID
-plot_df$ID <- factor(plot_df$ID, levels = rev(order))
-
-brewer.pal(4, "Set1")
-
-plot_info$steroid_to_pre_steroid <- plot_info$pre_steroid_samp_day - plot_info$steroid_start_day
-# Time from steroid to post-steroid samp
-plot_info$steroid_to_post_steroid <- plot_info$post_steroid_samp_day - plot_info$steroid_start_day
+
#### first run de
+if (!file.exists(glue('{wd}/output/lineage_de_by_steroid_treatment_all_results.csv'))) {
+  counts <- read_counts(glue("{wd}/output/pb_counts_by_sample_id_and_lineage.csv"))
+  meta <- read_meta(glue("{wd}/output/pb_meta_by_sample_id_and_lineage.csv"))
+  meta <- meta %>%
+    filter(steroid_treatment != "NA") %>%
+    filter(!str_detect(lineage, "Doublet"))
+  steroid_contrast_vec <- c('steroid_treatment', 'pre_steroid', 'post_steroid')
+  steroid_lineage_results <- run_de_by_comp_var(counts, meta, glue('{wd}/output/lineage'), steroid_contrast_vec,
+                                                deseq_formula = formula("~ steroid_treatment"))
+} else {
+  steroid_lineage_results <- read_csv(glue('{wd}/output/lineage_de_by_steroid_treatment_all_results.csv'))
+}
+
+#### then run gsea on de results
+if (!file.exists(glue('{wd}/output/gsea_by_steroid_treatment_by_lineage.csv'))) {
+  steroid_lineage_gsea <- run_gsea(steroid_lineage_results)
+  write_csv(steroid_lineage_gsea, glue('{wd}/output/gsea_by_steroid_treatment_by_lineage.csv'))
+} else {
+  steroid_lineage_gsea <- read_csv(glue('{wd}/output/gsea_by_steroid_treatment_by_lineage.csv'))
+}
+
+#### make heatmap
+pathways <- c('KEGG_ALLOGRAFT_REJECTION', 'KEGG_ANTIGEN_PROCESSING_AND_PRESENTATION',
+              'KEGG_CELL_ADHESION_MOLECULES_CAMS', 'KEGG_DNA_REPLICATION', 'HALLMARK_INTERFERON_GAMMA_RESPONSE',
+              'KEGG_VIRAL_MYOCARDITIS')
+pathway_abbrevs <- c('KEGG: Allo', 'KEGG: AP', 'KEGG: CAMS', 'KEGG: DNA', 'HALLMARK: IFNG', 'KEGG: Viral Myo')
+abbrev_dict <- setNames(pathway_abbrevs, pathways)
+lineage_order <- c('pDCs', 'cDCs', 'B and plasma', 'MNP', 'CD4', 'CD8 and NK')
+p_thresh <- 0.1
 
-plot_df <- plot_info %>%
-  dplyr::select(ID, steroid_to_pre_steroid, steroid_to_post_steroid) %>%
-  mutate("steroid_start" = 0)
-plot_df$start_point <- ifelse(is.na(plot_df$steroid_to_pre_steroid), 0, (plot_df$steroid_to_pre_steroid))
-plot_df$end_point <- ifelse(is.na(plot_df$steroid_to_post_steroid), 0, (plot_df$steroid_to_post_steroid))
-
-# Lets get the rows ordered
-fatal_ids <- c("SIC_3", "SIC_17", "SIC_175", "SIC_232", "SIC_264", "SIC_266")
-plot_df$fatal <- ifelse(plot_df$ID %in% fatal_ids, "fatal", "non-fatal")
-plot_df$has_pre <- ifelse(is.na(plot_df$steroid_to_pre_steroid), "no", "yes")
-plot_df$has_post <- ifelse(is.na(plot_df$steroid_to_post_steroid), "no", "yes")
-order <- plot_df %>%
-  arrange(desc(has_post), fatal,steroid_to_post_steroid ) %>%
-  .$ID
-plot_df$ID <- factor(plot_df$ID, levels = rev(order))
-
-# Lets add a table next to it with peak troponin
-trop_info <- read.csv("/projects/home/nealpsmith/projects/myocarditis/post_steroid_trop/data/post_steroid_trop_data.csv")
-trop_info$ID <- sapply(trop_info$sample, function(x) paste(strsplit(x, "_")[[1]][1:2], collapse = "_"))
-trop_info$sample <- NULL
-trop_info$trop_fc <-((trop_info$Peak.troponin- trop_info$post_steroid_troponin)/ trop_info$Peak.troponin) * 100
-
-# Get the missing ones as well
-trop_no_post <- read.csv("/projects/home/nealpsmith/projects/myocarditis/post_steroid_trop/data/no_pre_steroid_peak_trop.csv")
-trop_no_post$post_steroid_troponin <- NA
-trop_no_post$trop_fc <- NA
-trop_no_post <- trop_no_post %>%
-  dplyr::select(colnames(trop_info))
-
-trop_info <- rbind(trop_info, trop_no_post)
-trop_info$ID <- factor(trop_info$ID, levels = levels(plot_df$ID))
-
-p1 <- ggplot(plot_df, aes(x = value, y = ID)) +
-  geom_stripes(aes(x = NULL), odd = "#ffffff00", even = "#33333333") +
-  geom_segment(aes(x=start_point, xend=end_point, y=ID, yend=ID)) +
-  geom_point(x = 0, size = 4, (aes(color = "Steroid treatment")), alpha = 0.9) +
-  geom_point(x = plot_df$steroid_to_pre_steroid, size = 4, pch = 17,aes(color = "Pre-steroid sample"), alpha = 0.9) +
-  geom_point(x = plot_df$steroid_to_post_steroid, size = 4, pch = 18, aes(color = "Post-steroid sample"), alpha = 0.9) +
-  scale_color_manual(name = "",
-                     values = c("Steroid treatment" = "#E41A1C", "Pre-steroid sample" = "#377EB8", "Post-steroid sample" = "#984EA3"),
-                     breaks = c("Steroid treatment", "Pre-steroid sample", "Post-steroid sample")) +
-  scale_shape_manual(name = "",
-                   values = c("Steroid treatment" = 16, "Pre-steroid sample" = 17, "Post-steroid sample" = 18),
-                   breaks = c("Steroid treatment", "Pre-steroid sample", "Post-steroid sample")) +
-  xlab("Days") +
-  theme_classic(base_size = 20)
-
-all(levels(trop_info$ID) == levels(plot_df$ID))
-
-p2 <- ggplot(trop_info, aes(x = 1, y = ID)) +
-  geom_tile(fill = NA, color = "black", size = 1) +
-  geom_text(aes(label = Peak.troponin)) +
-  ggtitle("Peak troponin") +
-  geom_stripes(odd = "#ffffff00", even = "#33333333") +
-  theme_void()
-p3 <- ggplot(trop_info, aes(x = 1, y = ID)) +
-  geom_tile(fill = NA, color = "black", size = 1) +
-  geom_text(aes(label = round(trop_fc, 2))) +
-  ggtitle("% Troponin reduction") +
-  geom_stripes(odd = "#ffffff00", even = "#33333333") +
-  theme_void()
-
-ggarrange(p1, p2, p3, widths = c(0.8, 0.15, 0.15), align = "h", nrow = 1, common.legend = TRUE)
-

+heatmap_df <- steroid_lineage_gsea %>% + complete(pathway, cluster) %>% + replace_na(list(log2FoldChange = 0, padj = 1)) %>% + select(pathway, cluster, NES, padj) %>% + filter(pathway %in% pathways) %>% + mutate(cluster = str_replace_all(cluster, '_', ' ')) %>% + mutate(pathway = abbrev_dict[pathway]) + +# Format main body: +# get information for the main body's cells +# not replacing NA values so they are colored gray in heatmap +nes_mtx <- heatmap_df %>% + select(c(pathway, cluster, NES)) %>% + pivot_wider(names_from = cluster, values_from = NES) %>% + column_to_rownames('pathway') %>% + select(all_of(lineage_order)) %>% + as.matrix() +nes_mtx <- nes_mtx[pathway_abbrevs,] %>% t() + +# define cell color range +heatmap_col_fun <- colorRamp2(c(-1, 0, ceiling(max(nes_mtx, na.rm = T))), + brewer.pal(5, 'PiYG')[c(5, 3, 1)]) + +# Main body annotation (FDR): +# get fdr values +# not replacing NA values so they are colored gray in heatmap +fdr_mtx <- heatmap_df %>% + select(c(pathway, cluster, padj)) %>% + pivot_wider(names_from = cluster, values_from = padj) %>% + column_to_rownames('pathway') %>% + select(all_of(lineage_order)) %>% + as.matrix() +fdr_mtx <- fdr_mtx[pathway_abbrevs,] %>% t() + +# make function for plotting fdr value (this function returns a function) +fdr_func <- function(mtx) { + function(j, i, x, y, width, height, fill) { + if (!is.na(mtx[i, j]) & (mtx[i, j] < p_thresh)) { + grid.circle(x = x, y = y, r = unit(1.5, 'mm'), + gp = gpar(fill = 'black', col = NA)) + } + } +} + +# make sure columns are the same +stopifnot(colnames(fdr_mtx) == colnames(nes_mtx)) +stopifnot(rownames(fdr_mtx) == rownames(nes_mtx)) + +# Legends +gsea_lgd <- Legend(col_fun = heatmap_col_fun, title = 'NES', direction = 'horizontal') +fdr_lgd <- Legend(pch = 16, type = "points", labels = glue("FDR < {p_thresh}")) +na_lgd <- Legend(labels = 'N/A', legend_gp = gpar(fill = 'grey')) +pd <- packLegend(gsea_lgd, fdr_lgd, na_lgd, direction = 'horizontal') + +# Plot +ht_gsea <- Heatmap(nes_mtx, + col = heatmap_col_fun, + cell_fun = fdr_func(fdr_mtx), + column_order = colnames(nes_mtx), + top_annotation = NULL, column_title = NULL, + name = 'NES', show_heatmap_legend = FALSE, + cluster_columns = FALSE, column_names_side = "top", + show_column_names = T, column_names_rot = 45, + cluster_rows = FALSE, row_names_side = "left", + row_title_rot = 0, row_title_gp = gpar(fontface = 'bold'), + row_gap = unit(2, "mm"), border = TRUE, + width = ncol(nes_mtx) * unit(6, "mm"), + height = nrow(nes_mtx) * unit(6, "mm")) + +draw(ht_gsea, + heatmap_legend_list = pd, + heatmap_legend_side = 'bottom', + merge_legends = FALSE)
+

+

Supplemental figure 1M

+
plot_info <- read.csv("/projects/home/nealpsmith/projects/myocarditis/swimmer_plot/swimmer_plot_info.csv")
+
+# Need to figure out how long to make the line for everyone
+plot_info$end_line <- apply(plot_info, 1, function(d){
+  if (!is.na(d[["post_steroid_samp_day"]])){
+    return(as.numeric(d[["post_steroid_samp_day"]]) - as.numeric(d[["ici_start_day"]]))
+  } else {
+    return(as.numeric(d[["steroid_start_day"]]) - as.numeric(d[["ici_start_day"]]))
+  }
+})
+plot_info$days_to_steroid <-  plot_info$steroid_start_day - plot_info$ici_start_day
+plot_info$days_to_post_steroid <-plot_info$post_steroid_samp_day - plot_info$ici_start_day
+plot_df <- plot_info %>%
+  dplyr::select(ID, time_ici_to_pre_steroid_samp, time_pre_steroid_samp_to_steroid,
+                time_steroid_start_to_post_steroid_samp,days_to_steroid, days_to_post_steroid, end_line)
+# Lets get the rows ordered
+fatal_ids <- c("SIC_3", "SIC_17", "SIC_175", "SIC_232", "SIC_264", "SIC_266")
+plot_df$fatal <- ifelse(plot_df$ID %in% fatal_ids, "fatal", "non-fatal")
+plot_df$has_pre <- ifelse(is.na(plot_df$time_ici_to_pre_steroid_samp), "no", "yes")
+
+order <- plot_df %>%
+  arrange(desc(has_pre), fatal) %>%
+  .$ID
+plot_df$ID <- factor(plot_df$ID, levels = rev(order))
+
+brewer.pal(4, "Set1")
+
+plot_info$steroid_to_pre_steroid <- plot_info$pre_steroid_samp_day - plot_info$steroid_start_day
+# Time from steroid to post-steroid samp
+plot_info$steroid_to_post_steroid <- plot_info$post_steroid_samp_day - plot_info$steroid_start_day
+
+plot_df <- plot_info %>%
+  dplyr::select(ID, steroid_to_pre_steroid, steroid_to_post_steroid) %>%
+  mutate("steroid_start" = 0)
+plot_df$start_point <- ifelse(is.na(plot_df$steroid_to_pre_steroid), 0, (plot_df$steroid_to_pre_steroid))
+plot_df$end_point <- ifelse(is.na(plot_df$steroid_to_post_steroid), 0, (plot_df$steroid_to_post_steroid))
+
+# Lets get the rows ordered
+fatal_ids <- c("SIC_3", "SIC_17", "SIC_175", "SIC_232", "SIC_264", "SIC_266")
+plot_df$fatal <- ifelse(plot_df$ID %in% fatal_ids, "fatal", "non-fatal")
+plot_df$has_pre <- ifelse(is.na(plot_df$steroid_to_pre_steroid), "no", "yes")
+plot_df$has_post <- ifelse(is.na(plot_df$steroid_to_post_steroid), "no", "yes")
+order <- plot_df %>%
+  arrange(desc(has_post), fatal,steroid_to_post_steroid ) %>%
+  .$ID
+plot_df$ID <- factor(plot_df$ID, levels = rev(order))
+
+# Lets add a table next to it with peak troponin
+trop_info <- read.csv("/projects/home/nealpsmith/projects/myocarditis/post_steroid_trop/data/post_steroid_trop_data.csv")
+trop_info$ID <- sapply(trop_info$sample, function(x) paste(strsplit(x, "_")[[1]][1:2], collapse = "_"))
+trop_info$sample <- NULL
+trop_info$trop_fc <-((trop_info$Peak.troponin- trop_info$post_steroid_troponin)/ trop_info$Peak.troponin) * 100
+
+# Get the missing ones as well
+trop_no_post <- read.csv("/projects/home/nealpsmith/projects/myocarditis/post_steroid_trop/data/no_pre_steroid_peak_trop.csv")
+trop_no_post$post_steroid_troponin <- NA
+trop_no_post$trop_fc <- NA
+trop_no_post <- trop_no_post %>%
+  dplyr::select(colnames(trop_info))
+
+trop_info <- rbind(trop_info, trop_no_post)
+trop_info$ID <- factor(trop_info$ID, levels = levels(plot_df$ID))
+
+p1 <- ggplot(plot_df, aes(x = value, y = ID)) +
+  geom_stripes(aes(x = NULL), odd = "#ffffff00", even = "#33333333") +
+  geom_segment(aes(x=start_point, xend=end_point, y=ID, yend=ID)) +
+  geom_point(x = 0, size = 4, (aes(color = "Steroid treatment")), alpha = 0.9) +
+  geom_point(x = plot_df$steroid_to_pre_steroid, size = 4, pch = 17,aes(color = "Pre-steroid sample"), alpha = 0.9) +
+  geom_point(x = plot_df$steroid_to_post_steroid, size = 4, pch = 18, aes(color = "Post-steroid sample"), alpha = 0.9) +
+  scale_color_manual(name = "",
+                     values = c("Steroid treatment" = "#E41A1C", "Pre-steroid sample" = "#377EB8", "Post-steroid sample" = "#984EA3"),
+                     breaks = c("Steroid treatment", "Pre-steroid sample", "Post-steroid sample")) +
+  scale_shape_manual(name = "",
+                   values = c("Steroid treatment" = 16, "Pre-steroid sample" = 17, "Post-steroid sample" = 18),
+                   breaks = c("Steroid treatment", "Pre-steroid sample", "Post-steroid sample")) +
+  xlab("Days") +
+  theme_classic(base_size = 20)
+
+all(levels(trop_info$ID) == levels(plot_df$ID))
+
+p2 <- ggplot(trop_info, aes(x = 1, y = ID)) +
+  geom_tile(fill = NA, color = "black", size = 1) +
+  geom_text(aes(label = Peak.troponin)) +
+  ggtitle("Peak troponin") +
+  geom_stripes(odd = "#ffffff00", even = "#33333333") +
+  theme_void()
+p3 <- ggplot(trop_info, aes(x = 1, y = ID)) +
+  geom_tile(fill = NA, color = "black", size = 1) +
+  geom_text(aes(label = round(trop_fc, 2))) +
+  ggtitle("% Troponin reduction") +
+  geom_stripes(odd = "#ffffff00", even = "#33333333") +
+  theme_void()
+
+ggarrange(p1, p2, p3, widths = c(0.8, 0.15, 0.15), align = "h", nrow = 1, common.legend = TRUE)
+

## [1] "#E41A1C" "#377EB8" "#4DAF4A" "#984EA3"
 ## [1] TRUE
 

Supplemental figure 1N

-
mtx <- read.csv("/projects/home/sramesh/github/myocarditis/output/pb_counts_by_sample_id_and_lineage.csv",
-                row.names = 1)
-
-norm_counts <- apply(mtx, 2, function(c){
-  n_total <- sum(c)
-  per_100k <- (c * 1000000) / n_total
-  return(per_100k)
-})
-
-norm_counts <- log1p(norm_counts)
-
-meta <- read.csv("/projects/home/sramesh/github/myocarditis/output/pb_meta_by_sample_id_and_lineage.csv",
-                 row.names = 1)
-meta$donor <- sapply(meta$sample, function(x) paste(strsplit(x, "_")[[1]][1:2], collapse = "_"))
-
-trop_meta <- read.csv("/projects/home/nealpsmith/projects/myocarditis/post_steroid_trop/data/post_steroid_trop_data.csv")
-trop_meta$trop_fc <-(trop_meta$Peak.troponin- trop_meta$post_steroid_troponin)/ trop_meta$Peak.troponin
-trop_meta$trop_logfc <- log1p(trop_meta$trop_fc)
-
-# Also need to add time from steroid start to post-steroid sample
-time_info <- read.csv("/projects/home/nealpsmith/projects/myocarditis/swimmer_plot/swimmer_plot_info.csv")
-time_info %<>%
-  dplyr::select(ID, time_steroid_start_to_post_steroid_samp) %>%
-  dplyr::rename(donor = ID)
-
-meta %<>%
-  rownames_to_column() %>%
-  dplyr::left_join(trop_meta, by = "sample") %>%
-  dplyr::left_join(time_info, by = "donor") %>%
-  column_to_rownames("rowname")
-meta$log_time_steroid_start_to_post_steroid_samp <- log1p(meta$time_steroid_start_to_post_steroid_samp)
-# Need to fix SIC_3
-rownames(meta)[grepl("SIC_3_", rownames(meta))] <- sub("-", ".", rownames(meta)[grepl("SIC_3_", rownames(meta))])
-ps_meta <- meta %>%
-  dplyr::filter(sample %in% trop_meta$sample)
-ps_mtx <- mtx[,rownames(ps_meta)]
-
-if (!file.exists("/projects/home/nealpsmith/projects/myocarditis/post_steroid_trop/data/all_deg_res.csv")){
-  # Iterate across lineages
-  gene_sets <-  gmtPathways( "/projects/home/nealpsmith/projects/medoff/data/msigdb_symbols.gmt")
-  n_gene_df <- data.frame()
-  deg_res <- data.frame()
-  gsea_res <- data.frame()
-
-  for(lin in unique(ps_meta$lineage)){
-
-    if (lin == "Doublets_and_RBCs"){
-      next()
-    }
-
-    meta_temp <- ps_meta[ps_meta$lineage == lin,]
-    meta_temp$post_steroid_fatal <- factor(meta_temp$post_steroid_fatal, levels = c("non_fatal", "fatal"))
-    count_temp <- ps_mtx[,rownames(meta_temp)]
-
-    # Make sure the genes are detected in enough samples
-    n_samp <- rowSums(count_temp != 0)
-    count_temp <- count_temp[n_samp > round(nrow(meta_temp) / 2),]
-
-    models <- c(as.formula("~log_time_steroid_start_to_post_steroid_samp+trop_logfc"),
-                as.formula("~trop_logfc+log_time_steroid_start_to_post_steroid_samp"),
-                as.formula("~post_steroid_fatal"))
-    for (m in models){
-      test_var <- tail(strsplit(as.character(m), "+", fixed = TRUE)[[2]], n = 1)
-      test_var <- gsub(" ", "", test_var)
-
-      dds <- DESeqDataSetFromMatrix(countData = count_temp,
-                                colData = meta_temp,
-                                design = m)
-      dds <- DESeq(dds)
-      res <- as.data.frame(results(dds))
-      res <- res[!is.na(res$padj),]
-      res$contrast <- test_var
-      res$lin <- lin
-      res$gene <- rownames(res)
-      deg_res <- rbind(deg_res, res)
-
-      n_up <- nrow(res[res$padj < 0.1 & res$log2FoldChange > 0,])
-      n_down <- nrow(res[res$padj < 0.1 & res$log2FoldChange < 0,])
-      df <- data.frame(lin = lin, contrast = test_var, n_up = n_up, n_down = n_down)
-      n_gene_df <- rbind(n_gene_df, df)
-
-      ## GSEA ##
-      order_genes <- res %>%
-        dplyr::select(gene, stat) %>%
-        na.omit() %>%
-        distinct() %>%
-        group_by(gene) %>%
-        summarize(stat=mean(stat))
-      ranks <- deframe(order_genes)
-
-      for (geneset in c("BIOCARTA", "HALLMARK", "KEGG")){
-        gsets <- gene_sets[grep(geneset, names(gene_sets))]
-        fgseaRes <- fgsea(pathways=gsets, stats=ranks, nperm=10000)
-        fgseaResTidy <- fgseaRes %>%
-          as_tibble() %>%
-          arrange(desc(NES))
-        fgseaResTidy$contrast <- test_var
-        fgseaResTidy$lin <- lin
-        fgseaResTidy$library <- geneset
-        gsea_res <- rbind(gsea_res, fgseaResTidy)
-
-        }
-      }
-    }
-    gsea_res$leadingEdge <- as.character(gsea_res$leadingEdge)
-
-    # Write the results
-    write.csv(deg_res, "/projects/home/nealpsmith/projects/myocarditis/post_steroid_trop/data/all_deg_res.csv")
-    write.csv(gsea_res, "/projects/home/nealpsmith/projects/myocarditis/post_steroid_trop/data/all_gsea_res.csv")
-
-} else {
-  deg_res <- read.csv("/projects/home/nealpsmith/projects/myocarditis/post_steroid_trop/data/all_deg_res.csv",
-                      row.names = 1)
-  gsea_res <- read.csv("/projects/home/nealpsmith/projects/myocarditis/post_steroid_trop/data/all_gsea_res.csv",
-                       row.names = 1)
-}
-
-var <- "log_time_steroid_start_to_post_steroid_samp"
-plot_df <-deg_res %>%
-  dplyr::filter(contrast == var, padj < 0.1) %>%
-  mutate(variable = ifelse(log2FoldChange > 0, "n_up", "n_down")) %>%
-  group_by(lin, variable) %>%
-  summarise(value = n()) %>%
-  mutate(value = ifelse(variable == "n_down", -value, value))
-
-ggplot(plot_df, aes(x = lin, y = value, group = variable, fill = variable)) +
-  geom_bar(stat = "identity") + coord_flip() +
-  scale_fill_manual(values = c("#FF0000", "#0000FF")) +
-  scale_y_continuous(labels = abs, limits = c(-1500, 1750)) +
-  ylab("# of DE genes") + xlab("") +
-  ggtitle(glue("model by {var}")) +
-  theme_classic(base_size = 15)
-

-
pathways <- c('KEGG_ALLOGRAFT_REJECTION', 'KEGG_ANTIGEN_PROCESSING_AND_PRESENTATION',
-              'KEGG_CELL_ADHESION_MOLECULES_CAMS', 'KEGG_DNA_REPLICATION',
-              'HALLMARK_INTERFERON_GAMMA_RESPONSE',
-              'KEGG_VIRAL_MYOCARDITIS')
-pathway_abbrevs <- c('KEGG: Allo', 'KEGG: AP', 'KEGG: CAMS', 'KEGG: DNA', 'HALLMARK: IFNG', 'KEGG: Viral Myo')
-abbrev_dict <- setNames(pathway_abbrevs, pathways)
-lineage_order <- c('pDCs', 'cDCs', 'B and plasma', 'MNP', 'CD4', 'CD8 and NK')
-c <- "log_time_steroid_start_to_post_steroid_samp"
-c_data <- gsea_res %>%
-dplyr::filter(contrast == c)
+
mtx <- read.csv("/projects/home/sramesh/github/myocarditis/output/pb_counts_by_sample_id_and_lineage.csv",
+                row.names = 1)
+
+norm_counts <- apply(mtx, 2, function(c){
+  n_total <- sum(c)
+  per_100k <- (c * 1000000) / n_total
+  return(per_100k)
+})
+
+norm_counts <- log1p(norm_counts)
 
-heatmap_df <- c_data %>%
-complete(pathway, lin) %>%
-replace_na(list(log2FoldChange = 0, padj = 1)) %>%
-dplyr::select(pathway, lin, NES, padj) %>%
-filter(pathway %in% pathways) %>%
-mutate(cluster = str_replace_all(lin, '_', ' ')) %>%
-mutate(pathway = abbrev_dict[pathway])
+meta <- read.csv("/projects/home/sramesh/github/myocarditis/output/pb_meta_by_sample_id_and_lineage.csv",
+                 row.names = 1)
+meta$donor <- sapply(meta$sample, function(x) paste(strsplit(x, "_")[[1]][1:2], collapse = "_"))
+
+trop_meta <- read.csv("/projects/home/nealpsmith/projects/myocarditis/post_steroid_trop/data/post_steroid_trop_data.csv")
+trop_meta$trop_fc <-(trop_meta$Peak.troponin- trop_meta$post_steroid_troponin)/ trop_meta$Peak.troponin
+trop_meta$trop_logfc <- log1p(trop_meta$trop_fc)
 
-nes_mtx <- heatmap_df %>%
-dplyr::select(c(pathway, cluster, NES)) %>%
-pivot_wider(names_from = cluster, values_from = NES) %>%
-column_to_rownames('pathway') %>%
-dplyr::select(all_of(lineage_order)) %>%
-as.matrix()
-nes_mtx <- nes_mtx[pathway_abbrevs,] %>% t()
-
-# define cell color range
-heatmap_col_fun <- colorRamp2(c(floor(min(nes_mtx, na.rm = T)), 0, ceiling(max(nes_mtx, na.rm = T))),
-                            brewer.pal(5, 'PiYG')[c(5,3,1)])
-
-fdr_mtx <- heatmap_df %>%
-dplyr::select(c(pathway, cluster, padj)) %>%
-pivot_wider(names_from = cluster, values_from = padj) %>%
-column_to_rownames('pathway') %>%
-dplyr::select(all_of(lineage_order)) %>%
-as.matrix()
-fdr_mtx <- fdr_mtx[pathway_abbrevs,] %>% t()
-
-# make function for plotting fdr value (this function returns a function)
-fdr_func <- function(mtx) {
-function(j, i, x, y, width, height, fill) {
-  if (!is.na(mtx[i, j]) & (mtx[i,j] < 0.1)) {
-    grid.circle(x = x, y = y, r = unit(1.5, 'mm'),
-                gp = gpar(fill = 'black', col = NA))
-  }
-}
-}
-
-# make sure columns are the same
-stopifnot(colnames(fdr_mtx) == colnames(nes_mtx))
-stopifnot(rownames(fdr_mtx) == rownames(nes_mtx))
-
-gsea_lgd <- Legend(col_fun = heatmap_col_fun, title = 'NES', direction = 'horizontal')
-fdr_lgd <- Legend(pch = 16, type = "points", labels = "FDR < 0.1")
-na_lgd <- Legend(labels = 'N/A', legend_gp = gpar(fill = 'grey'))
-pd <- packLegend(gsea_lgd, fdr_lgd, na_lgd, direction = 'horizontal')
+# Also need to add time from steroid start to post-steroid sample
+time_info <- read.csv("/projects/home/nealpsmith/projects/myocarditis/swimmer_plot/swimmer_plot_info.csv")
+time_info %<>%
+  dplyr::select(ID, time_steroid_start_to_post_steroid_samp) %>%
+  dplyr::rename(donor = ID)
+
+meta %<>%
+  rownames_to_column() %>%
+  dplyr::left_join(trop_meta, by = "sample") %>%
+  dplyr::left_join(time_info, by = "donor") %>%
+  column_to_rownames("rowname")
+meta$log_time_steroid_start_to_post_steroid_samp <- log1p(meta$time_steroid_start_to_post_steroid_samp)
+# Need to fix SIC_3
+rownames(meta)[grepl("SIC_3_", rownames(meta))] <- sub("-", ".", rownames(meta)[grepl("SIC_3_", rownames(meta))])
+ps_meta <- meta %>%
+  dplyr::filter(sample %in% trop_meta$sample)
+ps_mtx <- mtx[,rownames(ps_meta)]
+
+if (!file.exists("/projects/home/nealpsmith/projects/myocarditis/post_steroid_trop/data/all_deg_res.csv")){
+  # Iterate across lineages
+  gene_sets <-  gmtPathways( "/projects/home/nealpsmith/projects/medoff/data/msigdb_symbols.gmt")
+  n_gene_df <- data.frame()
+  deg_res <- data.frame()
+  gsea_res <- data.frame()
+
+  for(lin in unique(ps_meta$lineage)){
+
+    if (lin == "Doublets_and_RBCs"){
+      next()
+    }
+
+    meta_temp <- ps_meta[ps_meta$lineage == lin,]
+    meta_temp$post_steroid_fatal <- factor(meta_temp$post_steroid_fatal, levels = c("non_fatal", "fatal"))
+    count_temp <- ps_mtx[,rownames(meta_temp)]
+
+    # Make sure the genes are detected in enough samples
+    n_samp <- rowSums(count_temp != 0)
+    count_temp <- count_temp[n_samp > round(nrow(meta_temp) / 2),]
 
-ht_gsea <- Heatmap(nes_mtx,
-                 col = heatmap_col_fun,
-                 cell_fun = fdr_func(fdr_mtx),
-                 column_order = colnames(nes_mtx),
-                 top_annotation = NULL, column_title = c,
-                 name = 'NES', show_heatmap_legend = FALSE,
-                 cluster_columns = FALSE, column_names_side = "top",
-                 show_column_names = T, column_names_rot = 45,
-                 cluster_rows = FALSE, row_names_side = "left",
-                 row_title_rot = 0, row_title_gp=gpar(fontface='bold'),
-                 row_gap = unit(2, "mm"), border = TRUE,
-                 width = ncol(nes_mtx)*unit(6, "mm"),
-                 height = nrow(nes_mtx)*unit(6, "mm"))
-
-draw(ht_gsea,
-   heatmap_legend_list = pd,
-   heatmap_legend_side = 'bottom',
-   merge_legends = FALSE)
+ models <- c(as.formula("~log_time_steroid_start_to_post_steroid_samp+trop_logfc"), + as.formula("~trop_logfc+log_time_steroid_start_to_post_steroid_samp"), + as.formula("~post_steroid_fatal")) + for (m in models){ + test_var <- tail(strsplit(as.character(m), "+", fixed = TRUE)[[2]], n = 1) + test_var <- gsub(" ", "", test_var) + + dds <- DESeqDataSetFromMatrix(countData = count_temp, + colData = meta_temp, + design = m) + dds <- DESeq(dds) + res <- as.data.frame(results(dds)) + res <- res[!is.na(res$padj),] + res$contrast <- test_var + res$lin <- lin + res$gene <- rownames(res) + deg_res <- rbind(deg_res, res) + + n_up <- nrow(res[res$padj < 0.1 & res$log2FoldChange > 0,]) + n_down <- nrow(res[res$padj < 0.1 & res$log2FoldChange < 0,]) + df <- data.frame(lin = lin, contrast = test_var, n_up = n_up, n_down = n_down) + n_gene_df <- rbind(n_gene_df, df) + + ## GSEA ## + order_genes <- res %>% + dplyr::select(gene, stat) %>% + na.omit() %>% + distinct() %>% + group_by(gene) %>% + summarize(stat=mean(stat)) + ranks <- deframe(order_genes) + + for (geneset in c("BIOCARTA", "HALLMARK", "KEGG")){ + gsets <- gene_sets[grep(geneset, names(gene_sets))] + fgseaRes <- fgsea(pathways=gsets, stats=ranks, nperm=10000) + fgseaResTidy <- fgseaRes %>% + as_tibble() %>% + arrange(desc(NES)) + fgseaResTidy$contrast <- test_var + fgseaResTidy$lin <- lin + fgseaResTidy$library <- geneset + gsea_res <- rbind(gsea_res, fgseaResTidy) + + } + } + } + gsea_res$leadingEdge <- as.character(gsea_res$leadingEdge) + + # Write the results + write.csv(deg_res, "/projects/home/nealpsmith/projects/myocarditis/post_steroid_trop/data/all_deg_res.csv") + write.csv(gsea_res, "/projects/home/nealpsmith/projects/myocarditis/post_steroid_trop/data/all_gsea_res.csv") + +} else { + deg_res <- read.csv("/projects/home/nealpsmith/projects/myocarditis/post_steroid_trop/data/all_deg_res.csv", + row.names = 1) + gsea_res <- read.csv("/projects/home/nealpsmith/projects/myocarditis/post_steroid_trop/data/all_gsea_res.csv", + row.names = 1) +} + +var <- "log_time_steroid_start_to_post_steroid_samp" +plot_df <-deg_res %>% + dplyr::filter(contrast == var, padj < 0.1) %>% + mutate(variable = ifelse(log2FoldChange > 0, "n_up", "n_down")) %>% + group_by(lin, variable) %>% + summarise(value = n()) %>% + mutate(value = ifelse(variable == "n_down", -value, value)) + +ggplot(plot_df, aes(x = lin, y = value, group = variable, fill = variable)) + + geom_bar(stat = "identity") + coord_flip() + + scale_fill_manual(values = c("#FF0000", "#0000FF")) + + scale_y_continuous(labels = abs, limits = c(-1500, 1750)) + + ylab("# of DE genes") + xlab("") + + ggtitle(glue("model by {var}")) + + theme_classic(base_size = 15)
+

+

Supplemental figure 1O

+
pathways <- c('KEGG_ALLOGRAFT_REJECTION', 'KEGG_ANTIGEN_PROCESSING_AND_PRESENTATION',
+              'KEGG_CELL_ADHESION_MOLECULES_CAMS', 'KEGG_DNA_REPLICATION',
+              'HALLMARK_INTERFERON_GAMMA_RESPONSE',
+              'KEGG_VIRAL_MYOCARDITIS')
+pathway_abbrevs <- c('KEGG: Allo', 'KEGG: AP', 'KEGG: CAMS', 'KEGG: DNA', 'HALLMARK: IFNG', 'KEGG: Viral Myo')
+abbrev_dict <- setNames(pathway_abbrevs, pathways)
+lineage_order <- c('pDCs', 'cDCs', 'B and plasma', 'MNP', 'CD4', 'CD8 and NK')
+c <- "log_time_steroid_start_to_post_steroid_samp"
+c_data <- gsea_res %>%
+dplyr::filter(contrast == c)
+
+heatmap_df <- c_data %>%
+complete(pathway, lin) %>%
+replace_na(list(log2FoldChange = 0, padj = 1)) %>%
+dplyr::select(pathway, lin, NES, padj) %>%
+filter(pathway %in% pathways) %>%
+mutate(cluster = str_replace_all(lin, '_', ' ')) %>%
+mutate(pathway = abbrev_dict[pathway])
+
+nes_mtx <- heatmap_df %>%
+dplyr::select(c(pathway, cluster, NES)) %>%
+pivot_wider(names_from = cluster, values_from = NES) %>%
+column_to_rownames('pathway') %>%
+dplyr::select(all_of(lineage_order)) %>%
+as.matrix()
+nes_mtx <- nes_mtx[pathway_abbrevs,] %>% t()
+
+# define cell color range
+heatmap_col_fun <- colorRamp2(c(floor(min(nes_mtx, na.rm = T)), 0, ceiling(max(nes_mtx, na.rm = T))),
+                            brewer.pal(5, 'PiYG')[c(5,3,1)])
+
+fdr_mtx <- heatmap_df %>%
+dplyr::select(c(pathway, cluster, padj)) %>%
+pivot_wider(names_from = cluster, values_from = padj) %>%
+column_to_rownames('pathway') %>%
+dplyr::select(all_of(lineage_order)) %>%
+as.matrix()
+fdr_mtx <- fdr_mtx[pathway_abbrevs,] %>% t()
+
+# make function for plotting fdr value (this function returns a function)
+fdr_func <- function(mtx) {
+function(j, i, x, y, width, height, fill) {
+  if (!is.na(mtx[i, j]) & (mtx[i,j] < 0.1)) {
+    grid.circle(x = x, y = y, r = unit(1.5, 'mm'),
+                gp = gpar(fill = 'black', col = NA))
+  }
+}
+}
+
+# make sure columns are the same
+stopifnot(colnames(fdr_mtx) == colnames(nes_mtx))
+stopifnot(rownames(fdr_mtx) == rownames(nes_mtx))
+
+gsea_lgd <- Legend(col_fun = heatmap_col_fun, title = 'NES', direction = 'horizontal')
+fdr_lgd <- Legend(pch = 16, type = "points", labels = "FDR < 0.1")
+na_lgd <- Legend(labels = 'N/A', legend_gp = gpar(fill = 'grey'))
+pd <- packLegend(gsea_lgd, fdr_lgd, na_lgd, direction = 'horizontal')
+
+ht_gsea <- Heatmap(nes_mtx,
+                 col = heatmap_col_fun,
+                 cell_fun = fdr_func(fdr_mtx),
+                 column_order = colnames(nes_mtx),
+                 top_annotation = NULL, column_title = c,
+                 name = 'NES', show_heatmap_legend = FALSE,
+                 cluster_columns = FALSE, column_names_side = "top",
+                 show_column_names = T, column_names_rot = 45,
+                 cluster_rows = FALSE, row_names_side = "left",
+                 row_title_rot = 0, row_title_gp=gpar(fontface='bold'),
+                 row_gap = unit(2, "mm"), border = TRUE,
+                 width = ncol(nes_mtx)*unit(6, "mm"),
+                 height = nrow(nes_mtx)*unit(6, "mm"))
+
+draw(ht_gsea,
+   heatmap_legend_list = pd,
+   heatmap_legend_side = 'bottom',
+   merge_legends = FALSE)

diff --git a/figure_scripts/extended_data_figure_1/extended_data_figure_1.md b/figure_scripts/extended_data_figure_1/extended_data_figure_1.md index 319e6a8..249ff0f 100644 --- a/figure_scripts/extended_data_figure_1/extended_data_figure_1.md +++ b/figure_scripts/extended_data_figure_1/extended_data_figure_1.md @@ -683,6 +683,115 @@ ggplot(df, aes(x = up, y = cluster)) + ## Supplemental figure 1L +``` r +#### first run de +if (!file.exists(glue('{wd}/output/lineage_de_by_steroid_treatment_all_results.csv'))) { + counts <- read_counts(glue("{wd}/output/pb_counts_by_sample_id_and_lineage.csv")) + meta <- read_meta(glue("{wd}/output/pb_meta_by_sample_id_and_lineage.csv")) + meta <- meta %>% + filter(steroid_treatment != "NA") %>% + filter(!str_detect(lineage, "Doublet")) + steroid_contrast_vec <- c('steroid_treatment', 'pre_steroid', 'post_steroid') + steroid_lineage_results <- run_de_by_comp_var(counts, meta, glue('{wd}/output/lineage'), steroid_contrast_vec, + deseq_formula = formula("~ steroid_treatment")) +} else { + steroid_lineage_results <- read_csv(glue('{wd}/output/lineage_de_by_steroid_treatment_all_results.csv')) +} + +#### then run gsea on de results +if (!file.exists(glue('{wd}/output/gsea_by_steroid_treatment_by_lineage.csv'))) { + steroid_lineage_gsea <- run_gsea(steroid_lineage_results) + write_csv(steroid_lineage_gsea, glue('{wd}/output/gsea_by_steroid_treatment_by_lineage.csv')) +} else { + steroid_lineage_gsea <- read_csv(glue('{wd}/output/gsea_by_steroid_treatment_by_lineage.csv')) +} + +#### make heatmap +pathways <- c('KEGG_ALLOGRAFT_REJECTION', 'KEGG_ANTIGEN_PROCESSING_AND_PRESENTATION', + 'KEGG_CELL_ADHESION_MOLECULES_CAMS', 'KEGG_DNA_REPLICATION', 'HALLMARK_INTERFERON_GAMMA_RESPONSE', + 'KEGG_VIRAL_MYOCARDITIS') +pathway_abbrevs <- c('KEGG: Allo', 'KEGG: AP', 'KEGG: CAMS', 'KEGG: DNA', 'HALLMARK: IFNG', 'KEGG: Viral Myo') +abbrev_dict <- setNames(pathway_abbrevs, pathways) +lineage_order <- c('pDCs', 'cDCs', 'B and plasma', 'MNP', 'CD4', 'CD8 and NK') +p_thresh <- 0.1 + +heatmap_df <- steroid_lineage_gsea %>% + complete(pathway, cluster) %>% + replace_na(list(log2FoldChange = 0, padj = 1)) %>% + select(pathway, cluster, NES, padj) %>% + filter(pathway %in% pathways) %>% + mutate(cluster = str_replace_all(cluster, '_', ' ')) %>% + mutate(pathway = abbrev_dict[pathway]) + +# Format main body: +# get information for the main body's cells +# not replacing NA values so they are colored gray in heatmap +nes_mtx <- heatmap_df %>% + select(c(pathway, cluster, NES)) %>% + pivot_wider(names_from = cluster, values_from = NES) %>% + column_to_rownames('pathway') %>% + select(all_of(lineage_order)) %>% + as.matrix() +nes_mtx <- nes_mtx[pathway_abbrevs,] %>% t() + +# define cell color range +heatmap_col_fun <- colorRamp2(c(-1, 0, ceiling(max(nes_mtx, na.rm = T))), + brewer.pal(5, 'PiYG')[c(5, 3, 1)]) + +# Main body annotation (FDR): +# get fdr values +# not replacing NA values so they are colored gray in heatmap +fdr_mtx <- heatmap_df %>% + select(c(pathway, cluster, padj)) %>% + pivot_wider(names_from = cluster, values_from = padj) %>% + column_to_rownames('pathway') %>% + select(all_of(lineage_order)) %>% + as.matrix() +fdr_mtx <- fdr_mtx[pathway_abbrevs,] %>% t() + +# make function for plotting fdr value (this function returns a function) +fdr_func <- function(mtx) { + function(j, i, x, y, width, height, fill) { + if (!is.na(mtx[i, j]) & (mtx[i, j] < p_thresh)) { + grid.circle(x = x, y = y, r = unit(1.5, 'mm'), + gp = gpar(fill = 'black', col = NA)) + } + } +} + +# make sure columns are the same +stopifnot(colnames(fdr_mtx) == colnames(nes_mtx)) +stopifnot(rownames(fdr_mtx) == rownames(nes_mtx)) + +# Legends +gsea_lgd <- Legend(col_fun = heatmap_col_fun, title = 'NES', direction = 'horizontal') +fdr_lgd <- Legend(pch = 16, type = "points", labels = glue("FDR < {p_thresh}")) +na_lgd <- Legend(labels = 'N/A', legend_gp = gpar(fill = 'grey')) +pd <- packLegend(gsea_lgd, fdr_lgd, na_lgd, direction = 'horizontal') + +# Plot +ht_gsea <- Heatmap(nes_mtx, + col = heatmap_col_fun, + cell_fun = fdr_func(fdr_mtx), + column_order = colnames(nes_mtx), + top_annotation = NULL, column_title = NULL, + name = 'NES', show_heatmap_legend = FALSE, + cluster_columns = FALSE, column_names_side = "top", + show_column_names = T, column_names_rot = 45, + cluster_rows = FALSE, row_names_side = "left", + row_title_rot = 0, row_title_gp = gpar(fontface = 'bold'), + row_gap = unit(2, "mm"), border = TRUE, + width = ncol(nes_mtx) * unit(6, "mm"), + height = nrow(nes_mtx) * unit(6, "mm")) + +draw(ht_gsea, + heatmap_legend_list = pd, + heatmap_legend_side = 'bottom', + merge_legends = FALSE) +``` + +![](extended_data_figure_1_files/figure-gfm/supp_1l-1.png) + ## Supplemental figure 1M ``` r @@ -926,6 +1035,8 @@ ggplot(plot_df, aes(x = lin, y = value, group = variable, fill = variable)) + ![](extended_data_figure_1_files/figure-gfm/supp_1n-1.png) +## Supplemental figure 1O + ``` r pathways <- c('KEGG_ALLOGRAFT_REJECTION', 'KEGG_ANTIGEN_PROCESSING_AND_PRESENTATION', 'KEGG_CELL_ADHESION_MOLECULES_CAMS', 'KEGG_DNA_REPLICATION', diff --git a/figure_scripts/extended_data_figure_1/extended_data_figure_1.rmd b/figure_scripts/extended_data_figure_1/extended_data_figure_1.rmd index e6ebaeb..cf5b46e 100644 --- a/figure_scripts/extended_data_figure_1/extended_data_figure_1.rmd +++ b/figure_scripts/extended_data_figure_1/extended_data_figure_1.rmd @@ -509,6 +509,113 @@ ggplot(df, aes(x = up, y = cluster)) + ## Supplemental figure 1L +```{r supp_1l, message = F, results = 'hold', warning = F, fig.width = 7, fig.height = 7} +#### first run de +if (!file.exists(glue('{wd}/output/lineage_de_by_steroid_treatment_all_results.csv'))) { + counts <- read_counts(glue("{wd}/output/pb_counts_by_sample_id_and_lineage.csv")) + meta <- read_meta(glue("{wd}/output/pb_meta_by_sample_id_and_lineage.csv")) + meta <- meta %>% + filter(steroid_treatment != "NA") %>% + filter(!str_detect(lineage, "Doublet")) + steroid_contrast_vec <- c('steroid_treatment', 'pre_steroid', 'post_steroid') + steroid_lineage_results <- run_de_by_comp_var(counts, meta, glue('{wd}/output/lineage'), steroid_contrast_vec, + deseq_formula = formula("~ steroid_treatment")) +} else { + steroid_lineage_results <- read_csv(glue('{wd}/output/lineage_de_by_steroid_treatment_all_results.csv')) +} + +#### then run gsea on de results +if (!file.exists(glue('{wd}/output/gsea_by_steroid_treatment_by_lineage.csv'))) { + steroid_lineage_gsea <- run_gsea(steroid_lineage_results) + write_csv(steroid_lineage_gsea, glue('{wd}/output/gsea_by_steroid_treatment_by_lineage.csv')) +} else { + steroid_lineage_gsea <- read_csv(glue('{wd}/output/gsea_by_steroid_treatment_by_lineage.csv')) +} + +#### make heatmap +pathways <- c('KEGG_ALLOGRAFT_REJECTION', 'KEGG_ANTIGEN_PROCESSING_AND_PRESENTATION', + 'KEGG_CELL_ADHESION_MOLECULES_CAMS', 'KEGG_DNA_REPLICATION', 'HALLMARK_INTERFERON_GAMMA_RESPONSE', + 'KEGG_VIRAL_MYOCARDITIS') +pathway_abbrevs <- c('KEGG: Allo', 'KEGG: AP', 'KEGG: CAMS', 'KEGG: DNA', 'HALLMARK: IFNG', 'KEGG: Viral Myo') +abbrev_dict <- setNames(pathway_abbrevs, pathways) +lineage_order <- c('pDCs', 'cDCs', 'B and plasma', 'MNP', 'CD4', 'CD8 and NK') +p_thresh <- 0.1 + +heatmap_df <- steroid_lineage_gsea %>% + complete(pathway, cluster) %>% + replace_na(list(log2FoldChange = 0, padj = 1)) %>% + select(pathway, cluster, NES, padj) %>% + filter(pathway %in% pathways) %>% + mutate(cluster = str_replace_all(cluster, '_', ' ')) %>% + mutate(pathway = abbrev_dict[pathway]) + +# Format main body: +# get information for the main body's cells +# not replacing NA values so they are colored gray in heatmap +nes_mtx <- heatmap_df %>% + select(c(pathway, cluster, NES)) %>% + pivot_wider(names_from = cluster, values_from = NES) %>% + column_to_rownames('pathway') %>% + select(all_of(lineage_order)) %>% + as.matrix() +nes_mtx <- nes_mtx[pathway_abbrevs,] %>% t() + +# define cell color range +heatmap_col_fun <- colorRamp2(c(-1, 0, ceiling(max(nes_mtx, na.rm = T))), + brewer.pal(5, 'PiYG')[c(5, 3, 1)]) + +# Main body annotation (FDR): +# get fdr values +# not replacing NA values so they are colored gray in heatmap +fdr_mtx <- heatmap_df %>% + select(c(pathway, cluster, padj)) %>% + pivot_wider(names_from = cluster, values_from = padj) %>% + column_to_rownames('pathway') %>% + select(all_of(lineage_order)) %>% + as.matrix() +fdr_mtx <- fdr_mtx[pathway_abbrevs,] %>% t() + +# make function for plotting fdr value (this function returns a function) +fdr_func <- function(mtx) { + function(j, i, x, y, width, height, fill) { + if (!is.na(mtx[i, j]) & (mtx[i, j] < p_thresh)) { + grid.circle(x = x, y = y, r = unit(1.5, 'mm'), + gp = gpar(fill = 'black', col = NA)) + } + } +} + +# make sure columns are the same +stopifnot(colnames(fdr_mtx) == colnames(nes_mtx)) +stopifnot(rownames(fdr_mtx) == rownames(nes_mtx)) + +# Legends +gsea_lgd <- Legend(col_fun = heatmap_col_fun, title = 'NES', direction = 'horizontal') +fdr_lgd <- Legend(pch = 16, type = "points", labels = glue("FDR < {p_thresh}")) +na_lgd <- Legend(labels = 'N/A', legend_gp = gpar(fill = 'grey')) +pd <- packLegend(gsea_lgd, fdr_lgd, na_lgd, direction = 'horizontal') + +# Plot +ht_gsea <- Heatmap(nes_mtx, + col = heatmap_col_fun, + cell_fun = fdr_func(fdr_mtx), + column_order = colnames(nes_mtx), + top_annotation = NULL, column_title = NULL, + name = 'NES', show_heatmap_legend = FALSE, + cluster_columns = FALSE, column_names_side = "top", + show_column_names = T, column_names_rot = 45, + cluster_rows = FALSE, row_names_side = "left", + row_title_rot = 0, row_title_gp = gpar(fontface = 'bold'), + row_gap = unit(2, "mm"), border = TRUE, + width = ncol(nes_mtx) * unit(6, "mm"), + height = nrow(nes_mtx) * unit(6, "mm")) + +draw(ht_gsea, + heatmap_legend_list = pd, + heatmap_legend_side = 'bottom', + merge_legends = FALSE) +``` + ## Supplemental figure 1M ```{r supp_1m, message = F, results = 'hold', warning = F, fig.width = 10, height = 12} @@ -748,6 +855,7 @@ ggplot(plot_df, aes(x = lin, y = value, group = variable, fill = variable)) + ``` +## Supplemental figure 1O ```{r supp_1o, message = F, results = 'hold', warning = F, fig.width = 7, fig.height = 7} pathways <- c('KEGG_ALLOGRAFT_REJECTION', 'KEGG_ANTIGEN_PROCESSING_AND_PRESENTATION', 'KEGG_CELL_ADHESION_MOLECULES_CAMS', 'KEGG_DNA_REPLICATION', diff --git a/figure_scripts/extended_data_figure_1/extended_data_figure_1_files/figure-gfm/fig_1g-1.png b/figure_scripts/extended_data_figure_1/extended_data_figure_1_files/figure-gfm/fig_1g-1.png index b2efaa6..4b7687a 100644 Binary files a/figure_scripts/extended_data_figure_1/extended_data_figure_1_files/figure-gfm/fig_1g-1.png and b/figure_scripts/extended_data_figure_1/extended_data_figure_1_files/figure-gfm/fig_1g-1.png differ diff --git a/figure_scripts/extended_data_figure_1/extended_data_figure_1_files/figure-gfm/supp_1f-1.png b/figure_scripts/extended_data_figure_1/extended_data_figure_1_files/figure-gfm/supp_1f-1.png index 42301a5..f61404a 100644 Binary files a/figure_scripts/extended_data_figure_1/extended_data_figure_1_files/figure-gfm/supp_1f-1.png and b/figure_scripts/extended_data_figure_1/extended_data_figure_1_files/figure-gfm/supp_1f-1.png differ diff --git a/figure_scripts/extended_data_figure_1/extended_data_figure_1_files/figure-gfm/supp_1l-1.png b/figure_scripts/extended_data_figure_1/extended_data_figure_1_files/figure-gfm/supp_1l-1.png new file mode 100644 index 0000000..ef0f9ca Binary files /dev/null and b/figure_scripts/extended_data_figure_1/extended_data_figure_1_files/figure-gfm/supp_1l-1.png differ diff --git a/figure_scripts/extended_data_figure_1/extended_data_figure_1_files/figure-gfm/supp_1m-1.png b/figure_scripts/extended_data_figure_1/extended_data_figure_1_files/figure-gfm/supp_1m-1.png index d2ab60a..eac8805 100644 Binary files a/figure_scripts/extended_data_figure_1/extended_data_figure_1_files/figure-gfm/supp_1m-1.png and b/figure_scripts/extended_data_figure_1/extended_data_figure_1_files/figure-gfm/supp_1m-1.png differ