hsmaan · hsmaan · Sep 27, 2023
diff --git a/workflow/analysis/R/17_Iniq_Contol_Cider_Supp_Analysis_Figures_Stat_Tests.R b/workflow/analysis/R/17_Iniq_Contol_Cider_Supp_Analysis_Figures_Stat_Tests.R
diff --git a/workflow/analysis/R/21_PBMC_perturbation_umap_plots.R b/workflow/analysis/R/21_PBMC_perturbation_umap_plots.R
@@ -0,0 +1,206 @@
+library(data.table)
+library(tidyverse)
+library(reshape2)
+library(ggplot2)
+library(ggthemes)
+library(ggExtra)
+library(ggpubr)
+library(dotwhisker)
+library(Seurat)
+library(SeuratDisk)
+library(ComplexHeatmap)
+library(circlize)
+library(RColorBrewer)
+library(Cairo)
+
+# Helper functions
+`%ni%` <- Negate(`%in%`)
+
+# Change to results dir for uamp results data
+setwd("../../../results/umap/")
+
+# Load color palette 
+kev_palette <- c("dodgerblue2", 
+                 "#E31A1C",
+                 "green4",
+                 "#6A3D9A", 
+                 "#FF7F00", 
+                 "black",
+                 "gold1",
+                 "skyblue2",
+                 "#FB9A99", 
+                 "palegreen2",
+                 "#CAB2D6", 
+                 "#FDBF6F", 
+                 "gray70", 
+                 "khaki2",
+                 "maroon",
+                 "orchid1",
+                 "deeppink1",
+                 "blue1",
+                 "steelblue4",
+                 "darkturquoise",
+                 "green1",
+                 "yellow4",
+                 "yellow3",
+                 "darkorange4",
+                 "brown")
+
+##### Analysis of PBMC 2 batch balanced data - baseline #####
+
+# Load in the umap plot results 
+setwd("umap_plots/")
+umap_files <- list.files()
+umap_files <- grep(
+  ".tsv",
+  umap_files,
+  value = TRUE
+)
+umap_files <- grep(
+  "pbmc_2_batch_base_balanced",
+  umap_files,
+  value = TRUE
+)
+umap_loaded <- lapply(umap_files, fread)
+umap_names <- str_split_fixed(umap_files, fixed(".tsv"), 2)[,1]
+names(umap_loaded) <- umap_names
+
+setwd("../../..")
+
+# Create directory for umap results if it doesn't exist
+if (!dir.exists("outs/umap/results")) {
+  dir.create("outs/umap/results", recursive = TRUE)
+}
+if (!dir.exists("outs/umap/figures")) {
+  dir.create("outs/umap/figures")
+}
+
+# Create function to loop over the umap files and return the results 
+umap_plot <- function(df, save_prefix) {
+  # Format celltype names
+  df$Clustering <- plyr::mapvalues(
+    df$Clustering,
+    from = c(
+      "Monocyte_CD14",
+      "Monocyte_FCGR3A",
+      "CD4 T cell",
+      "CD8 T cell"
+    ),
+    to = c(
+      "CD14+ Monocyte",
+      "FCGR3A+ Monocyte",
+      "CD4+ T cell",
+      "CD8+ T cell"
+    )
+  )
+
+  # Format batch names 
+  df$Clustering <- plyr::mapvalues(
+    df$Clustering,
+    from = c(
+      "batch_1",
+      "batch_2"
+    ),
+    to = c(
+      "Batch 1",
+      "Batch 2"
+    )
+  )
+
+  unique_cluster_len <- length(unique(df$Clustering))
+  if (unique_cluster_len > 8) {
+    ggplot(data = df, aes(x = `UMAP 1`, y = `UMAP 2`)) +
+      geom_point(
+        aes(
+          color = factor(
+            as.numeric(Clustering),
+            levels = sort(as.numeric(unique(df$Clustering)))
+          )
+        ),
+        size = 0.25
+      ) +
+      facet_wrap(
+        .~Subset, 
+        scales = "free"
+      ) +
+      labs(
+        color = "",
+        x = "UMAP 1",
+        y = "UMAP 2"
+      ) +
+      scale_color_manual(
+        name = "",
+        values = kev_palette[1:unique_cluster_len]
+      ) + 
+      guides(color = guide_legend(override.aes = list(size=2))) + 
+      theme_few() +
+      theme(axis.title.x = element_text(size = 16)) +
+      theme(axis.title.y = element_text(size = 16)) +
+      theme(strip.text.x = element_text(size = 16)) +
+      theme(plot.title = element_text(size = 14)) +
+      theme(axis.text.x = element_text(size = 16)) +
+      theme(axis.text.y = element_text(size = 16)) +
+      theme(legend.title = element_text(size = 16)) +
+      theme(legend.text = element_text(size = 16))
+    ggsave(
+      paste0(
+        "outs/umap/figures/",
+        save_prefix,
+        ".pdf"
+      ),
+      width = 16,
+      height = 8,
+      device = cairo_pdf
+    )
+  } else {
+    if (any(grepl("Batch", df$Clustering))) {
+      pal = "Set1"
+    } else {
+      pal = "Dark2"
+    }
+    ggplot(data = df, aes(x = `UMAP 1`, y = `UMAP 2`)) +
+      geom_point(
+        aes(
+          color = factor(Clustering),
+        ),
+        size = 0.5
+      ) +
+      facet_wrap(
+        .~Subset, 
+        scales = "free"
+      ) +
+      labs(
+        color = "",
+        x = "UMAP 1",
+        y = "UMAP 2"
+      ) +
+      guides(color = guide_legend(override.aes = list(size=2))) +
+      scale_color_brewer(palette = pal) +  
+      theme_few() +
+      theme(axis.title.x = element_text(size = 16)) +
+      theme(axis.title.y = element_text(size = 16)) +
+      theme(strip.text.x = element_text(size = 16)) +
+      theme(plot.title = element_text(size = 14)) +
+      theme(axis.text.x = element_text(size = 16)) +
+      theme(axis.text.y = element_text(size = 16)) +
+      theme(legend.title = element_text(size = 16)) +
+      theme(legend.text = element_text(size = 16))
+    ggsave(
+      paste0(
+        "outs/umap/figures/",
+        save_prefix,
+        ".pdf"
+      ),
+      width = 16,
+      height = 8,
+      device = cairo_pdf
+    )
+  }
+}
+
+# Iterate over the umap files and names and save the results 
+mapply(
+  umap_plot,
+  df = umap_loaded, 
+  save_prefix = umap_names
+)
diff --git a/workflow/configs/config_control_cider.json b/workflow/configs/config_control_cider.json
@@ -0,0 +1,22 @@
+{
+    "config_name": "control_w_cider",
+    "int_datasets": {
+        "pbmc_2_batch_base_balanced" : {
+            "data_folder": "pbmc_2_batch_base_balanced",
+            "ds_celltypes": [1],
+            "ds_proportions": [0.1, 0],
+            "num_batches": [0, 1],
+            "repetitions": 200
+        },
+        "pbmc_2_batch_hierarchical_balanced": {
+            "data_folder": "pbmc_2_batch_hierarchical_balanced",
+            "ds_celltypes": [1],
+            "ds_proportions": [0.1, 0],
+            "num_batches": [0, 1],
+            "repetitions": 200
+        }
+    },
+    "int_ti_datasets": {},
+    "query_to_reference": "No",
+    "celltype_list": "No"
+}
diff --git a/workflow/configs/config_umap.json b/workflow/configs/config_umap.json
@@ -0,0 +1,50 @@
+{
+    "config_name": "umap",
+    "int_datasets": {
+        "pbmc_2_batch_base_balanced" : {
+            "data_folder": "pbmc_2_batch_base_balanced",
+            "ds_celltypes": [1],
+            "ds_proportions": [0.1, 0],
+            "num_batches": [0, 1],
+            "repetitions": 1
+        },
+        "pbmc_2_batch_hierarchical_balanced": {
+            "data_folder": "pbmc_2_batch_hierarchical_balanced",
+            "ds_celltypes": [1],
+            "ds_proportions": [0.1, 0],
+            "num_batches": [0, 1],
+            "repetitions": 1
+        },
+        "pbmc_2_batch" : {
+            "data_folder": "pbmc_2_batch",
+            "ds_celltypes": [0],
+            "ds_proportions": [0],
+            "num_batches": [0],
+            "repetitions": 1
+        },
+        "pbmc_4_batch" : {
+            "data_folder": "pbmc_4_batch",
+            "ds_celltypes": [0],
+            "ds_proportions": [0],
+            "num_batches": [0],
+            "repetitions": 1
+        },
+        "mouse_hindbrain_6_batch": {
+            "data_folder": "mouse_hindbrain_6_batch",
+            "ds_celltypes": [0],
+            "ds_proportions": [0],
+            "num_batches": [0],
+            "repetitions": 1
+        },
+        "peng_pdac_8_batch": {
+            "data_folder": "peng_pdac_tumor_annot_8_batch",
+            "ds_celltypes": [0],
+            "ds_proportions": [0],
+            "num_batches": [0],
+            "repetitions": 1
+        }
+    },
+    "int_ti_datasets": {},
+    "query_to_reference": "No",
+    "celltype_list": "No"
+}
diff --git a/workflow/scripts/R/install_packages.R b/workflow/scripts/R/install_packages.R
@@ -0,0 +1,19 @@
+# Install the required packages 
+install.packages("BiocManager", repos = "http://cran.us.r-project.org")
+library(BiocManager)
+BiocManager::install("limma")
+install.packages(
+    "https://cran.r-project.org/src/contrib/Archive/locfit/locfit_1.5-9.4.tar.gz", 
+    repos = NULL, 
+    type = "source"
+)
+BiocManager::install("edgeR")
+install.packages("CIDER", repos = "http://cran.us.r-project.org")
+
+# Stop if CIDER is not installed successfully
+if (!"CIDER" %in% installed.packages()[, "Package"]) {
+  stop("Package CIDER not installed successfully.")
+}
+
+# Output a dummy file to indicate that the script has completed
+file.create("../results/install_confirmation.txt")