From 02294de81ef26236450e8da033dcfbb8786310a7 Mon Sep 17 00:00:00 2001
From: browaeysrobin <robin.browaeys@ugent.be>
Date: Wed, 14 Feb 2024 14:37:01 +0100
Subject: [PATCH] fix filtering prior to findMarkers DE

---
 R/pipeline_wrappers.R | 306 +++++++++++++++++++++++-------------------
 R/plotting.R          |   4 -
 2 files changed, 167 insertions(+), 143 deletions(-)

diff --git a/R/pipeline_wrappers.R b/R/pipeline_wrappers.R
index 4583695..56be3bb 100644
--- a/R/pipeline_wrappers.R
+++ b/R/pipeline_wrappers.R
@@ -365,11 +365,9 @@ process_abundance_expression_info = function(sce, sample_id, group_id, celltype_
 #' @export
 #'
 #'
-get_DE_info = function(sce, sample_id, group_id, celltype_id, batches, covariates, contrasts_oi, expressed_df, min_cells = 10, assay_oi_pb = "counts", fun_oi_pb = "sum", de_method_oi = "edgeR", findMarkers = FALSE, contrast_tbl = NULL){
-  
+get_DE_info = function (sce, sample_id, group_id, celltype_id, batches, covariates, contrasts_oi, expressed_df, min_cells = 10, assay_oi_pb = "counts", fun_oi_pb = "sum", de_method_oi = "edgeR", findMarkers = FALSE, contrast_tbl = NULL) {
   requireNamespace("dplyr")
   requireNamespace("ggplot2")
-  
   if (class(sce) != "SingleCellExperiment") {
     stop("sce should be a SingleCellExperiment object")
   }
@@ -391,173 +389,182 @@ get_DE_info = function(sce, sample_id, group_id, celltype_id, batches, covariate
   if (group_id != make.names(group_id)) {
     stop("group_id should be a syntactically valid R name - check make.names")
   }
-  
-  if(is.double(SummarizedExperiment::colData(sce)[,celltype_id])){
+  if (is.double(SummarizedExperiment::colData(sce)[, celltype_id])) {
     stop("SummarizedExperiment::colData(sce)[,celltype_id] should be a character vector or a factor")
   }
-  if(is.double(SummarizedExperiment::colData(sce)[,group_id])){
+  if (is.double(SummarizedExperiment::colData(sce)[, group_id])) {
     stop("SummarizedExperiment::colData(sce)[,group_id] should be a character vector or a factor")
   }
-  if(is.double(SummarizedExperiment::colData(sce)[,sample_id])){
+  if (is.double(SummarizedExperiment::colData(sce)[, sample_id])) {
     stop("SummarizedExperiment::colData(sce)[,sample_id] should be a character vector or a factor")
   }
-  
-  # if some of these are factors, and not all levels have syntactically valid names - prompt to change this
-  if(is.factor(SummarizedExperiment::colData(sce)[,celltype_id])){
-    is_make_names = levels(SummarizedExperiment::colData(sce)[,celltype_id]) == make.names(levels(SummarizedExperiment::colData(sce)[,celltype_id]))
-    if(sum(is_make_names) != length(levels(SummarizedExperiment::colData(sce)[,celltype_id]))){
+  if (is.factor(SummarizedExperiment::colData(sce)[, celltype_id])) {
+    is_make_names = levels(SummarizedExperiment::colData(sce)[, 
+                                                              celltype_id]) == make.names(levels(SummarizedExperiment::colData(sce)[, 
+                                                                                                                                    celltype_id]))
+    if (sum(is_make_names) != length(levels(SummarizedExperiment::colData(sce)[, 
+                                                                               celltype_id]))) {
       stop("The levels of the factor SummarizedExperiment::colData(sce)[,celltype_id] should be a syntactically valid R names - see make.names")
     }
-  } else{
-    is_make_names = unique(sort(SummarizedExperiment::colData(sce)[,celltype_id])) == make.names(unique(sort(SummarizedExperiment::colData(sce)[,celltype_id])))
-    if(sum(is_make_names) != length(unique(sort((SummarizedExperiment::colData(sce)[,celltype_id]))))){
+  }
+  else {
+    is_make_names = unique(sort(SummarizedExperiment::colData(sce)[, 
+                                                                   celltype_id])) == make.names(unique(sort(SummarizedExperiment::colData(sce)[, 
+                                                                                                                                               celltype_id])))
+    if (sum(is_make_names) != length(unique(sort((SummarizedExperiment::colData(sce)[, 
+                                                                                     celltype_id]))))) {
       stop("All the cell type labels in SummarizedExperiment::colData(sce)[,celltype_id] should be syntactically valid R names - see make.names")
     }
   }
-  
-  if(is.factor(SummarizedExperiment::colData(sce)[,group_id])){
-    is_make_names = levels(SummarizedExperiment::colData(sce)[,group_id]) == make.names(levels(SummarizedExperiment::colData(sce)[,group_id]))
-    if(sum(is_make_names) != length(levels(SummarizedExperiment::colData(sce)[,group_id]))){
+  if (is.factor(SummarizedExperiment::colData(sce)[, group_id])) {
+    is_make_names = levels(SummarizedExperiment::colData(sce)[, 
+                                                              group_id]) == make.names(levels(SummarizedExperiment::colData(sce)[, 
+                                                                                                                                 group_id]))
+    if (sum(is_make_names) != length(levels(SummarizedExperiment::colData(sce)[, 
+                                                                               group_id]))) {
       stop("The levels of the factor SummarizedExperiment::colData(sce)[,group_id] should be a syntactically valid R names - see make.names")
     }
-  } else{
-    is_make_names = unique(sort(SummarizedExperiment::colData(sce)[,group_id])) == make.names(unique(sort(SummarizedExperiment::colData(sce)[,group_id])))
-    if(sum(is_make_names) != length(unique(sort((SummarizedExperiment::colData(sce)[,group_id]))))){
+  }
+  else {
+    is_make_names = unique(sort(SummarizedExperiment::colData(sce)[, 
+                                                                   group_id])) == make.names(unique(sort(SummarizedExperiment::colData(sce)[, 
+                                                                                                                                            group_id])))
+    if (sum(is_make_names) != length(unique(sort((SummarizedExperiment::colData(sce)[, 
+                                                                                     group_id]))))) {
       stop("All the group/condition labels in SummarizedExperiment::colData(sce)[,group_id] should be syntactically valid R names - see make.names")
     }
   }
-  if(is.factor(SummarizedExperiment::colData(sce)[,sample_id])){
-    is_make_names = levels(SummarizedExperiment::colData(sce)[,sample_id]) == make.names(levels(SummarizedExperiment::colData(sce)[,sample_id]))
-    if(sum(is_make_names) != length(levels(SummarizedExperiment::colData(sce)[,sample_id]))){
+  if (is.factor(SummarizedExperiment::colData(sce)[, sample_id])) {
+    is_make_names = levels(SummarizedExperiment::colData(sce)[, 
+                                                              sample_id]) == make.names(levels(SummarizedExperiment::colData(sce)[, 
+                                                                                                                                  sample_id]))
+    if (sum(is_make_names) != length(levels(SummarizedExperiment::colData(sce)[, 
+                                                                               sample_id]))) {
       stop("The levels of the factor SummarizedExperiment::colData(sce)[,sample_id] should be a syntactically valid R names - see make.names")
     }
-  } else{
-    is_make_names = unique(sort(SummarizedExperiment::colData(sce)[,sample_id])) == make.names(unique(sort(SummarizedExperiment::colData(sce)[,sample_id])))
-    if(sum(is_make_names) != length(unique(sort((SummarizedExperiment::colData(sce)[,sample_id]))))){
+  }
+  else {
+    is_make_names = unique(sort(SummarizedExperiment::colData(sce)[, 
+                                                                   sample_id])) == make.names(unique(sort(SummarizedExperiment::colData(sce)[, 
+                                                                                                                                             sample_id])))
+    if (sum(is_make_names) != length(unique(sort((SummarizedExperiment::colData(sce)[, 
+                                                                                     sample_id]))))) {
       stop("All the sample_id labels in SummarizedExperiment::colData(sce)[,sample_id] should be syntactically valid R names - see make.names")
     }
   }
-  
-  if(!is.character(contrasts_oi)){
+  if (!is.character(contrasts_oi)) {
     stop("contrasts should be a character vector")
   }
-  
-  # conditions of interest in the contrast should be present in the in the group column of the metadata
-  groups_oi = SummarizedExperiment::colData(sce)[,group_id] %>% unique()
-  conditions_oi = stringr::str_split(contrasts_oi, "'") %>% unlist() %>% unique() %>%
-    # stringr::str_split("[:digit:]") %>% unlist() %>% unique() %>%
-    stringr::str_split("\\)") %>% unlist() %>% unique() %>%
-    stringr::str_split("\\(") %>% unlist() %>% unique() %>%
-    stringr::str_split("-") %>% unlist() %>% unique() %>%
-    stringr::str_split("\\+") %>% unlist() %>% unique() %>%
-    stringr::str_split("\\*") %>% unlist() %>% unique() %>%
-    stringr::str_split("\\/") %>% unlist() %>% unique() %>% generics::setdiff(c("",","," ,", ", ")) %>% unlist() %>% unique()
+  groups_oi = SummarizedExperiment::colData(sce)[, group_id] %>% 
+    unique()
+  conditions_oi = stringr::str_split(contrasts_oi, "'") %>% 
+    unlist() %>% unique() %>% stringr::str_split("\\)") %>% 
+    unlist() %>% unique() %>% stringr::str_split("\\(") %>% 
+    unlist() %>% unique() %>% stringr::str_split("-") %>% 
+    unlist() %>% unique() %>% stringr::str_split("\\+") %>% 
+    unlist() %>% unique() %>% stringr::str_split("\\*") %>% 
+    unlist() %>% unique() %>% stringr::str_split("\\/") %>% 
+    unlist() %>% unique() %>% generics::setdiff(c("", ",", 
+                                                  " ,", ", ")) %>% unlist() %>% unique()
   conditions_oi = conditions_oi[is.na(suppressWarnings(as.numeric(conditions_oi)))]
-  
-  if(length(contrasts_oi) != 1 | !is.character(contrasts_oi)){
+  if (length(contrasts_oi) != 1 | !is.character(contrasts_oi)) {
     stop("contrasts_oi should be a character vector of length 1. See the documentation of the function for having an idea of the right format of setting your contrasts.")
   }
-  
-  # conditions of interest in the contrast should be present in the in the contrast_tbl
-  contrasts_simplified = stringr::str_split(contrasts_oi, "'") %>% unlist() %>% unique() %>%
-    stringr::str_split(",") %>% unlist() %>% unique() %>% generics::setdiff(c("",",")) %>% unlist() %>% unique()
-  
+  contrasts_simplified = stringr::str_split(contrasts_oi, "'") %>% 
+    unlist() %>% unique() %>% stringr::str_split(",") %>% 
+    unlist() %>% unique() %>% generics::setdiff(c("", ",")) %>% 
+    unlist() %>% unique()
   if (sum(conditions_oi %in% groups_oi) != length(conditions_oi)) {
     stop("conditions written in contrasts should be in the condition-indicating column! This is not the case, which can lead to errors downstream.")
   }
-  
-  if(!is.na(batches)){
-    if (sum(batches %in% colnames(SummarizedExperiment::colData(sce))) != length(batches) ) {
+  if (!is.na(batches)) {
+    if (sum(batches %in% colnames(SummarizedExperiment::colData(sce))) != 
+        length(batches)) {
       stop("batches should be NA or all present as column name(s) in the metadata dataframe of sce")
     }
   }
-  
-  if(length(covariates) > 1){
+  if (length(covariates) > 1) {
     covariates_present = TRUE
-    if (sum(covariates %in% colnames(SummarizedExperiment::colData(sce))) != length(covariates) ) {
+    if (sum(covariates %in% colnames(SummarizedExperiment::colData(sce))) != 
+        length(covariates)) {
       stop("covariates should be NA or all present as column name(s) in the metadata dataframe of sce")
     }
-  } else {
-    if(!is.na(covariates)){
+  }
+  else {
+    if (!is.na(covariates)) {
       covariates_present = TRUE
-      if (sum(covariates %in% colnames(SummarizedExperiment::colData(sce))) != length(covariates) ) {
+      if (sum(covariates %in% colnames(SummarizedExperiment::colData(sce))) != 
+          length(covariates)) {
         stop("covariates should be NA or all present as column name(s) in the metadata dataframe of sce")
       }
-    } else {
+    }
+    else {
       covariates_present = FALSE
     }
   }
-  
-  if(!is.character(assay_oi_pb)){
+  if (!is.character(assay_oi_pb)) {
     stop("assay_oi_pb should be a character vector")
-  } else {
-    if(assay_oi_pb != "counts"){
+  }
+  else {
+    if (assay_oi_pb != "counts") {
       warning("are you sure you don't want to use the counts assay?")
     }
   }
-  if(!is.character(fun_oi_pb)){
+  if (!is.character(fun_oi_pb)) {
     stop("fun_oi_pb should be a character vector")
   }
-  if(!is.character(de_method_oi)){
+  if (!is.character(de_method_oi)) {
     stop("de_method_oi should be a character vector")
   }
-  
-  if(!is.double(min_cells)){
+  if (!is.double(min_cells)) {
     stop("min_cells should be numeric")
-  } else {
-    if(min_cells <= 0) {
+  }
+  else {
+    if (min_cells <= 0) {
       warning("min_cells is now 0 or smaller. We recommend having a positive, non-zero value for this parameter")
     }
   }
-  if(findMarkers == TRUE){
-    if(is.null(contrast_tbl)){
+  if (findMarkers == TRUE) {
+    if (is.null(contrast_tbl)) {
       stop("Please provide an input to the argument `contrast_tbl` -- see documentation")
     }
   }
-
-  celltypes = SummarizedExperiment::colData(sce)[,celltype_id] %>% unique()
-  
-  DE_list = celltypes %>% lapply(function(celltype_oi, sce){
-    sce_oi = sce[, SummarizedExperiment::colData(sce)[,celltype_id] == celltype_oi]
-    DE_result = tryCatch(
-      {perform_muscat_de_analysis(sce = sce_oi, 
-                                  sample_id = sample_id, 
-                                  celltype_id = celltype_id, 
-                                  group_id = group_id, 
-                                  batches = batches, 
-                                  covariates = covariates, 
-                                  contrasts = contrasts_oi, 
-                                  expressed_df = expressed_df, 
-                                  assay_oi_pb = assay_oi_pb, 
-                                  fun_oi_pb = fun_oi_pb, 
-                                  de_method_oi = de_method_oi, 
-                                  min_cells = min_cells)
-        }, 
-      error = function(cond){
-        message(paste0("perform_muscat_de_analysis errored for celltype: ", celltype_oi))
-        message("Here's the original error message:")
-        message(cond)
-        message("")
-        print(cond)
-        message(paste0("perform_muscat_de_analysis errored for celltype: ", celltype_oi))
-        message("")
-        print("In case: Error in x[[1]]: subscript out of bounds: this likely means that there are not enough samples per group with sufficient cells of this cell type. This cell type will thus be ignored for further analyses, other cell types will still be considered.")
-        return(NA) # occurs when not enough samples per group with sufficient cells in most cases, can also be due to other error messages
-      })
+  celltypes = SummarizedExperiment::colData(sce)[, celltype_id] %>% 
+    unique()
+  DE_list = celltypes %>% lapply(function(celltype_oi, sce) {
+    sce_oi = sce[, SummarizedExperiment::colData(sce)[, celltype_id] == 
+                   celltype_oi]
+    DE_result = tryCatch({
+      perform_muscat_de_analysis(sce = sce_oi, sample_id = sample_id, 
+                                 celltype_id = celltype_id, group_id = group_id, 
+                                 batches = batches, covariates = covariates, contrasts = contrasts_oi, 
+                                 expressed_df = expressed_df, assay_oi_pb = assay_oi_pb, 
+                                 fun_oi_pb = fun_oi_pb, de_method_oi = de_method_oi, 
+                                 min_cells = min_cells)
+    }, error = function(cond) {
+      message(paste0("perform_muscat_de_analysis errored for celltype: ", 
+                     celltype_oi))
+      message("Here's the original error message:")
+      message(cond)
+      message("")
+      print(cond)
+      message(paste0("perform_muscat_de_analysis errored for celltype: ", 
+                     celltype_oi))
+      message("")
+      print("In case: Error in x[[1]]: subscript out of bounds: this likely means that there are not enough samples per group with sufficient cells of this cell type. This cell type will thus be ignored for further analyses, other cell types will still be considered.")
+      return(NA)
+    })
   }, sce)
-  
-  celltype_de = list(
-    de_output = c(DE_list %>% purrr::map("de_output")),
-    de_output_tidy = DE_list %>% purrr::map("de_output_tidy") %>% bind_rows()
-    )
-  
+  celltype_de = list(de_output = c(DE_list %>% purrr::map("de_output")), 
+                     de_output_tidy = DE_list %>% purrr::map("de_output_tidy") %>% 
+                       bind_rows())
   print("DE analysis is done:")
-  
   print("included cell types are:")
-  included_celltypes = celltypes %>% generics::intersect(celltype_de$de_output_tidy$cluster_id) %>% unique()
+  included_celltypes = celltypes %>% generics::intersect(celltype_de$de_output_tidy$cluster_id) %>% 
+    unique()
   print(included_celltypes)
-  
-  excluded_celltypes = celltypes %>% generics::setdiff(celltype_de$de_output_tidy$cluster_id) %>% unique()
+  excluded_celltypes = celltypes %>% generics::setdiff(celltype_de$de_output_tidy$cluster_id) %>% 
+    unique()
   if (length(excluded_celltypes) > 0) {
     print("excluded cell types are:")
     print(excluded_celltypes)
@@ -566,41 +573,62 @@ get_DE_info = function(sce, sample_id, group_id, celltype_id, batches, covariate
   if (length(excluded_celltypes) == length(celltypes)) {
     print("DE analysis did error for all cell types. This might be because of several reasons - check the original error message for this. Here are 2 common reasons in case no cell type past the filtering criteria: 1) no cell type has enough cells in >=2 samples per group. 2) problem in batch definition: not all levels of your batch are in each group - Also for groups not included in your contrasts!")
   }
-  
-  hist_pvals = celltype_de$de_output_tidy %>% dplyr::inner_join(celltype_de$de_output_tidy %>% dplyr::group_by(contrast,cluster_id) %>% dplyr::count(), by = c("cluster_id","contrast")) %>% 
-    dplyr::mutate(cluster_id = paste0(cluster_id, "\nnr of genes: ", n)) %>% dplyr::mutate(`p-value <= 0.05` = p_val <= 0.05) %>% 
-    ggplot(aes(x = p_val, fill = `p-value <= 0.05`)) + 
-    geom_histogram(binwidth = 0.05,boundary=0, color = "grey35") + scale_fill_manual(values = c("grey90", "lightsteelblue1")) + 
-    facet_grid(contrast~cluster_id) + ggtitle("P-value histograms") + theme_bw() 
-  
-  if(findMarkers == TRUE){
-    celltypes = celltype_de$de_output_tidy %>% dplyr::pull(cluster_id) %>% unique()
-    
-    celltype_de_findmarkers = celltypes %>% lapply(function(celltype_oi, sce){
-      genes_expressed = rownames(sce) ## change later if necessary for having a more decent filtering
-      sce_oi = sce[intersect(rownames(sce), genes_expressed), SummarizedExperiment::colData(sce)[,celltype_id] == celltype_oi]
-      DE_tables_list = scran::findMarkers(sce_oi, test.type="t", groups = SummarizedExperiment::colData(sce_oi)[,group_id])
+  hist_pvals = celltype_de$de_output_tidy %>% dplyr::inner_join(celltype_de$de_output_tidy %>% 
+                                                                  dplyr::group_by(contrast, cluster_id) %>% dplyr::count(), 
+                                                                by = c("cluster_id", "contrast")) %>% dplyr::mutate(cluster_id = paste0(cluster_id, 
+                                                                                                                                        "\nnr of genes: ", n)) %>% dplyr::mutate(`p-value <= 0.05` = p_val <= 
+                                                                                                                                                                                   0.05) %>% ggplot(aes(x = p_val, fill = `p-value <= 0.05`)) + 
+    geom_histogram(binwidth = 0.05, boundary = 0, color = "grey35") + 
+    scale_fill_manual(values = c("grey90", "lightsteelblue1")) + 
+    facet_grid(contrast ~ cluster_id) + ggtitle("P-value histograms") + 
+    theme_bw()
+  if (findMarkers == TRUE) {
+    celltypes = celltype_de$de_output_tidy %>% dplyr::pull(cluster_id) %>% 
+      unique()
+    celltype_de_findmarkers = celltypes %>% lapply(function(celltype_oi, 
+                                                            sce) {
+      genes_expressed = expressed_df %>% filter(celltype == celltype_oi & 
+                                                  expressed == TRUE) %>% pull(gene) %>% unique()
+      sce_oi = sce[intersect(rownames(sce), genes_expressed), 
+                   SummarizedExperiment::colData(sce)[, celltype_id] == 
+                     celltype_oi]
+      #DE_tables_list = scran::findMarkers(sce_oi@assays@data$counts, test.type = "binom", 
+      #   groups = SummarizedExperiment::colData(sce_oi)[, 
+      #    group_id])
+      DE_tables_list = scran::findMarkers(sce_oi@assays@data$logcounts, test.type = "t", 
+                                          groups = SummarizedExperiment::colData(sce_oi)[, 
+                                                                                         group_id])
       conditions = names(DE_tables_list)
-      DE_tables_df = conditions %>% lapply(function(condition_oi, DE_tables_list){
+      DE_tables_df = conditions %>% lapply(function(condition_oi, 
+                                                    DE_tables_list) {
         DE_table_oi = DE_tables_list[[condition_oi]]
-        DE_table_oi = DE_table_oi %>% data.frame() %>% tibble::rownames_to_column("gene") %>% tibble::as_tibble() %>% dplyr::mutate(cluster_id = celltype_oi, group = condition_oi) %>% dplyr::select(gene, p.value, FDR, summary.logFC, cluster_id, group)  
+        DE_table_oi = DE_table_oi %>% data.frame() %>% 
+          tibble::rownames_to_column("gene") %>% tibble::as_tibble() %>% 
+          dplyr::mutate(cluster_id = celltype_oi, group = condition_oi) %>% 
+          dplyr::select(gene, p.value, FDR, summary.logFC, 
+                        cluster_id, group)
       }, DE_tables_list) %>% dplyr::bind_rows()
-    }, sce) %>% dplyr::bind_rows() %>% dplyr::rename(logFC = summary.logFC, p_val = p.value, p_adj = FDR) %>% dplyr::inner_join(contrast_tbl, by = "group") %>% dplyr::select(gene, cluster_id, logFC, p_val, p_adj, contrast)
-    
-    hist_pvals_findmarkers = celltype_de_findmarkers %>% dplyr::inner_join(celltype_de_findmarkers %>% dplyr::group_by(contrast,cluster_id) %>% dplyr::count(), by = c("cluster_id","contrast")) %>% 
-      dplyr::mutate(cluster_id = paste0(cluster_id, "\nnr of genes: ", n)) %>% dplyr::mutate(`p-value <= 0.05` = p_val <= 0.05) %>% 
-      ggplot(aes(x = p_val, fill = `p-value <= 0.05`)) + 
-      geom_histogram(binwidth = 0.05,boundary=0, color = "grey35") + scale_fill_manual(values = c("grey90", "lightsteelblue1")) + 
-      facet_grid(contrast~cluster_id) + ggtitle("findMarker P-value histograms") + theme_bw() 
-    
-    
-  } else {
+    }, sce) %>% dplyr::bind_rows() %>% dplyr::rename(logFC = summary.logFC, 
+                                                     p_val = p.value, p_adj = FDR) %>% dplyr::inner_join(contrast_tbl, 
+                                                                                                         by = "group") %>% dplyr::select(gene, cluster_id, 
+                                                                                                                                         logFC, p_val, p_adj, contrast)
+    hist_pvals_findmarkers = celltype_de_findmarkers %>% 
+      dplyr::inner_join(celltype_de_findmarkers %>% dplyr::group_by(contrast, 
+                                                                    cluster_id) %>% dplyr::count(), by = c("cluster_id", 
+                                                                                                           "contrast")) %>% dplyr::mutate(cluster_id = paste0(cluster_id, 
+                                                                                                                                                              "\nnr of genes: ", n)) %>% dplyr::mutate(`p-value <= 0.05` = p_adj <= 
+                                                                                                                                                                                                         0.05) %>% ggplot(aes(x = p_val, fill = `p-value <= 0.05`)) + 
+      geom_histogram(binwidth = 0.05, boundary = 0, color = "grey35") + 
+      scale_fill_manual(values = c("grey90", "lightsteelblue1")) + 
+      facet_grid(contrast ~ cluster_id) + ggtitle("findMarker adj P-value histograms") + 
+      theme_bw()
+  }
+  else {
     celltype_de_findmarkers = NA
     hist_pvals_findmarkers = NA
-    
   }
-  return(list(celltype_de = celltype_de, hist_pvals = hist_pvals, celltype_de_findmarkers = celltype_de_findmarkers, hist_pvals_findmarkers = hist_pvals_findmarkers))
-  
+  return(list(celltype_de = celltype_de, hist_pvals = hist_pvals, 
+              celltype_de_findmarkers = celltype_de_findmarkers, hist_pvals_findmarkers = hist_pvals_findmarkers))
 }
 #' @title get_empirical_pvals
 #'
diff --git a/R/plotting.R b/R/plotting.R
index a542d13..24a7e90 100644
--- a/R/plotting.R
+++ b/R/plotting.R
@@ -784,7 +784,6 @@ make_DEgene_dotplot_pseudobulk = function(genes_oi, celltype_info, prioritizatio
       strip.text.y = element_text(size = 9, color = "black", face = "bold", angle = 0),
       strip.background = element_rect(color="darkgrey", fill="whitesmoke", size=1.5, linetype="solid")
     )  +
-    # labs(color = "Scaled gene\navg expression")
     labs(color = "Scaled pseudobulk\nexpression", size= "Fraction of\nexpressing cells") + xlab("Samples") +ylab("Genes") 
   max_lfc = abs(plot_data$scaled_gene_exprs) %>% max()
   custom_scale_fill = scale_color_gradientn(colours = RColorBrewer::brewer.pal(n = 7, name = "RdBu") %>% rev(),values = c(0, 0.35, 0.465, 0.5, 0.535, 0.65, 1),  limits = c(-1*max_lfc, max_lfc))
@@ -917,7 +916,6 @@ make_DEgene_dotplot_pseudobulk_reversed = function(genes_oi, celltype_info, prio
         strip.text.x = element_text(size = 9, color = "black", face = "bold"),
         strip.background = element_rect(color="darkgrey", fill="whitesmoke", size=1.5, linetype="solid")
       )  +
-      # labs(color = "Scaled gene\navg expression")
       labs(color = "Scaled pseudobulk\nexpression", size= "Fraction of\nexpressing cells") + xlab("Genes") +ylab("Samples") 
     max_lfc = abs(plot_data$scaled_gene_exprs) %>% max()
     custom_scale_fill = scale_color_gradientn(colours = RColorBrewer::brewer.pal(n = 7, name = "RdBu") %>% rev(),values = c(0, 0.35, 0.465, 0.5, 0.535, 0.65, 1),  limits = c(-1*max_lfc, max_lfc))
@@ -996,7 +994,6 @@ make_DEgene_dotplot_pseudobulk_reversed = function(genes_oi, celltype_info, prio
         strip.text.x = element_text(size = 9, color = "black", face = "bold"),
         strip.background = element_rect(color="darkgrey", fill="whitesmoke", size=1.5, linetype="solid")
       )  +
-      # labs(color = "Scaled gene\navg expression")
       labs(color = "Scaled pseudobulk\nexpression", size= "Fraction of\nexpressing cells") + xlab("Genes") +ylab("Samples")
     max_lfc = abs(plot_data$scaled_gene_exprs) %>% max()
     custom_scale_fill = scale_color_gradientn(colours = RColorBrewer::brewer.pal(n = 7, name = "RdBu") %>% rev(),values = c(0, 0.35, 0.465, 0.5, 0.535, 0.65, 1),  limits = c(-1*max_lfc, max_lfc))
@@ -1137,7 +1134,6 @@ make_DEgene_dotplot_pseudobulk_batch = function(genes_oi, celltype_info, priorit
       strip.text.y = element_text(size = 9, color = "black", face = "bold", angle = 0),
       strip.background = element_rect(color="darkgrey", fill="whitesmoke", size=1.5, linetype="solid")
     )  +
-    # labs(color = "Scaled gene\navg expression")
     labs(color = "Scaled pseudobulk\nexpression", size= "Fraction of\nexpressing cells") + xlab("") +ylab("Genes") 
   max_lfc = abs(plot_data$scaled_gene_exprs) %>% max()
   custom_scale_fill = scale_color_gradientn(colours = RColorBrewer::brewer.pal(n = 7, name = "RdBu") %>% rev(),values = c(0, 0.35, 0.465, 0.5, 0.535, 0.65, 1),  limits = c(-1*max_lfc, max_lfc))