Merge pull request #387 from AlexsLemonade/development

Merge in `development` for `v0.5.3` release
AlexsLemonade · Jul 19, 2023 · bed9eff · bed9eff
2 parents 4b4de3c + 45fb456
commit bed9eff
Show file tree

Hide file tree

Showing 6 changed files with 90 additions and 52 deletions.
diff --git a/bin/post_process_sce.R b/bin/post_process_sce.R
@@ -96,18 +96,37 @@ metadata(sce)$min_gene_cutoff <- opt$gene_cutoff
 # create adt_scpca_filter column, if CITEseq ----------
 alt_exp <- opt$adt_name
 if (alt_exp %in% altExpNames(sce)) {
-  sce$adt_scpca_filter <-  ifelse(
-    altExp(sce, alt_exp)$discard,
-    "Remove",
-    "Keep"
-  )
 
-  # assign `adt_scpca_filter_method` metadata based on colData contents
-  if ("sum.controls" %in% names(colData(altExp(sce, alt_exp)))) {
-    metadata(sce)$adt_scpca_filter_method <- "cleanTagCounts with isotype controls"
-  } else if ("ambient.scale" %in% names(colData(altExp(sce, alt_exp)))) {
-    metadata(sce)$adt_scpca_filter_method <- "cleanTagCounts without isotype controls"
+  # set up filter with all Keep, and then override to Remove where necessary
+  sce$adt_scpca_filter <- "Keep"
+  sce$adt_scpca_filter[which(altExp(sce, alt_exp)$discard)] <- "Remove"
+
+  # Warnings for different types of failure:
+  fail_all_removed <- all(sce$adt_scpca_filter == "Remove")
+  fail_all_na <- all(is.na(altExp(sce, alt_exp)$discard)) 
+
+  # handle failures - warnings and assign method as "No filter"
+  if (fail_all_removed | fail_all_na) {
+    metadata(sce)$adt_scpca_filter_method <- "No filter"   
+    if (fail_all_removed) {
+      sce$adt_scpca_filter <- "Keep"
+      warning("Filtering on ADTs attempted to remove all cells. No cells will be removed.")
+    } else {
+      warning("ADT filtering failed. No cells will be removed.")
+    }
   } else {
+    # Handle successes - assign `adt_scpca_filter_method` metadata based on colData contents
+    if ("sum.controls" %in% names(colData(altExp(sce, alt_exp)))) {
+      metadata(sce)$adt_scpca_filter_method <- "cleanTagCounts with isotype controls"
+    } else if ("ambient.scale" %in% names(colData(altExp(sce, alt_exp)))) {
+      metadata(sce)$adt_scpca_filter_method <- "cleanTagCounts without isotype controls"
+    } else {
+      stop("Error in ADT filtering.")
+    }
+  }
+
+  # make extra sure there are no NAs in `adt_scpca_filter`
+  if (any(is.na(sce$adt_scpca_filter))) {
     stop("Error in ADT filtering.")
   }
 }
@@ -161,6 +180,7 @@ processed_sce <- scuttle::logNormCounts(processed_sce)
 # Try to normalize ADT counts, if present
 if (alt_exp %in% altExpNames(processed_sce)) {
 
+
   # need `all()` since,if present, this is an array
   if( !all(is.null(metadata(altExp(processed_sce, alt_exp))$ambient_profile))){
     # Calculate median size factors from the ambient profile
@@ -173,38 +193,40 @@ if (alt_exp %in% altExpNames(processed_sce)) {
     altExp(processed_sce, alt_exp)$sizeFactor <- 0
   }
 
-  # Perform filtering specifically to allow for normalization
   adt_sce <- altExp(processed_sce, alt_exp)
-  adt_sce <- adt_sce[,adt_sce$discard == FALSE]
-
-  # If any size factors are not positive, simply use log1p
-  if ( any( adt_sce$sizeFactor <= 0 ) ) {
+  # Perform filtering specifically to allow for normalization
+  if (!is.null(processed_sce$adt_scpca_filter)) {
+    adt_sce <- adt_sce[,processed_sce$adt_scpca_filter == "Keep"]
+  }
+
+  # If any size factors are not positive or there was no filtering, simply use log1p
+  if ( any( adt_sce$sizeFactor <= 0 )  | metadata(processed_sce)$adt_scpca_filter_method == "No filter") {
     metadata(processed_sce)$adt_normalization <- "log-normalization"
     logcounts(adt_sce) <- log1p(counts(adt_sce))
   } else {
     # Apply normalization using size factors
     metadata(processed_sce)$adt_normalization <- "median-based"
     adt_sce <- scuttle::logNormCounts(adt_sce)    
-
-    # Add this logcounts matrix with NA values added for cells not included in normalization
-
-    # first, get the counts matrix and make it NA
-    result_matrix <- counts(altExp(processed_sce, alt_exp))
-    result_matrix[,] <- NA
-
-    # now get the computed logcounts & fill them in
-    result_matrix[, colnames(adt_sce)] <- logcounts(adt_sce)
-
-    # Check correct number of NAs:
-    observed_na_count <- sum(is.na(result_matrix))
-    expected_na_count <- nrow(adt_sce) * (ncol(altExp(processed_sce, alt_exp)) - ncol(adt_sce))
-    if (observed_na_count < expected_na_count) {
-      stop("Incorrect number of NAs recovered during ADT normalization.")
-    }
-
-    # Add result_matrix back into correct SCE as logcounts assay
-    logcounts(altExp(processed_sce, alt_exp)) <- result_matrix
   }
+  # Now that we have logcounts, add back to `processed_sce` but
+  #   with NA values for cells not included in normalization
+
+  # first, get the counts matrix and make it NA
+  result_matrix <- counts(altExp(processed_sce, alt_exp))
+  result_matrix[,] <- NA
+
+  # now get the computed logcounts & fill them in
+  result_matrix[, colnames(adt_sce)] <- logcounts(adt_sce)
+
+  # Check correct number of NAs:
+  observed_na_count <- sum(is.na(result_matrix))
+  expected_na_count <- nrow(adt_sce) * (ncol(altExp(processed_sce, alt_exp)) - ncol(adt_sce))
+  if (observed_na_count < expected_na_count) {
+    stop("Incorrect number of NAs recovered during ADT normalization.")
+  }
+
+  # Add result_matrix back into correct SCE as logcounts assay
+  logcounts(altExp(processed_sce, alt_exp)) <- result_matrix
 }
 
 

diff --git a/external-instructions.md b/external-instructions.md
@@ -65,7 +65,7 @@ nextflow run AlexsLemonade/scpca-nf \
 ```
 
 Where `<path to config file>` is the **relative** path to the [configuration file](#configuration-files) that you have setup and `<name of profile>` is the name of the profile that you chose when [creating a profile](#setting-up-a-profile-in-the-configuration-file).
-This command will pull the `scpca-nf` workflow directly from Github, using the `v0.5.2` version, and run it based on the settings in the configuration file that you have defined.
+This command will pull the `scpca-nf` workflow directly from Github, using the `v0.5.3` version, and run it based on the settings in the configuration file that you have defined.
 
 **Note:** `scpca-nf` is under active development.
 Using the above command will run the workflow from the `main` branch of the workflow repository.
@@ -76,7 +76,7 @@ Released versions can be found on the [`scpca-nf` repo releases page](https://gi
 
 ```sh
 nextflow run AlexsLemonade/scpca-nf \
-  -r v0.5.2 \
+  -r v0.5.3 \
   -config <path to config file>  \
   -profile <name of profile>
 ```
@@ -280,8 +280,8 @@ If you will be analyzing spatial expression data, you will also need the Cell Ra
 
 If your compute nodes do not have internet access, you will likely have to pre-pull the required container images as well.
 When doing this, it is important to be sure that you also specify the revision (version tag) of the `scpca-nf` workflow that you are using.
-For example, if you would run `nextflow run AlexsLemonade/scpca-nf -r v0.5.2`, then you will want to set `-r v0.5.2` for `get_refs.py` as well to be sure you have the correct containers.
-Be default,  `get_refs.py` will download files and images associated with the latest release.
+For example, if you would run `nextflow run AlexsLemonade/scpca-nf -r v0.5.3`, then you will want to set `-r v0.5.3` for `get_refs.py` as well to be sure you have the correct containers.
+By default, `get_refs.py` will download files and images associated with the latest release.
 
 If your system uses Docker, you can add the `--docker` flag:
 

diff --git a/internal-instructions.md b/internal-instructions.md
@@ -33,7 +33,7 @@ nextflow run AlexsLemonade/scpca-nf -profile ccdl,batch
 When running the workflow for a project or group of samples that is ready to be released on ScPCA portal, please use the tag for the latest release:
 
 ```
-nextflow run AlexsLemonade/scpca-nf -r v0.5.2 -profile ccdl,batch --project SCPCP000000
+nextflow run AlexsLemonade/scpca-nf -r v0.5.3 -profile ccdl,batch --project SCPCP000000
 ```
 
 ### Processing example data

diff --git a/modules/qc-report.nf b/modules/qc-report.nf
@@ -3,6 +3,7 @@
 
 process sce_qc_report{
     container params.SCPCATOOLS_CONTAINER
+    label 'mem_8'
     tag "${meta.library_id}"
     publishDir "${params.results_dir}/${meta.project_id}/${meta.sample_id}", mode: 'copy'
     input:

diff --git a/nextflow.config b/nextflow.config
@@ -5,7 +5,7 @@ manifest{
   homePage = 'https://github.com/AlexsLemonade/scpca-nf'
   mainScript = 'main.nf'
   defaultBranch = 'main'
-  version = 'v0.5.2'
+  version = 'v0.5.3'
 }
 
 // global parameters for workflows

diff --git a/templates/qc_report/cite_qc.rmd b/templates/qc_report/cite_qc.rmd
@@ -56,22 +56,25 @@ knitr::kable(antibody_tags, digits = 2) |>
 
 ## ADT Post-processing Statistics
 
-Note that low-quality cells as identified by ADT counts are not actually filtered from the SCE object.
-Instead, cells that passed the filter threshold are labeled as `"Keep"` within the SCE object, and conversely cells that failed to pass the filtered are labeled as `"Remove"`.
-
 ```{r}
 
 if (has_processed) {
-  filtered_cell_count <- sum(processed_sce$adt_scpca_filter == "Keep")
   
   basic_statistics <- tibble::tibble(
-    # Note that the adt_scpca_filter_method column is only present in the processed_sce object
-    "Method used to identify cells to filter" = format(processed_meta$adt_scpca_filter_method),
-    "Number of cells that pass filtering threshold"  = format(filtered_cell_count),
-    "Percent of cells that pass filtering threshold" =
-        paste0(round( filtered_cell_count/ncol(processed_sce) * 100, digits = 2), "%"), 
-    "Normalization method" = format(processed_meta$adt_normalization) 
-    ) |>
+      "Method used to identify cells to filter" = format(processed_meta$adt_scpca_filter_method)
+    )
+  
+  if (!(processed_meta$adt_scpca_filter_method == "No filter")) {
+    filtered_cell_count <- sum(processed_sce$adt_scpca_filter == "Keep")
+  
+    basic_statistics <- basic_statistics |>
+      # Note that the adt_scpca_filter_method column is only present in the processed_sce object
+      mutate("Number of cells that pass filtering threshold"  = format(filtered_cell_count),
+             "Percent of cells that pass filtering threshold" = paste0(round( filtered_cell_count/ncol(processed_sce) * 100, digits = 2), "%")
+      )
+  }
+  basic_statistics <- basic_statistics |>
+    mutate("Normalization method" = format(processed_meta$adt_normalization)) |>
     reformat_nulls() |>
     t()
   
@@ -96,9 +99,13 @@ if (has_processed) {
 
 
 ```{r fig.alt="Cell filtering based on both ADT and RNA counts", results='asis'}
-if (has_processed) { 
+if (has_processed & !(processed_meta$adt_scpca_filter_method == "No filter")) { 
   
   glue::glue('
+    
+    Note that low-quality cells as identified by ADT counts are not actually filtered from the SCE object.
+    Instead, cells that passed the filter threshold are labeled as `"Keep"` within the SCE object, and conversely cells that failed to pass the filtered are labeled as `"Remove"`.
+
     The plot below displays an overall view of cell filtering based on both ADT and RNA counts. Cells are labeled as one of the following:
   
     - "Keep": This cell is retained based on both RNA and ADT counts.
@@ -144,6 +151,14 @@ if (has_processed) {
     facet_wrap(vars(filter_summary)) +
     theme(plot.title = element_text(hjust = 0.5), 
           legend.position = "none")
+} else {
+  glue::glue("
+    <div class=\"alert alert-warning\">
+
+    No ADT filtering was performed on this library.
+
+    </div>
+  ")
 }
 ```