From 737ca588959d8bfc3d843e60795a766c801a0eac Mon Sep 17 00:00:00 2001
From: Ally Hawkins <54039191+allyhawkins@users.noreply.github.com>
Date: Fri, 13 Oct 2023 14:59:12 -0500
Subject: [PATCH 01/14] only return the SingleR model, not annotations

---
 bin/classify_SingleR.R        | 41 -----------------------------------
 modules/classify-celltypes.nf |  5 ++---
 2 files changed, 2 insertions(+), 44 deletions(-)
diff --git a/bin/classify_SingleR.R b/bin/classify_SingleR.R
index 39ab1755..cfb45202 100755
--- a/bin/classify_SingleR.R
+++ b/bin/classify_SingleR.R
@@ -21,11 +21,6 @@ option_list <- list(
     help = "path to file containing a single model generated for SingleR annotation. 
             File name is expected to be in form: <model name>_model.rds."
   ),
-  make_option(
-    opt_str = c("--output_singler_annotations_file"),
-    type = "character",
-    help = "path to output TSV file that will store the SingleR annotations. Must end in .tsv"
-  ),
   make_option(
     opt_str = c("--output_singler_results_file"),
     type = "character",
@@ -60,9 +55,6 @@ if (!file.exists(opt$sce_file)) {
 if (!(stringr::str_ends(opt$output_singler_results_file, ".rds"))) {
   stop("output SingleR result file name must end in .rds")
 }
-if (!(stringr::str_ends(opt$output_singler_annotations_file, ".tsv"))) {
-  stop("output SingleR annotations file name must end in .tsv")
-}
 
 # check that reference exists and filename is properly formatted
 singler_model_file <- opt$singler_model_file
@@ -103,41 +95,8 @@ singler_results <- SingleR::classifySingleR(
 # add reference name to singler_results DataFrame metadata
 metadata(singler_results)$reference_name <- reference_name
 
-
-# create data frame of annotations
-annotations_df <- tibble::tibble(
-  barcode = rownames(singler_results),
-  singler_celltype_annotation = singler_results$pruned.labels,
-)
-
-# map ontology labels to cell type names, as needed
-# we can tell if ontologies were used because this will exist:
-if ("cell_ontology_df" %in% names(singler_model)) {
-
-  # end up with columns: barcode, singler_celltype_annotation, singler_celltype_ontology
-  annotations_df <- annotations_df |>
-    dplyr::left_join(
-      # column names: ontology_id, ontology_cell_names
-      singler_model$cell_ontology_df, 
-      by = c("singler_celltype_annotation" = "ontology_id")
-    ) |> 
-    # rename columns
-    dplyr::rename(
-      singler_celltype_ontology = singler_celltype_annotation,
-      singler_celltype_annotation = ontology_cell_names
-    )
-  
-  # add cell_ontology_df to singler_results DataFrame metadata
-  metadata(singler_results)$cell_ontology_df <- singler_model$cell_ontology_df
-} 
-
-
-
 # export results ---------------
 
-# first, a stand-alone tsv of annotations
-readr::write_tsv(annotations_df, opt$output_singler_annotations_file)
-
 # next, the full result to a compressed rds
 readr::write_rds(
   singler_results,
diff --git a/modules/classify-celltypes.nf b/modules/classify-celltypes.nf
index f9c3c8b2..eb1a6fb2 100644
--- a/modules/classify-celltypes.nf
+++ b/modules/classify-celltypes.nf
@@ -22,7 +22,6 @@ process classify_singler {
       classify_SingleR.R \
         --sce_file "${processed_rds}" \
         --singler_model_file "${singler_model_file}" \
-        --output_singler_annotations_file "${singler_dir}/singler_annotations.tsv" \
         --output_singler_results_file "${singler_dir}/singler_results.rds" \
         --seed ${params.seed} \
         --threads ${task.cpus}
@@ -211,12 +210,12 @@ workflow annotate_celltypes {
 
       // incorporate annotations into SCE object
       add_celltypes_to_sce(assignment_input_ch.add_celltypes)
-      
+
       // mix in libraries without new celltypes
       // result is [meta, proccessed rds]
       celltyped_ch = assignment_input_ch.no_celltypes
         .map{[it[0], it[1]]}
-        .mix(add_celltypes_to_sce.out) 
+        .mix(add_celltypes_to_sce.out)
 
       // add back in the unchanged sce files to the results
       export_channel = celltyped_ch

From 3a8fec7afc5b87124b8ab2e2ea69c05f7495458e Mon Sep 17 00:00:00 2001
From: Ally Hawkins <54039191+allyhawkins@users.noreply.github.com>
Date: Fri, 13 Oct 2023 16:10:25 -0500
Subject: [PATCH 02/14] add reapeat celltyping param

---
 modules/classify-celltypes.nf | 33 +++++++++++++++++++++++----------
 nextflow.config               |  1 +
 2 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/modules/classify-celltypes.nf b/modules/classify-celltypes.nf
index eb1a6fb2..09d2537e 100644
--- a/modules/classify-celltypes.nf
+++ b/modules/classify-celltypes.nf
@@ -97,7 +97,7 @@ process add_celltypes_to_sce {
     annotated_rds = "${meta.library_id}_processed_annotated.rds"
     singler_present = "${singler_dir.name}" != "NO_FILE"
     singler_results = "${singler_dir}/singler_results.rds"
-    cellassign_present = "${cellassign_dir}.name" != "NO_FILE"
+    cellassign_present = "${cellassign_dir.name}" != "NO_FILE"
     cellassign_predictions = "${cellassign_dir}/cellassign_predictions.tsv"
     cellassign_ref_name = file("${meta.cellassign_reference_file}").baseName
     """
@@ -148,6 +148,8 @@ workflow annotate_celltypes {
           meta.cellassign_dir = "${meta.celltype_publish_dir}/${meta.library_id}_cellassign";
           meta.singler_model_file = singler_model_file;
           meta.cellassign_reference_file = cellassign_reference_file;
+          meta.singler_results_file = "${meta.singler_dir}/singler_results.rds";
+          meta.cellassign_predictions_file = "${meta.cellassign_dir}/cellassign_predictions.tsv"
           // return simplified input:
           [meta, processed_sce]
         }
@@ -157,18 +159,24 @@ workflow annotate_celltypes {
       singler_input_ch = celltype_input_ch
         // add in singler model or empty file
         .map{it.toList() + [file(it[0].singler_model_file ?: empty_file)]}
-        // skip if no singleR model file
+        // skip if no singleR model file or if singleR results are already present
         .branch{
-          missing_ref: it[2].name == "NO_FILE"
+          skip_singler: (!params.repeat_celltyping && file(it[0].singler_results_file).exists())
+                        || it[2].name == "NO_FILE"
           do_singler: true
         }
 
 
       // perform singleR celltyping and export results
       classify_singler(singler_input_ch.do_singler)
+
       // singleR output channel: [library_id, singler_results]
-      singler_output_ch = singler_input_ch.missing_ref
-        .map{[it[0]["library_id"], file(empty_file)]}
+      singler_output_ch = singler_input_ch.skip_singler
+        // provide existing singler results dir for those we skipped and empty file for those missing reference
+        .map{[
+          it[0]["library_id"],
+          file(it[0].singler_results_file).exists() ? file(it[0].singler_dir, type: 'dir') : file(empty_file)
+          ]}
         // add in channel outputs
         .mix(classify_singler.out)
 
@@ -178,7 +186,8 @@ workflow annotate_celltypes {
         .map{it.toList() + [file(it[0].cellassign_reference_file ?: empty_file)]}
         // skip if no cellassign reference file or reference name is not defined
         .branch{
-          missing_ref: it[2].name == "NO_FILE"
+          skip_cellassign: (!params.repeat_celltyping && file(it[0].cellassign_predictions_file).exists())
+                           || it[2].name == "NO_FILE"
           do_cellassign: true
         }
 
@@ -187,14 +196,18 @@ workflow annotate_celltypes {
       classify_cellassign(cellassign_input_ch.do_cellassign)
 
       // cellassign output channel: [library_id, cellassign_dir]
-      cellassign_output_ch = cellassign_input_ch.missing_ref
-        .map{[it[0]["library_id"], file(empty_file)]}
+      cellassign_output_ch = cellassign_input_ch.skip_cellassign
+        // provide existing cellassign predictions dir for those we skipped and empty file for those missing reference
+        .map{[
+          it[0]["library_id"],
+          file(it[0].cellassign_predictions_file).exists() ? file(it[0].cellassign_dir, type: 'dir') : file(empty_file)
+          ]}
         // add in channel outputs
         .mix(classify_cellassign.out)
 
       // prepare input for process to add celltypes to the processed SCE
       // result is [meta, processed rds, singler dir, cellassign dir]
-      assignment_input_ch = processed_sce_channel
+      assignment_input_ch = celltype_input_ch
         .map{[it[0]["library_id"]] + it}
         // add in singler results
         .join(singler_output_ch, by: 0, failOnMismatch: true, failOnDuplicate: true)
@@ -202,7 +215,7 @@ workflow annotate_celltypes {
         .join(cellassign_output_ch, by: 0, failOnMismatch: true, failOnDuplicate: true)
         .map{it.drop(1)} // remove library_id
         .branch{
-          // pull out libraries that actually have at least 1 type of annotations
+          // pull out libraries that actually have at least 1 type of annotation
           add_celltypes: (it[2].baseName != "NO_FILE") || (it[3].baseName != "NO_FILE")
           no_celltypes: true
         }
diff --git a/nextflow.config b/nextflow.config
index 0aa7ef79..6bb35e83 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -39,6 +39,7 @@ params {
   publish_fry_outs = false // alevin-fry outputs are not published by default. Use `--publish_fry_outs` to publish these files to the `checkpoints` folder.
   spliced_only = false // include only spliced reads in counts matrix, by default both unspliced and spliced reads are totaled and found in `counts` asasy of returned SingleCellExperiment object
   perform_celltyping = false // specify whether or not to incorporate cell type annotations
+  repeat_celltyping = false // if cell type annotations alread exist, skip cell type classification with SingleR and CellAssign
 
   seed = 2021   // random number seed for filtering and post-processing (0 means use system seed)
 

From 6034e8b2126cbd6520c51ca0539b0ae6acca8c40 Mon Sep 17 00:00:00 2001
From: Ally Hawkins <54039191+allyhawkins@users.noreply.github.com>
Date: Fri, 13 Oct 2023 16:10:38 -0500
Subject: [PATCH 03/14] use correct variable for setting the seed

---
 bin/classify_SingleR.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bin/classify_SingleR.R b/bin/classify_SingleR.R
index cfb45202..54862131 100755
--- a/bin/classify_SingleR.R
+++ b/bin/classify_SingleR.R
@@ -18,7 +18,7 @@ option_list <- list(
   make_option(
     opt_str = c("--singler_model_file"),
     type = "character",
-    help = "path to file containing a single model generated for SingleR annotation. 
+    help = "path to file containing a single model generated for SingleR annotation.
             File name is expected to be in form: <model name>_model.rds."
   ),
   make_option(
@@ -44,7 +44,7 @@ opt <- parse_args(OptionParser(option_list = option_list))
 # Set up -----------------------------------------------------------------------
 
 # set seed
-set.seed(opt$random_seed)
+set.seed(opt$seed)
 
 # check that input file file exists
 if (!file.exists(opt$sce_file)) {

From 8f94751fbcd24de9937745bd6417dc2147587e38 Mon Sep 17 00:00:00 2001
From: Ally Hawkins <54039191+allyhawkins@users.noreply.github.com>
Date: Mon, 16 Oct 2023 14:47:59 -0500
Subject: [PATCH 04/14] join singler annotations by barcodes

---
 bin/add_celltypes_to_sce.R | 60 +++++++++++++++++++++-----------------
 1 file changed, 34 insertions(+), 26 deletions(-)

diff --git a/bin/add_celltypes_to_sce.R b/bin/add_celltypes_to_sce.R
index 4531f78e..f21c55f7 100755
--- a/bin/add_celltypes_to_sce.R
+++ b/bin/add_celltypes_to_sce.R
@@ -56,59 +56,67 @@ sce <- readr::read_rds(opt$input_sce_file)
 # SingleR results --------------------------------------------------------------
 
 if(!is.null(opt$singler_results)){
-  
+
   if(!file.exists(opt$singler_results)){
     stop("Missing singleR results file")
   }
-  
+
   singler_results <- readr::read_rds(opt$singler_results)
-  
-  # first just add in labels as annotations
-  sce$singler_celltype_annotation = singler_results$pruned.labels
-  
+
+  # create a tibble with annotations and barcode
+  # later we'll add the annotations into colData by joining on barcodes column
+  annotations_df <- tibble::tibble(
+    barcodes = rownames(singler_results),
+    singler_celltype_annotation = singler_results$pruned.labels,
+  )
+
   # map ontology labels to cell type names, as needed
   # we can tell if ontologies were used because this will exist:
   if ("cell_ontology_df" %in% names(singler_results)) {
-    
+
     # end up with columns: barcode, singler_celltype_annotation, singler_celltype_ontology
-    colData(sce) <- colData(sce) |>
-      as.data.frame() |> 
+    colData(sce) <- annotations_df |>
       dplyr::left_join(
         # column names: ontology_id, ontology_cell_names
-        singler_results$cell_ontology_df, 
+        singler_results$cell_ontology_df,
         by = c("singler_celltype_annotation" = "ontology_id")
-      ) |> 
+      ) |>
       # rename columns
       dplyr::rename(
         # ontology should contain the original pruned labels
         singler_celltype_ontology = singler_celltype_annotation,
         # annotation contains the cell names associated with the ontology
         singler_celltype_annotation = ontology_cell_names
-      ) |>
-      DataFrame(row.names = colData(sce)$barcodes)
+      )
+
+  }
 
-  } 
+  # add annotations to colData
+  colData(sce) <- colData(sce) |>
+    as.data.frame() |>
+    dplyr::left_join(annotations_df, by = c("barcodes")) |>
+    DataFrame(row.names = colData(sce)$barcodes)
 
- # add singler info to metadata
+  # add singler info to metadata
   metadata(sce)$singler_results <- singler_results
   metadata(sce)$reference_name <- metadata(singler_results)$reference_name
-  
-  # add note about cell type method to metadata 
+
+  # add note about cell type method to metadata
   metadata(sce)$celltype_methods <- c(metadata(sce)$celltype_methods, "singler")
-  
+
 }
 
 # CellAssign results -----------------------------------------------------------
 
 if(!is.null(opt$cellassign_predictions)){
- 
+
   # check that cellassign predictions file was provided
   if (!file.exists(opt$cellassign_predictions)) {
     stop("Missing CellAssign predictions file")
   }
-  
+
   predictions <- readr::read_tsv(opt$cellassign_predictions)
-  
+
   # get cell type with maximum prediction value for each cell
   celltype_assignments <- predictions |>
     tidyr::pivot_longer(
@@ -119,23 +127,23 @@ if(!is.null(opt$cellassign_predictions)){
     dplyr::group_by(barcode) |>
     dplyr::slice_max(prediction, n = 1) |>
     dplyr::ungroup()
-  
+
   # join by barcode to make sure assignments are in the right order
   celltype_assignments <- data.frame(barcode = sce$barcodes) |>
     dplyr::left_join(celltype_assignments, by = "barcode")
-  
+
   # add cell type and prediction to colData
   sce$cellassign_celltype_annotation <- celltype_assignments$celltype
   sce$cellassign_max_prediction <- celltype_assignments$prediction
-  
+
   # add entire predictions matrix and ref name to metadata
   metadata(sce)$cellassign_predictions <- predictions
   metadata(sce)$cellassign_reference <- opt$cellassign_ref_name
-  
+
   # add cellassign as celltype method
   # note that if `metadata(sce)$celltype_methods` doesn't exist yet, this will
   #  come out to just the string "cellassign"
-  metadata(sce)$celltype_methods <- c(metadata(sce)$celltype_methods, "cellassign") 
+  metadata(sce)$celltype_methods <- c(metadata(sce)$celltype_methods, "cellassign")
 }
 
 # export annotated object with cellassign assignments

From 11b930b652fc5b654645a202300161cb3e4178c3 Mon Sep 17 00:00:00 2001
From: Ally Hawkins <54039191+allyhawkins@users.noreply.github.com>
Date: Tue, 17 Oct 2023 14:20:17 -0500
Subject: [PATCH 05/14] Apply suggestions from code review

Co-authored-by: Joshua Shapiro <josh.shapiro@ccdatalab.org>
---
 modules/classify-celltypes.nf | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/modules/classify-celltypes.nf b/modules/classify-celltypes.nf
index 09d2537e..3c29a76a 100644
--- a/modules/classify-celltypes.nf
+++ b/modules/classify-celltypes.nf
@@ -161,8 +161,8 @@ workflow annotate_celltypes {
         .map{it.toList() + [file(it[0].singler_model_file ?: empty_file)]}
         // skip if no singleR model file or if singleR results are already present
         .branch{
-          skip_singler: (!params.repeat_celltyping && file(it[0].singler_results_file).exists())
-                        || it[2].name == "NO_FILE"
+          skip_singler: !params.repeat_celltyping && file(it[0].singler_results_file).exists()
+          missing_ref: it[2].name == "NO_FILE"
           do_singler: true
         }
 
@@ -172,11 +172,10 @@ workflow annotate_celltypes {
 
       // singleR output channel: [library_id, singler_results]
       singler_output_ch = singler_input_ch.skip_singler
-        // provide existing singler results dir for those we skipped and empty file for those missing reference
-        .map{[
-          it[0]["library_id"],
-          file(it[0].singler_results_file).exists() ? file(it[0].singler_dir, type: 'dir') : file(empty_file)
-          ]}
+        // provide existing singler results dir for those we skipped
+        .map{[it[0]["library_id"], file(it[0].singler_dir, type: 'dir')}
+        // add empty file for missing ref samples
+        .mix(singler_input_ch.missing_ref.map{[it[0]["library_id"], file(empty_file)]} )
         // add in channel outputs
         .mix(classify_singler.out)
 

From adec1fbcf75d30e8e55210224fc68a45bdb47b0d Mon Sep 17 00:00:00 2001
From: Ally Hawkins <54039191+allyhawkins@users.noreply.github.com>
Date: Tue, 17 Oct 2023 14:26:25 -0500
Subject: [PATCH 06/14] use three branches for cellassign

---
 modules/classify-celltypes.nf | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/modules/classify-celltypes.nf b/modules/classify-celltypes.nf
index 3c29a76a..d98f9d44 100644
--- a/modules/classify-celltypes.nf
+++ b/modules/classify-celltypes.nf
@@ -173,7 +173,7 @@ workflow annotate_celltypes {
       // singleR output channel: [library_id, singler_results]
       singler_output_ch = singler_input_ch.skip_singler
         // provide existing singler results dir for those we skipped
-        .map{[it[0]["library_id"], file(it[0].singler_dir, type: 'dir')}
+        .map{[it[0]["library_id"], file(it[0].singler_dir, type: 'dir')]}
         // add empty file for missing ref samples
         .mix(singler_input_ch.missing_ref.map{[it[0]["library_id"], file(empty_file)]} )
         // add in channel outputs
@@ -185,8 +185,8 @@ workflow annotate_celltypes {
         .map{it.toList() + [file(it[0].cellassign_reference_file ?: empty_file)]}
         // skip if no cellassign reference file or reference name is not defined
         .branch{
-          skip_cellassign: (!params.repeat_celltyping && file(it[0].cellassign_predictions_file).exists())
-                           || it[2].name == "NO_FILE"
+          skip_cellassign: !params.repeat_celltyping && file(it[0].cellassign_predictions_file).exists()
+          missing_ref: it[2].name == "NO_FILE"
           do_cellassign: true
         }
 
@@ -196,11 +196,10 @@ workflow annotate_celltypes {
 
       // cellassign output channel: [library_id, cellassign_dir]
       cellassign_output_ch = cellassign_input_ch.skip_cellassign
-        // provide existing cellassign predictions dir for those we skipped and empty file for those missing reference
-        .map{[
-          it[0]["library_id"],
-          file(it[0].cellassign_predictions_file).exists() ? file(it[0].cellassign_dir, type: 'dir') : file(empty_file)
-          ]}
+        // provide existing cellassign predictions dir for those we skipped
+        .map{[it[0]["library_id"], file(it[0].cellassign_dir, type: 'dir')]}
+        // add empty file for missing ref samples
+        .mix(cellassign_input_ch.missing_ref.map{[it[0]["library_id"], file(empty_file)]} )
         // add in channel outputs
         .mix(classify_cellassign.out)
 

From b2501f88b340b942c4d4dc41afbefe3a679deb7c Mon Sep 17 00:00:00 2001
From: Ally Hawkins <54039191+allyhawkins@users.noreply.github.com>
Date: Wed, 18 Oct 2023 08:16:53 -0500
Subject: [PATCH 07/14] spelling

Co-authored-by: Stephanie Spielman <stephanie.spielman@gmail.com>
---
 nextflow.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nextflow.config b/nextflow.config
index 6bb35e83..2f46ecbf 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -39,7 +39,7 @@ params {
   publish_fry_outs = false // alevin-fry outputs are not published by default. Use `--publish_fry_outs` to publish these files to the `checkpoints` folder.
   spliced_only = false // include only spliced reads in counts matrix, by default both unspliced and spliced reads are totaled and found in `counts` asasy of returned SingleCellExperiment object
   perform_celltyping = false // specify whether or not to incorporate cell type annotations
-  repeat_celltyping = false // if cell type annotations alread exist, skip cell type classification with SingleR and CellAssign
+  repeat_celltyping = false // if cell type annotations already exist, skip cell type classification with SingleR and CellAssign
 
   seed = 2021   // random number seed for filtering and post-processing (0 means use system seed)
 

From d8544a51e9c1cfc2a718de71544dc8fd7bb0f759 Mon Sep 17 00:00:00 2001
From: Ally Hawkins <54039191+allyhawkins@users.noreply.github.com>
Date: Wed, 18 Oct 2023 11:46:16 -0500
Subject: [PATCH 08/14] use scpcaTools edge

---
 config/containers.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/config/containers.config b/config/containers.config
index af1c0aa5..e01373a3 100644
--- a/config/containers.config
+++ b/config/containers.config
@@ -1,5 +1,5 @@
 // Docker container images
-SCPCATOOLS_CONTAINER = 'ghcr.io/alexslemonade/scpca-tools:v0.3.0'
+SCPCATOOLS_CONTAINER = 'ghcr.io/alexslemonade/scpca-tools:edge'
 
 ALEVINFRY_CONTAINER = 'quay.io/biocontainers/alevin-fry:0.7.0--h9f5acd7_1'
 BCFTOOLS_CONTAINER = 'quay.io/biocontainers/bcftools:1.14--h88f3f91_0'

From 9cb8b6dc8a84f99ca6168608a8880be651067ebf Mon Sep 17 00:00:00 2001
From: Ally Hawkins <54039191+allyhawkins@users.noreply.github.com>
Date: Thu, 19 Oct 2023 11:54:07 -0500
Subject: [PATCH 09/14] name singler reference

---
 bin/add_celltypes_to_sce.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/add_celltypes_to_sce.R b/bin/add_celltypes_to_sce.R
index f21c55f7..fb3cf0a3 100755
--- a/bin/add_celltypes_to_sce.R
+++ b/bin/add_celltypes_to_sce.R
@@ -99,7 +99,7 @@ if(!is.null(opt$singler_results)){
 
   # add singler info to metadata
   metadata(sce)$singler_results <- singler_results
-  metadata(sce)$reference_name <- metadata(singler_results)$reference_name
+  metadata(sce)$singler_reference <- metadata(singler_results)$reference_name
 
   # add note about cell type method to metadata
   metadata(sce)$celltype_methods <- c(metadata(sce)$celltype_methods, "singler")

From 3c0b65c349a5365f14bbf39f28ec3076082f0f74 Mon Sep 17 00:00:00 2001
From: Stephanie Spielman <stephanie.spielman@gmail.com>
Date: Thu, 19 Oct 2023 13:49:10 -0400
Subject: [PATCH 10/14] Apply suggestions from code review

Co-authored-by: Joshua Shapiro <josh.shapiro@ccdatalab.org>
---
 templates/qc_report/celltypes_supplemental_report.rmd | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/templates/qc_report/celltypes_supplemental_report.rmd b/templates/qc_report/celltypes_supplemental_report.rmd
index bee630c8..956bf37b 100644
--- a/templates/qc_report/celltypes_supplemental_report.rmd
+++ b/templates/qc_report/celltypes_supplemental_report.rmd
@@ -70,22 +70,21 @@ In this section, we assess the reliability of cell type annotations using diagno
 knitr::asis_output("
 ### `SingleR` assessment
 
-Here, we evaluate the quality of `SingleR` cell type annotations by comparing the scores for the assigned cell type to the median score of all other cell type labels.
+To assess the quality of the `SingleR`-assigned cell types, we use the _delta median_ statistic.
 
-- This quantity is the _delta median_ statistic. 
-- _Delta median_ is calculated for each cell by subtracting the median score of all other cell type labels from the score of the assigned cell type label.
+- _Delta median_ is calculated for each cell as the difference between the score of the assigned cell type label and  the median score of the other cell type labels in the model.
 - Higher _delta median_ values indicate higher quality cell type annotations.
   - However, there is no universal threshold for calling absolute high vs. low quality, as described in the [`SingleR` book section on 'Annotation diagnostics'](https://bioconductor.org/books/release/SingleRBook/annotation-diagnostics.html#annotation-diagnostics).
 
 You can interpret this plot as follows:
 
-- Each point represents the _delta median_ statistic of a given cell whose final `SingleR` annotation is shown on the y-axis.
+- Each point represents the _delta median_ statistic of a given cell whose assigned `SingleR` annotation is shown on the y-axis.
 - The color of the points indicates how confident `SingleR` is in the cell type annotation: 
   - High-quality cell annotations are shown as closed points.
   - Low-quality cell annotations are shown as open points. 
   In other sections of this report, these cells are referred to as `Unknown cell types`.
   - For more information on how `SingleR` calculates annotation quality, please refer to [this `SingleR` documentation](https://rdrr.io/bioc/SingleR/man/pruneScores.html).
-- Red diamonds represent the median _delta median_ statistic among high-quality points for the given annotation.
+- Red diamonds represent the median _delta median_ statistic among high-quality annotations for the given cell type label.
 ")
 ```
 

From fc03ace8853cc63c06552a36790955b81211d44d Mon Sep 17 00:00:00 2001
From: Stephanie <stephanie.spielman@ccdatalab.org>
Date: Thu, 19 Oct 2023 14:42:40 -0400
Subject: [PATCH 11/14] range 0-1

---
 templates/qc_report/celltypes_supplemental_report.rmd | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/templates/qc_report/celltypes_supplemental_report.rmd b/templates/qc_report/celltypes_supplemental_report.rmd
index 956bf37b..8244a924 100644
--- a/templates/qc_report/celltypes_supplemental_report.rmd
+++ b/templates/qc_report/celltypes_supplemental_report.rmd
@@ -74,7 +74,8 @@ To assess the quality of the `SingleR`-assigned cell types, we use the _delta me
 
 - _Delta median_ is calculated for each cell as the difference between the score of the assigned cell type label and  the median score of the other cell type labels in the model.
 - Higher _delta median_ values indicate higher quality cell type annotations.
-  - However, there is no universal threshold for calling absolute high vs. low quality, as described in the [`SingleR` book section on 'Annotation diagnostics'](https://bioconductor.org/books/release/SingleRBook/annotation-diagnostics.html#annotation-diagnostics).
+  - Values can range from 0-1. 
+  - Note that there is no universal threshold for calling absolute high vs. low quality, as described in the [`SingleR` book section on 'Annotation diagnostics'](https://bioconductor.org/books/release/SingleRBook/annotation-diagnostics.html#annotation-diagnostics).
 
 You can interpret this plot as follows:
 

From df363014315279dfa5a20f7b78f171c2714831ea Mon Sep 17 00:00:00 2001
From: Stephanie <stephanie.spielman@ccdatalab.org>
Date: Thu, 19 Oct 2023 14:44:44 -0400
Subject: [PATCH 12/14] some wording - scores are from singler, remove extra
 space, reference dataset not model.

---
 templates/qc_report/celltypes_supplemental_report.rmd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/templates/qc_report/celltypes_supplemental_report.rmd b/templates/qc_report/celltypes_supplemental_report.rmd
index 8244a924..2a2351d8 100644
--- a/templates/qc_report/celltypes_supplemental_report.rmd
+++ b/templates/qc_report/celltypes_supplemental_report.rmd
@@ -72,7 +72,7 @@ knitr::asis_output("
 
 To assess the quality of the `SingleR`-assigned cell types, we use the _delta median_ statistic.
 
-- _Delta median_ is calculated for each cell as the difference between the score of the assigned cell type label and  the median score of the other cell type labels in the model.
+- _Delta median_ is calculated for each cell as the difference between the `SingleR` score of the assigned cell type label and the median score of the other cell type labels in the reference dataset.
 - Higher _delta median_ values indicate higher quality cell type annotations.
   - Values can range from 0-1. 
   - Note that there is no universal threshold for calling absolute high vs. low quality, as described in the [`SingleR` book section on 'Annotation diagnostics'](https://bioconductor.org/books/release/SingleRBook/annotation-diagnostics.html#annotation-diagnostics).

From 1bd11cd0668a784ba475a4acc47424908f6590d9 Mon Sep 17 00:00:00 2001
From: Stephanie <stephanie.spielman@ccdatalab.org>
Date: Thu, 19 Oct 2023 16:45:44 -0400
Subject: [PATCH 13/14] use my sentence but also make it active

---
 .../celltypes_supplemental_report.rmd         | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/templates/qc_report/celltypes_supplemental_report.rmd b/templates/qc_report/celltypes_supplemental_report.rmd
index 2a2351d8..1f9b0b96 100644
--- a/templates/qc_report/celltypes_supplemental_report.rmd
+++ b/templates/qc_report/celltypes_supplemental_report.rmd
@@ -74,15 +74,15 @@ To assess the quality of the `SingleR`-assigned cell types, we use the _delta me
 
 - _Delta median_ is calculated for each cell as the difference between the `SingleR` score of the assigned cell type label and the median score of the other cell type labels in the reference dataset.
 - Higher _delta median_ values indicate higher quality cell type annotations.
-  - Values can range from 0-1. 
+  - Values can range from 0-1.
   - Note that there is no universal threshold for calling absolute high vs. low quality, as described in the [`SingleR` book section on 'Annotation diagnostics'](https://bioconductor.org/books/release/SingleRBook/annotation-diagnostics.html#annotation-diagnostics).
 
 You can interpret this plot as follows:
 
 - Each point represents the _delta median_ statistic of a given cell whose assigned `SingleR` annotation is shown on the y-axis.
-- The color of the points indicates how confident `SingleR` is in the cell type annotation: 
+- The point color indicates `SingleR`'s quality assessment of the annotation::
   - High-quality cell annotations are shown as closed points.
-  - Low-quality cell annotations are shown as open points. 
+  - Low-quality cell annotations are shown as open points.
   In other sections of this report, these cells are referred to as `Unknown cell types`.
   - For more information on how `SingleR` calculates annotation quality, please refer to [this `SingleR` documentation](https://rdrr.io/bioc/SingleR/man/pruneScores.html).
 - Red diamonds represent the median _delta median_ statistic among high-quality annotations for the given cell type label.
@@ -105,7 +105,7 @@ delta_median_df <- tibble::tibble(
   # so, negate for this variable:
   confident = !is.na(metadata(processed_sce)$singler_result$pruned.labels)
 ) |>
-  dplyr::mutate(confident = 
+  dplyr::mutate(confident =
     ifelse(confident, "High-quality", "Low-quality")
   )
 
@@ -158,7 +158,7 @@ ggplot(delta_median_df) +
   aes(
     x = delta_median,
     y = annotation_wrapped,
-    shape = confident, 
+    shape = confident,
     alpha = confident
   ) +
   ggforce::geom_sina(
@@ -167,7 +167,7 @@ ggplot(delta_median_df) +
     fill = "white", # will apply to non-confident fill only
     position = position_dodge(width = 0.05) # Keep both types of points mostly in line
   ) +
-  # Handle points aesthetics: 
+  # Handle points aesthetics:
   #  confident are closed black with alpha = 0.5
   #  not confident are open black with alpha = 1
   scale_shape_manual(values = c(19, 21)) +
@@ -179,16 +179,16 @@ ggplot(delta_median_df) +
   ) +
   # add median diamond for confident points only
   stat_summary(
-    data = delta_median_confident_df, 
+    data = delta_median_confident_df,
     color = "red",
-    geom = "point", 
-    fun = "median", 
-    shape = 18, 
-    size = 2.25, 
+    geom = "point",
+    fun = "median",
+    shape = 18,
+    size = 2.25,
     alpha = 0.9
   ) +
   guides(
-    alpha = FALSE, 
+    alpha = FALSE,
     shape = guide_legend(override.aes = list(size = 1.5, alpha = 0.55))
   ) +
   theme(

From 5e66ceca129d33c871d0ceaeb0795f49846174be Mon Sep 17 00:00:00 2001
From: Stephanie Spielman <stephanie.spielman@gmail.com>
Date: Thu, 19 Oct 2023 16:46:44 -0400
Subject: [PATCH 14/14] Update
 templates/qc_report/celltypes_supplemental_report.rmd

remove extra colon i introduced
---
 templates/qc_report/celltypes_supplemental_report.rmd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/templates/qc_report/celltypes_supplemental_report.rmd b/templates/qc_report/celltypes_supplemental_report.rmd
index 1f9b0b96..72a3932c 100644
--- a/templates/qc_report/celltypes_supplemental_report.rmd
+++ b/templates/qc_report/celltypes_supplemental_report.rmd
@@ -80,7 +80,7 @@ To assess the quality of the `SingleR`-assigned cell types, we use the _delta me
 You can interpret this plot as follows:
 
 - Each point represents the _delta median_ statistic of a given cell whose assigned `SingleR` annotation is shown on the y-axis.
-- The point color indicates `SingleR`'s quality assessment of the annotation::
+- The point color indicates `SingleR`'s quality assessment of the annotation:
   - High-quality cell annotations are shown as closed points.
   - Low-quality cell annotations are shown as open points.
   In other sections of this report, these cells are referred to as `Unknown cell types`.