aifimmunology · lokada-aifi · May 25, 2023 · May 26, 2023 · May 26, 2023 · Jul 11, 2023
diff --git a/.gitignore b/.gitignore
@@ -2,3 +2,6 @@
 .Rhistory
 .RData
 .Ruserdata
+dev/*
+.ipynb_checkpoints/
+*/.ipynb_checkpoints/
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -8,7 +8,7 @@ Description: Provides data, tests, and markdowns for pipeline use.
 License: Allen Institute Software License
 Encoding: UTF-8
 LazyData: true
-RoxygenNote: 6.1.1
+RoxygenNote: 7.1.1
 Depends:
   Seurat,
   Matrix

diff --git a/R/format_query_so.R b/R/format_query_so.R
@@ -0,0 +1,46 @@
+#' Format input H5 as a Seurat object 
+#'
+#' Formats the .H5 file as a Seurat object with option to
+#' bootstrap if there are fewer than 200 cells. 
+#'
+#' For bootstrapping, the entire dataset will be duplicated 
+#' enough times to obtain 200 cells prior to labeling. Duplicates
+#' will be removed downstream.
+#'
+#' @param in_h5 Filepath to input h5 file
+#' @param allow_few_cells Logical value, default FALSE. If TRUE allows proceeding when
+#' input dataset has fewer thatn 200 cells but bootstrapping up to >=200 cells. If FALSE,
+#' errors out if too few cells
+#' @param dup_pattern String value prefix for bootstrapped cellnames when allow_few_cells
+#' behavior is triggered. Default '__dup__'
+format_query_so <- function(in_h5, allow_few_cells = FALSE, dup_pattern = "__dup__"){
+    # Read in counts
+    query_data <- H5weaver::read_h5_dgCMatrix(
+        h5_file = in_h5,
+        feature_names = "name"
+    )
+    rownames(query_data) <- as.vector(rhdf5::h5read(in_h5, "/matrix/features/name"))
+
+    # Handle low cell count option
+    if(allow_few_cells & ncol(query_data) < 200) {
+      stm("WARNING: < 200 Cells as Input. Attempting compensation")
+      stm("WARNING: Labels may be inaccurate")
+
+      ndup <- ceiling(200 / ncol(query_data))
+
+      query_data_orig <- query_data
+      for(i in 1:(ndup-1)) {
+        dup_data <- query_data_orig
+        colnames(dup_data) <- paste0(dup_pattern, i, "_", colnames(dup_data))
+        query_data <- cbind(query_data, dup_data)
+      }
+      rm(query_data_orig)
+    } else if (ncol(query_data) < 200) {
+      stm("ERROR: < 200 Cells as Input. Too Few for Labeling")
+      stop("ERROR: < 200 Cells as Input. Too Few for Labeling")
+    } 
+
+    query_so <- Seurat::CreateSeuratObject(counts = query_data)
+
+    return(query_so)
+}
diff --git a/R/label_seurat4_bmmc.R b/R/label_seurat4_bmmc.R
@@ -0,0 +1,64 @@
+#' Label Seurat with BMMC Reference
+#'
+#' @param query_so The query seurat object
+#' @param ref_so The reference seurat object
+#' @param ref_reduction Reduction value passed to Seurat::FindTransferAnchors(). Defualt 'spca'.
+#' @param ref_data List of reference data passed to Seurat::TransferData(). Default celltype.l1,
+#' @param dims Dimensionsfor FindTransferAnchors, defualt 1:50
+#' @param verbose Default FALSE
+#' @param ncores Number of cores for parallel processing via Seurat's future::plan implementation.
+#' @param sct_method Method argument passed to Seurat::SCTransform, default 'glmGamPoi'
+label_seurat4_bmmc <- function(
+    query_so,
+    ref_so,
+    ref_reduction = "spca",
+    ref_data = list(
+      celltype.l1 = "celltype.l1",
+      celltype.l2 = "celltype.l2",
+      predicted_ADT = "ADT"
+    ),
+    dims = 1:50,
+    verbose = FALSE
+  )
+  {
+
+  if(!(ref_reduction %in% names(ref_so@reductions))) {
+    stop(sprintf("Reference dataset reduction '%s' not found in reference data object",
+                 ref_reduction))
+  }
+
+  # validate and group the ref_data types
+  ref_data_types <- .check_transfer_values(reference_so = ref_so, ref_data = ref_data)
+
+  if(verbose){message("Finding anchors")}
+  anchors <- Seurat::FindTransferAnchors(
+    reference = ref_so,
+    query = query_so,
+    reference.assay = 'RNA',
+    normalization.method = 'LogNormalize',
+    reference.reduction = ref_reduction,
+    recompute.residuals = TRUE,  # function default
+    dims = dims,
+    verbose = verbose
+  )
+
+  query_so <- Seurat::TransferData(
+    anchorset = anchors,
+    query = query_so,
+    reference = ref_so,
+    refdata = ref_data,
+    weight.reduction = 'pcaproject',
+    verbose = verbose,
+    l2.norm = FALSE,
+    dims = NULL,
+    k.weight = 50,
+    sd.weight = 1,
+    eps = 0,
+    n.trees = 50,
+    slot = "data",
+    prediction.assay = FALSE,
+    store.weights = TRUE
+  )
+
+  return(query_so)
+}
diff --git a/R/label_seurat4_pbmc.R b/R/label_seurat4_pbmc.R
@@ -0,0 +1,114 @@
+.validate_future_cores <- function(ncores, verbose = TRUE){
+  if (ncores > 1){
+    if(ncores > (future::availableCores() - 1)){
+      if(verbose){
+        message(
+          sprintf("%s available cores. Reducing future cores from %s to %s",
+                  future::availableCores(), ncores,  future::availableCores()-1)
+          )
+      }
+      ncores <- future::availableCores() - 1
+    }
+  }
+  return(ncores)
+}
+
+.check_transfer_values <- function(reference_so, ref_data){
+  imeta <- which(unlist(ref_data) %in% names([email protected]))
+  if(length(imeta) > 0){
+    meta = unlist(ref_data)[imeta]
+  } else {
+    meta <- NULL
+  }
+
+  idata <- which(unlist(ref_data) %in% names(reference_so@assays))
+  if(length(idata) > 0){
+    dat = unlist(ref_data)[idata]
+  } else {
+    dat = NULL
+  }
+
+  missing <- setdiff(unlist(ref_data), c(meta, dat))
+  if(length(missing) > 0){
+    stop(sprintf("Reference data to transfer not found in reference object: %s",
+                 paste(missing, collapse = ", ")))
+  }
+
+  return(list(meta = meta, data = dat))
+}
+
+#' Label Seurat with PBMC Reference
+#'
+#' @param query_so The query seurat object
+#' @param ref_so The reference seurat object
+#' @param ref_reduction Reduction value passed to Seurat::FindTransferAnchors(). Defualt 'spca'.
+#' @param ref_data List of reference data passed to Seurat::TransferData(). Default celltype.l1,
+#' @param dims Dimensionsfor FindTransferAnchors, defualt 1:50
+#' @param verbose Default FALSE
+#' @param ncores Number of cores for parallel processing via Seurat's future::plan implementation.
+#' @param sct_method Method argument passed to Seurat::SCTransform, default 'glmGamPoi'
+label_seurat4_pbmc <- function(
+    query_so,
+    ref_so,
+    ref_reduction = "spca",
+    ref_data = list(
+      celltype.l1 = "celltype.l1",
+      celltype.l2 = "celltype.l2",
+      celltype.l2.5 = "celltype.l2.5",
+      celltype.l3 = "celltype.l3",
+      predicted_ADT = "ADT"
+    ),
+    dims = 1:50,
+    verbose = FALSE,
+    ncores = 1,
+    sct_method = "glmGamPoi"
+  )
+  {
+
+  if(!(ref_reduction %in% names(ref_so@reductions))) {
+    stop(sprintf("Reference dataset reduction '%s' not found in reference data object",
+                 ref_reduction))
+  }
+
+  # validate and group the ref_data types
+  ref_data_types <- .check_transfer_values(reference_so = ref_so, ref_data = ref_data)
+
+  # Set Cores
+  if (verbose){ message("Setting multiple cores for SCTransform") }
+  ncores <- .validate_future_cores(ncores = ncores, verbose = verbose)
+  if (ncores > 1){
+    future::plan(strategy = 'multicore', workers = ncores)
+  }
+
+  if(verbose){message("Finding anchors")}
+  anchors <- Seurat::FindTransferAnchors(
+    reference = ref_so,
+    query = query_so,
+    reference.assay = 'SCT',
+    normalization.method = 'SCT',
+    reference.reduction = ref_reduction,
+    recompute.residuals = TRUE,  # function default
+    dims = dims,
+    verbose = verbose
+  )
+
+  query_so <- Seurat::TransferData(
+    anchorset = anchors,
+    query = query_so,
+    reference = ref_so,
+    refdata = ref_data,
+    weight.reduction = 'pcaproject',
+    verbose = verbose,
+    l2.norm = FALSE,
+    dims = NULL,
+    k.weight = 50,
+    sd.weight = 1,
+    eps = 0,
+    n.trees = 50,
+    slot = "data",
+    prediction.assay = FALSE,
+    store.weights = TRUE
+  )
+
+  return(query_so)
+}
diff --git a/R/read_reference.R b/R/read_reference.R
@@ -0,0 +1,33 @@
+#' Read in and validate input reference object file
+#'
+#' Currently supports .rds or .h5seurat input file and enforces read in
+#' object as type 'Seurat'.
+#'
+#' @param ref_fp File path to reference object, an .rds or .h5seurat object
+#' @return Seurat object contained in input file
+#' @export 
+read_reference <- function(ref_fp){
+    valid_extensions <- c("rds", "h5seurat")
+
+    extension <- tolower(gsub(".*[.]", "", ref_fp))
+    if (!(extension %in% valid_extensions)) {
+        stop(sprintf("Reference file extension [%s] not in valid_extensions [%s]",
+                    extension,
+                    paste(valid_extensions, collapse = ", ")))
+    }
+
+    if(extension == "rds"){
+        ref_so <- readRDS(ref_fp)
+        if(!inherits(ref_so, 'Seurat')){
+            stop(sprintf("Input reference RDS file does not contain a Seurat object. Object in file '%s' is '%s'",
+                 ref_fp,
+                 class(ref_so)))
+        }
+    } else if (extension == "h5seurat") {
+        ref_so <- SeuratDisk::LoadH5Seurat(ref_fp)
+    } else {
+        stop(sprintf("Need to define a read method for valid reference extension '%s'", extension))
+    }
+
+    return(ref_so)
+}
diff --git a/R/util.R b/R/util.R
@@ -10,3 +10,113 @@ stm <- function(x) {
 
   write(paste0("[",Sys.time(),"] ",x), stderr())
 }
+
+#' Suppress selected messages and/or warnings from function output
+#' 
+#' Returns the result slot of the quietly call. Prints all output in the outputs slot.
+#' Outputs messages for all messages in the messages slot that do not match expected
+#' values or patterns. Outputs warnings for all items in warnings slot that do not
+#' match expected values or patterns.
+#'
+#' @param A function call result generated using \code{purrr::quietly()}
+#' @param values_suppress Named list of character vectors. Names should be be 'messages' and 'warnings'.
+#' Vectors are values of expected messages and warnings that should be suppressed (exact matches). For 
+#' example, \code{list(messages=c('Expected message'), warnings = c("Expected warning"))}
+#' @param patterns_suppress Named list of character vectors. Names should be be 'messages' and 'warnings'.
+#' Vectors are patterns of expected messages and warnings that should be suppressed (pattern matched)
+#' @returns The $result value of the input quietly call result.
+#' @examples
+#' testfun <- function(){
+#'     print('testprint1')
+#'     print('asdfsdaf')
+#'     print('sdfasdfas')
+#'     message('expected_message_1')
+#'     message('exp_message_2')
+#'     message('unexpected message1')
+#'     warning('exp_warn_2')
+#'     warning('expected warn')
+#'     warning('unexpected warn')
+#'     return(NULL)
+#' }
+#' vlist <- list(messages=c('expected_message_1'), warnings = c("exp_warn_2"))
+#' plist <- list(messages=c('test'), warnings = c("^expected"))
+#' # # See result of filtered quietly call
+#' # filter_quietly(purrr::quietly(testfun)(), values_suppress=vlist, plist)
+#' # # Compare to result of original function call
+#' # testfun()
+filter_quietly <- function(
+    quietly_res, 
+    values_suppress = list(messages = character(), 
+                           warnings = character()),
+    patterns_suppress = list(messages = character(), 
+                             warnings = character())
+){
+    default_list <- list(
+        messages = character(), 
+        warnings = character()
+    )
+
+    # Validate values_suppress
+    missing_val <- setdiff(names(default_list), names(values_suppress))
+    extra_val <- setdiff(names(values_suppress), names(default_list))
+    if (length(missing_val) > 0){
+        values_suppress <- c(values_suppress, default_list[missing_val])
+    }
+    if (length(extra_val) > 0){
+        warning(sprintf("Unexpected list item names in values_suppress: %s. Expected labels are messages, output, or warnings",
+                       paste(extra_val, collapse = ',')))
+    }
+
+    # Validate patterns_suppress
+    missing_pat <- setdiff(names(default_list), names(patterns_suppress))
+    extra_pat <- setdiff(names(patterns_suppress), names(default_list))
+    if (length(missing_pat) > 0){
+        patterns_suppress <- c(patterns_suppress, default_list[missing_pat])
+    }
+    if (length(extra_pat) > 0){
+        warning(sprintf("Unexpected list item names in patterns_suppress: %s. Expected labels are messages, output, or warnings",
+                       paste(extra_pat, collapse = ',')))
+    }
+
+    .match_listwise = function(query_list, pattern_list){
+        matchvals <- unlist(sapply(pattern_list, function(x){grep(x, query_list, value = T)}))
+        if(length(matchvals)>0){
+            return(matchvals)
+        } 
+
+    }
+    .filter_values <- function(fieldname, patterns_suppress, values_suppress){
+        ignore_match <- .match_listwise(quietly_res[[fieldname]], patterns_suppress[[fieldname]])
+        ignore_value_i <- which(gsub("\n","", quietly_res[[fieldname]]) %in% values_suppress[[fieldname]])
+        ignore_value <- quietly_res[[fieldname]][ignore_value_i]
+        ignore_all <- unique(c(ignore_match, ignore_value))
+        output_vals <- setdiff(quietly_res[[fieldname]], ignore_all)
+        return(output_vals)
+    }
+
+    # Print all output
+    if(length(quietly_res$output) > 0){
+        for (outmsg in quietly_res$output) {
+            cat(outmsg)
+        }
+    }
+
+    # Handle messages
+    message_vals <- .filter_values('messages', patterns_suppress, values_suppress)
+    if(length(message_vals) > 0){
+        for (msg in message_vals) {
+            message(msg)
+        }
+    }
+
+    # Handle warnings
+    warn_vals <- .filter_values('warnings', patterns_suppress, values_suppress)
+    if(length(warn_vals) > 0){
+        for (warnval in warn_vals) {
+            warning(warnval, call.= FALSE)
+        }
+    }    
+
+    return(quietly_res$result)
+
+}