Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

V4 labels #11

Draft
wants to merge 5 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,6 @@
.Rhistory
.RData
.Ruserdata
dev/*
.ipynb_checkpoints/
*/.ipynb_checkpoints/
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Description: Provides data, tests, and markdowns for pipeline use.
License: Allen Institute Software License
Encoding: UTF-8
LazyData: true
RoxygenNote: 6.1.1
RoxygenNote: 7.1.1
Depends:
Seurat,
Matrix
Expand Down
46 changes: 46 additions & 0 deletions R/format_query_so.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#' Format input H5 as a Seurat object
#'
#' Formats the .H5 file as a Seurat object with option to
#' bootstrap if there are fewer than 200 cells.
#'
#' For bootstrapping, the entire dataset will be duplicated
#' enough times to obtain 200 cells prior to labeling. Duplicates
#' will be removed downstream.
#'
#' @param in_h5 Filepath to input h5 file
#' @param allow_few_cells Logical value, default FALSE. If TRUE allows proceeding when
#' input dataset has fewer thatn 200 cells but bootstrapping up to >=200 cells. If FALSE,
#' errors out if too few cells
#' @param dup_pattern String value prefix for bootstrapped cellnames when allow_few_cells
#' behavior is triggered. Default '__dup__'
format_query_so <- function(in_h5, allow_few_cells = FALSE, dup_pattern = "__dup__"){
# Read in counts
query_data <- H5weaver::read_h5_dgCMatrix(
h5_file = in_h5,
feature_names = "name"
)
rownames(query_data) <- as.vector(rhdf5::h5read(in_h5, "/matrix/features/name"))

# Handle low cell count option
if(allow_few_cells & ncol(query_data) < 200) {
stm("WARNING: < 200 Cells as Input. Attempting compensation")
stm("WARNING: Labels may be inaccurate")

ndup <- ceiling(200 / ncol(query_data))

query_data_orig <- query_data
for(i in 1:(ndup-1)) {
dup_data <- query_data_orig
colnames(dup_data) <- paste0(dup_pattern, i, "_", colnames(dup_data))
query_data <- cbind(query_data, dup_data)
}
rm(query_data_orig)
} else if (ncol(query_data) < 200) {
stm("ERROR: < 200 Cells as Input. Too Few for Labeling")
stop("ERROR: < 200 Cells as Input. Too Few for Labeling")
}

query_so <- Seurat::CreateSeuratObject(counts = query_data)

return(query_so)
}
64 changes: 64 additions & 0 deletions R/label_seurat4_bmmc.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#' Label Seurat with BMMC Reference
#'
#' @param query_so The query seurat object
#' @param ref_so The reference seurat object
#' @param ref_reduction Reduction value passed to Seurat::FindTransferAnchors(). Defualt 'spca'.
#' @param ref_data List of reference data passed to Seurat::TransferData(). Default celltype.l1,
#' @param dims Dimensionsfor FindTransferAnchors, defualt 1:50
#' @param verbose Default FALSE
#' @param ncores Number of cores for parallel processing via Seurat's future::plan implementation.
#' @param sct_method Method argument passed to Seurat::SCTransform, default 'glmGamPoi'
label_seurat4_bmmc <- function(
query_so,
ref_so,
ref_reduction = "spca",
ref_data = list(
celltype.l1 = "celltype.l1",
celltype.l2 = "celltype.l2",
predicted_ADT = "ADT"
),
dims = 1:50,
verbose = FALSE
)
{

if(!(ref_reduction %in% names(ref_so@reductions))) {
stop(sprintf("Reference dataset reduction '%s' not found in reference data object",
ref_reduction))
}

# validate and group the ref_data types
ref_data_types <- .check_transfer_values(reference_so = ref_so, ref_data = ref_data)

if(verbose){message("Finding anchors")}
anchors <- Seurat::FindTransferAnchors(
reference = ref_so,
query = query_so,
reference.assay = 'RNA',
normalization.method = 'LogNormalize',
reference.reduction = ref_reduction,
recompute.residuals = TRUE, # function default
dims = dims,
verbose = verbose
)

query_so <- Seurat::TransferData(
anchorset = anchors,
query = query_so,
reference = ref_so,
refdata = ref_data,
weight.reduction = 'pcaproject',
verbose = verbose,
l2.norm = FALSE,
dims = NULL,
k.weight = 50,
sd.weight = 1,
eps = 0,
n.trees = 50,
slot = "data",
prediction.assay = FALSE,
store.weights = TRUE
)

return(query_so)
}
114 changes: 114 additions & 0 deletions R/label_seurat4_pbmc.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
.validate_future_cores <- function(ncores, verbose = TRUE){
if (ncores > 1){
if(ncores > (future::availableCores() - 1)){
if(verbose){
message(
sprintf("%s available cores. Reducing future cores from %s to %s",
future::availableCores(), ncores, future::availableCores()-1)
)
}
ncores <- future::availableCores() - 1
}
}
return(ncores)
}

.check_transfer_values <- function(reference_so, ref_data){
imeta <- which(unlist(ref_data) %in% names([email protected]))
if(length(imeta) > 0){
meta = unlist(ref_data)[imeta]
} else {
meta <- NULL
}

idata <- which(unlist(ref_data) %in% names(reference_so@assays))
if(length(idata) > 0){
dat = unlist(ref_data)[idata]
} else {
dat = NULL
}

missing <- setdiff(unlist(ref_data), c(meta, dat))
if(length(missing) > 0){
stop(sprintf("Reference data to transfer not found in reference object: %s",
paste(missing, collapse = ", ")))
}

return(list(meta = meta, data = dat))
}

#' Label Seurat with PBMC Reference
#'
#' @param query_so The query seurat object
#' @param ref_so The reference seurat object
#' @param ref_reduction Reduction value passed to Seurat::FindTransferAnchors(). Defualt 'spca'.
#' @param ref_data List of reference data passed to Seurat::TransferData(). Default celltype.l1,
#' @param dims Dimensionsfor FindTransferAnchors, defualt 1:50
#' @param verbose Default FALSE
#' @param ncores Number of cores for parallel processing via Seurat's future::plan implementation.
#' @param sct_method Method argument passed to Seurat::SCTransform, default 'glmGamPoi'
label_seurat4_pbmc <- function(
query_so,
ref_so,
ref_reduction = "spca",
ref_data = list(
celltype.l1 = "celltype.l1",
celltype.l2 = "celltype.l2",
celltype.l2.5 = "celltype.l2.5",
celltype.l3 = "celltype.l3",
predicted_ADT = "ADT"
),
dims = 1:50,
verbose = FALSE,
ncores = 1,
sct_method = "glmGamPoi"
)
{

if(!(ref_reduction %in% names(ref_so@reductions))) {
stop(sprintf("Reference dataset reduction '%s' not found in reference data object",
ref_reduction))
}

# validate and group the ref_data types
ref_data_types <- .check_transfer_values(reference_so = ref_so, ref_data = ref_data)

# Set Cores
if (verbose){ message("Setting multiple cores for SCTransform") }
ncores <- .validate_future_cores(ncores = ncores, verbose = verbose)
if (ncores > 1){
future::plan(strategy = 'multicore', workers = ncores)
}

if(verbose){message("Finding anchors")}
anchors <- Seurat::FindTransferAnchors(
reference = ref_so,
query = query_so,
reference.assay = 'SCT',
normalization.method = 'SCT',
reference.reduction = ref_reduction,
recompute.residuals = TRUE, # function default
dims = dims,
verbose = verbose
)

query_so <- Seurat::TransferData(
anchorset = anchors,
query = query_so,
reference = ref_so,
refdata = ref_data,
weight.reduction = 'pcaproject',
verbose = verbose,
l2.norm = FALSE,
dims = NULL,
k.weight = 50,
sd.weight = 1,
eps = 0,
n.trees = 50,
slot = "data",
prediction.assay = FALSE,
store.weights = TRUE
)

return(query_so)
}
33 changes: 33 additions & 0 deletions R/read_reference.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#' Read in and validate input reference object file
#'
#' Currently supports .rds or .h5seurat input file and enforces read in
#' object as type 'Seurat'.
#'
#' @param ref_fp File path to reference object, an .rds or .h5seurat object
#' @return Seurat object contained in input file
#' @export
read_reference <- function(ref_fp){
valid_extensions <- c("rds", "h5seurat")

extension <- tolower(gsub(".*[.]", "", ref_fp))
if (!(extension %in% valid_extensions)) {
stop(sprintf("Reference file extension [%s] not in valid_extensions [%s]",
extension,
paste(valid_extensions, collapse = ", ")))
}

if(extension == "rds"){
ref_so <- readRDS(ref_fp)
if(!inherits(ref_so, 'Seurat')){
stop(sprintf("Input reference RDS file does not contain a Seurat object. Object in file '%s' is '%s'",
ref_fp,
class(ref_so)))
}
} else if (extension == "h5seurat") {
ref_so <- SeuratDisk::LoadH5Seurat(ref_fp)
} else {
stop(sprintf("Need to define a read method for valid reference extension '%s'", extension))
}

return(ref_so)
}
110 changes: 110 additions & 0 deletions R/util.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,113 @@ stm <- function(x) {

write(paste0("[",Sys.time(),"] ",x), stderr())
}

#' Suppress selected messages and/or warnings from function output
#'
#' Returns the result slot of the quietly call. Prints all output in the outputs slot.
#' Outputs messages for all messages in the messages slot that do not match expected
#' values or patterns. Outputs warnings for all items in warnings slot that do not
#' match expected values or patterns.
#'
#' @param A function call result generated using \code{purrr::quietly()}
#' @param values_suppress Named list of character vectors. Names should be be 'messages' and 'warnings'.
#' Vectors are values of expected messages and warnings that should be suppressed (exact matches). For
#' example, \code{list(messages=c('Expected message'), warnings = c("Expected warning"))}
#' @param patterns_suppress Named list of character vectors. Names should be be 'messages' and 'warnings'.
#' Vectors are patterns of expected messages and warnings that should be suppressed (pattern matched)
#' @returns The $result value of the input quietly call result.
#' @examples
#' testfun <- function(){
#' print('testprint1')
#' print('asdfsdaf')
#' print('sdfasdfas')
#' message('expected_message_1')
#' message('exp_message_2')
#' message('unexpected message1')
#' warning('exp_warn_2')
#' warning('expected warn')
#' warning('unexpected warn')
#' return(NULL)
#' }
#' vlist <- list(messages=c('expected_message_1'), warnings = c("exp_warn_2"))
#' plist <- list(messages=c('test'), warnings = c("^expected"))
#' # # See result of filtered quietly call
#' # filter_quietly(purrr::quietly(testfun)(), values_suppress=vlist, plist)
#' # # Compare to result of original function call
#' # testfun()
filter_quietly <- function(
quietly_res,
values_suppress = list(messages = character(),
warnings = character()),
patterns_suppress = list(messages = character(),
warnings = character())
){
default_list <- list(
messages = character(),
warnings = character()
)

# Validate values_suppress
missing_val <- setdiff(names(default_list), names(values_suppress))
extra_val <- setdiff(names(values_suppress), names(default_list))
if (length(missing_val) > 0){
values_suppress <- c(values_suppress, default_list[missing_val])
}
if (length(extra_val) > 0){
warning(sprintf("Unexpected list item names in values_suppress: %s. Expected labels are messages, output, or warnings",
paste(extra_val, collapse = ',')))
}

# Validate patterns_suppress
missing_pat <- setdiff(names(default_list), names(patterns_suppress))
extra_pat <- setdiff(names(patterns_suppress), names(default_list))
if (length(missing_pat) > 0){
patterns_suppress <- c(patterns_suppress, default_list[missing_pat])
}
if (length(extra_pat) > 0){
warning(sprintf("Unexpected list item names in patterns_suppress: %s. Expected labels are messages, output, or warnings",
paste(extra_pat, collapse = ',')))
}

.match_listwise = function(query_list, pattern_list){
matchvals <- unlist(sapply(pattern_list, function(x){grep(x, query_list, value = T)}))
if(length(matchvals)>0){
return(matchvals)
}

}
.filter_values <- function(fieldname, patterns_suppress, values_suppress){
ignore_match <- .match_listwise(quietly_res[[fieldname]], patterns_suppress[[fieldname]])
ignore_value_i <- which(gsub("\n","", quietly_res[[fieldname]]) %in% values_suppress[[fieldname]])
ignore_value <- quietly_res[[fieldname]][ignore_value_i]
ignore_all <- unique(c(ignore_match, ignore_value))
output_vals <- setdiff(quietly_res[[fieldname]], ignore_all)
return(output_vals)
}

# Print all output
if(length(quietly_res$output) > 0){
for (outmsg in quietly_res$output) {
cat(outmsg)
}
}

# Handle messages
message_vals <- .filter_values('messages', patterns_suppress, values_suppress)
if(length(message_vals) > 0){
for (msg in message_vals) {
message(msg)
}
}

# Handle warnings
warn_vals <- .filter_values('warnings', patterns_suppress, values_suppress)
if(length(warn_vals) > 0){
for (warnval in warn_vals) {
warning(warnval, call.= FALSE)
}
}

return(quietly_res$result)

}
Loading