Skip to content

Commit

Permalink
Support filtering of HTOs during CallAndGenerateReport (#119)
Browse files Browse the repository at this point in the history
  • Loading branch information
bbimber authored Feb 14, 2023
1 parent 1f681bc commit 35b8dd9
Show file tree
Hide file tree
Showing 5 changed files with 10 additions and 6 deletions.
3 changes: 2 additions & 1 deletion R/CellHashing.R
Original file line number Diff line number Diff line change
Expand Up @@ -690,6 +690,7 @@ GetExampleMarkdown <- function(dest) {
#' @param callFile The file to which the table of calls will be written
#' @param rawFeatureMatrixH5 Both demuxEM and demuxmix require the 10x h5 gene expression count file. This is only required when either demuxEM or demuxmix are used.
#' @param barcodeWhitelist A vector of barcode names to retain.
#' @param barcodeBlacklist A vector of barcodes names to discard. An example would be an input library generated with CITE-seq and cell hashing. In this case, it may make sense to discard the CITE-seq markers.
#' @param cellbarcodeWhitelist Either a vector of expected barcodes (such as all cells with passing gene expression data), a file with one cellbarcode per line, or the string 'inputMatrix'. If the latter is provided, the set of cellbarcodes present in the original unfiltered count matrix will be stored and used for reporting. This allows the report to count cells that were filtered due to low counts separately from negative/non-callable cells.
#' @param methods The set of methods to use for calling. See GenerateCellHashingCalls for options.
#' @param methodsForConsensus By default, a consensus call will be generated using all methods; however, if this parameter is provided, all algorithms specified by methods will be run, but only the list here will be used for the final consensus call. This allows one to see the results of a given caller without using it for the final calls.
Expand All @@ -705,7 +706,7 @@ GetExampleMarkdown <- function(dest) {
#' @param title A title for the HTML report
#' @importFrom rmdformats html_clean
#' @export
CallAndGenerateReport <- function(rawCountData, reportFile, callFile, rawFeatureMatrixH5 = NULL, barcodeWhitelist = NULL, cellbarcodeWhitelist = 'inputMatrix', methods = c('bff_cluster', 'gmm_demux', 'dropletutils'), methodsForConsensus = NULL, minCountPerCell = 5, title = NULL, metricsFile = NULL, rawCountsExport = NULL, skipNormalizationQc = FALSE, keepMarkdown = FALSE, molInfoFile = NULL, majorityConsensusThreshold = NULL, callerDisagreementThreshold = NULL, doTSNE = TRUE) {
CallAndGenerateReport <- function(rawCountData, reportFile, callFile, rawFeatureMatrixH5 = NULL, barcodeWhitelist = NULL, barcodeBlacklist = c('no_match', 'total_reads', 'unmapped'), cellbarcodeWhitelist = 'inputMatrix', methods = c('bff_cluster', 'gmm_demux', 'dropletutils'), methodsForConsensus = NULL, minCountPerCell = 5, title = NULL, metricsFile = NULL, rawCountsExport = NULL, skipNormalizationQc = FALSE, keepMarkdown = FALSE, molInfoFile = NULL, majorityConsensusThreshold = NULL, callerDisagreementThreshold = NULL, doTSNE = TRUE) {
rmd <- system.file("rmd/cellhashR.rmd", package = "cellhashR")
if (!file.exists(rmd)) {
stop(paste0('Unable to find file: ', rmd))
Expand Down
4 changes: 2 additions & 2 deletions R/Preprocessing.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#' @param rawCountData The input barcode file or umi_count folder
#' @param minCountPerCell Cells (columns) will be dropped if their total count is less than this value.
#' @param barcodeWhitelist A vector of barcode names to retain.
#' @param barcodeBlacklist A vector of barcodes names to discard.
#' @param barcodeBlacklist A vector of barcodes names to discard. An example would be an input library generated with CITE-seq and cell hashing. In this case, it may make sense to discard the CITE-seq markers.
#' @param cellbarcodeWhitelist If provided, the raw count matrix will be subset to include only these cells. This allows one to use the cellranger unfiltered matrix as an input, but filter based on target cells, such as those with GEX data. This can either be a character vector of barcodes, or a file with one cell barcode per line.
#' @param doPlot If true, QC plots will be generated
#' @param simplifyBarcodeNames If true, the sequence tag portion will be removed from the barcode names (i.e. HTO-1-ATGTGTGA -> HTO-1)
Expand Down Expand Up @@ -103,7 +103,7 @@ ProcessCountMatrix <- function(rawCountData=NA, minCountPerCell = 5, barcodeWhit
stop("Need to provide a directory or file for rawCountData")
}

if (all(is.na(barcodeBlacklist)) || all(is.null(barcodeBlacklist))) {
if (all(is.na(barcodeBlacklist) | is.null(barcodeBlacklist))) {
barcodeBlacklist <- character()
}

Expand Down
4 changes: 2 additions & 2 deletions inst/rmd/cellhashR.rmd
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ if (!file.exists(rawCountData)) {
stop(paste0('Could not find rawCountData: ', rawCountData))
}
optionalVars <- c('barcodeWhitelist', 'cellbarcodeWhitelist', 'minCountPerCell', 'metricsFile', 'rawCountsExport', 'molInfoFile', 'rawFeatureMatrixH5', 'methodsForConsensus', 'majorityConsensusThreshold', 'callerDisagreementThreshold', 'doTSNE')
optionalVars <- c('barcodeWhitelist', 'barcodeBlacklist', 'cellbarcodeWhitelist', 'minCountPerCell', 'metricsFile', 'rawCountsExport', 'molInfoFile', 'rawFeatureMatrixH5', 'methodsForConsensus', 'majorityConsensusThreshold', 'callerDisagreementThreshold', 'doTSNE')
for (v in optionalVars) {
if (!exists(v)) {
if (v == 'minCountPerCell') {
Expand Down Expand Up @@ -72,7 +72,7 @@ if (!is.null(metricsFile)) {

```{r QC}
barcodeData <- ProcessCountMatrix(rawCountData = rawCountData, minCountPerCell = minCountPerCell, barcodeWhitelist = barcodeWhitelist, cellbarcodeWhitelist = cellbarcodeWhitelist, saveOriginalCellBarcodeFile = saveOriginalCellBarcodeFile, metricsFile = metricsFile)
barcodeData <- ProcessCountMatrix(rawCountData = rawCountData, minCountPerCell = minCountPerCell, barcodeWhitelist = barcodeWhitelist, barcodeBlacklist = barcodeBlacklist, cellbarcodeWhitelist = cellbarcodeWhitelist, saveOriginalCellBarcodeFile = saveOriginalCellBarcodeFile, metricsFile = metricsFile)
if (nrow(barcodeData) == 0) {
stop('No passing barcodes')
}
Expand Down
3 changes: 3 additions & 0 deletions man/CallAndGenerateReport.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/ProcessCountMatrix.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 35b8dd9

Please sign in to comment.