Skip to content

Commit

Permalink
Merge pull request #269 from AlexsLemonade/jashapiro/reduce-merge-memory
Browse files Browse the repository at this point in the history
Update to Bioc3.18 and reduce merge memory
  • Loading branch information
jashapiro authored Apr 18, 2024
2 parents 20c9029 + ff19180 commit 951b569
Show file tree
Hide file tree
Showing 23 changed files with 900 additions and 812 deletions.
5 changes: 3 additions & 2 deletions .Rprofile
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
options(BioC_mirror = "https://packagemanager.posit.co/bioconductor")
options(BIOCONDUCTOR_CONFIG_FILE = "https://packagemanager.posit.co/bioconductor/config.yaml")
# Configure BioCManager to use Posit Public Package Manager:
options(BioC_mirror = "https://p3m.dev/bioconductor")
options(BIOCONDUCTOR_CONFIG_FILE = "https://p3m.dev/bioconductor/config.yaml")

# activate renv
source("renv/activate.R")
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ Description: Tools for processing single cell data associated with the
License: BSD_3_clause + file LICENSE
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
Depends:
R (>= 4.3.0)
Imports:
Expand Down
15 changes: 8 additions & 7 deletions R/build_sce.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,16 @@
#' with intron counts marked by "-U" and ambiguous counts "-A".
#' @param include_unspliced Whether or not to include the unspliced reads in the counts matrix.
#' If TRUE, the main "counts" assay will contain unspliced reads and spliced reads and an additional "spliced"
#' assay will contain spliced reads only. If TRUE, requires that data has been aligned to a reference contianing
#' assay will contain spliced reads only. If TRUE, requires that data has been aligned to a reference containing
#' spliced and unspliced reads.
#' Default is TRUE.
#' @param round_counts Logical indicating in the count matrix should be rounded to integers on import.
#' Default is TRUE.
#'
#' @return SingleCellExperiment object containing either just a counts assay with spliced cDNA only if
#' `include_unspliced` is FALSE. If `include_unspliced` is TRUE, the counts assay will contain both spliced and unspliced
#' counts and the spliced assay will contain the counts for just the spliced cDNA.
#' @return SingleCellExperiment object. If `include_unspliced` is TRUE (default), the counts assay will contain
#' both spliced and unspliced counts and the spliced assay will contain the counts for just the spliced cDNA.
#' If `include_unspliced` is FALSE, the counts assay will contain spliced cDNA counts only.
#'
#'
#' @examples
#' \dontrun{
Expand Down Expand Up @@ -41,13 +42,13 @@ build_sce <- function(counts,
# define if counts matrix has any unspliced reads
has_unspliced <- any(grep("-[IU]$", rownames(counts)))

if (include_unspliced & !has_unspliced) {
if (include_unspliced && !has_unspliced) {
stop("No counts corresponding to intronic reads detected.
If `include_unspliced` is TRUE a reference with spliced and unspliced reads must be used.")
}

# if has unspliced data get counts for both unspliced and spliced
if (include_unspliced & has_unspliced) {
if (include_unspliced && has_unspliced) {
total <- collapse_intron_counts(counts, which_counts = c("total"))
spliced <- collapse_intron_counts(counts, which_counts = c("spliced"))

Expand Down Expand Up @@ -88,7 +89,7 @@ build_sce <- function(counts,
counts = total,
spliced = spliced
)
} else if (!include_unspliced & has_unspliced) {
} else if (!include_unspliced && has_unspliced) {
# still aligned to introns, but want to collapse and just return spliced
spliced <- collapse_intron_counts(counts, which_counts = c("spliced"))
assay_list <- list(counts = spliced)
Expand Down
53 changes: 18 additions & 35 deletions R/import_quant_data.R
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
#' Import Gene Expression Quantification Data for Single-Cell RNA-Seq
#'
#' Imports the gene x cell matrix output from either Alevin, Alevin-fry, Cellranger, or Kallisto and returns a SingleCellExperiment.
#' Imports the gene x cell matrix output from either Alevin, Alevin-fry, Cell Ranger, or Kallisto
#' and returns a SingleCellExperiment.
#'
#' @param quant_dir Path to directory where output files are located.
#' @param tool Type of tool used to create files (alevin, alevin-fry, cellranger, or kallisto).
#' @param include_unspliced Whether or not to include the unspliced reads in the counts matrix.
#' If TRUE, the main "counts" assay will contain unspliced reads and spliced reads and an additional "spliced"
#' assay will contain spliced reads only. If TRUE, requires that data has been aligned to a reference contianing
#' assay will contain spliced reads only. If TRUE, requires that data has been aligned to a reference containing
#' spliced and unspliced reads.
#' Default is TRUE.
#' @param usa_mode Logical indicating if Alevin-fry was used, if the USA mode was invoked.
#' Default is FALSE.
#' @param filter Logical indicating whether or not to filter the counts matrix.
#' Filtering is performed using DropletUtils::emptyDrops and cannot be performed with Cellranger.
#' Filtering is performed using DropletUtils::emptyDrops and cannot be performed with Cell Ranger.
#' @param fdr_cutoff FDR cutoff to use for DropletUtils::emptyDrops.
#' Default is 0.01.
#' @param tech_version Technology or kit used to process library (i.e. 10Xv3, 10Xv3.1).
Expand Down Expand Up @@ -64,40 +65,22 @@ import_quant_data <- function(quant_dir,
...) {
which_counts <- match.arg(which_counts)

if (!(tool %in% c("cellranger", "alevin", "alevin-fry", "kallisto"))) {
stop("Tool must be either cellranger, alevin, alevin-fry, or kallisto.")
}

# checks for intron_mode and usa_mode
if (!is.logical(include_unspliced)) {
stop("include_unspliced must be set as TRUE or FALSE")
}
if (!is.logical(usa_mode)) {
stop("usa_mode must be set as TRUE or FALSE")
}
if (!is.logical(filter)) {
stop("filter must be set as TRUE or FALSE")
}
stopifnot(
"Tool must be one of cellranger, alevin, alevin-fry, or kallisto." =
tool %in% c("cellranger", "alevin", "alevin-fry", "kallisto"),
"include_unspliced must be set as TRUE or FALSE" = is.logical(include_unspliced),
"usa_mode must be set as TRUE or FALSE" = is.logical(usa_mode),
"filter must be set as TRUE or FALSE" = is.logical(filter),
"USA mode only compatible with alevin-fry." = !(usa_mode && tool %in% c("cellranger", "alevin", "kallisto")),
"Include unspliced not compatible with cellranger." = !(include_unspliced && tool %in% c("cellranger")),
"Cannot perform emptyDrops filtering on cellranger output." = !(filter && tool == "cellranger")
)

# check that usa_mode and intron_mode are used with the proper tools
if (usa_mode & tool %in% c("cellranger", "alevin", "kallisto")) {
stop("USA mode only compatible with alevin-fry.")
}
if (include_unspliced & tool %in% c("cellranger")) {
stop("Include unspliced not compatible with cellranger.")
}

# check that filter is not used with cellranger
if (filter & tool == "cellranger") {
stop("Cannot perform emptyDrops filtering on cellranger output.")
}
if (filter) {
if (!(is.numeric(fdr_cutoff))) {
stop("fdr_cutoff is not a number.")
}
if (fdr_cutoff < 0 | fdr_cutoff > 1) {
stop("fdr_cutoff must be a number between 0 - 1.")
}
stopifnot(
"fdr_cutoff must be a number." = is.numeric(fdr_cutoff),
"fdr_cutoff must be a number between 0 - 1." = fdr_cutoff >= 0 && fdr_cutoff <= 1
)
}

if (tool %in% c("alevin-fry", "alevin")) {
Expand Down
51 changes: 27 additions & 24 deletions R/merge_sce_list.R
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,13 @@ merge_sce_list <- function(
purrr::walk(
\(altexp_name) {
sce_list |>
purrr::keep(\(sce) altexp_name %in% altExpNames(sce)) |>
purrr::map(altExp, altexp_name) |>
purrr::map(\(sce) {
if (altexp_name %in% altExpNames(sce)) {
altExp(sce, altexp_name)
}
}) |>
# remove nulls
purrr::keep(\(sce) is(sce, "SingleCellExperiment")) |>
check_metadata()
}
)
Expand Down Expand Up @@ -244,24 +249,9 @@ merge_sce_list <- function(

if (include_altexp) {
for (altexp_name in names(altexp_attributes)) {
has_altexp_name <- sce_list |>
purrr::map_lgl(\(sce) (altexp_name %in% altExpNames(sce)))

# For any SCEs without this altExp, create the library_id and sample_id metadata
additional_metadata <- sce_list |>
purrr::discard(has_altexp_name) |>
purrr::map(extract_metadata_for_altexp)

# Update metadata in altExps that were originally present
altexp_metadata_list <- sce_list |>
purrr::keep(has_altexp_name) |>
purrr::map(altExp, altexp_name) |>
purrr::map(metadata) |>
# Tack on the metadata we created for libraries without altExps
c(additional_metadata)

# Ensure correct order
altexp_metadata_list <- altexp_metadata_list[names(sce_list)]
purrr::map(get_altexp_metadata, altexp_name = altexp_name)

metadata(altExp(merged_sce, altexp_name)) <- prepare_merged_metadata(altexp_metadata_list)
}
Expand Down Expand Up @@ -572,14 +562,27 @@ check_metadata <- function(sce_list, expected_fields = c("library_id", "sample_i
}
}

#' Helper function to extract main SCE metadata for inclusion in an altExp


#' Helper function to get altExp metadata from an SCE that may not have the altexp
#' Returns main experiment metadata if the altExp is not present in the SCE
#'
#' @param sce SCE object to extract metadata from
#' @param altexp_name Name of the altExp to extract metadata for
#'
#' @return List with fields `library_id` and `sample_id`
extract_metadata_for_altexp <- function(sce) {
list(
library_id = metadata(sce)$library_id,
sample_id = metadata(sce)$sample_id
)
get_altexp_metadata <- function(sce, altexp_name) {
if (altexp_name %in% altExpNames(sce)) {
return(
metadata(altExp(sce, altexp_name))
)
} else {
# if the altExp is not present, return partial main SCE metadata
return(
list(
library_id = metadata(sce)$library_id,
sample_id = metadata(sce)$sample_id
)
)
}
}
2 changes: 1 addition & 1 deletion R/read_alevin.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#' Default is FALSE.
#' @param include_unspliced Whether or not to include the unspliced reads in the counts matrix.
#' If TRUE, the main "counts" assay will contain unspliced reads and spliced reads and an additional "spliced"
#' assay will contain spliced reads only. If TRUE, requires that data has been aligned to a reference contianing
#' assay will contain spliced reads only. If TRUE, requires that data has been aligned to a reference containing
#' spliced and unspliced reads.
#' Default is TRUE.
#' @param feature_data Logical indicating if the data being read in contains feature data.
Expand Down
2 changes: 1 addition & 1 deletion R/read_cellranger.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#' Read in counts data processed with Cellranger
#' Read in counts data processed with Cell Ranger
#'
#' @param quant_dir Path to directory where output files are located.
#'
Expand Down
2 changes: 1 addition & 1 deletion R/read_kallisto.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#' @param quant_dir Path to directory where output files are located.
#' @param include_unspliced Whether or not to include the unspliced reads in the counts matrix.
#' If TRUE, the main "counts" assay will contain unspliced reads and spliced reads and an additional "spliced"
#' assay will contain spliced reads only. If TRUE, requires that data has been aligned to a reference contianing
#' assay will contain spliced reads only. If TRUE, requires that data has been aligned to a reference containing
#' spliced and unspliced reads.
#' Default is TRUE.
#' @param round_counts Logical indicating in the count matrix should be rounded to integers on import.
Expand Down
7 changes: 3 additions & 4 deletions R/scpcaTools-package.R
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
#' scpcaTools: Useful tools for analysis of single-cell RNA seq counts data
#'
#' The scpcaTools package contains a set of tools for working with single-cell and single-nuclei RNA-seq counts data.
#' Mainly, this package can work with data that has been produced using Alevin, Alevin-Fry, Cellranger, or Kallisto.
#' Mainly, this package can work with data that has been produced using Alevin, Alevin-Fry, Cell Ranger, or Kallisto.
#'
#'
#' @docType package
#' @name scpcaTools-package
#' @aliases scpcaTools-package
#'
#' @import methods
#' @import SingleCellExperiment
#' @import stringr
NULL
"_PACKAGE"
11 changes: 5 additions & 6 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM rocker/r-ver:4.3.2
FROM rocker/r-ver:4.3.3
LABEL maintainer="[email protected]"
LABEL org.opencontainers.image.source https://github.com/AlexsLemonade/scpcaTools

Expand All @@ -13,10 +13,9 @@ RUN Rscript -e "install.packages(c('remotes', 'renv'))"

WORKDIR /usr/local/renv
COPY renv.lock renv.lock
# restore with PPM repo set for binary installs
RUN Rscript -e "renv::consent(provided = TRUE); \
renv::restore(repos = 'https://packagemanager.posit.co/cran/latest')" && \
rm -rf ~/.local/share/renv && \
# restore renv and remove cache files
RUN Rscript -e "renv::restore()" && \
rm -rf ~/.cache/R/renv && \
rm -rf /tmp/downloaded_packages && \
rm -rf /tmp/Rtmp*

Expand All @@ -27,7 +26,7 @@ RUN Rscript -e "proc <- basilisk::basiliskStart(env = zellkonverter::zellkonvert

#### Python packages
COPY requirements.txt requirements.txt
RUN pip install -r requirements.txt && pip cache purge
RUN pip install --no-cache-dir -r requirements.txt


##########################
Expand Down
Loading

0 comments on commit 951b569

Please sign in to comment.