From 40c4c96e78980576f7970601e55751a112c87718 Mon Sep 17 00:00:00 2001 From: Joshua Shapiro Date: Mon, 9 Dec 2024 19:24:33 -0500 Subject: [PATCH] hvg fixes for summing --- R/sum-duplicate-genes.R | 13 ++++++++++--- tests/testthat/test-sum-duplicate-genes.R | 20 ++++++++++++++++++++ 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/R/sum-duplicate-genes.R b/R/sum-duplicate-genes.R index cb2cfcb..405bd7b 100644 --- a/R/sum-duplicate-genes.R +++ b/R/sum-duplicate-genes.R @@ -76,8 +76,10 @@ sum_duplicate_genes <- function(sce, normalize = TRUE, recalculate_reduced_dims try({ # try to cluster similar cells # clustering may fail if < 100 cells in dataset - qclust <- suppressWarnings(scran::quickCluster(summed_sce)) - summed_sce <- scran::computeSumFactors(summed_sce, clusters = qclust) + suppressWarnings({ + qclust <- scran::quickCluster(summed_sce) + summed_sce <- scran::computeSumFactors(summed_sce, clusters = qclust) + }) }) summed_sce <- scuttle::logNormCounts(summed_sce) } @@ -90,8 +92,13 @@ sum_duplicate_genes <- function(sce, normalize = TRUE, recalculate_reduced_dims pca_dim <- min(50, ncol(sce) - 1) # always reduce dimensions at least 1! } + if (is.null(metadata(sce)$highly_variable_genes)) { + n_hvg <- 2000 # same default as ScPCA + } else { + n_hvg <- length(metadata(sce)$highly_variable_genes) + } hv_genes <- scran::modelGeneVar(summed_sce) |> - scran::getTopHVGs(n = length(metadata(sce)$highly_variable_genes)) + scran::getTopHVGs(n = n_hvg) metadata(summed_sce)$highly_variable_genes <- hv_genes # store the new HVGs summed_sce <- scater::runPCA(summed_sce, subset_row = hv_genes, ncomponents = pca_dim) |> diff --git a/tests/testthat/test-sum-duplicate-genes.R b/tests/testthat/test-sum-duplicate-genes.R index 4fa7769..127c098 100644 --- a/tests/testthat/test-sum-duplicate-genes.R +++ b/tests/testthat/test-sum-duplicate-genes.R @@ -8,6 +8,26 @@ test_that("merging works as expected", { expect_setequal(rownames(deduped_sce), rownames(sce)) + expect_equal( + counts(deduped_sce)[rownames(sce), ], # need to make the row order identical + 2 * counts(sce) |> Matrix::drop0() # original matrices may have explicit 0s + ) + expect_equal( + assay(deduped_sce, "spliced")[rownames(sce), ], + 2 * assay(sce, "spliced") |> Matrix::drop0() + ) +}) + +test_that("merging works as expected with unprocessed SCE", { + sce <- readRDS(test_path("data", "filtered_sce.rds")) + double_sce <- rbind(sce, sce) + deduped_sce <- sum_duplicate_genes(double_sce) + + expect_equal(dim(deduped_sce), dim(sce)) + + expect_setequal(rownames(deduped_sce), rownames(sce)) + + expect_equal( counts(deduped_sce)[rownames(sce), ], # need to make the row order identical 2 * counts(sce) |> Matrix::drop0() # original matrices may have explicit 0s