From 1d324c9dc241b00d29c70ac12bd5cc70d7dd0ca3 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Mon, 18 Dec 2023 09:09:59 -0500 Subject: [PATCH 01/30] Fail when altExps of the same name have a different set of features --- R/merge_sce_list.R | 48 ++++++++++++++++++++-- tests/testthat/test-merge_sce_list.R | 60 ++++++++++------------------ 2 files changed, 66 insertions(+), 42 deletions(-) diff --git a/R/merge_sce_list.R b/R/merge_sce_list.R index 29f885a6..8361f637 100644 --- a/R/merge_sce_list.R +++ b/R/merge_sce_list.R @@ -62,7 +62,7 @@ merge_sce_list <- function( cell_id_column = "cell_id", include_altexp = TRUE) { - # Check `sce_list`---------------------- + ## Checks -------------------------- if (is.null(names(sce_list))) { warning( glue::glue( @@ -79,13 +79,53 @@ merge_sce_list <- function( return(sce_list) } - # Check `retain_coldata_cols` ---------------- + # Check `retain_coldata_cols` if (length(retain_coldata_cols) == 0) { warning("All pre-existing colData will be removed from the the merged SCE. Please check that `retain_coldata_cols` was correctly specified.") } - # Subset SCEs to shared features and ensure appropriate naming ------------------ + # Check altExp compatibility, if we are including them + if (include_altexp) { + + # Find all altExp names present in the SCE objects. + altexp_names <- sce_list |> + purrr::map( + \(sce) altExpNames(sce) + ) |> + purrr::reduce(union) + + # For each in altexp_names (if present), do they have the same features? + # If not, error out + for (altexp_name in altexp_names) { + + # all altExps for this name + altexp_list <- sce_list |> + purrr::keep(\(sce) altexp_name %in% altExpNames(sce)) |> + purrr::map(altExp, altexp_name) + + # find their union of features + altexp_name_features <- altexp_list |> + purrr::map(rownames) |> + purrr::reduce(union) |> + sort() + + # create logical vector for presence of all features + features_present <- altexp_list |> + purrr::map_lgl( + \(alt_sce) identical(altexp_name_features, sort(rownames(alt_sce))) + ) + + if (!all(features_present)) { + stop( + glue::glue("The {altexp_name} alternative experiments do not share the same set of features.") + ) + } + } + } + + + ## Subset SCEs to shared features and ensure appropriate naming ------------------ # First, obtain intersection among all SCE objects shared_features <- sce_list |> @@ -123,7 +163,7 @@ merge_sce_list <- function( stop("The metadata for each SCE object must contain `library_id` and `sample_id`.") } - # Prepare main experiment of SCEs for merging -------------------- + ## Prepare main experiment of SCEs for merging -------------------- sce_list <- sce_list |> purrr::imap( prepare_sce_for_merge, diff --git a/tests/testthat/test-merge_sce_list.R b/tests/testthat/test-merge_sce_list.R index cc0c8cf3..f411619a 100644 --- a/tests/testthat/test-merge_sce_list.R +++ b/tests/testthat/test-merge_sce_list.R @@ -370,8 +370,13 @@ sce_list_with_altexp <- sce_list |> # vector of all expected names full_altexp_features <- rownames(altExp(sce_list_with_altexp[[1]])) +# second list where 1 is missing all +sce_list_with_altexp_features_mismatch <- sce_list_with_altexp +altExp(sce_list_with_altexp_features_mismatch[[1]]) <- altExp(sce_list_with_altexp_features_mismatch[[1]])[1:3,] -test_that("merging SCEs with same altExp features works as expected, with altexps", { + + +test_that("merging SCEs with altExps works as expected when include_altexps = FALSE", { merged_sce <- merge_sce_list( sce_list_with_altexp, @@ -379,34 +384,16 @@ test_that("merging SCEs with same altExp features works as expected, with altexp # "total" should get removed retain_coldata_cols = retain_coldata_cols, # this row name should not be modified: - preserve_rowdata_cols = c("gene_names") + preserve_rowdata_cols = c("gene_names"), + include_altexp = FALSE ) - merged_altexp <- altExp(merged_sce) - - - expect_true(altExpNames(merged_sce) == altexp_name) - expect_equal(dim(merged_altexp), c(num_altexp_features, total_cells)) - expect_equal(rownames(merged_altexp), full_altexp_features) - - expected_colnames <- sce_list_with_altexp |> - purrr::imap( - \(sce, sce_name) glue::glue("{sce_name}-{colnames(sce)}") - ) |> - unlist() |> - unname() - expect_equal(colnames(merged_altexp), expected_colnames) + # there should not be any altExps + expect_length(altExpNames(merged_sce), 0) }) - - - -test_that("merging SCEs with different altExp features works as expected, with altexps", { - - # keep only the first 3 features from the first SCE - altExp(sce_list_with_altexp[[1]]) <- altExp(sce_list_with_altexp[[1]])[1:3] - +test_that("merging SCEs with 1 altexp and same features works as expected, with altexps", { merged_sce <- merge_sce_list( sce_list_with_altexp, @@ -435,21 +422,18 @@ test_that("merging SCEs with different altExp features works as expected, with a }) -test_that("merging SCEs with altExps works as expected when include_altexps = FALSE", { - merged_sce <- merge_sce_list( - sce_list_with_altexp, - batch_column = batch_column, - # "total" should get removed - retain_coldata_cols = retain_coldata_cols, - # this row name should not be modified: - preserve_rowdata_cols = c("gene_names"), - include_altexp = FALSE - ) - # there should not be any altExps - expect_length(altExpNames(merged_sce), 0) +test_that("merging SCEs with 1 altexp but different features fails as expected, with altexps", { + expect_error( + merge_sce_list( + sce_list_with_altexp_features_mismatch, + batch_column = batch_column, + # "total" should get removed + retain_coldata_cols = retain_coldata_cols, + # this row name should not be modified: + preserve_rowdata_cols = c("gene_names") + ) + ) }) - - From 84b9d3c85fe275d81db7d3e252dd55d49f63379a Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Tue, 19 Dec 2023 09:43:23 -0500 Subject: [PATCH 02/30] Move altexp check code into helper function for more modularity. This function checks for feature and assay compatibility and currently dies if either condition is not met. Function also returns a list of features & assays for each altexp_name for later use when preparing altexps --- R/merge_sce_list.R | 118 ++++++++++++++++++++++++++++++++------------- 1 file changed, 85 insertions(+), 33 deletions(-) diff --git a/R/merge_sce_list.R b/R/merge_sce_list.R index 8361f637..08dd7d47 100644 --- a/R/merge_sce_list.R +++ b/R/merge_sce_list.R @@ -86,42 +86,13 @@ merge_sce_list <- function( } # Check altExp compatibility, if we are including them + # altExps for a given name must all have the same features and the same assays if (include_altexp) { - # Find all altExp names present in the SCE objects. - altexp_names <- sce_list |> - purrr::map( - \(sce) altExpNames(sce) - ) |> - purrr::reduce(union) - - # For each in altexp_names (if present), do they have the same features? - # If not, error out - for (altexp_name in altexp_names) { + # This is a list of lists of altexp information for later use: + # (altexp_name = list( features = c(features), assays = c(assays) )) + altexp_attributes <- check_altexps(sce_list) - # all altExps for this name - altexp_list <- sce_list |> - purrr::keep(\(sce) altexp_name %in% altExpNames(sce)) |> - purrr::map(altExp, altexp_name) - - # find their union of features - altexp_name_features <- altexp_list |> - purrr::map(rownames) |> - purrr::reduce(union) |> - sort() - - # create logical vector for presence of all features - features_present <- altexp_list |> - purrr::map_lgl( - \(alt_sce) identical(altexp_name_features, sort(rownames(alt_sce))) - ) - - if (!all(features_present)) { - stop( - glue::glue("The {altexp_name} alternative experiments do not share the same set of features.") - ) - } - } } @@ -464,3 +435,84 @@ build_new_altexp_assay <- function( return(new_matrix) } + + + +#' Helper function to check altExp compatibility +#' +#' @param sce_list List of SCEs with altExps to check +#' +#' @return List of named lists with altExp information for use when preparing to merge, +#' with each sublist formatted as: +#' altexp_name = list(features = c(features), assays = c(assays)) +check_altexps <- function(sce_list) { + + # Attribute list to save for later use + altexp_attributes <- list() + + # Find all altExp names present in the SCE objects. + altexp_names <- sce_list |> + purrr::map( + \(sce) altExpNames(sce) + ) |> + purrr::reduce(union) + + # For each in altexp_names (if present), do they have the same features? + # If not, error out + for (altexp_name in altexp_names) { + + # all altExps for this name + altexp_list <- sce_list |> + purrr::keep(\(sce) altexp_name %in% altExpNames(sce)) |> + purrr::map(altExp, altexp_name) + + # find their union of features + all_features <- altexp_list |> + purrr::map(rownames) |> + purrr::reduce(union) |> + sort() + + # create logical vector for presence of all features + features_present <- altexp_list |> + purrr::map_lgl( + \(alt_sce) identical(all_features, sort(rownames(alt_sce))) + ) + + if (!all(features_present)) { + stop( + glue::glue("The {altexp_name} alternative experiments do not share the same set of features.") + ) + } + + ################################################################################ + # TODO: ALTERNATIVELY, we can just return the union of assays and make dummy + # matrices if any are missing, rather that requiring that all be present. + ################################################################################ + + # check for same assays + all_assays <- altexp_list |> + purrr::map(assayNames) |> + purrr::reduce(union)|> + sort() + + # create logical vector for presence of all assays + assays_present <- altexp_list |> + purrr::map_lgl( + \(alt_sce) identical(all_assays, sort(assayNames(alt_sce))) + ) + + if (!all(assays_present)) { + stop( + glue::glue("The {altexp_name} alternative experiments do not share the same set of assays.") + ) + } + + # Save to altexp_attributes for later use + altexp_attributes[[altexp_name]] <- list( + "features" = all_features, + "assays" = all_assays + ) + + } + return(altexp_attributes) +} From 7f7c7917ea7d57d921d8711d7189cee0d8632edb Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Tue, 19 Dec 2023 10:16:24 -0500 Subject: [PATCH 03/30] remove a TODO and add a new one based on discussion --- R/merge_sce_list.R | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/R/merge_sce_list.R b/R/merge_sce_list.R index 08dd7d47..63afbbe0 100644 --- a/R/merge_sce_list.R +++ b/R/merge_sce_list.R @@ -484,11 +484,6 @@ check_altexps <- function(sce_list) { ) } - ################################################################################ - # TODO: ALTERNATIVELY, we can just return the union of assays and make dummy - # matrices if any are missing, rather that requiring that all be present. - ################################################################################ - # check for same assays all_assays <- altexp_list |> purrr::map(assayNames) |> @@ -501,6 +496,7 @@ check_altexps <- function(sce_list) { \(alt_sce) identical(all_assays, sort(assayNames(alt_sce))) ) + # TODO: we may want to drop assays that aren't present in all altexps, rather than dying. if (!all(assays_present)) { stop( glue::glue("The {altexp_name} alternative experiments do not share the same set of assays.") From 06fb9afaa8d930961bcdd3c783ddd7d5ebb00148 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Tue, 19 Dec 2023 10:20:07 -0500 Subject: [PATCH 04/30] A little recreational revision to split out metadata processing for improved script readability/modularity, as the script is getting long --- R/merge_sce_list.R | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/R/merge_sce_list.R b/R/merge_sce_list.R index 63afbbe0..affb0f60 100644 --- a/R/merge_sce_list.R +++ b/R/merge_sce_list.R @@ -315,8 +315,22 @@ prepare_sce_for_merge <- function( # Add `sce_name` to colnames so cell ids can be mapped to originating SCE colnames(sce) <- glue::glue("{sce_name}-{colnames(sce)}") - # get metadata list for updating it - metadata_list <- metadata(sce) + # Update metadata list + metadata(sce) <- update_sce_metadata( + metadata(sce) + ) + + # return the processed SCE + return(sce) +} + + +#' Helper function to update SCE metadata for merging +#' +#' @param metadata_list Current SCE metadata before modification +#' +#' @return Updated metadata list to store in the SCE +update_sce_metadata <- function(metadata_list) { # first check that this library hasn't already been merged if ("library_metadata" %in% names(metadata_list)) { @@ -330,21 +344,17 @@ prepare_sce_for_merge <- function( # combine into one list metadata_list <- list( - library_id = metadata(sce)[["library_id"]], - sample_id = metadata(sce)[["sample_id"]], + library_id = metadata_list[["library_id"]], + sample_id = metadata_list[["sample_id"]], library_metadata = library_metadata, # this will be all previous metadata for the given library sample_metadata = sample_metadata # this will be the same as the previous sample_metadata ) - # replace existing metadata - metadata(sce) <- metadata_list - - # return the processed SCE - return(sce) + # return updated metadata + return(metadata_list) } - #' Prepare altExps for merge and create a list of merged altExps for each altExp name #' #' From 576077a95ff05a9bb006d6f608fab40458d08ecf Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Tue, 19 Dec 2023 10:23:28 -0500 Subject: [PATCH 05/30] add unit test for new function update_sce_metadata --- tests/testthat/test-merge_sce_list.R | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/testthat/test-merge_sce_list.R b/tests/testthat/test-merge_sce_list.R index f411619a..7b0dcef1 100644 --- a/tests/testthat/test-merge_sce_list.R +++ b/tests/testthat/test-merge_sce_list.R @@ -59,6 +59,24 @@ sce_list <- list( # Tests without altexps ---------------------------------------------- +test_that("`update_sce_metadata()` returns the expected list", { + + metadata_list <- metadata(sce_list[[1]]) + + new_metadata <- update_sce_metadata(metadata_list) + + expect_equal( + names(new_metadata), + c("library_id", "sample_id", "library_metadata", "sample_metadata") + ) + + expect_equal( + names(new_metadata$library_metadata), + c("library_id", "sample_id", "total_reads") + ) +}) + + test_that("`prepare_sce_for_merge` works as expected when all columns are present, no altexps", { result_sce <- prepare_sce_for_merge( sce, From 33c58accca2f31c2c92a0b4c2a239b1d0571dbcd Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Tue, 19 Dec 2023 11:47:15 -0500 Subject: [PATCH 06/30] Overhauled altexp approach with some new functions and removed old functions --- R/merge_sce_list.R | 223 +++++++++++---------------- tests/testthat/test-merge_sce_list.R | 31 +++- 2 files changed, 114 insertions(+), 140 deletions(-) diff --git a/R/merge_sce_list.R b/R/merge_sce_list.R index affb0f60..f7a30780 100644 --- a/R/merge_sce_list.R +++ b/R/merge_sce_list.R @@ -88,11 +88,13 @@ merge_sce_list <- function( # Check altExp compatibility, if we are including them # altExps for a given name must all have the same features and the same assays if (include_altexp) { - # This is a list of lists of altexp information for later use: # (altexp_name = list( features = c(features), assays = c(assays) )) altexp_attributes <- check_altexps(sce_list) - + } else { + # Remove altexps if we are not including them + sce_list <- sce_list |> + purrr::map(removeAltExps) } @@ -181,68 +183,33 @@ merge_sce_list <- function( ## Handle altExps ------------------------------------------------------ # If we are including altExps, process them and save to list to add to merged SCE - merged_altexps <- list() if (include_altexp) { - # First we need to determine the final column names of the merged_sce (not yet made) - # for use in altExp code. Later we'll apply this order to the merged_sce itself. - # These values are cell ids: `{sce_name}-{barcode}` - all_merged_barcodes <- sce_list |> - purrr::map(colnames) |> - purrr::reduce(union) + for (altexp_name in names(altexp_attributes)) { - # Find all altExp names present in the SCE objects. - # We will prepare a merged altExp for each of these. - altexp_names <- sce_list |> - purrr::map( - \(sce) altExpNames(sce) - ) |> - purrr::reduce(union) - - for (altexp_name in altexp_names) { - # Determine which SCEs contain this altExp, and create list of those altExps - altexp_list <- sce_list |> - purrr::keep(\(sce) altexp_name %in% altExpNames(sce)) |> - purrr::map(altExp) - - # Create and save the merged altExp for this altexp_name - merged_altexps[[altexp_name]] <- create_merged_altexp( - altexp_list, - all_merged_barcodes - ) + expected_assays <- altexp_attributes[[altexp_name]][["assays"]] + expected_features <- altexp_attributes[[altexp_name]][["features"]] + sce_list <- sce_list |> + purrr::imap( + prepare_altexp_for_merge, + altexp_name, + expected_assays, + expected_features + ) } } - # Remove altExps from SCEs prior to main experiment merge - # If none are present, this code has no effect. - sce_list <- sce_list |> - purrr::map(removeAltExps) - # Create the merged SCE from the processed list merged_sce <- do.call(cbind, sce_list) # Replace existing metadata list with merged metadata metadata(merged_sce) <- metadata_list - # Add the merged altE into the main merged_sce - if (include_altexp) { - - # Ensure compatible column names - # (this is probably not necessary but doesn't hurt...) - merged_sce <- merged_sce[,all_merged_barcodes] - - # Add the merged altexps into the merged sce - for (altexp_name in names(merged_altexps)) { - altExp(merged_sce, altexp_name) <- merged_altexps[[altexp_name]] - } - } - return(merged_sce) } - #' Helper function to prepare an SCE object for merging #' #' @param sce The SCE object to be prepared @@ -259,6 +226,8 @@ merge_sce_list <- function( #' populated with respective information. #' @param preserve_rowdata_cols A vector of rowData columns which should not be #' renamed +#' @param is_altexp Boolean if we are preparing an altExp or not. +#' Default is `FALSE`. #' #' @return An updated SCE that is prepared for merging prepare_sce_for_merge <- function( @@ -268,7 +237,8 @@ prepare_sce_for_merge <- function( cell_id_column, shared_features, retain_coldata_cols, - preserve_rowdata_cols) { + preserve_rowdata_cols, + is_altexp = FALSE) { # Subset to shared features sce <- sce[shared_features, ] @@ -306,14 +276,17 @@ prepare_sce_for_merge <- function( # Use drop=FALSE to ensure result is a DataFrame colData(sce) <- colData(sce)[, retain_coldata_cols, drop = FALSE] - # Add batch column - sce[[batch_column]] <- sce_name + # Only make these changes if we are not working with an altExp + if (!is_altexp) { + # Add batch column + sce[[batch_column]] <- sce_name - # Add cell_id column - sce[[cell_id_column]] <- colnames(sce) + # Add cell_id column + sce[[cell_id_column]] <- colnames(sce) - # Add `sce_name` to colnames so cell ids can be mapped to originating SCE - colnames(sce) <- glue::glue("{sce_name}-{colnames(sce)}") + # Add `sce_name` to colnames so cell ids can be mapped to originating SCE + colnames(sce) <- glue::glue("{sce_name}-{colnames(sce)}") + } # Update metadata list metadata(sce) <- update_sce_metadata( @@ -325,6 +298,57 @@ prepare_sce_for_merge <- function( } + +#' Prepare an alternative experiment for merging +#' +#' If the given `altexp_name` name is missing from the SCE, a new one with all +#' NA assays will be created. +#' @param sce SCE with altExp to prepare +#' @param sce_name SCE name +#' @param altexp_name Name of altExp of interest +#' @param expected_assays Vector of assays that should be in each altExp +#' @param expected_features Vector of features that should be in each altExp +#' @param preserve_rowdata_cols altExp rowData columns which should not be renamed +#' +#' @return An SCE with an updated altEcp +prepare_altexp_for_merge <- function( + sce, + sce_name, + altexp_name, + expected_assays, + expected_features, + preserve_rowdata_cols = c("target_type")) { + + if (!altexp_name %in% altExpNames(sce) ) { + + na_assays <- expected_assays |> + purrr::set_names() |> + purrr::map( + build_na_matrix, + expected_features, + colnames(sce) + ) + + altExp(sce, altexp_name) <- SingleCellExperiment(assays = na_assays) + } + + # Now, prepare this altexp for merge + altExp(sce, altexp_name) <- prepare_sce_for_merge( + altExp(sce, altexp_name), + sce_name, + batch_column = NULL, + cell_id_column = NULL, + shared_features = expected_features, + retain_coldata_cols = NULL, + preserve_rowdata_cols = preserve_rowdata_cols, + is_altexp = TRUE + ) + + return(sce) +} + + + #' Helper function to update SCE metadata for merging #' #' @param metadata_list Current SCE metadata before modification @@ -355,99 +379,32 @@ update_sce_metadata <- function(metadata_list) { } -#' Prepare altExps for merge and create a list of merged altExps for each altExp name -#' -#' -#' @param altexp_list List of altexps to merge -#' @param all_merged_barcodes Vector of column names (`{sce_name}-{barcode}`) to include -#' in the final merged altExp. This vector includes _all_ SCEs, not only those -#' with this altExp name. -#' -#' @return A list of merged altExps to include the final merged SCE object -create_merged_altexp <- function( - altexp_list, - all_merged_barcodes) { - - # Create vector of all features - # this order will be used for the final assay matrix/ces - altexp_features <- altexp_list |> - purrr::map(rownames) |> - purrr::reduce(union) - - # Determine which assays are present for this altexp_name. We'll need a matrix - # for each of these in the final merged object - altexp_assay_names <- altexp_list |> - purrr::map(assayNames) |> - purrr::reduce(union) - - # Create new merged assay matrices - new_assays <- altexp_assay_names |> - purrr::map( - build_new_altexp_assay, - altexp_list, - altexp_features, - all_merged_barcodes - ) - names(new_assays) <- altexp_assay_names - - - # Create merged altExp - merged_altexp <- SingleCellExperiment(assays = new_assays) - - # TODO: Add rowData and colData to merged_altexp - - return(merged_altexp) - -} - - -#' Build a new sparse matrix for merging altExps +#' Create a sparse matrix with all NA values #' -#' @param assay_name Name of assay of interest (e.g., "counts") -#' @param altexp_list List of altExps which should be included in the new matrix -#' @param all_merged_features Vector of matrix row names, corresponding to the full -#' set of features for this altExp -#' @param all_merged_barcodes Vector of matrix column names, corresponding to all cells -#' which will be in the final merged altExp +#' @param assay_name Intended assay name for this matrix (e.g., "counts") +#' @param matrix_rownames Vector of matrix rown ames +#' @param matrix_colnames Vector of matrix column names #' #' @return Sparse matrix -build_new_altexp_assay <- function( +build_na_matrix <- function( assay_name, - altexp_list, - all_merged_features, - all_merged_barcodes) { + matrix_rownames, + matrix_colnames) { - # Establish new matrix with all NA values - new_matrix <- matrix( + Matrix::Matrix( data = NA, - nrow = length(all_merged_features), - ncol = length(all_merged_barcodes), + nrow = length(matrix_rownames), + ncol = length(matrix_colnames), dimnames = list( - all_merged_features, - all_merged_barcodes + matrix_rownames, + matrix_colnames ) ) - - # Substitute existing assays into the matrix, if they exist - for (altexp in altexp_list) { - # Note that column names were already formatted as `{sce_name}-{barcode}` by - # the main SCE merging code - if (assay_name %in% assayNames(altexp)) { - # as.matrix() is needed here - new_matrix[rownames(altexp), colnames(altexp)] <- as.matrix( assay(altexp, assay_name) ) - } - - } - # sparsify - new_matrix <- as(new_matrix, "CsparseMatrix") - - return(new_matrix) } - #' Helper function to check altExp compatibility #' #' @param sce_list List of SCEs with altExps to check diff --git a/tests/testthat/test-merge_sce_list.R b/tests/testthat/test-merge_sce_list.R index 7b0dcef1..940d8c66 100644 --- a/tests/testthat/test-merge_sce_list.R +++ b/tests/testthat/test-merge_sce_list.R @@ -384,15 +384,9 @@ sce_list_with_altexp <- sce_list |> num_altexp_features, total_cells / 3 ) - # vector of all expected names full_altexp_features <- rownames(altExp(sce_list_with_altexp[[1]])) -# second list where 1 is missing all -sce_list_with_altexp_features_mismatch <- sce_list_with_altexp -altExp(sce_list_with_altexp_features_mismatch[[1]]) <- altExp(sce_list_with_altexp_features_mismatch[[1]])[1:3,] - - test_that("merging SCEs with altExps works as expected when include_altexps = FALSE", { @@ -444,9 +438,12 @@ test_that("merging SCEs with 1 altexp and same features works as expected, with test_that("merging SCEs with 1 altexp but different features fails as expected, with altexps", { + # second list where 1 is missing all + altExp(sce_list_with_altexp[[1]]) <- altExp(sce_list_with_altexp[[1]])[1:3,] + expect_error( merge_sce_list( - sce_list_with_altexp_features_mismatch, + sce_list_with_altexp, batch_column = batch_column, # "total" should get removed retain_coldata_cols = retain_coldata_cols, @@ -455,3 +452,23 @@ test_that("merging SCEs with 1 altexp but different features fails as expected, ) ) }) + + + + +test_that("merging SCEs where 1 altExp is missing works as expected, with altexps", { + + sce_list_with_altexp[["sce4"]] <- removeAltExps(sce_list_with_altexp[[1]]) + + merged_sce <- merge_sce_list( + sce_list_with_altexp, + batch_column = batch_column, + # "total" should get removed + retain_coldata_cols = retain_coldata_cols, + # this row name should not be modified: + preserve_rowdata_cols = c("gene_names") + ) + + expect_true(altExpNames(merged_sce) == "adt") + +}) From cbdf216cd17f03aedbc91b6346c5e54efbcd4efb Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Tue, 19 Dec 2023 11:47:55 -0500 Subject: [PATCH 07/30] run document --- man/build_na_matrix.Rd | 21 ++++++++++++++++++++ man/build_new_altexp_assay.Rd | 30 ---------------------------- man/check_altexps.Rd | 19 ++++++++++++++++++ man/create_merged_altexp.Rd | 21 -------------------- man/prepare_altexp_for_merge.Rd | 35 +++++++++++++++++++++++++++++++++ man/prepare_sce_for_merge.Rd | 6 +++++- man/update_sce_metadata.Rd | 17 ++++++++++++++++ 7 files changed, 97 insertions(+), 52 deletions(-) create mode 100644 man/build_na_matrix.Rd delete mode 100644 man/build_new_altexp_assay.Rd create mode 100644 man/check_altexps.Rd delete mode 100644 man/create_merged_altexp.Rd create mode 100644 man/prepare_altexp_for_merge.Rd create mode 100644 man/update_sce_metadata.Rd diff --git a/man/build_na_matrix.Rd b/man/build_na_matrix.Rd new file mode 100644 index 00000000..45fc6b9b --- /dev/null +++ b/man/build_na_matrix.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/merge_sce_list.R +\name{build_na_matrix} +\alias{build_na_matrix} +\title{Create a sparse matrix with all NA values} +\usage{ +build_na_matrix(assay_name, matrix_rownames, matrix_colnames) +} +\arguments{ +\item{assay_name}{Intended assay name for this matrix (e.g., "counts")} + +\item{matrix_rownames}{Vector of matrix rown ames} + +\item{matrix_colnames}{Vector of matrix column names} +} +\value{ +Sparse matrix +} +\description{ +Create a sparse matrix with all NA values +} diff --git a/man/build_new_altexp_assay.Rd b/man/build_new_altexp_assay.Rd deleted file mode 100644 index efcd50e1..00000000 --- a/man/build_new_altexp_assay.Rd +++ /dev/null @@ -1,30 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/merge_sce_list.R -\name{build_new_altexp_assay} -\alias{build_new_altexp_assay} -\title{Build a new sparse matrix for merging altExps} -\usage{ -build_new_altexp_assay( - assay_name, - altexp_list, - all_merged_features, - all_merged_barcodes -) -} -\arguments{ -\item{assay_name}{Name of assay of interest (e.g., "counts")} - -\item{altexp_list}{List of altExps which should be included in the new matrix} - -\item{all_merged_features}{Vector of matrix row names, corresponding to the full -set of features for this altExp} - -\item{all_merged_barcodes}{Vector of matrix column names, corresponding to all cells -which will be in the final merged altExp} -} -\value{ -Sparse matrix -} -\description{ -Build a new sparse matrix for merging altExps -} diff --git a/man/check_altexps.Rd b/man/check_altexps.Rd new file mode 100644 index 00000000..60bb7bc2 --- /dev/null +++ b/man/check_altexps.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/merge_sce_list.R +\name{check_altexps} +\alias{check_altexps} +\title{Helper function to check altExp compatibility} +\usage{ +check_altexps(sce_list) +} +\arguments{ +\item{sce_list}{List of SCEs with altExps to check} +} +\value{ +List of named lists with altExp information for use when preparing to merge, + with each sublist formatted as: + altexp_name = list(features = c(features), assays = c(assays)) +} +\description{ +Helper function to check altExp compatibility +} diff --git a/man/create_merged_altexp.Rd b/man/create_merged_altexp.Rd deleted file mode 100644 index 4e2a5876..00000000 --- a/man/create_merged_altexp.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/merge_sce_list.R -\name{create_merged_altexp} -\alias{create_merged_altexp} -\title{Prepare altExps for merge and create a list of merged altExps for each altExp name} -\usage{ -create_merged_altexp(altexp_list, all_merged_barcodes) -} -\arguments{ -\item{altexp_list}{List of altexps to merge} - -\item{all_merged_barcodes}{Vector of column names (`{sce_name}-{barcode}`) to include -in the final merged altExp. This vector includes _all_ SCEs, not only those -with this altExp name.} -} -\value{ -A list of merged altExps to include the final merged SCE object -} -\description{ -Prepare altExps for merge and create a list of merged altExps for each altExp name -} diff --git a/man/prepare_altexp_for_merge.Rd b/man/prepare_altexp_for_merge.Rd new file mode 100644 index 00000000..0dee95d8 --- /dev/null +++ b/man/prepare_altexp_for_merge.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/merge_sce_list.R +\name{prepare_altexp_for_merge} +\alias{prepare_altexp_for_merge} +\title{Prepare an alternative experiment for merging} +\usage{ +prepare_altexp_for_merge( + sce, + sce_name, + altexp_name, + expected_assays, + expected_features, + preserve_rowdata_cols = c("target_type") +) +} +\arguments{ +\item{sce}{SCE with altExp to prepare} + +\item{sce_name}{SCE name} + +\item{altexp_name}{Name of altExp of interest} + +\item{expected_assays}{Vector of assays that should be in each altExp} + +\item{expected_features}{Vector of features that should be in each altExp} + +\item{preserve_rowdata_cols}{altExp rowData columns which should not be renamed} +} +\value{ +An SCE with an updated altEcp +} +\description{ +If the given `altexp_name` name is missing from the SCE, a new one with all + NA assays will be created. +} diff --git a/man/prepare_sce_for_merge.Rd b/man/prepare_sce_for_merge.Rd index 5c91d2ae..195d8762 100644 --- a/man/prepare_sce_for_merge.Rd +++ b/man/prepare_sce_for_merge.Rd @@ -11,7 +11,8 @@ prepare_sce_for_merge( cell_id_column, shared_features, retain_coldata_cols, - preserve_rowdata_cols + preserve_rowdata_cols, + is_altexp = FALSE ) } \arguments{ @@ -35,6 +36,9 @@ populated with respective information.} \item{preserve_rowdata_cols}{A vector of rowData columns which should not be renamed} + +\item{is_altexp}{Boolean if we are preparing an altExp or not. +Default is `FALSE`.} } \value{ An updated SCE that is prepared for merging diff --git a/man/update_sce_metadata.Rd b/man/update_sce_metadata.Rd new file mode 100644 index 00000000..1cc58743 --- /dev/null +++ b/man/update_sce_metadata.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/merge_sce_list.R +\name{update_sce_metadata} +\alias{update_sce_metadata} +\title{Helper function to update SCE metadata for merging} +\usage{ +update_sce_metadata(metadata_list) +} +\arguments{ +\item{metadata_list}{Current SCE metadata before modification} +} +\value{ +Updated metadata list to store in the SCE +} +\description{ +Helper function to update SCE metadata for merging +} From 1631f6eb4cd09b6dae8d45170e2fc67e5242ce7a Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Tue, 19 Dec 2023 12:07:44 -0500 Subject: [PATCH 08/30] add some unit tests --- tests/testthat/test-merge_sce_list.R | 82 +++++++++++++++++++++++++++- 1 file changed, 81 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-merge_sce_list.R b/tests/testthat/test-merge_sce_list.R index 940d8c66..5cdd14fa 100644 --- a/tests/testthat/test-merge_sce_list.R +++ b/tests/testthat/test-merge_sce_list.R @@ -334,7 +334,6 @@ test_that("merging SCEs with library metadata fails as expected, no altexps", { # Tests with altexps ------------------------------------------------------- - ## helper function to add an altExp to a simulated SCE ---- add_sce_altexp <- function( sce, @@ -388,6 +387,32 @@ sce_list_with_altexp <- sce_list |> full_altexp_features <- rownames(altExp(sce_list_with_altexp[[1]])) + +test_that("prepare_sce_for_merge() works as expected with is_altexp=TRUE", { + + test_altexp <- altExp(sce_list_with_altexp[[1]]) + prepared_altexp <- prepare_sce_for_merge( + test_altexp, + "test", + batch_column = NULL, + cell_id_column = NULL, + full_altexp_features, + retain_coldata_cols = NULL, + preserve_rowdata_cols = "target_type", + is_altexp = TRUE + ) + + # rowdata names should start with test-, but NOT the overall column names + expect_true( + all(stringr::str_starts(colnames(rowData(prepared_altexp)), "test-")) + ) + + expect_true( + !all(stringr::str_starts(colnames(prepared_altexp), "test-")) + ) +}) + + test_that("merging SCEs with altExps works as expected when include_altexps = FALSE", { merged_sce <- merge_sce_list( @@ -472,3 +497,58 @@ test_that("merging SCEs where 1 altExp is missing works as expected, with altexp expect_true(altExpNames(merged_sce) == "adt") }) + + +## Other tests ------------------ + +test_that("build_na_matrix works as expected",{ + + rows <- letters[1:5] + cols <- letters[6:10] + sparse_mat <- build_na_matrix( + "name", + rows, + cols + ) + + expect_true( + class(sparse_mat) == "lgeMatrix" + ) + + expect_equal( + rownames(sparse_mat), rows + ) + + expect_equal( + colnames(sparse_mat), cols + ) + +}) + + +test_that("check_altexps passes when it should pass", { + + attribute_list <- check_altexps(sce_list_with_altexp) + expect_equal( + attribute_list[["adt"]][["assays"]], c("counts", "logcounts") + ) + expect_equal( + attribute_list[["adt"]][["features"]], full_altexp_features + ) + +}) + + +test_that("check_altexps throws an error as expected when assays do not match", { + + logcounts(altExp(sce_list_with_altexp[[3]])) <- NULL + expect_error(check_altexps(sce_list_with_altexp)) + +}) + +test_that("check_altexps throws an error as expected when features do not match", { + + altExp(sce_list_with_altexp[[1]]) <- altExp(sce_list_with_altexp[[1]])[1:3,] + expect_error(check_altexps(sce_list_with_altexp)) + +}) From 957b24fa7afabfac5bf5034cb62a1fefcff91cee Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Tue, 19 Dec 2023 12:28:02 -0500 Subject: [PATCH 09/30] We should add batch and cell_id columns to the altExp, too. Cant hurt --- R/merge_sce_list.R | 33 ++++++++++++++++++---------- tests/testthat/test-merge_sce_list.R | 8 +++++-- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/R/merge_sce_list.R b/R/merge_sce_list.R index f7a30780..a06fe93d 100644 --- a/R/merge_sce_list.R +++ b/R/merge_sce_list.R @@ -195,7 +195,9 @@ merge_sce_list <- function( prepare_altexp_for_merge, altexp_name, expected_assays, - expected_features + expected_features, + batch_column = batch_column, + cell_id_column = cell_id_column ) } } @@ -215,9 +217,9 @@ merge_sce_list <- function( #' @param sce The SCE object to be prepared #' @param sce_name The name of the SCE object #' @param batch_column The name of the batch column which will be added to the -#' colData slot +#' colData slot. #' @param cell_id_column The name of the cell_id column which will be added to the -#' colData slot +#' colData slot. #' @param shared_features A vector of features (genes) that all SCEs to be merged #' have in common #' @param retain_coldata_cols A vector of columns to retain in the colData slot. @@ -276,14 +278,15 @@ prepare_sce_for_merge <- function( # Use drop=FALSE to ensure result is a DataFrame colData(sce) <- colData(sce)[, retain_coldata_cols, drop = FALSE] - # Only make these changes if we are not working with an altExp - if (!is_altexp) { - # Add batch column - sce[[batch_column]] <- sce_name + # Add batch column + sce[[batch_column]] <- sce_name - # Add cell_id column - sce[[cell_id_column]] <- colnames(sce) + # Add cell_id column + sce[[cell_id_column]] <- colnames(sce) + # Only modify column names if we are not working with an altExp. + # This avoids having double `{sce_name}-{sce_name}` prefixes + if (!is_altexp) { # Add `sce_name` to colnames so cell ids can be mapped to originating SCE colnames(sce) <- glue::glue("{sce_name}-{colnames(sce)}") } @@ -308,6 +311,10 @@ prepare_sce_for_merge <- function( #' @param altexp_name Name of altExp of interest #' @param expected_assays Vector of assays that should be in each altExp #' @param expected_features Vector of features that should be in each altExp +#' @param batch_column The name of the batch column which will be added to the +#' colData slot. +#' @param cell_id_column The name of the cell_id column which will be added to the +#' colData slot. #' @param preserve_rowdata_cols altExp rowData columns which should not be renamed #' #' @return An SCE with an updated altEcp @@ -317,6 +324,8 @@ prepare_altexp_for_merge <- function( altexp_name, expected_assays, expected_features, + batch_column, + cell_id_column, preserve_rowdata_cols = c("target_type")) { if (!altexp_name %in% altExpNames(sce) ) { @@ -336,10 +345,10 @@ prepare_altexp_for_merge <- function( altExp(sce, altexp_name) <- prepare_sce_for_merge( altExp(sce, altexp_name), sce_name, - batch_column = NULL, - cell_id_column = NULL, + batch_column = batch_column, + cell_id_column = cell_id_column, shared_features = expected_features, - retain_coldata_cols = NULL, + retain_coldata_cols = NULL, # Is this right? preserve_rowdata_cols = preserve_rowdata_cols, is_altexp = TRUE ) diff --git a/tests/testthat/test-merge_sce_list.R b/tests/testthat/test-merge_sce_list.R index 5cdd14fa..83a48d53 100644 --- a/tests/testthat/test-merge_sce_list.R +++ b/tests/testthat/test-merge_sce_list.R @@ -394,8 +394,8 @@ test_that("prepare_sce_for_merge() works as expected with is_altexp=TRUE", { prepared_altexp <- prepare_sce_for_merge( test_altexp, "test", - batch_column = NULL, - cell_id_column = NULL, + batch_column = "batch", + cell_id_column = "cell", full_altexp_features, retain_coldata_cols = NULL, preserve_rowdata_cols = "target_type", @@ -410,6 +410,10 @@ test_that("prepare_sce_for_merge() works as expected with is_altexp=TRUE", { expect_true( !all(stringr::str_starts(colnames(prepared_altexp), "test-")) ) + + expect_equal( + colnames(colData(prepared_altexp)), c("batch", "cell") + ) }) From 75ab574430f27412ddaa2d0b143749d4418da861 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Tue, 19 Dec 2023 12:29:16 -0500 Subject: [PATCH 10/30] doc update and redocument --- R/merge_sce_list.R | 2 +- man/prepare_altexp_for_merge.Rd | 8 ++++++++ man/prepare_sce_for_merge.Rd | 6 +++--- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/R/merge_sce_list.R b/R/merge_sce_list.R index a06fe93d..42be7801 100644 --- a/R/merge_sce_list.R +++ b/R/merge_sce_list.R @@ -229,7 +229,7 @@ merge_sce_list <- function( #' @param preserve_rowdata_cols A vector of rowData columns which should not be #' renamed #' @param is_altexp Boolean if we are preparing an altExp or not. -#' Default is `FALSE`. +#' Default is `FALSE`. If FALSE, the SCE colnames will not be modified. #' #' @return An updated SCE that is prepared for merging prepare_sce_for_merge <- function( diff --git a/man/prepare_altexp_for_merge.Rd b/man/prepare_altexp_for_merge.Rd index 0dee95d8..baea7aa7 100644 --- a/man/prepare_altexp_for_merge.Rd +++ b/man/prepare_altexp_for_merge.Rd @@ -10,6 +10,8 @@ prepare_altexp_for_merge( altexp_name, expected_assays, expected_features, + batch_column, + cell_id_column, preserve_rowdata_cols = c("target_type") ) } @@ -24,6 +26,12 @@ prepare_altexp_for_merge( \item{expected_features}{Vector of features that should be in each altExp} +\item{batch_column}{The name of the batch column which will be added to the +colData slot.} + +\item{cell_id_column}{The name of the cell_id column which will be added to the +colData slot.} + \item{preserve_rowdata_cols}{altExp rowData columns which should not be renamed} } \value{ diff --git a/man/prepare_sce_for_merge.Rd b/man/prepare_sce_for_merge.Rd index 195d8762..810cf19a 100644 --- a/man/prepare_sce_for_merge.Rd +++ b/man/prepare_sce_for_merge.Rd @@ -21,10 +21,10 @@ prepare_sce_for_merge( \item{sce_name}{The name of the SCE object} \item{batch_column}{The name of the batch column which will be added to the -colData slot} +colData slot.} \item{cell_id_column}{The name of the cell_id column which will be added to the -colData slot} +colData slot.} \item{shared_features}{A vector of features (genes) that all SCEs to be merged have in common} @@ -38,7 +38,7 @@ populated with respective information.} renamed} \item{is_altexp}{Boolean if we are preparing an altExp or not. -Default is `FALSE`.} +Default is `FALSE`. If FALSE, the SCE colnames will not be modified.} } \value{ An updated SCE that is prepared for merging From 6c2c26cf768ea8f47a5944b0ec2801c71a76d1b3 Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Wed, 20 Dec 2023 08:18:53 -0500 Subject: [PATCH 11/30] Apply suggestions from code review Co-authored-by: Joshua Shapiro --- R/merge_sce_list.R | 31 ++++++++++++---------------- tests/testthat/test-merge_sce_list.R | 6 +++--- 2 files changed, 16 insertions(+), 21 deletions(-) diff --git a/R/merge_sce_list.R b/R/merge_sce_list.R index 42be7801..e17046a7 100644 --- a/R/merge_sce_list.R +++ b/R/merge_sce_list.R @@ -217,9 +217,9 @@ merge_sce_list <- function( #' @param sce The SCE object to be prepared #' @param sce_name The name of the SCE object #' @param batch_column The name of the batch column which will be added to the -#' colData slot. +#' colData slot #' @param cell_id_column The name of the cell_id column which will be added to the -#' colData slot. +#' colData slot #' @param shared_features A vector of features (genes) that all SCEs to be merged #' have in common #' @param retain_coldata_cols A vector of columns to retain in the colData slot. @@ -292,9 +292,7 @@ prepare_sce_for_merge <- function( } # Update metadata list - metadata(sce) <- update_sce_metadata( - metadata(sce) - ) + metadata(sce) <- update_sce_metadata(metadata(sce)) # return the processed SCE return(sce) @@ -317,7 +315,7 @@ prepare_sce_for_merge <- function( #' colData slot. #' @param preserve_rowdata_cols altExp rowData columns which should not be renamed #' -#' @return An SCE with an updated altEcp +#' @return An SCE with an updated altExp prepare_altexp_for_merge <- function( sce, sce_name, @@ -403,13 +401,14 @@ build_na_matrix <- function( matrix_colnames) { Matrix::Matrix( - data = NA, + data = NA_real_, nrow = length(matrix_rownames), ncol = length(matrix_colnames), dimnames = list( matrix_rownames, matrix_colnames - ) + ), + sparse = TRUE ) } @@ -421,16 +420,14 @@ build_na_matrix <- function( #' @return List of named lists with altExp information for use when preparing to merge, #' with each sublist formatted as: #' altexp_name = list(features = c(features), assays = c(assays)) -check_altexps <- function(sce_list) { +get_altexp_attributes <- function(sce_list) { # Attribute list to save for later use altexp_attributes <- list() # Find all altExp names present in the SCE objects. altexp_names <- sce_list |> - purrr::map( - \(sce) altExpNames(sce) - ) |> + purrr::map(altExpNames) |> purrr::reduce(union) # For each in altexp_names (if present), do they have the same features? @@ -445,13 +442,12 @@ check_altexps <- function(sce_list) { # find their union of features all_features <- altexp_list |> purrr::map(rownames) |> - purrr::reduce(union) |> - sort() + purrr::reduce(union) # create logical vector for presence of all features features_present <- altexp_list |> purrr::map_lgl( - \(alt_sce) identical(all_features, sort(rownames(alt_sce))) + \(alt_sce) setequal(rownames(alt_sce), all_features) ) if (!all(features_present)) { @@ -463,13 +459,12 @@ check_altexps <- function(sce_list) { # check for same assays all_assays <- altexp_list |> purrr::map(assayNames) |> - purrr::reduce(union)|> - sort() + purrr::reduce(union) # create logical vector for presence of all assays assays_present <- altexp_list |> purrr::map_lgl( - \(alt_sce) identical(all_assays, sort(assayNames(alt_sce))) + \(alt_sce) setequal(assayNames(alt_sce), all_assays) ) # TODO: we may want to drop assays that aren't present in all altexps, rather than dying. diff --git a/tests/testthat/test-merge_sce_list.R b/tests/testthat/test-merge_sce_list.R index 83a48d53..1c3d5b83 100644 --- a/tests/testthat/test-merge_sce_list.R +++ b/tests/testthat/test-merge_sce_list.R @@ -467,7 +467,7 @@ test_that("merging SCEs with 1 altexp and same features works as expected, with test_that("merging SCEs with 1 altexp but different features fails as expected, with altexps", { - # second list where 1 is missing all + # keep only the first 3 features from the first SCE altExp(sce_list_with_altexp[[1]]) <- altExp(sce_list_with_altexp[[1]])[1:3,] expect_error( @@ -498,7 +498,7 @@ test_that("merging SCEs where 1 altExp is missing works as expected, with altexp preserve_rowdata_cols = c("gene_names") ) - expect_true(altExpNames(merged_sce) == "adt") + expect_equal(altExpNames(merged_sce), "adt") }) @@ -516,7 +516,7 @@ test_that("build_na_matrix works as expected",{ ) expect_true( - class(sparse_mat) == "lgeMatrix" + class(sparse_mat) == "sparseMatrix" ) expect_equal( From 3b439076f9fdc4e97a50d78699f39f5dc802fbc1 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Wed, 20 Dec 2023 08:21:04 -0500 Subject: [PATCH 12/30] Swap to get_altexp_attributes and redocument --- R/merge_sce_list.R | 2 +- man/{check_altexps.Rd => get_altexp_attributes.Rd} | 6 +++--- man/prepare_altexp_for_merge.Rd | 2 +- man/prepare_sce_for_merge.Rd | 4 ++-- tests/testthat/test-merge_sce_list.R | 12 ++++++------ 5 files changed, 13 insertions(+), 13 deletions(-) rename man/{check_altexps.Rd => get_altexp_attributes.Rd} (83%) diff --git a/R/merge_sce_list.R b/R/merge_sce_list.R index e17046a7..5a887ba2 100644 --- a/R/merge_sce_list.R +++ b/R/merge_sce_list.R @@ -90,7 +90,7 @@ merge_sce_list <- function( if (include_altexp) { # This is a list of lists of altexp information for later use: # (altexp_name = list( features = c(features), assays = c(assays) )) - altexp_attributes <- check_altexps(sce_list) + altexp_attributes <- get_altexp_attributes(sce_list) } else { # Remove altexps if we are not including them sce_list <- sce_list |> diff --git a/man/check_altexps.Rd b/man/get_altexp_attributes.Rd similarity index 83% rename from man/check_altexps.Rd rename to man/get_altexp_attributes.Rd index 60bb7bc2..d844b263 100644 --- a/man/check_altexps.Rd +++ b/man/get_altexp_attributes.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/merge_sce_list.R -\name{check_altexps} -\alias{check_altexps} +\name{get_altexp_attributes} +\alias{get_altexp_attributes} \title{Helper function to check altExp compatibility} \usage{ -check_altexps(sce_list) +get_altexp_attributes(sce_list) } \arguments{ \item{sce_list}{List of SCEs with altExps to check} diff --git a/man/prepare_altexp_for_merge.Rd b/man/prepare_altexp_for_merge.Rd index baea7aa7..e978c18d 100644 --- a/man/prepare_altexp_for_merge.Rd +++ b/man/prepare_altexp_for_merge.Rd @@ -35,7 +35,7 @@ colData slot.} \item{preserve_rowdata_cols}{altExp rowData columns which should not be renamed} } \value{ -An SCE with an updated altEcp +An SCE with an updated altExp } \description{ If the given `altexp_name` name is missing from the SCE, a new one with all diff --git a/man/prepare_sce_for_merge.Rd b/man/prepare_sce_for_merge.Rd index 810cf19a..efd1cc22 100644 --- a/man/prepare_sce_for_merge.Rd +++ b/man/prepare_sce_for_merge.Rd @@ -21,10 +21,10 @@ prepare_sce_for_merge( \item{sce_name}{The name of the SCE object} \item{batch_column}{The name of the batch column which will be added to the -colData slot.} +colData slot} \item{cell_id_column}{The name of the cell_id column which will be added to the -colData slot.} +colData slot} \item{shared_features}{A vector of features (genes) that all SCEs to be merged have in common} diff --git a/tests/testthat/test-merge_sce_list.R b/tests/testthat/test-merge_sce_list.R index 1c3d5b83..4e7a773c 100644 --- a/tests/testthat/test-merge_sce_list.R +++ b/tests/testthat/test-merge_sce_list.R @@ -530,9 +530,9 @@ test_that("build_na_matrix works as expected",{ }) -test_that("check_altexps passes when it should pass", { +test_that("get_altexp_attributes passes when it should pass", { - attribute_list <- check_altexps(sce_list_with_altexp) + attribute_list <- get_altexp_attributes(sce_list_with_altexp) expect_equal( attribute_list[["adt"]][["assays"]], c("counts", "logcounts") ) @@ -543,16 +543,16 @@ test_that("check_altexps passes when it should pass", { }) -test_that("check_altexps throws an error as expected when assays do not match", { +test_that("get_altexp_attributes throws an error as expected when assays do not match", { logcounts(altExp(sce_list_with_altexp[[3]])) <- NULL - expect_error(check_altexps(sce_list_with_altexp)) + expect_error(get_altexp_attributes(sce_list_with_altexp)) }) -test_that("check_altexps throws an error as expected when features do not match", { +test_that("get_altexp_attributes throws an error as expected when features do not match", { altExp(sce_list_with_altexp[[1]]) <- altExp(sce_list_with_altexp[[1]])[1:3,] - expect_error(check_altexps(sce_list_with_altexp)) + expect_error(get_altexp_attributes(sce_list_with_altexp)) }) From ea75b6b73ce46960ae1c6912ef3fad9955907712 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Wed, 20 Dec 2023 08:22:21 -0500 Subject: [PATCH 13/30] check for matrix class should be dgCMatrix, not sparseMatrix --- tests/testthat/test-merge_sce_list.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-merge_sce_list.R b/tests/testthat/test-merge_sce_list.R index 4e7a773c..cebe2201 100644 --- a/tests/testthat/test-merge_sce_list.R +++ b/tests/testthat/test-merge_sce_list.R @@ -516,7 +516,7 @@ test_that("build_na_matrix works as expected",{ ) expect_true( - class(sparse_mat) == "sparseMatrix" + class(sparse_mat) == "dgCMatrix" ) expect_equal( From d661d12a417ebea6923d287d5b9a205631536b45 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Wed, 20 Dec 2023 08:49:13 -0500 Subject: [PATCH 14/30] Update get_altexp_attributes to also collect colData names. Update sparse matrix test --- R/merge_sce_list.R | 17 ++++++++++++++--- man/get_altexp_attributes.Rd | 5 ++++- tests/testthat/test-merge_sce_list.R | 6 +++++- 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/R/merge_sce_list.R b/R/merge_sce_list.R index 5a887ba2..f0cdd57a 100644 --- a/R/merge_sce_list.R +++ b/R/merge_sce_list.R @@ -89,7 +89,7 @@ merge_sce_list <- function( # altExps for a given name must all have the same features and the same assays if (include_altexp) { # This is a list of lists of altexp information for later use: - # (altexp_name = list( features = c(features), assays = c(assays) )) + # (altexp_name = list( features = c(features), assays = c(assays), coldata = c(column, names) )) altexp_attributes <- get_altexp_attributes(sce_list) } else { # Remove altexps if we are not including them @@ -419,7 +419,10 @@ build_na_matrix <- function( #' #' @return List of named lists with altExp information for use when preparing to merge, #' with each sublist formatted as: -#' altexp_name = list(features = c(features), assays = c(assays)) +#' altexp_name = list(features = c(features), assays = c(assays), coldata = c(column, names)) +#' - features are the full set of altExp features +#' - assays are the set of assays for the given altExp (e.g. counts and logcounts) +#' - coldata are the colData columns associated with this altxp to retain in the main SCE get_altexp_attributes <- function(sce_list) { # Attribute list to save for later use @@ -474,10 +477,18 @@ get_altexp_attributes <- function(sce_list) { ) } + + # Add a vector of colData names we would like to retain in the main SCE + # We'll need one for each {name} + # altexps_{name}_sum, altexps_{name}_detected, and altexps_{name}_percent + coldata_suffixes <- c("sum", "detected", "percent") + coldata_names <- glue::glue("altexps_{altexp_name}_{coldata_suffixes}") + # Save to altexp_attributes for later use altexp_attributes[[altexp_name]] <- list( "features" = all_features, - "assays" = all_assays + "assays" = all_assays, + "coldata" = coldata_names ) } diff --git a/man/get_altexp_attributes.Rd b/man/get_altexp_attributes.Rd index d844b263..9f453431 100644 --- a/man/get_altexp_attributes.Rd +++ b/man/get_altexp_attributes.Rd @@ -12,7 +12,10 @@ get_altexp_attributes(sce_list) \value{ List of named lists with altExp information for use when preparing to merge, with each sublist formatted as: - altexp_name = list(features = c(features), assays = c(assays)) + altexp_name = list(features = c(features), assays = c(assays), coldata = c(column, names)) + - features are the full set of altExp features + - assays are the set of assays for the given altExp (e.g. counts and logcounts) + - coldata are the colData columns associated with this altxp to retain in the main SCE } \description{ Helper function to check altExp compatibility diff --git a/tests/testthat/test-merge_sce_list.R b/tests/testthat/test-merge_sce_list.R index cebe2201..e62cc89f 100644 --- a/tests/testthat/test-merge_sce_list.R +++ b/tests/testthat/test-merge_sce_list.R @@ -516,7 +516,7 @@ test_that("build_na_matrix works as expected",{ ) expect_true( - class(sparse_mat) == "dgCMatrix" + is(sparse_mat, "dgCMatrix") ) expect_equal( @@ -539,6 +539,10 @@ test_that("get_altexp_attributes passes when it should pass", { expect_equal( attribute_list[["adt"]][["features"]], full_altexp_features ) + expect_equal( + attribute_list[["adt"]][["coldata"]], + glue::glue("altexps_adt_{c('sum', 'detected', 'percent')}") + ) }) From 4e81e1235d1eb71690b1ab037a01f858813bdd6d Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Wed, 20 Dec 2023 08:50:45 -0500 Subject: [PATCH 15/30] Revert "Update get_altexp_attributes to also collect colData names. Update sparse matrix test" This reverts commit d661d12a417ebea6923d287d5b9a205631536b45. --- R/merge_sce_list.R | 17 +++-------------- man/get_altexp_attributes.Rd | 5 +---- tests/testthat/test-merge_sce_list.R | 6 +----- 3 files changed, 5 insertions(+), 23 deletions(-) diff --git a/R/merge_sce_list.R b/R/merge_sce_list.R index f0cdd57a..5a887ba2 100644 --- a/R/merge_sce_list.R +++ b/R/merge_sce_list.R @@ -89,7 +89,7 @@ merge_sce_list <- function( # altExps for a given name must all have the same features and the same assays if (include_altexp) { # This is a list of lists of altexp information for later use: - # (altexp_name = list( features = c(features), assays = c(assays), coldata = c(column, names) )) + # (altexp_name = list( features = c(features), assays = c(assays) )) altexp_attributes <- get_altexp_attributes(sce_list) } else { # Remove altexps if we are not including them @@ -419,10 +419,7 @@ build_na_matrix <- function( #' #' @return List of named lists with altExp information for use when preparing to merge, #' with each sublist formatted as: -#' altexp_name = list(features = c(features), assays = c(assays), coldata = c(column, names)) -#' - features are the full set of altExp features -#' - assays are the set of assays for the given altExp (e.g. counts and logcounts) -#' - coldata are the colData columns associated with this altxp to retain in the main SCE +#' altexp_name = list(features = c(features), assays = c(assays)) get_altexp_attributes <- function(sce_list) { # Attribute list to save for later use @@ -477,18 +474,10 @@ get_altexp_attributes <- function(sce_list) { ) } - - # Add a vector of colData names we would like to retain in the main SCE - # We'll need one for each {name} - # altexps_{name}_sum, altexps_{name}_detected, and altexps_{name}_percent - coldata_suffixes <- c("sum", "detected", "percent") - coldata_names <- glue::glue("altexps_{altexp_name}_{coldata_suffixes}") - # Save to altexp_attributes for later use altexp_attributes[[altexp_name]] <- list( "features" = all_features, - "assays" = all_assays, - "coldata" = coldata_names + "assays" = all_assays ) } diff --git a/man/get_altexp_attributes.Rd b/man/get_altexp_attributes.Rd index 9f453431..d844b263 100644 --- a/man/get_altexp_attributes.Rd +++ b/man/get_altexp_attributes.Rd @@ -12,10 +12,7 @@ get_altexp_attributes(sce_list) \value{ List of named lists with altExp information for use when preparing to merge, with each sublist formatted as: - altexp_name = list(features = c(features), assays = c(assays), coldata = c(column, names)) - - features are the full set of altExp features - - assays are the set of assays for the given altExp (e.g. counts and logcounts) - - coldata are the colData columns associated with this altxp to retain in the main SCE + altexp_name = list(features = c(features), assays = c(assays)) } \description{ Helper function to check altExp compatibility diff --git a/tests/testthat/test-merge_sce_list.R b/tests/testthat/test-merge_sce_list.R index e62cc89f..cebe2201 100644 --- a/tests/testthat/test-merge_sce_list.R +++ b/tests/testthat/test-merge_sce_list.R @@ -516,7 +516,7 @@ test_that("build_na_matrix works as expected",{ ) expect_true( - is(sparse_mat, "dgCMatrix") + class(sparse_mat) == "dgCMatrix" ) expect_equal( @@ -539,10 +539,6 @@ test_that("get_altexp_attributes passes when it should pass", { expect_equal( attribute_list[["adt"]][["features"]], full_altexp_features ) - expect_equal( - attribute_list[["adt"]][["coldata"]], - glue::glue("altexps_adt_{c('sum', 'detected', 'percent')}") - ) }) From ecf94fb0f4cfa5e807a58e842e1c9d3b0eda5454 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Wed, 20 Dec 2023 08:51:20 -0500 Subject: [PATCH 16/30] revive the class test after reversion: --- tests/testthat/test-merge_sce_list.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-merge_sce_list.R b/tests/testthat/test-merge_sce_list.R index cebe2201..746a4ba4 100644 --- a/tests/testthat/test-merge_sce_list.R +++ b/tests/testthat/test-merge_sce_list.R @@ -516,7 +516,7 @@ test_that("build_na_matrix works as expected",{ ) expect_true( - class(sparse_mat) == "dgCMatrix" + is(sparse_mat, "dgCMatrix") ) expect_equal( From 4f61572b84623b6ed19f7108fc44ec447360213f Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Wed, 20 Dec 2023 08:56:28 -0500 Subject: [PATCH 17/30] final.final expect_s4_class test --- tests/testthat/test-merge_sce_list.R | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/testthat/test-merge_sce_list.R b/tests/testthat/test-merge_sce_list.R index 746a4ba4..e7b59e9d 100644 --- a/tests/testthat/test-merge_sce_list.R +++ b/tests/testthat/test-merge_sce_list.R @@ -515,9 +515,7 @@ test_that("build_na_matrix works as expected",{ cols ) - expect_true( - is(sparse_mat, "dgCMatrix") - ) + expect_s4_class(sparse_mat, "sparseMatrix") expect_equal( rownames(sparse_mat), rows From 955409e1f254497e01bf825399852e1a0ee4b3c6 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Wed, 20 Dec 2023 09:05:01 -0500 Subject: [PATCH 18/30] Retain altExps colData stats columns in the main SCE --- tests/testthat/test-merge_sce_list.R | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-merge_sce_list.R b/tests/testthat/test-merge_sce_list.R index e7b59e9d..17a16fd7 100644 --- a/tests/testthat/test-merge_sce_list.R +++ b/tests/testthat/test-merge_sce_list.R @@ -354,7 +354,7 @@ add_sce_altexp <- function( rowData(sce_alt)[["feature_column"]] <- rownames(sce_alt) rowData(sce_alt)[["other_column"]] <- runif(nrow(sce_alt)) - # add a coldata columns + # add a coldata column colData(sce_alt)[["coldata_column"]] <- runif(ncol(sce_alt)) # add logcounts @@ -368,6 +368,11 @@ add_sce_altexp <- function( metadata(sce_alt)$sample_id <- sample_id metadata(sce_alt)$mapped_reads <- 100 + # Add some more columns to retain to the SCE based on this altExp + colData(sce)[[glue::glue("altexps_{altexp_name}_sum")]] <- runif(ncol(sce)) + colData(sce)[[glue::glue("altexps_{altexp_name}_percent")]] <- runif(ncol(sce)) + colData(sce)[[glue::glue("altexps_{altexp_name}_detected")]] <- runif(ncol(sce)) + # add sce_alt as sce's altExp altExp(sce, altexp_name) <- sce_alt @@ -460,6 +465,22 @@ test_that("merging SCEs with 1 altexp and same features works as expected, with unname() expect_equal(colnames(merged_altexp), expected_colnames) + # Check colData columns + expected_coldata <- c("sum", + "detected", + "altexps_adt_sum", + "altexps_adt_detected", + "altexps_adt_percent", + batch_column, + "cell_id") + + expect_true( + setequal( + colnames(colData(merged_sce)), + expected_coldata + ) + ) + }) From 8170377a012a8aa6268b89ec3aaeba758e2f41e6 Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Wed, 20 Dec 2023 09:12:45 -0500 Subject: [PATCH 19/30] Update tests/testthat/test-merge_sce_list.R Co-authored-by: Joshua Shapiro --- tests/testthat/test-merge_sce_list.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-merge_sce_list.R b/tests/testthat/test-merge_sce_list.R index 17a16fd7..5d782564 100644 --- a/tests/testthat/test-merge_sce_list.R +++ b/tests/testthat/test-merge_sce_list.R @@ -412,8 +412,9 @@ test_that("prepare_sce_for_merge() works as expected with is_altexp=TRUE", { all(stringr::str_starts(colnames(rowData(prepared_altexp)), "test-")) ) - expect_true( - !all(stringr::str_starts(colnames(prepared_altexp), "test-")) + # column names should be unchanged + expect_equal( + colnames(prepared_altexp), colnames(test_altexp) ) expect_equal( From b1a0091d73e48901756dc33c4618769ef7a74bb2 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Wed, 20 Dec 2023 09:23:28 -0500 Subject: [PATCH 20/30] need to probably also commit the R code..... --- R/merge_sce_list.R | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/R/merge_sce_list.R b/R/merge_sce_list.R index 5a887ba2..6a2b8ec9 100644 --- a/R/merge_sce_list.R +++ b/R/merge_sce_list.R @@ -91,6 +91,18 @@ merge_sce_list <- function( # This is a list of lists of altexp information for later use: # (altexp_name = list( features = c(features), assays = c(assays) )) altexp_attributes <- get_altexp_attributes(sce_list) + + # Define more main SCE colData columns to keep, based on altExp names + coldata_suffixes <- c("sum", "detected", "percent") + altexp_columns <- glue::glue("altexps_{names(altexp_attributes)}") |> + purrr::map( + \(prefix) { glue::glue("{prefix}_{coldata_suffixes}")} + ) |> + unlist() + + # Update retain_coldata_cols + retain_coldata_cols <- c(retain_coldata_cols, altexp_columns) + } else { # Remove altexps if we are not including them sce_list <- sce_list |> From bc6f82e1a2ec036f2c4c17d2fbb096fb9fda831c Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Wed, 20 Dec 2023 10:14:53 -0500 Subject: [PATCH 21/30] Update tests to handle target_type retention and add test for NA matrix. One test is now failing --- tests/testthat/test-merge_sce_list.R | 55 ++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-merge_sce_list.R b/tests/testthat/test-merge_sce_list.R index 5d782564..069c2eb6 100644 --- a/tests/testthat/test-merge_sce_list.R +++ b/tests/testthat/test-merge_sce_list.R @@ -351,6 +351,7 @@ add_sce_altexp <- function( colnames(sce_alt) <- colnames(sce) # add some rowdata columns + rowData(sce_alt)[["target_type"]] <- "target" # should be retained rowData(sce_alt)[["feature_column"]] <- rownames(sce_alt) rowData(sce_alt)[["other_column"]] <- runif(nrow(sce_alt)) @@ -407,9 +408,9 @@ test_that("prepare_sce_for_merge() works as expected with is_altexp=TRUE", { is_altexp = TRUE ) - # rowdata names should start with test-, but NOT the overall column names - expect_true( - all(stringr::str_starts(colnames(rowData(prepared_altexp)), "test-")) + expect_equal( + colnames(rowData(prepared_altexp)), + c("target_type", "test-feature_column", "test-other_column") ) # column names should be unchanged @@ -423,6 +424,49 @@ test_that("prepare_sce_for_merge() works as expected with is_altexp=TRUE", { }) +test_that("prepare_sce_for_merge() works from an NA matrix", { + + + assay_names <- c("counts", "logcounts") + sce <- sce_list_with_altexp[[1]] + na_assays <- assay_names |> + purrr::set_names() |> + purrr::map( + build_na_matrix, + full_altexp_features, + colnames(sce) + ) + test_altexp <- SingleCellExperiment(assays = na_assays) + + prepared_altexp <- prepare_sce_for_merge( + test_altexp, + "test", + batch_column = batch_column, + cell_id_column = cell_id_column, + shared_features = full_altexp_features, + retain_coldata_cols = NULL, + preserve_rowdata_cols = c("target_type"), + is_altexp = TRUE + ) + + expect_equal( + assayNames(prepared_altexp), assay_names + ) + + expect_equal( + colnames(colData(prepared_altexp)), c("batch", "cell_id") + ) + + expect_equal( + colnames(prepared_altexp), colnames(sce) + ) + + expect_equal( + rownames(prepared_altexp), full_altexp_features + ) +}) + + test_that("merging SCEs with altExps works as expected when include_altexps = FALSE", { merged_sce <- merge_sce_list( @@ -492,6 +536,7 @@ test_that("merging SCEs with 1 altexp but different features fails as expected, # keep only the first 3 features from the first SCE altExp(sce_list_with_altexp[[1]]) <- altExp(sce_list_with_altexp[[1]])[1:3,] + expect_error( merge_sce_list( sce_list_with_altexp, @@ -511,6 +556,10 @@ test_that("merging SCEs where 1 altExp is missing works as expected, with altexp sce_list_with_altexp[["sce4"]] <- removeAltExps(sce_list_with_altexp[[1]]) + # from cbind docs: + #The colnames in colData(SummarizedExperiment) must match or an error is thrown. + #Duplicate columns of rowData(SummarizedExperiment) must contain the same data. + merged_sce <- merge_sce_list( sce_list_with_altexp, batch_column = batch_column, From 3fdad1e778b76175221ff6ecc332a2f844ed2b85 Mon Sep 17 00:00:00 2001 From: Joshua Shapiro Date: Thu, 21 Dec 2023 09:40:18 -0500 Subject: [PATCH 22/30] make ADT names not genes reformatting from precommit --- tests/testthat/test-merge_sce_list.R | 62 ++++++++++++---------------- 1 file changed, 26 insertions(+), 36 deletions(-) diff --git a/tests/testthat/test-merge_sce_list.R b/tests/testthat/test-merge_sce_list.R index 069c2eb6..0ff026de 100644 --- a/tests/testthat/test-merge_sce_list.R +++ b/tests/testthat/test-merge_sce_list.R @@ -60,7 +60,6 @@ sce_list <- list( # Tests without altexps ---------------------------------------------- test_that("`update_sce_metadata()` returns the expected list", { - metadata_list <- metadata(sce_list[[1]]) new_metadata <- update_sce_metadata(metadata_list) @@ -72,7 +71,7 @@ test_that("`update_sce_metadata()` returns the expected list", { expect_equal( names(new_metadata$library_metadata), - c("library_id", "sample_id", "total_reads") + c("library_id", "sample_id", "total_reads") ) }) @@ -327,7 +326,7 @@ test_that("merging SCEs with library metadata fails as expected, no altexps", { expect_error( merge_sce_list( sce_list - ) + ) ) }) @@ -341,15 +340,22 @@ add_sce_altexp <- function( altexp_name, num_altexp_features, n_cells) { - sce_alt <- sim_sce( n_genes = num_altexp_features, n_cells = n_cells, - n_empty = 0) + n_empty = 0 + ) # ensure matching barcodes colnames(sce_alt) <- colnames(sce) + # change feature names + rownames(sce_alt) <- stringr::str_replace( + rownames(sce_alt), + "^GENE", + toupper(altexp_name) + ) + # add some rowdata columns rowData(sce_alt)[["target_type"]] <- "target" # should be retained rowData(sce_alt)[["feature_column"]] <- rownames(sce_alt) @@ -395,7 +401,6 @@ full_altexp_features <- rownames(altExp(sce_list_with_altexp[[1]])) test_that("prepare_sce_for_merge() works as expected with is_altexp=TRUE", { - test_altexp <- altExp(sce_list_with_altexp[[1]]) prepared_altexp <- prepare_sce_for_merge( test_altexp, @@ -425,8 +430,6 @@ test_that("prepare_sce_for_merge() works as expected with is_altexp=TRUE", { test_that("prepare_sce_for_merge() works from an NA matrix", { - - assay_names <- c("counts", "logcounts") sce <- sce_list_with_altexp[[1]] na_assays <- assay_names |> @@ -468,7 +471,6 @@ test_that("prepare_sce_for_merge() works from an NA matrix", { test_that("merging SCEs with altExps works as expected when include_altexps = FALSE", { - merged_sce <- merge_sce_list( sce_list_with_altexp, batch_column = batch_column, @@ -481,11 +483,9 @@ test_that("merging SCEs with altExps works as expected when include_altexps = FA # there should not be any altExps expect_length(altExpNames(merged_sce), 0) - }) test_that("merging SCEs with 1 altexp and same features works as expected, with altexps", { - merged_sce <- merge_sce_list( sce_list_with_altexp, batch_column = batch_column, @@ -511,13 +511,15 @@ test_that("merging SCEs with 1 altexp and same features works as expected, with expect_equal(colnames(merged_altexp), expected_colnames) # Check colData columns - expected_coldata <- c("sum", - "detected", - "altexps_adt_sum", - "altexps_adt_detected", - "altexps_adt_percent", - batch_column, - "cell_id") + expected_coldata <- c( + "sum", + "detected", + "altexps_adt_sum", + "altexps_adt_detected", + "altexps_adt_percent", + batch_column, + "cell_id" + ) expect_true( setequal( @@ -525,16 +527,14 @@ test_that("merging SCEs with 1 altexp and same features works as expected, with expected_coldata ) ) - }) test_that("merging SCEs with 1 altexp but different features fails as expected, with altexps", { - # keep only the first 3 features from the first SCE - altExp(sce_list_with_altexp[[1]]) <- altExp(sce_list_with_altexp[[1]])[1:3,] + altExp(sce_list_with_altexp[[1]]) <- altExp(sce_list_with_altexp[[1]])[1:3, ] expect_error( @@ -553,12 +553,11 @@ test_that("merging SCEs with 1 altexp but different features fails as expected, test_that("merging SCEs where 1 altExp is missing works as expected, with altexps", { - sce_list_with_altexp[["sce4"]] <- removeAltExps(sce_list_with_altexp[[1]]) # from cbind docs: - #The colnames in colData(SummarizedExperiment) must match or an error is thrown. - #Duplicate columns of rowData(SummarizedExperiment) must contain the same data. + # The colnames in colData(SummarizedExperiment) must match or an error is thrown. + # Duplicate columns of rowData(SummarizedExperiment) must contain the same data. merged_sce <- merge_sce_list( sce_list_with_altexp, @@ -570,16 +569,14 @@ test_that("merging SCEs where 1 altExp is missing works as expected, with altexp ) expect_equal(altExpNames(merged_sce), "adt") - }) ## Other tests ------------------ -test_that("build_na_matrix works as expected",{ - +test_that("build_na_matrix works as expected", { rows <- letters[1:5] - cols <- letters[6:10] + cols <- letters[6:10] sparse_mat <- build_na_matrix( "name", rows, @@ -595,12 +592,10 @@ test_that("build_na_matrix works as expected",{ expect_equal( colnames(sparse_mat), cols ) - }) test_that("get_altexp_attributes passes when it should pass", { - attribute_list <- get_altexp_attributes(sce_list_with_altexp) expect_equal( attribute_list[["adt"]][["assays"]], c("counts", "logcounts") @@ -608,20 +603,15 @@ test_that("get_altexp_attributes passes when it should pass", { expect_equal( attribute_list[["adt"]][["features"]], full_altexp_features ) - }) test_that("get_altexp_attributes throws an error as expected when assays do not match", { - logcounts(altExp(sce_list_with_altexp[[3]])) <- NULL expect_error(get_altexp_attributes(sce_list_with_altexp)) - }) test_that("get_altexp_attributes throws an error as expected when features do not match", { - - altExp(sce_list_with_altexp[[1]]) <- altExp(sce_list_with_altexp[[1]])[1:3,] + altExp(sce_list_with_altexp[[1]]) <- altExp(sce_list_with_altexp[[1]])[1:3, ] expect_error(get_altexp_attributes(sce_list_with_altexp)) - }) From 5bd051a6e544596d7a5c4bc4e10827a4b48dbeae Mon Sep 17 00:00:00 2001 From: Joshua Shapiro Date: Thu, 21 Dec 2023 09:54:39 -0500 Subject: [PATCH 23/30] Don't fill NAs ourselves: use combineCols --- R/merge_sce_list.R | 56 +++++++++++++++++----------------------------- 1 file changed, 20 insertions(+), 36 deletions(-) diff --git a/R/merge_sce_list.R b/R/merge_sce_list.R index 6a2b8ec9..80e261c8 100644 --- a/R/merge_sce_list.R +++ b/R/merge_sce_list.R @@ -61,8 +61,6 @@ merge_sce_list <- function( preserve_rowdata_cols = NULL, cell_id_column = "cell_id", include_altexp = TRUE) { - - ## Checks -------------------------- if (is.null(names(sce_list))) { warning( glue::glue( @@ -70,7 +68,7 @@ merge_sce_list <- function( named based on their list index in the merged SCE object." ) ) - names(sce_list) <- 1:length(sce_list) + names(sce_list) <- seq_along(sce_list) } if (length(sce_list) < 2) { @@ -96,13 +94,14 @@ merge_sce_list <- function( coldata_suffixes <- c("sum", "detected", "percent") altexp_columns <- glue::glue("altexps_{names(altexp_attributes)}") |> purrr::map( - \(prefix) { glue::glue("{prefix}_{coldata_suffixes}")} + \(prefix) { + glue::glue("{prefix}_{coldata_suffixes}") + } ) |> unlist() # Update retain_coldata_cols retain_coldata_cols <- c(retain_coldata_cols, altexp_columns) - } else { # Remove altexps if we are not including them sce_list <- sce_list |> @@ -139,7 +138,7 @@ merge_sce_list <- function( # check that library id and sample id are present in metadata id_checks <- sce_list |> - purrr::map(\(sce){ + purrr::map(\(sce) { all(c("library_id", "sample_id") %in% names(metadata(sce))) }) |> unlist() @@ -196,9 +195,7 @@ merge_sce_list <- function( # If we are including altExps, process them and save to list to add to merged SCE if (include_altexp) { - for (altexp_name in names(altexp_attributes)) { - expected_assays <- altexp_attributes[[altexp_name]][["assays"]] expected_features <- altexp_attributes[[altexp_name]][["features"]] @@ -215,7 +212,7 @@ merge_sce_list <- function( } # Create the merged SCE from the processed list - merged_sce <- do.call(cbind, sce_list) + merged_sce <- do.call(combineCols, unname(sce_list)) # Replace existing metadata list with merged metadata metadata(merged_sce) <- metadata_list @@ -245,15 +242,14 @@ merge_sce_list <- function( #' #' @return An updated SCE that is prepared for merging prepare_sce_for_merge <- function( - sce, - sce_name, - batch_column, - cell_id_column, - shared_features, - retain_coldata_cols, - preserve_rowdata_cols, - is_altexp = FALSE) { - + sce, + sce_name, + batch_column, + cell_id_column, + shared_features, + retain_coldata_cols, + preserve_rowdata_cols, + is_altexp = FALSE) { # Subset to shared features sce <- sce[shared_features, ] @@ -337,18 +333,8 @@ prepare_altexp_for_merge <- function( batch_column, cell_id_column, preserve_rowdata_cols = c("target_type")) { - - if (!altexp_name %in% altExpNames(sce) ) { - - na_assays <- expected_assays |> - purrr::set_names() |> - purrr::map( - build_na_matrix, - expected_features, - colnames(sce) - ) - - altExp(sce, altexp_name) <- SingleCellExperiment(assays = na_assays) + if (!altexp_name %in% altExpNames(sce)) { + return(sce) } # Now, prepare this altexp for merge @@ -374,10 +360,12 @@ prepare_altexp_for_merge <- function( #' #' @return Updated metadata list to store in the SCE update_sce_metadata <- function(metadata_list) { - # first check that this library hasn't already been merged if ("library_metadata" %in% names(metadata_list)) { - stop("This SCE object appears to be a merged object. We do not support merging objects with objects that have already been merged.") + stop(paste( + "This SCE object appears to be a merged object", + "We do not support merging objects with objects that have already been merged." + )) } # create library and sample metadata. @@ -411,7 +399,6 @@ build_na_matrix <- function( assay_name, matrix_rownames, matrix_colnames) { - Matrix::Matrix( data = NA_real_, nrow = length(matrix_rownames), @@ -433,7 +420,6 @@ build_na_matrix <- function( #' with each sublist formatted as: #' altexp_name = list(features = c(features), assays = c(assays)) get_altexp_attributes <- function(sce_list) { - # Attribute list to save for later use altexp_attributes <- list() @@ -445,7 +431,6 @@ get_altexp_attributes <- function(sce_list) { # For each in altexp_names (if present), do they have the same features? # If not, error out for (altexp_name in altexp_names) { - # all altExps for this name altexp_list <- sce_list |> purrr::keep(\(sce) altexp_name %in% altExpNames(sce)) |> @@ -491,7 +476,6 @@ get_altexp_attributes <- function(sce_list) { "features" = all_features, "assays" = all_assays ) - } return(altexp_attributes) } From 73732572a2ad26407feb637307141e843600a66a Mon Sep 17 00:00:00 2001 From: Joshua Shapiro Date: Thu, 21 Dec 2023 09:58:03 -0500 Subject: [PATCH 24/30] remove tests no longer needed --- R/merge_sce_list.R | 39 ---------------- tests/testthat/test-merge_sce_list.R | 67 ---------------------------- 2 files changed, 106 deletions(-) diff --git a/R/merge_sce_list.R b/R/merge_sce_list.R index 80e261c8..5a0f3954 100644 --- a/R/merge_sce_list.R +++ b/R/merge_sce_list.R @@ -386,32 +386,6 @@ update_sce_metadata <- function(metadata_list) { } - - -#' Create a sparse matrix with all NA values -#' -#' @param assay_name Intended assay name for this matrix (e.g., "counts") -#' @param matrix_rownames Vector of matrix rown ames -#' @param matrix_colnames Vector of matrix column names -#' -#' @return Sparse matrix -build_na_matrix <- function( - assay_name, - matrix_rownames, - matrix_colnames) { - Matrix::Matrix( - data = NA_real_, - nrow = length(matrix_rownames), - ncol = length(matrix_colnames), - dimnames = list( - matrix_rownames, - matrix_colnames - ), - sparse = TRUE - ) -} - - #' Helper function to check altExp compatibility #' #' @param sce_list List of SCEs with altExps to check @@ -458,19 +432,6 @@ get_altexp_attributes <- function(sce_list) { purrr::map(assayNames) |> purrr::reduce(union) - # create logical vector for presence of all assays - assays_present <- altexp_list |> - purrr::map_lgl( - \(alt_sce) setequal(assayNames(alt_sce), all_assays) - ) - - # TODO: we may want to drop assays that aren't present in all altexps, rather than dying. - if (!all(assays_present)) { - stop( - glue::glue("The {altexp_name} alternative experiments do not share the same set of assays.") - ) - } - # Save to altexp_attributes for later use altexp_attributes[[altexp_name]] <- list( "features" = all_features, diff --git a/tests/testthat/test-merge_sce_list.R b/tests/testthat/test-merge_sce_list.R index 0ff026de..e546eb89 100644 --- a/tests/testthat/test-merge_sce_list.R +++ b/tests/testthat/test-merge_sce_list.R @@ -429,47 +429,6 @@ test_that("prepare_sce_for_merge() works as expected with is_altexp=TRUE", { }) -test_that("prepare_sce_for_merge() works from an NA matrix", { - assay_names <- c("counts", "logcounts") - sce <- sce_list_with_altexp[[1]] - na_assays <- assay_names |> - purrr::set_names() |> - purrr::map( - build_na_matrix, - full_altexp_features, - colnames(sce) - ) - test_altexp <- SingleCellExperiment(assays = na_assays) - - prepared_altexp <- prepare_sce_for_merge( - test_altexp, - "test", - batch_column = batch_column, - cell_id_column = cell_id_column, - shared_features = full_altexp_features, - retain_coldata_cols = NULL, - preserve_rowdata_cols = c("target_type"), - is_altexp = TRUE - ) - - expect_equal( - assayNames(prepared_altexp), assay_names - ) - - expect_equal( - colnames(colData(prepared_altexp)), c("batch", "cell_id") - ) - - expect_equal( - colnames(prepared_altexp), colnames(sce) - ) - - expect_equal( - rownames(prepared_altexp), full_altexp_features - ) -}) - - test_that("merging SCEs with altExps works as expected when include_altexps = FALSE", { merged_sce <- merge_sce_list( sce_list_with_altexp, @@ -574,27 +533,6 @@ test_that("merging SCEs where 1 altExp is missing works as expected, with altexp ## Other tests ------------------ -test_that("build_na_matrix works as expected", { - rows <- letters[1:5] - cols <- letters[6:10] - sparse_mat <- build_na_matrix( - "name", - rows, - cols - ) - - expect_s4_class(sparse_mat, "sparseMatrix") - - expect_equal( - rownames(sparse_mat), rows - ) - - expect_equal( - colnames(sparse_mat), cols - ) -}) - - test_that("get_altexp_attributes passes when it should pass", { attribute_list <- get_altexp_attributes(sce_list_with_altexp) expect_equal( @@ -606,11 +544,6 @@ test_that("get_altexp_attributes passes when it should pass", { }) -test_that("get_altexp_attributes throws an error as expected when assays do not match", { - logcounts(altExp(sce_list_with_altexp[[3]])) <- NULL - expect_error(get_altexp_attributes(sce_list_with_altexp)) -}) - test_that("get_altexp_attributes throws an error as expected when features do not match", { altExp(sce_list_with_altexp[[1]]) <- altExp(sce_list_with_altexp[[1]])[1:3, ] expect_error(get_altexp_attributes(sce_list_with_altexp)) From 9992ef63da700036f8c6d60b1077fccb9451de19 Mon Sep 17 00:00:00 2001 From: Joshua Shapiro Date: Thu, 21 Dec 2023 10:00:20 -0500 Subject: [PATCH 25/30] update docs --- man/build_na_matrix.Rd | 21 --------------------- 1 file changed, 21 deletions(-) delete mode 100644 man/build_na_matrix.Rd diff --git a/man/build_na_matrix.Rd b/man/build_na_matrix.Rd deleted file mode 100644 index 45fc6b9b..00000000 --- a/man/build_na_matrix.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/merge_sce_list.R -\name{build_na_matrix} -\alias{build_na_matrix} -\title{Create a sparse matrix with all NA values} -\usage{ -build_na_matrix(assay_name, matrix_rownames, matrix_colnames) -} -\arguments{ -\item{assay_name}{Intended assay name for this matrix (e.g., "counts")} - -\item{matrix_rownames}{Vector of matrix rown ames} - -\item{matrix_colnames}{Vector of matrix column names} -} -\value{ -Sparse matrix -} -\description{ -Create a sparse matrix with all NA values -} From 1b235c28bc28ca53a80400e74bb03e06ffc89225 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Thu, 21 Dec 2023 11:00:26 -0500 Subject: [PATCH 26/30] add some dplyr:: --- R/merge_sce_list.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/merge_sce_list.R b/R/merge_sce_list.R index 92dbc660..f6bb60c4 100644 --- a/R/merge_sce_list.R +++ b/R/merge_sce_list.R @@ -181,8 +181,8 @@ merge_sce_list <- function( purrr::map(\(df) { # make sure all column types are compatible first df |> - dplyr::mutate(across(everything(), as.character)) - }) |> + dplyr::mutate(dplyr::across(dplyr::everything(), as.character)) + }) |> dplyr::bind_rows() |> unique() From 4e6d24b97961a060bc4759b93c9b82df2a3f40bc Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Thu, 21 Dec 2023 12:15:35 -0500 Subject: [PATCH 27/30] Apply suggestions from code review Co-authored-by: Joshua Shapiro --- tests/testthat/test-merge_sce_list.R | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tests/testthat/test-merge_sce_list.R b/tests/testthat/test-merge_sce_list.R index e546eb89..52004bc0 100644 --- a/tests/testthat/test-merge_sce_list.R +++ b/tests/testthat/test-merge_sce_list.R @@ -480,11 +480,9 @@ test_that("merging SCEs with 1 altexp and same features works as expected, with "cell_id" ) - expect_true( - setequal( - colnames(colData(merged_sce)), - expected_coldata - ) + expect_setequal( + colnames(colData(merged_sce)), + expected_coldata ) }) @@ -512,7 +510,7 @@ test_that("merging SCEs with 1 altexp but different features fails as expected, test_that("merging SCEs where 1 altExp is missing works as expected, with altexps", { - sce_list_with_altexp[["sce4"]] <- removeAltExps(sce_list_with_altexp[[1]]) + sce_list_with_altexp[["sce4"]] <- sce_list[[1]] # from cbind docs: # The colnames in colData(SummarizedExperiment) must match or an error is thrown. From a9f33ca4f06169a0ce26ea02bf5a32cd959f5b6e Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Thu, 21 Dec 2023 13:21:17 -0500 Subject: [PATCH 28/30] Clean up some comments and leave breadcrumbs for my next steps --- tests/testthat/test-merge_sce_list.R | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-merge_sce_list.R b/tests/testthat/test-merge_sce_list.R index 52004bc0..4ae455f9 100644 --- a/tests/testthat/test-merge_sce_list.R +++ b/tests/testthat/test-merge_sce_list.R @@ -374,6 +374,7 @@ add_sce_altexp <- function( metadata(sce_alt)$library_id <- library_id metadata(sce_alt)$sample_id <- sample_id metadata(sce_alt)$mapped_reads <- 100 + metadata(sce_alt)$ambient_profile <- runif(num_altexp_features) # Add some more columns to retain to the SCE based on this altExp colData(sce)[[glue::glue("altexps_{altexp_name}_sum")]] <- runif(ncol(sce)) @@ -512,10 +513,6 @@ test_that("merging SCEs with 1 altexp but different features fails as expected, test_that("merging SCEs where 1 altExp is missing works as expected, with altexps", { sce_list_with_altexp[["sce4"]] <- sce_list[[1]] - # from cbind docs: - # The colnames in colData(SummarizedExperiment) must match or an error is thrown. - # Duplicate columns of rowData(SummarizedExperiment) must contain the same data. - merged_sce <- merge_sce_list( sce_list_with_altexp, batch_column = batch_column, @@ -526,6 +523,14 @@ test_that("merging SCEs where 1 altExp is missing works as expected, with altexp ) expect_equal(altExpNames(merged_sce), "adt") + + merged_altexp <- altExp(merged_sce, "adt") + + # check number of columns + + # check colData names are as expected + + # check that the NAs are as expected }) From 2f12c6cc7f652410d44178bb0ad99d7119821498 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Thu, 21 Dec 2023 15:31:28 -0500 Subject: [PATCH 29/30] Add tests requested in review; passing locally --- tests/testthat/test-merge_sce_list.R | 29 ++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-merge_sce_list.R b/tests/testthat/test-merge_sce_list.R index 4ae455f9..16da9a08 100644 --- a/tests/testthat/test-merge_sce_list.R +++ b/tests/testthat/test-merge_sce_list.R @@ -526,11 +526,36 @@ test_that("merging SCEs where 1 altExp is missing works as expected, with altexp merged_altexp <- altExp(merged_sce, "adt") - # check number of columns - + # check dimensions + expect_equal( + dim(merged_sce), + c(12, 32) + ) # check colData names are as expected + expected_coldata <- c( + "sum", + "detected", + "altexps_adt_sum", + "altexps_adt_detected", + "altexps_adt_percent", + batch_column, + cell_id_column + ) + expect_setequal( + colnames(colData(merged_sce)), + expected_coldata + ) # check that the NAs are as expected + counts_mat <- as.matrix(counts(merged_altexp)) + sce4_counts <- counts_mat[, stringr::str_starts(colnames(counts_mat), "sce4-")] + expect_true( + all(is.na(sce4_counts)) + ) + numeric_counts <- counts_mat[, !stringr::str_starts(colnames(counts_mat), "sce4-")] + expect_true( + all(is.numeric(numeric_counts)) + ) }) From e4a7399ea4c2d686e13b0fc02e3b8371c008b8ff Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Thu, 21 Dec 2023 16:15:40 -0500 Subject: [PATCH 30/30] Apply suggestions from code review Co-authored-by: Joshua Shapiro --- tests/testthat/test-merge_sce_list.R | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tests/testthat/test-merge_sce_list.R b/tests/testthat/test-merge_sce_list.R index 16da9a08..991909e6 100644 --- a/tests/testthat/test-merge_sce_list.R +++ b/tests/testthat/test-merge_sce_list.R @@ -529,7 +529,11 @@ test_that("merging SCEs where 1 altExp is missing works as expected, with altexp # check dimensions expect_equal( dim(merged_sce), - c(12, 32) + c(total_genes, total_cells * 4 / 3) + ) + expect_equal( + dim(merged_altexp), + c(num_altexp_features, total_cells * 4 / 3 ) ) # check colData names are as expected expected_coldata <- c( @@ -547,14 +551,14 @@ test_that("merging SCEs where 1 altExp is missing works as expected, with altexp ) # check that the NAs are as expected - counts_mat <- as.matrix(counts(merged_altexp)) - sce4_counts <- counts_mat[, stringr::str_starts(colnames(counts_mat), "sce4-")] + counts_mat <- counts(merged_altexp) + sce4_counts <- counts_mat[, merged_sce$library_id == "sce4"] expect_true( all(is.na(sce4_counts)) ) - numeric_counts <- counts_mat[, !stringr::str_starts(colnames(counts_mat), "sce4-")] + numeric_counts <- counts_mat[, merged_sce$library_id != "sce4"] expect_true( - all(is.numeric(numeric_counts)) + all(is.finite(numeric_counts)) ) })