-
Notifications
You must be signed in to change notification settings - Fork 2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add functions for Seurat conversion #15
Changes from 12 commits
b7c8385
07a1e12
8a74309
39d7b56
6a316f7
868aa6d
248c1c1
70fba8f
9ef3821
36296d9
80f0703
b2ebf8b
1d80018
2188402
a90d281
40c4c96
17b86b3
927f050
01d0849
55442c5
9fe0918
0e13cfd
f79f070
914e500
7c546fd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
#' Convert an SCE object to Seurat | ||
#' | ||
#' Converts A SingleCellExperiment (SCE) object to Seurat format. This is | ||
jashapiro marked this conversation as resolved.
Show resolved
Hide resolved
jashapiro marked this conversation as resolved.
Show resolved
Hide resolved
|
||
#' primarily a wrapper around Seurat::as.Seurat() with some additional steps to | ||
#' include ScPCA metadata and options for converting the feature index from | ||
#' Ensembl gene ids to gene symbols. | ||
#' | ||
#' If present, reduced dimensions from SCE objects will be included, renamed to | ||
#' match Seurat default naming. | ||
#' | ||
#' | ||
#' @param sce The input SCE object | ||
#' @param use_symbols Logical indicating whether the Seurat object uses gene | ||
#' symbols for indexing. Default is TRUE. | ||
#' @param reference If `use_symbols` is TRUE, the reference to use for gene | ||
#' symbols. One of `sce`, `scpca`, `10x2020`, `10x2024`, where `sce` uses the | ||
#' symbols stored in the `gene_symbol` column of the SCE object's row data, | ||
#' and the others use standardized translation tables. See `ensembl_to_symbol()` | ||
#' for more information. Default is `sce`. | ||
#' @param dedup_method Method to handle duplicated gene symbols. If `unique`, | ||
#' the gene symbols will be made unique following standard Seurat procedures. | ||
Comment on lines
+20
to
+21
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a Seurat version to specify here, or do they all (to our knowledge) follow the same approach? |
||
#' If `sum`, the expression values for each gene symbols will be summed. | ||
#' @param seurat_assay_version Whether to create Seurat `v3` or `v5` assay | ||
#' objects. Default is `v3`. | ||
jashapiro marked this conversation as resolved.
Show resolved
Hide resolved
|
||
#' | ||
#' @return a Seurat object | ||
#' @export | ||
#' | ||
sce_to_seurat <- function( | ||
sce, | ||
use_symbols = TRUE, | ||
reference = c("sce", "scpca", "10x2020", "10x2024"), | ||
dedup_method = c("unique", "sum"), | ||
seurat_assay_version = c("v3", "v5")) { | ||
reference <- match.arg(reference) | ||
dedup_method <- match.arg(dedup_method) | ||
seurat_assay_version <- match.arg(seurat_assay_version) | ||
|
||
stopifnot( | ||
"Package `Seurat` must be installed for SCE to Seurat conversion." = | ||
requireNamespace("Seurat", quietly = TRUE), | ||
"Package `SeuratObject` must be installed for SCE to Seurat conversion." = | ||
requireNamespace("SeuratObject", quietly = TRUE), | ||
jashapiro marked this conversation as resolved.
Show resolved
Hide resolved
|
||
"`sce` must be a SingleCellExperiment object." = is(sce, "SingleCellExperiment"), | ||
"`sce` must contain both `gene_ids` and `gene_symbol` columns in the row data if it is being used as a reference." = | ||
reference != "sce" || all(c("gene_ids", "gene_symbol") %in% names(rowData(sce))) | ||
) | ||
|
||
if (use_symbols) { | ||
sce <- suppressMessages(sce_to_symbols( | ||
sce, | ||
reference = reference, | ||
unique = (dedup_method == "unique"), | ||
seurat_compatible = TRUE | ||
)) | ||
} | ||
|
||
if (dedup_method == "sum") { | ||
sce <- sum_duplicate_genes(sce) | ||
} | ||
|
||
mainExpName(sce) <- "RNA" | ||
|
||
# convert reducedDims to Seurat standard naming | ||
reducedDimNames(sce) <- tolower(reducedDimNames(sce)) | ||
reducedDims(sce) <- reducedDims(sce) |> | ||
purrr::map(\(mat) { | ||
# insert underscore between letters and numbers in axis labels | ||
rd_names <- gsub("([A-Za-z]+)([0-9]+)", "\\1_\\2", colnames(mat)) |> | ||
tolower() | ||
colnames(mat) <- rd_names | ||
if (!is.null(attr(mat, "rotation"))) { | ||
colnames(attr(mat, "rotation")) <- rd_names | ||
} | ||
return(mat) | ||
}) | ||
|
||
# pull out any altExps to handle manually | ||
alt_exps <- altExps(sce) | ||
altExps(sce) <- NULL | ||
|
||
# Let Seurat do initial conversion to capture most things | ||
sobj <- Seurat::as.Seurat(sce) | ||
|
||
# update identity values, using cluster if present | ||
sce_meta <- metadata(sce) | ||
sobj$orig.ident <- sce_meta$library_id | ||
if (is.null(sce$cluster)) { | ||
Seurat::Idents(sobj) <- sce_meta$library_id | ||
} else { | ||
Seurat::Idents(sobj) <- sce$cluster | ||
} | ||
|
||
# add variable features if present | ||
if (!is.null(sce_meta$highly_variable_genes)) { | ||
sobj[["RNA"]]@var.features <- sce_meta$highly_variable_genes | ||
sce_meta$highly_variable_genes <- NULL # remove from metadata to avoid redundancy | ||
} | ||
|
||
# convert and set functions depending on assay version requested | ||
if (seurat_assay_version == "v5") { | ||
create_seurat_assay <- SeuratObject::CreateAssay5Object | ||
suppressWarnings(sobj[["RNA"]] <- as(sobj[["RNA"]], "Assay5")) | ||
} else { | ||
create_seurat_assay <- SeuratObject::CreateAssayObject | ||
suppressWarnings(sobj[["RNA"]] <- as(sobj[["RNA"]], "Assay")) | ||
} | ||
|
||
# add spliced data if present | ||
if ("spliced" %in% assayNames(sce)) { | ||
sobj[["spliced"]] <- create_seurat_assay(counts = assay(sce, "spliced")) | ||
} | ||
|
||
# add altExps as needed. | ||
for (alt_exp_name in names(alt_exps)) { | ||
alt_exp <- alt_exps[[alt_exp_name]] | ||
stopifnot( | ||
"All altExps must contain a `counts` assay." = "counts" %in% assayNames(alt_exp) | ||
) | ||
sobj[[alt_exp_name]] <- create_seurat_assay(counts = counts(alt_exp)) | ||
if ("logcounts" %in% assayNames(alt_exp)) { | ||
sobj[[alt_exp_name]]$data <- logcounts(alt_exp) | ||
Comment on lines
+154
to
+156
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Worth noting that these lines may both give the Seurat warning
I got this when running a multiplexed library through, so this was coming from the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added conversion for altExps with a warning. |
||
} | ||
} | ||
|
||
# add sample-level metadata after removing non-vector objects | ||
sobj@misc <- sce_meta |> | ||
purrr::discard(is.object) |> | ||
purrr::discard(is.list) | ||
|
||
# scale data because many Seurat functions expect this | ||
sobj <- Seurat::ScaleData(sobj, assay = "RNA", verbose = FALSE) | ||
|
||
return(sobj) | ||
} |
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
@@ -0,0 +1,102 @@ | ||||||||||||||||||||||||||||||
#' Sum counts for genes with duplicate names in a SingleCellExperiment object. | ||||||||||||||||||||||||||||||
#' | ||||||||||||||||||||||||||||||
#' Genes with the same name are merged by summing their raw expression counts. | ||||||||||||||||||||||||||||||
#' If requested, the log-normalized expression values are recalculated, otherwise | ||||||||||||||||||||||||||||||
#' this is left blank. | ||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd add a smidge more here about why this function is helpful, since it's exported (which I agree with) |
||||||||||||||||||||||||||||||
#' | ||||||||||||||||||||||||||||||
#' | ||||||||||||||||||||||||||||||
#' @param sce a SingleCellExperiment object with duplicated row names | ||||||||||||||||||||||||||||||
#' @param normalize a logical indicating whether to normalize the expression | ||||||||||||||||||||||||||||||
#' values. Default is TRUE | ||||||||||||||||||||||||||||||
#' @param recalculate_reduced_dims a logical indicating whether to recalculate | ||||||||||||||||||||||||||||||
#' PCA and UMAP. If FALSE, the input reduced dimensions are copied over. If | ||||||||||||||||||||||||||||||
#' TRUE, the highly variable genes are also recalculated with the new values | ||||||||||||||||||||||||||||||
#' stored in metadata. Default is FALSE | ||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For consistency with other docs...
Suggested change
|
||||||||||||||||||||||||||||||
#' | ||||||||||||||||||||||||||||||
#' @return a SingleCellExperiment object | ||||||||||||||||||||||||||||||
#' @export | ||||||||||||||||||||||||||||||
#' | ||||||||||||||||||||||||||||||
#' @import SingleCellExperiment | ||||||||||||||||||||||||||||||
#' @import SummarizedExperiment | ||||||||||||||||||||||||||||||
#' | ||||||||||||||||||||||||||||||
sum_duplicate_genes <- function(sce, normalize = TRUE, recalculate_reduced_dims = FALSE) { | ||||||||||||||||||||||||||||||
stopifnot( | ||||||||||||||||||||||||||||||
"sce must be a SingleCellExperiment object" = is(sce, "SingleCellExperiment"), | ||||||||||||||||||||||||||||||
"normalize must be a logical" = is.logical(normalize), | ||||||||||||||||||||||||||||||
"recalculate_reduced_dims must be a logical" = is.logical(recalculate_reduced_dims), | ||||||||||||||||||||||||||||||
"normalize can not be FALSE if recalculate_reduced_dims is TRUE" = normalize || !recalculate_reduced_dims | ||||||||||||||||||||||||||||||
) | ||||||||||||||||||||||||||||||
if (normalize) { | ||||||||||||||||||||||||||||||
stopifnot( | ||||||||||||||||||||||||||||||
"Package `scran` must be installed if `normalize = TRUE` is set." = | ||||||||||||||||||||||||||||||
requireNamespace("scran", quietly = TRUE), | ||||||||||||||||||||||||||||||
"Package `scuttle` must be installed if `normalize = TRUE` is set." = | ||||||||||||||||||||||||||||||
requireNamespace("scuttle", quietly = TRUE) | ||||||||||||||||||||||||||||||
jashapiro marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||||||||||||||||||
) | ||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||
stopifnot( | ||||||||||||||||||||||||||||||
"Package `scater` must be installed if `recalculate_reduced_dims = TRUE` is set." = | ||||||||||||||||||||||||||||||
requireNamespace("scater", quietly = TRUE) | ||||||||||||||||||||||||||||||
) | ||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||
if (!any(duplicated(rownames(sce)))) { | ||||||||||||||||||||||||||||||
message("No duplicated gene names found in the SingleCellExperiment object, returning original object") | ||||||||||||||||||||||||||||||
return(sce) | ||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||
# calculate the reduced matrices | ||||||||||||||||||||||||||||||
counts <- rowsum(counts(sce), rownames(sce)) |> as("sparseMatrix") | ||||||||||||||||||||||||||||||
if ("spliced" %in% assayNames(sce)) { | ||||||||||||||||||||||||||||||
spliced <- rowsum(assay(sce, "spliced"), rownames(sce)) |> as("sparseMatrix") | ||||||||||||||||||||||||||||||
assays <- list(counts = counts, spliced = spliced) | ||||||||||||||||||||||||||||||
} else { | ||||||||||||||||||||||||||||||
assays <- list(counts = counts) | ||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||
if (recalculate_reduced_dims) { | ||||||||||||||||||||||||||||||
reduced_dims <- list() | ||||||||||||||||||||||||||||||
} else { | ||||||||||||||||||||||||||||||
reduced_dims <- reducedDims(sce) | ||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||
# Build the new SingleCellExperiment object | ||||||||||||||||||||||||||||||
summed_sce <- SingleCellExperiment( | ||||||||||||||||||||||||||||||
assays = assays, | ||||||||||||||||||||||||||||||
colData = colData(sce), | ||||||||||||||||||||||||||||||
metadata = metadata(sce), | ||||||||||||||||||||||||||||||
# if we are not recalculating reduced dimensions, copy over previous (likely similar) | ||||||||||||||||||||||||||||||
reducedDims = reduced_dims, | ||||||||||||||||||||||||||||||
altExps = altExps(sce) | ||||||||||||||||||||||||||||||
) | ||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||
# Add normalized values if requested | ||||||||||||||||||||||||||||||
if (normalize) { | ||||||||||||||||||||||||||||||
try({ | ||||||||||||||||||||||||||||||
# try to cluster similar cells | ||||||||||||||||||||||||||||||
# clustering may fail if < 100 cells in dataset | ||||||||||||||||||||||||||||||
qclust <- suppressWarnings(scran::quickCluster(summed_sce)) | ||||||||||||||||||||||||||||||
summed_sce <- scran::computeSumFactors(summed_sce, clusters = qclust) | ||||||||||||||||||||||||||||||
jashapiro marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||||||||||||||||||
}) | ||||||||||||||||||||||||||||||
summed_sce <- scuttle::logNormCounts(summed_sce) | ||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||
# recalculate PCA if requested, using the same dimensions as before | ||||||||||||||||||||||||||||||
jashapiro marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||||||||||||||||||
if (recalculate_reduced_dims) { | ||||||||||||||||||||||||||||||
if ("PCA" %in% reducedDimNames(sce)) { | ||||||||||||||||||||||||||||||
pca_dim <- ncol(reducedDim(sce, "PCA")) | ||||||||||||||||||||||||||||||
} else { | ||||||||||||||||||||||||||||||
pca_dim <- min(50, ncol(sce) - 1) # always reduce dimensions at least 1! | ||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||
hv_genes <- scran::modelGeneVar(summed_sce) |> | ||||||||||||||||||||||||||||||
scran::getTopHVGs(n = length(metadata(sce)$highly_variable_genes)) | ||||||||||||||||||||||||||||||
jashapiro marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||||||||||||||||||
metadata(summed_sce)$highly_variable_genes <- hv_genes # store the new HVGs | ||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||
summed_sce <- scater::runPCA(summed_sce, subset_row = hv_genes, ncomponents = pca_dim) |> | ||||||||||||||||||||||||||||||
scater::runUMAP() | ||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||
return(summed_sce) | ||||||||||||||||||||||||||||||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
👍