diff --git a/.Rbuildignore b/.Rbuildignore
index b238b50..ae19577 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -6,3 +6,4 @@
 ^\.lintr$
 ^\.pre-commit-config.yaml$
 ^data-raw$
+^LICENSE\.md$
diff --git a/.Renviron b/.Renviron
new file mode 100644
index 0000000..90d1f09
--- /dev/null
+++ b/.Renviron
@@ -0,0 +1 @@
+RENV_EXT_ENABLED = FALSE
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index e3079b8..5d0ac27 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -16,7 +16,7 @@ repos:
         exclude: '\.Rd'
 
   - repo: https://github.com/crate-ci/typos
-    rev: v1.28.1
+    rev: v1.28.2
     hooks:
       - id: typos
         exclude: '\.nb\.html'
diff --git a/DESCRIPTION b/DESCRIPTION
index b2908ff..0c9cc6e 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -21,8 +21,12 @@ LazyData: true
 Suggests:
     testthat (>= 3.0.0),
     scater,
+    scran,
     Seurat,
-    splatter
+    splatter,
+    scuttle,
+    Matrix,
+    SeuratObject
 Config/testthat/edition: 3
 RoxygenNote: 7.3.2
 Imports:
@@ -34,6 +38,7 @@ Imports:
     purrr,
     S4Vectors,
     SingleCellExperiment,
+    SummarizedExperiment,
     tibble,
     tidyr
 biocViews:
diff --git a/LICENSE b/LICENSE
index ad17738..ab97fda 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,28 +1,3 @@
-BSD 3-Clause License
-
-Copyright (c) 2024, Alex's Lemonade Stand Foundation
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-1. Redistributions of source code must retain the above copyright notice, this
-   list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright notice,
-   this list of conditions and the following disclaimer in the documentation
-   and/or other materials provided with the distribution.
-
-3. Neither the name of the copyright holder nor the names of its
-   contributors may be used to endorse or promote products derived from
-   this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+YEAR: 2024
+COPYRIGHT HOLDER: Alex's Lemonade Stand Foundation
+ORGANIZATION: Alex's Lemonade Stand Foundation
diff --git a/LICENSE.md b/LICENSE.md
new file mode 100644
index 0000000..252ca04
--- /dev/null
+++ b/LICENSE.md
@@ -0,0 +1,28 @@
+# BSD 3-Clause License
+
+Copyright (c) 2024, Alex's Lemonade Stand Foundation
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+   contributors may be used to endorse or promote products derived from
+   this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/NAMESPACE b/NAMESPACE
index 601be7e..6d9e440 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -6,9 +6,12 @@ export(calculate_silhouette)
 export(calculate_stability)
 export(ensembl_to_symbol)
 export(extract_pc_matrix)
+export(sce_to_seurat)
 export(sce_to_symbols)
+export(sum_duplicate_genes)
 export(sweep_clusters)
 import(SingleCellExperiment)
+import(SummarizedExperiment)
 import(methods)
 importFrom(S4Vectors,`metadata<-`)
 importFrom(S4Vectors,metadata)
diff --git a/R/convert-gene-ids.R b/R/convert-gene-ids.R
index 5645a9c..dd10d32 100644
--- a/R/convert-gene-ids.R
+++ b/R/convert-gene-ids.R
@@ -13,18 +13,21 @@
 #'
 #'
 #' @param ensembl_ids A character vector of Ensembl gene ids to translate to
-#'  gene symbols.
-#' @param reference The reference gene list to use for translation. One of `scpca`,
-#'   `10x2020`, `10x2024`. The `scpca` reference is the default.
-#' @param sce A SingleCellExperiment object to use as a reference for gene symbols.
-#'  If provided, the `reference` argument will be ignored. The `sce` object must
-#'  include columns with the names `gene_ids` (containing Ensembl ids) and
-#'  `gene_symbol` (containing the symbols) to use for conversion.
-#' @param unique Whether to use unique gene symbols, as would be done if
-#'  data had been read in with gene symbols by Seurat. Default is FALSE.
-#' @param leave_na Whether to leave NA values in the output vector.
-#' If FALSE, any missing values will be replaced with the input ensembl_id value.
-#' Default is FALSE.
+#'   gene symbols.
+#' @param reference The reference gene list to use for translation. One of
+#'   `scpca`, `10x2020`, `10x2024`. The `scpca` reference is the default.
+#' @param sce A SingleCellExperiment object to use as a reference for gene
+#'   symbols. If provided, the `reference` argument will be ignored. The `sce`
+#'   object must include columns with the names `gene_ids` (containing Ensembl
+#'   ids) and `gene_symbol` (containing the symbols) to use for conversion.
+#' @param unique Whether to use unique gene symbols, as would be done if data
+#'   had been read in with gene symbols by Seurat. Default is FALSE.
+#' @param leave_na Whether to leave NA values in the output vector. If FALSE,
+#'   any missing values will be replaced with the input ensembl_id value.
+#'   Default is FALSE.
+#' @param seurat_compatible Whether to return a vector that is compatible with
+#'   Seurat, translating and underscores to dashes.
+#'   Default is FALSE.
 #'
 #' @return A vector of gene symbols corresponding to the input Ensembl ids.
 #' @export
@@ -51,7 +54,8 @@ ensembl_to_symbol <- function(
     reference = c("scpca", "10x2020", "10x2024"),
     sce = NULL,
     unique = FALSE,
-    leave_na = FALSE) {
+    leave_na = FALSE,
+    seurat_compatible = FALSE) {
   reference <- match.arg(reference)
   stopifnot(
     "`ensembl_ids` must be a character vector." = is.character(ensembl_ids),
@@ -85,10 +89,17 @@ ensembl_to_symbol <- function(
     )
   }
   if (!leave_na && any(missing_symbols)) {
-    warning("Not all input ids have corresponding gene symbols, using input ids for missing values.")
+    message("Not all input ids have corresponding gene symbols, using input ids for missing values.")
     gene_symbols[missing_symbols] <- ensembl_ids[missing_symbols]
   }
 
+  if (seurat_compatible) {
+    if (any(grepl("_", gene_symbols))) {
+      warning("Replacing underscores ('_') with dashes ('-') in gene symbols for Seurat compatibility.")
+    }
+    gene_symbols <- gsub("_", "-", gene_symbols)
+  }
+
   return(gene_symbols)
 }
 
@@ -112,15 +123,19 @@ ensembl_to_symbol <- function(
 #' (and not disabled by the `convert_hvg` and `convert_pca` arguments).
 #'
 #'
-#' @param sce A SingleCellExperiment object containing gene ids and gene symbols.
-#' @param reference The reference gene list for conversion. One of `sce`, `scpca`,
-#' `10x2020`, or `10x2024`. If `sce` (the default) the internal row data is used.
-#' @param unique Whether to use unique gene symbols, as would be done if
-#' data had been read in with gene symbols by Seurat. Default is FALSE.
-#' @param convert_hvg Logical indicating whether to convert highly variable genes to gene symbols.
-#'  Default is TRUE.
-#' @param convert_pca Logical indicating whether to convert PCA rotation matrix to gene symbols.
-#'  Default is TRUE.
+#' @param sce A SingleCellExperiment object containing gene ids and gene
+#'   symbols.
+#' @param reference The reference gene list for conversion. One of `sce`,
+#'   `scpca`, `10x2020`, or `10x2024`. If `sce` (the default) the internal row
+#'   data is used.
+#' @param unique Whether to use unique gene symbols, as would be done if data
+#'   had been read in with gene symbols by Seurat. Default is FALSE.
+#' @param convert_hvg Logical indicating whether to convert highly variable
+#'   genes to gene symbols. Default is TRUE.
+#' @param convert_pca Logical indicating whether to convert PCA rotation matrix
+#'   to gene symbols. Default is TRUE.
+#' @param seurat_compatible Logical indicating whether to make gene symbols
+#'   Seurat-compatible by replacing underscores with dashes. Default is FALSE.
 #'
 #' @return A SingleCellExperiment object with row names set as gene symbols.
 #' @export
@@ -145,7 +160,8 @@ sce_to_symbols <- function(
     reference = c("sce", "scpca", "10x2020", "10x2024"),
     unique = FALSE,
     convert_hvg = TRUE,
-    convert_pca = TRUE) {
+    convert_pca = TRUE,
+    seurat_compatible = FALSE) {
   reference <- match.arg(reference)
   stopifnot(
     "`sce` must be a SingleCellExperiment object." = is(sce, "SingleCellExperiment"),
@@ -164,9 +180,19 @@ sce_to_symbols <- function(
   }
 
   if (reference == "sce") {
-    gene_symbols <- ensembl_to_symbol(ensembl_ids, sce = sce, unique = unique)
+    gene_symbols <- ensembl_to_symbol(
+      ensembl_ids,
+      sce = sce,
+      unique = unique,
+      seurat_compatible = seurat_compatible
+    )
   } else {
-    gene_symbols <- ensembl_to_symbol(ensembl_ids, reference = reference, unique = unique)
+    gene_symbols <- ensembl_to_symbol(
+      ensembl_ids,
+      reference = reference,
+      unique = unique,
+      seurat_compatible = seurat_compatible
+    )
   }
   row_ids <- gene_symbols
   # set Ensembl ids as original ids for later translations
diff --git a/R/make-seurat.R b/R/make-seurat.R
new file mode 100644
index 0000000..424ee6f
--- /dev/null
+++ b/R/make-seurat.R
@@ -0,0 +1,169 @@
+#' Convert an SCE object to Seurat
+#'
+#' Converts an ScPCA SingleCellExperiment (SCE) object to Seurat format. This is
+#' primarily a wrapper around Seurat::as.Seurat() with some additional steps to
+#' include ScPCA metadata and options for converting the feature index from
+#' Ensembl gene ids to gene symbols.
+#'
+#' If present, reduced dimensions from SCE objects will be included, renamed to
+#' match Seurat default naming.
+#'
+#'
+#' @param sce The input SCE object
+#' @param use_symbols Logical indicating whether the Seurat object uses gene
+#'   symbols for indexing. Default is TRUE.
+#' @param reference If `use_symbols` is TRUE, the reference to use for gene
+#'   symbols. One of `sce`, `scpca`, `10x2020`, `10x2024`, where `sce` uses the
+#'   symbols stored in the `gene_symbol` column of the SCE object's row data,
+#'   and the others use standardized translation tables. See `ensembl_to_symbol()`
+#'   for more information. Default is `sce`.
+#' @param dedup_method Method to handle duplicated gene symbols. If `unique`,
+#'   the gene symbols will be made unique following standard Seurat procedures.
+#'   If `sum`, the expression values for each gene symbols will be summed.
+#' @param seurat_assay_version Whether to create Seurat `v3` or `v5` assay
+#'   objects. Default is `v3`.
+#'
+#' @return a Seurat object
+#' @export
+#'
+#' @examples
+#' \dontrun{
+#' # convert an ScPCA SCE to Seurat renaming with gene symbols
+#' seurat_obj <- sce_to_seurat(sce)
+#'
+#' # convert SCE to Seurat keeping the current names (usually Ensembl)
+#' seurat_obj <- sce_to_seurat(sce, use_symbols = FALSE)
+#'
+#' # convert SCE to Seurat, merging duplicated gene names
+#' seurat_obj <- sce_to_seurat(sce, dedup_method = "sum")
+#'
+#' # convert SCE to Seurat with v5 assay objects
+#' seurat_obj <- sce_to_seurat(sce, seurat_assay_version = "v5")
+#' }
+#'
+sce_to_seurat <- function(
+    sce,
+    use_symbols = TRUE,
+    reference = c("sce", "scpca", "10x2020", "10x2024"),
+    dedup_method = c("unique", "sum"),
+    seurat_assay_version = c("v3", "v5")) {
+  reference <- match.arg(reference)
+  dedup_method <- match.arg(dedup_method)
+  seurat_assay_version <- match.arg(seurat_assay_version)
+
+  stopifnot(
+    "Package `Seurat` must be installed for SCE to Seurat conversion." =
+      requireNamespace("Seurat", quietly = TRUE),
+    "Package `SeuratObject` must be installed for SCE to Seurat conversion." =
+      requireNamespace("SeuratObject", quietly = TRUE),
+    "`sce` must be a SingleCellExperiment object." = is(sce, "SingleCellExperiment"),
+    "`sce` must contain both `gene_ids` and `gene_symbol` columns in the row data if it is being used as a reference." =
+      reference != "sce" || all(c("gene_ids", "gene_symbol") %in% names(rowData(sce))),
+    "`sce` should only include a single library. Merged SingleCellExperiment objects are not (yet) supported." =
+      length(metadata(sce)$library_id) <= 1
+  )
+
+  if (use_symbols) {
+    sce <- suppressMessages(sce_to_symbols(
+      sce,
+      reference = reference,
+      unique = (dedup_method == "unique"),
+      seurat_compatible = TRUE
+    ))
+  }
+
+  if (dedup_method == "sum") {
+    sce <- sum_duplicate_genes(sce)
+  }
+
+  mainExpName(sce) <- "RNA"
+
+  # convert reducedDims to Seurat standard naming
+  reducedDimNames(sce) <- tolower(reducedDimNames(sce))
+  reducedDims(sce) <- reducedDims(sce) |>
+    purrr::map(\(mat) {
+      # insert underscore between letters and numbers in axis labels
+      rd_names <- gsub("([A-Za-z]+)([0-9]+)", "\\1_\\2", colnames(mat)) |>
+        tolower()
+      colnames(mat) <- rd_names
+      if (!is.null(attr(mat, "rotation"))) {
+        colnames(attr(mat, "rotation")) <- rd_names
+      }
+      return(mat)
+    })
+
+  # pull out any altExps to handle manually
+  alt_exps <- altExps(sce)
+  altExps(sce) <- NULL
+
+  # Let Seurat do initial conversion to capture most things
+  if ("logcounts" %in% assayNames(sce)) {
+    data_name <- "logcounts"
+  } else {
+    data_name <- NULL
+  }
+  sobj <- Seurat::as.Seurat(sce, data = data_name)
+
+  # update identity values, using cluster if present
+  sce_meta <- metadata(sce)
+  sobj$orig.ident <- sce_meta$library_id
+  if (is.null(sce$cluster)) {
+    Seurat::Idents(sobj) <- sce_meta$library_id
+  } else {
+    Seurat::Idents(sobj) <- sce$cluster
+  }
+
+  # add variable features if present
+  if (!is.null(sce_meta$highly_variable_genes)) {
+    sobj[["RNA"]]@var.features <- sce_meta$highly_variable_genes
+    sce_meta$highly_variable_genes <- NULL # remove from metadata to avoid redundancy
+  }
+
+  # convert and set functions depending on assay version requested
+  if (seurat_assay_version == "v5") {
+    create_seurat_assay <- SeuratObject::CreateAssay5Object
+    suppressWarnings(sobj[["RNA"]] <- as(sobj[["RNA"]], "Assay5"))
+  } else {
+    create_seurat_assay <- SeuratObject::CreateAssayObject
+    suppressWarnings(sobj[["RNA"]] <- as(sobj[["RNA"]], "Assay"))
+  }
+
+  # add spliced data if present
+  if ("spliced" %in% assayNames(sce)) {
+    sobj[["spliced"]] <- create_seurat_assay(counts = assay(sce, "spliced"))
+  }
+
+  # add altExps as needed.
+  for (alt_exp_name in names(alt_exps)) {
+    alt_exp <- alt_exps[[alt_exp_name]]
+    stopifnot(
+      "All altExps must contain a `counts` assay." = "counts" %in% assayNames(alt_exp)
+    )
+
+    # check name compatibility for Seurat
+    alt_exp_rownames <- rownames(alt_exp)
+    if (any(grepl("_", alt_exp_rownames))) {
+      warning(
+        "Replacing underscores ('_') with dashes ('-') in feature names from ",
+        alt_exp_name,
+        " for Seurat compatibility."
+      )
+      rownames(alt_exp) <- gsub("_", "-", alt_exp_rownames)
+    }
+
+    sobj[[alt_exp_name]] <- create_seurat_assay(counts = counts(alt_exp))
+    if ("logcounts" %in% assayNames(alt_exp)) {
+      sobj[[alt_exp_name]]$data <- logcounts(alt_exp)
+    }
+  }
+
+  # add sample-level metadata after removing non-vector objects
+  sobj@misc <- sce_meta |>
+    purrr::discard(is.object) |>
+    purrr::discard(is.list)
+
+  # scale data because many Seurat functions expect this
+  sobj <- Seurat::ScaleData(sobj, assay = "RNA", verbose = FALSE)
+
+  return(sobj)
+}
diff --git a/R/sum-duplicate-genes.R b/R/sum-duplicate-genes.R
new file mode 100644
index 0000000..a4f512f
--- /dev/null
+++ b/R/sum-duplicate-genes.R
@@ -0,0 +1,129 @@
+#' Sum counts for genes with duplicate names in a SingleCellExperiment object.
+#'
+#' Genes with the same name are merged by summing their raw expression counts.
+#' When multiple Ensembl gene IDs are associated with the same gene symbol,
+#' identifier conversion can result in duplicate gene names. This function
+#' resolves such duplicates by summing the expression values for each duplicate
+#' gene name, which may be justified if the different Ensembl gene IDs share
+#' substantial sequence identity, which could make separate quantification of
+#' the two genes less reliable.
+#'
+#' If requested, the log-normalized expression values are recalculated,
+#' otherwise that matrix is left blank.
+#'
+#' PCA and UMAP are also recalculated if requested (which requires recalculating
+#' the normalized expression). If not recalculating, the original reduced
+#' dimensions are retained as they are likely to remain similar in most cases.
+#'
+#'
+#' @param sce a SingleCellExperiment object with duplicated row names
+#' @param normalize a logical indicating whether to normalize the expression
+#'   values. Default is TRUE.
+#' @param recalculate_reduced_dims a logical indicating whether to recalculate
+#'   PCA and UMAP. If FALSE, the input reduced dimensions are copied over. If
+#'   TRUE, the highly variable genes are also recalculated with the new values
+#'   stored in metadata. Default is FALSE.
+#'
+#' @return a SingleCellExperiment object
+#' @export
+#'
+#' @import SingleCellExperiment
+#' @import SummarizedExperiment
+#'
+#' @examples
+#' \dontrun{
+#' # sum across duplicated gene names
+#' summed_sce <- sum_duplicate_genes(sce)
+#'
+#' # sum across duplicated gene names and recalculate PCA and UMAP
+#' summed_sce <- sum_duplicate_genes(sce, recalculate_reduced_dims = TRUE)
+#' }
+#'
+sum_duplicate_genes <- function(sce, normalize = TRUE, recalculate_reduced_dims = FALSE) {
+  stopifnot(
+    "sce must be a SingleCellExperiment object" = is(sce, "SingleCellExperiment"),
+    "normalize must be a logical" = is.logical(normalize),
+    "recalculate_reduced_dims must be a logical" = is.logical(recalculate_reduced_dims),
+    "normalize can not be FALSE if recalculate_reduced_dims is TRUE" = normalize || !recalculate_reduced_dims
+  )
+  if (normalize) {
+    stopifnot(
+      "Package `scran` must be installed if `normalize = TRUE` is set." =
+        requireNamespace("scran", quietly = TRUE),
+      "Package `scuttle` must be installed if `normalize = TRUE` is set." =
+        requireNamespace("scuttle", quietly = TRUE)
+    )
+  }
+  stopifnot(
+    "Package `scater` must be installed if `recalculate_reduced_dims = TRUE` is set." =
+      requireNamespace("scater", quietly = TRUE)
+  )
+
+  if (!any(duplicated(rownames(sce)))) {
+    message("No duplicated gene names found in the SingleCellExperiment object, returning original object")
+    return(sce)
+  }
+
+  # calculate the reduced matrices
+  counts <- rowsum(counts(sce), rownames(sce)) |> as("sparseMatrix")
+  if ("spliced" %in% assayNames(sce)) {
+    spliced <- rowsum(assay(sce, "spliced"), rownames(sce)) |> as("sparseMatrix")
+    assays <- list(counts = counts, spliced = spliced)
+  } else {
+    assays <- list(counts = counts)
+  }
+
+
+  if (recalculate_reduced_dims) {
+    reduced_dims <- list()
+  } else {
+    reduced_dims <- reducedDims(sce)
+  }
+
+
+  # Build the new SingleCellExperiment object
+  summed_sce <- SingleCellExperiment(
+    assays = assays,
+    colData = colData(sce),
+    metadata = metadata(sce),
+    # if we are not recalculating reduced dimensions, copy over previous (likely similar)
+    reducedDims = reduced_dims,
+    altExps = altExps(sce)
+  )
+
+  # Add normalized values if requested
+  if (normalize) {
+    try({
+      # try to cluster similar cells
+      # clustering may fail if < 100 cells in dataset
+      suppressWarnings({
+        qclust <- scran::quickCluster(summed_sce)
+        summed_sce <- scran::computeSumFactors(summed_sce, clusters = qclust)
+      })
+    })
+    summed_sce <- scuttle::logNormCounts(summed_sce)
+  }
+
+  # recalculate PCA and UMAP if requested, using the same dimensions as before (if available)
+  if (recalculate_reduced_dims) {
+    if ("PCA" %in% reducedDimNames(sce)) {
+      pca_dim <- ncol(reducedDim(sce, "PCA"))
+    } else {
+      pca_dim <- min(50, ncol(sce) - 1) # always reduce dimensions at least 1!
+    }
+
+    if (is.null(metadata(sce)$highly_variable_genes)) {
+      n_hvg <- 2000 # same default as ScPCA
+    } else {
+      n_hvg <- length(metadata(sce)$highly_variable_genes)
+    }
+    hv_genes <- scran::modelGeneVar(summed_sce) |>
+      scran::getTopHVGs(n = n_hvg)
+    metadata(summed_sce)$highly_variable_genes <- hv_genes # store the new HVGs
+
+    summed_sce <- scater::runPCA(summed_sce, subset_row = hv_genes, ncomponents = pca_dim) |>
+      scater::runUMAP()
+  }
+
+  return(summed_sce)
+}
diff --git a/data-raw/README.md b/data-raw/README.md
index 608c806..0432140 100644
--- a/data-raw/README.md
+++ b/data-raw/README.md
@@ -1,7 +1,12 @@
 This directory contains scripts to download and preprocess data used within the rOpenScPCA package.
 There are also notebooks that explore some of the prepared datasets for exploration.
 
-## Building Gene References
+## Test data
+
+- The `download_test_data.R` script downloads the test data used in this package from the OpenScPCA test data bucket.
+
+
+## Building gene references
 
 - The `build_gene_references.R` script creates a table of Ensembl id to gene symbol references.
 The initial table of gene ids and gene symbols is extracted from an example ScPCA-formatted SCE object (`rOpenScPCA/tests/testthat/data/scpca_sce.rds`).
diff --git a/data-raw/build_gene_reference.R b/data-raw/build_gene_reference.R
index 9860b7f..90af14f 100644
--- a/data-raw/build_gene_reference.R
+++ b/data-raw/build_gene_reference.R
@@ -11,9 +11,10 @@ suppressPackageStartupMessages({
 
 ## Download the reference data -------------------------------------------------
 
+setwd(here::here())
 
 # Read in the test data ScPCA SCE object and extract the row data:
-genes_scpca <- readRDS(here::here("tests", "testthat", "data", "scpca_sce.rds")) |>
+genes_scpca <- readRDS(testthat::test_path("data", "scpca_sce.rds")) |>
   rowData() |>
   as.data.frame() |>
   # Use Ensembl ID if gene symbol is missing, then make unique
diff --git a/data-raw/download_test_data.R b/data-raw/download_test_data.R
new file mode 100644
index 0000000..76f9288
--- /dev/null
+++ b/data-raw/download_test_data.R
@@ -0,0 +1,14 @@
+#!/usr/env Rscript
+
+# Downloads a test data file from OpenScPCA and places it in the test directory
+
+setwd(here::here())
+url_base <- "https://openscpca-test-data-release-public-access.s3.us-east-2.amazonaws.com/test/SCPCP000001/SCPCS000001/" # nolint
+
+test_processed_path <- testthat::test_path("data", "scpca_sce.rds")
+processed_url <- paste0(url_base, "SCPCL000001_processed.rds")
+download.file(processed_url, test_processed_path, mode = "wb")
+
+test_filtered_path <- testthat::test_path("data", "filtered_sce.rds")
+filtered_url <- paste0(url_base, "SCPCL000001_filtered.rds")
+download.file(filtered_url, test_filtered_path, mode = "wb")
diff --git a/man/ensembl_to_symbol.Rd b/man/ensembl_to_symbol.Rd
index bb380cc..c7f1f9e 100644
--- a/man/ensembl_to_symbol.Rd
+++ b/man/ensembl_to_symbol.Rd
@@ -9,26 +9,31 @@ ensembl_to_symbol(
   reference = c("scpca", "10x2020", "10x2024"),
   sce = NULL,
   unique = FALSE,
-  leave_na = FALSE
+  leave_na = FALSE,
+  seurat_compatible = FALSE
 )
 }
 \arguments{
 \item{ensembl_ids}{A character vector of Ensembl gene ids to translate to
 gene symbols.}
 
-\item{reference}{The reference gene list to use for translation. One of `scpca`,
-`10x2020`, `10x2024`. The `scpca` reference is the default.}
+\item{reference}{The reference gene list to use for translation. One of
+`scpca`, `10x2020`, `10x2024`. The `scpca` reference is the default.}
 
-\item{sce}{A SingleCellExperiment object to use as a reference for gene symbols.
-If provided, the `reference` argument will be ignored. The `sce` object must
-include columns with the names `gene_ids` (containing Ensembl ids) and
-`gene_symbol` (containing the symbols) to use for conversion.}
+\item{sce}{A SingleCellExperiment object to use as a reference for gene
+symbols. If provided, the `reference` argument will be ignored. The `sce`
+object must include columns with the names `gene_ids` (containing Ensembl
+ids) and `gene_symbol` (containing the symbols) to use for conversion.}
 
-\item{unique}{Whether to use unique gene symbols, as would be done if
-data had been read in with gene symbols by Seurat. Default is FALSE.}
+\item{unique}{Whether to use unique gene symbols, as would be done if data
+had been read in with gene symbols by Seurat. Default is FALSE.}
 
-\item{leave_na}{Whether to leave NA values in the output vector.
-If FALSE, any missing values will be replaced with the input ensembl_id value.
+\item{leave_na}{Whether to leave NA values in the output vector. If FALSE,
+any missing values will be replaced with the input ensembl_id value.
+Default is FALSE.}
+
+\item{seurat_compatible}{Whether to return a vector that is compatible with
+Seurat, translating and underscores to dashes.
 Default is FALSE.}
 }
 \value{
diff --git a/man/sce_to_seurat.Rd b/man/sce_to_seurat.Rd
new file mode 100644
index 0000000..49543ff
--- /dev/null
+++ b/man/sce_to_seurat.Rd
@@ -0,0 +1,62 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/make-seurat.R
+\name{sce_to_seurat}
+\alias{sce_to_seurat}
+\title{Convert an SCE object to Seurat}
+\usage{
+sce_to_seurat(
+  sce,
+  use_symbols = TRUE,
+  reference = c("sce", "scpca", "10x2020", "10x2024"),
+  dedup_method = c("unique", "sum"),
+  seurat_assay_version = c("v3", "v5")
+)
+}
+\arguments{
+\item{sce}{The input SCE object}
+
+\item{use_symbols}{Logical indicating whether the Seurat object uses gene
+symbols for indexing. Default is TRUE.}
+
+\item{reference}{If `use_symbols` is TRUE, the reference to use for gene
+symbols. One of `sce`, `scpca`, `10x2020`, `10x2024`, where `sce` uses the
+symbols stored in the `gene_symbol` column of the SCE object's row data,
+and the others use standardized translation tables. See `ensembl_to_symbol()`
+for more information. Default is `sce`.}
+
+\item{dedup_method}{Method to handle duplicated gene symbols. If `unique`,
+the gene symbols will be made unique following standard Seurat procedures.
+If `sum`, the expression values for each gene symbols will be summed.}
+
+\item{seurat_assay_version}{Whether to create Seurat `v3` or `v5` assay
+objects. Default is `v3`.}
+}
+\value{
+a Seurat object
+}
+\description{
+Converts an ScPCA SingleCellExperiment (SCE) object to Seurat format. This is
+primarily a wrapper around Seurat::as.Seurat() with some additional steps to
+include ScPCA metadata and options for converting the feature index from
+Ensembl gene ids to gene symbols.
+}
+\details{
+If present, reduced dimensions from SCE objects will be included, renamed to
+match Seurat default naming.
+}
+\examples{
+\dontrun{
+# convert an ScPCA SCE to Seurat renaming with gene symbols
+seurat_obj <- sce_to_seurat(sce)
+
+# convert SCE to Seurat keeping the current names (usually Ensembl)
+seurat_obj <- sce_to_seurat(sce, use_symbols = FALSE)
+
+# convert SCE to Seurat, merging duplicated gene names
+seurat_obj <- sce_to_seurat(sce, dedup_method = "sum")
+
+# convert SCE to Seurat with v5 assay objects
+seurat_obj <- sce_to_seurat(sce, seurat_assay_version = "v5")
+}
+
+}
diff --git a/man/sce_to_symbols.Rd b/man/sce_to_symbols.Rd
index 5af59e8..504c747 100644
--- a/man/sce_to_symbols.Rd
+++ b/man/sce_to_symbols.Rd
@@ -9,23 +9,29 @@ sce_to_symbols(
   reference = c("sce", "scpca", "10x2020", "10x2024"),
   unique = FALSE,
   convert_hvg = TRUE,
-  convert_pca = TRUE
+  convert_pca = TRUE,
+  seurat_compatible = FALSE
 )
 }
 \arguments{
-\item{sce}{A SingleCellExperiment object containing gene ids and gene symbols.}
+\item{sce}{A SingleCellExperiment object containing gene ids and gene
+symbols.}
 
-\item{reference}{The reference gene list for conversion. One of `sce`, `scpca`,
-`10x2020`, or `10x2024`. If `sce` (the default) the internal row data is used.}
+\item{reference}{The reference gene list for conversion. One of `sce`,
+`scpca`, `10x2020`, or `10x2024`. If `sce` (the default) the internal row
+data is used.}
 
-\item{unique}{Whether to use unique gene symbols, as would be done if
-data had been read in with gene symbols by Seurat. Default is FALSE.}
+\item{unique}{Whether to use unique gene symbols, as would be done if data
+had been read in with gene symbols by Seurat. Default is FALSE.}
 
-\item{convert_hvg}{Logical indicating whether to convert highly variable genes to gene symbols.
-Default is TRUE.}
+\item{convert_hvg}{Logical indicating whether to convert highly variable
+genes to gene symbols. Default is TRUE.}
 
-\item{convert_pca}{Logical indicating whether to convert PCA rotation matrix to gene symbols.
-Default is TRUE.}
+\item{convert_pca}{Logical indicating whether to convert PCA rotation matrix
+to gene symbols. Default is TRUE.}
+
+\item{seurat_compatible}{Logical indicating whether to make gene symbols
+Seurat-compatible by replacing underscores with dashes. Default is FALSE.}
 }
 \value{
 A SingleCellExperiment object with row names set as gene symbols.
diff --git a/man/sum_duplicate_genes.Rd b/man/sum_duplicate_genes.Rd
new file mode 100644
index 0000000..8421d11
--- /dev/null
+++ b/man/sum_duplicate_genes.Rd
@@ -0,0 +1,49 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/sum-duplicate-genes.R
+\name{sum_duplicate_genes}
+\alias{sum_duplicate_genes}
+\title{Sum counts for genes with duplicate names in a SingleCellExperiment object.}
+\usage{
+sum_duplicate_genes(sce, normalize = TRUE, recalculate_reduced_dims = FALSE)
+}
+\arguments{
+\item{sce}{a SingleCellExperiment object with duplicated row names}
+
+\item{normalize}{a logical indicating whether to normalize the expression
+values. Default is TRUE.}
+
+\item{recalculate_reduced_dims}{a logical indicating whether to recalculate
+PCA and UMAP. If FALSE, the input reduced dimensions are copied over. If
+TRUE, the highly variable genes are also recalculated with the new values
+stored in metadata. Default is FALSE.}
+}
+\value{
+a SingleCellExperiment object
+}
+\description{
+Genes with the same name are merged by summing their raw expression counts.
+When multiple Ensembl gene IDs are associated with the same gene symbol,
+identifier conversion can result in duplicate gene names. This function
+resolves such duplicates by summing the expression values for each duplicate
+gene name, which may be justified if the different Ensembl gene IDs share
+substantial sequence identity, which could make separate quantification of
+the two genes less reliable.
+}
+\details{
+If requested, the log-normalized expression values are recalculated,
+otherwise that matrix is left blank.
+
+PCA and UMAP are also recalculated if requested (which requires recalculating
+the normalized expression). If not recalculating, the original reduced
+dimensions are retained as they are likely to remain similar in most cases.
+}
+\examples{
+\dontrun{
+# sum across duplicated gene names
+summed_sce <- sum_duplicate_genes(sce)
+
+# sum across duplicated gene names and recalculate PCA and UMAP
+summed_sce <- sum_duplicate_genes(sce, recalculate_reduced_dims = TRUE)
+}
+
+}
diff --git a/renv.lock b/renv.lock
index 787a0fa..6b52b1c 100644
--- a/renv.lock
+++ b/renv.lock
@@ -66,20 +66,6 @@
       ],
       "Hash": "ef32d07aafdd12f24c5827374ae3590d"
     },
-    "BiocIO": {
-      "Package": "BiocIO",
-      "Version": "1.14.0",
-      "Source": "Bioconductor",
-      "Repository": "Bioconductor 3.19",
-      "Requirements": [
-        "BiocGenerics",
-        "R",
-        "S4Vectors",
-        "methods",
-        "tools"
-      ],
-      "Hash": "f97a7ef01d364cf20d1946d43a3d526f"
-    },
     "BiocManager": {
       "Package": "BiocManager",
       "Version": "1.30.25",
@@ -156,26 +142,6 @@
       ],
       "Hash": "b892e27fc9659a4c8f8787d34c37b8b2"
     },
-    "Biostrings": {
-      "Package": "Biostrings",
-      "Version": "2.72.1",
-      "Source": "Bioconductor",
-      "Repository": "Bioconductor 3.19",
-      "Requirements": [
-        "BiocGenerics",
-        "GenomeInfoDb",
-        "IRanges",
-        "R",
-        "S4Vectors",
-        "XVector",
-        "crayon",
-        "grDevices",
-        "methods",
-        "stats",
-        "utils"
-      ],
-      "Hash": "886ff0ed958d6f839ed2e0d01f6853b3"
-    },
     "Cairo": {
       "Package": "Cairo",
       "Version": "1.6-2",
@@ -294,28 +260,6 @@
       ],
       "Hash": "c3c792a7b7f2677be56e8632c5b7543d"
     },
-    "GenomicAlignments": {
-      "Package": "GenomicAlignments",
-      "Version": "1.40.0",
-      "Source": "Bioconductor",
-      "Repository": "Bioconductor 3.19",
-      "Requirements": [
-        "BiocGenerics",
-        "BiocParallel",
-        "Biostrings",
-        "GenomeInfoDb",
-        "GenomicRanges",
-        "IRanges",
-        "R",
-        "Rsamtools",
-        "S4Vectors",
-        "SummarizedExperiment",
-        "methods",
-        "stats",
-        "utils"
-      ],
-      "Hash": "e539709764587c581b31e446dc84d7b8"
-    },
     "GenomicRanges": {
       "Package": "GenomicRanges",
       "Version": "1.56.1",
@@ -494,18 +438,6 @@
       ],
       "Hash": "45f0398006e83a5b10b72a90663d8d8c"
     },
-    "RCurl": {
-      "Package": "RCurl",
-      "Version": "1.98-1.16",
-      "Source": "Repository",
-      "Repository": "RSPM",
-      "Requirements": [
-        "R",
-        "bitops",
-        "methods"
-      ],
-      "Hash": "ddbdf53d15b47be4407ede6914f56fbb"
-    },
     "ROCR": {
       "Package": "ROCR",
       "Version": "1.0-11",
@@ -637,41 +569,6 @@
       ],
       "Hash": "c92ba8b9a2c5c9ff600a1062a3b7b727"
     },
-    "Rhtslib": {
-      "Package": "Rhtslib",
-      "Version": "3.0.0",
-      "Source": "Bioconductor",
-      "Repository": "Bioconductor 3.19",
-      "Requirements": [
-        "tools",
-        "zlibbioc"
-      ],
-      "Hash": "5d6514cd44a0106581e3310f3972a82e"
-    },
-    "Rsamtools": {
-      "Package": "Rsamtools",
-      "Version": "2.20.0",
-      "Source": "Bioconductor",
-      "Repository": "Bioconductor 3.19",
-      "Requirements": [
-        "BiocGenerics",
-        "BiocParallel",
-        "Biostrings",
-        "GenomeInfoDb",
-        "GenomicRanges",
-        "IRanges",
-        "R",
-        "Rhtslib",
-        "S4Vectors",
-        "XVector",
-        "bitops",
-        "methods",
-        "stats",
-        "utils",
-        "zlibbioc"
-      ],
-      "Hash": "9762f24dcbdbd1626173c516bb64792c"
-    },
     "Rtsne": {
       "Package": "Rtsne",
       "Version": "0.17",
@@ -899,18 +796,6 @@
       ],
       "Hash": "83d45b690bffd09d1980c224ef329f5b"
     },
-    "XML": {
-      "Package": "XML",
-      "Version": "3.99-0.17",
-      "Source": "Repository",
-      "Repository": "RSPM",
-      "Requirements": [
-        "R",
-        "methods",
-        "utils"
-      ],
-      "Hash": "bc2a8a1139d8d4bd9c46086708945124"
-    },
     "XVector": {
       "Package": "XVector",
       "Version": "0.44.0",
@@ -998,30 +883,6 @@
       ],
       "Hash": "0f4e9d8caa6feaa7e409ae6c30f2ca66"
     },
-    "bit": {
-      "Package": "bit",
-      "Version": "4.5.0",
-      "Source": "Repository",
-      "Repository": "CRAN",
-      "Requirements": [
-        "R"
-      ],
-      "Hash": "5dc7b2677d65d0e874fc4aaf0e879987"
-    },
-    "bit64": {
-      "Package": "bit64",
-      "Version": "4.5.2",
-      "Source": "Repository",
-      "Repository": "CRAN",
-      "Requirements": [
-        "R",
-        "bit",
-        "methods",
-        "stats",
-        "utils"
-      ],
-      "Hash": "e84984bf5f12a18628d9a02322128dfd"
-    },
     "bitops": {
       "Package": "bitops",
       "Version": "1.0-8",
@@ -1396,14 +1257,13 @@
     },
     "evaluate": {
       "Package": "evaluate",
-      "Version": "0.24.0",
+      "Version": "1.0.1",
       "Source": "Repository",
-      "Repository": "RSPM",
+      "Repository": "CRAN",
       "Requirements": [
-        "R",
-        "methods"
+        "R"
       ],
-      "Hash": "a1066cbc05caee9a4bf6d90f194ff4da"
+      "Hash": "3fd29944b231036ad67c3edb32e02201"
     },
     "fansi": {
       "Package": "fansi",
@@ -1810,20 +1670,6 @@
       ],
       "Hash": "d65ba49117ca223614f71b60d85b8ab7"
     },
-    "hms": {
-      "Package": "hms",
-      "Version": "1.1.3",
-      "Source": "Repository",
-      "Repository": "CRAN",
-      "Requirements": [
-        "lifecycle",
-        "methods",
-        "pkgconfig",
-        "rlang",
-        "vctrs"
-      ],
-      "Hash": "b59377caa7ed00fa41808342002138f9"
-    },
     "htmltools": {
       "Package": "htmltools",
       "Version": "0.5.8.1",
@@ -2197,6 +2043,16 @@
       ],
       "Hash": "e2817ccf4a065c5d9d7f2cfbe7c1d78c"
     },
+    "metapod": {
+      "Package": "metapod",
+      "Version": "1.12.0",
+      "Source": "Bioconductor",
+      "Repository": "Bioconductor 3.19",
+      "Requirements": [
+        "Rcpp"
+      ],
+      "Hash": "026552a86c3aa0d92d4d8b12d80010cc"
+    },
     "mgcv": {
       "Package": "mgcv",
       "Version": "1.9-1",
@@ -2474,16 +2330,6 @@
       "Repository": "CRAN",
       "Hash": "a555924add98c99d2f411e37e7d25e9f"
     },
-    "prettyunits": {
-      "Package": "prettyunits",
-      "Version": "1.2.0",
-      "Source": "Repository",
-      "Repository": "CRAN",
-      "Requirements": [
-        "R"
-      ],
-      "Hash": "6b01fc98b1e86c4f705ce9dcfd2f57c7"
-    },
     "processx": {
       "Package": "processx",
       "Version": "3.8.4",
@@ -2497,20 +2343,6 @@
       ],
       "Hash": "0c90a7d71988856bad2a2a45dd871bb9"
     },
-    "progress": {
-      "Package": "progress",
-      "Version": "1.2.3",
-      "Source": "Repository",
-      "Repository": "CRAN",
-      "Requirements": [
-        "R",
-        "R6",
-        "crayon",
-        "hms",
-        "prettyunits"
-      ],
-      "Hash": "f4625e061cb2865f111b47ff163a5ca6"
-    },
     "progressr": {
       "Package": "progressr",
       "Version": "0.14.0",
@@ -2586,48 +2418,12 @@
       ],
       "Hash": "5e3c5dc0b071b21fa128676560dbe94d"
     },
-    "readr": {
-      "Package": "readr",
-      "Version": "2.1.5",
-      "Source": "Repository",
-      "Repository": "CRAN",
-      "Requirements": [
-        "R",
-        "R6",
-        "cli",
-        "clipr",
-        "cpp11",
-        "crayon",
-        "hms",
-        "lifecycle",
-        "methods",
-        "rlang",
-        "tibble",
-        "tzdb",
-        "utils",
-        "vroom"
-      ],
-      "Hash": "9de96463d2117f6ac49980577939dfb3"
-    },
-    "rematch2": {
-      "Package": "rematch2",
-      "Version": "2.1.2",
-      "Source": "Repository",
-      "Repository": "CRAN",
-      "Requirements": [
-        "tibble"
-      ],
-      "Hash": "76c9e04c712a05848ae7a23d2f170a40"
-    },
     "renv": {
       "Package": "renv",
       "Version": "1.0.11",
-      "Source": "Repository",
-      "Repository": "RSPM",
-      "Requirements": [
-        "utils"
-      ],
-      "Hash": "47623f66b4e80b3b0587bc5d7b309888"
+      "OS_type": null,
+      "Repository": "CRAN",
+      "Source": "Repository"
     },
     "reshape2": {
       "Package": "reshape2",
@@ -2642,22 +2438,6 @@
       ],
       "Hash": "bb5996d0bd962d214a11140d77589917"
     },
-    "restfulr": {
-      "Package": "restfulr",
-      "Version": "0.0.15",
-      "Source": "Repository",
-      "Repository": "RSPM",
-      "Requirements": [
-        "R",
-        "RCurl",
-        "S4Vectors",
-        "XML",
-        "methods",
-        "rjson",
-        "yaml"
-      ],
-      "Hash": "44651c1e68eda9d462610aca9f15a815"
-    },
     "reticulate": {
       "Package": "reticulate",
       "Version": "1.39.0",
@@ -2703,16 +2483,6 @@
       ],
       "Hash": "99e15369f8fb17dc188377234de13fc6"
     },
-    "rjson": {
-      "Package": "rjson",
-      "Version": "0.2.23",
-      "Source": "Repository",
-      "Repository": "RSPM",
-      "Requirements": [
-        "R"
-      ],
-      "Hash": "7a04e9eff95857dbf557b4e5f0b3d1a8"
-    },
     "rlang": {
       "Package": "rlang",
       "Version": "1.1.4",
@@ -2775,33 +2545,6 @@
       ],
       "Hash": "b462187d887abc519894874486dbd6fd"
     },
-    "rtracklayer": {
-      "Package": "rtracklayer",
-      "Version": "1.64.0",
-      "Source": "Bioconductor",
-      "Repository": "Bioconductor 3.19",
-      "Requirements": [
-        "BiocGenerics",
-        "BiocIO",
-        "Biostrings",
-        "GenomeInfoDb",
-        "GenomicAlignments",
-        "GenomicRanges",
-        "IRanges",
-        "R",
-        "Rsamtools",
-        "S4Vectors",
-        "XML",
-        "XVector",
-        "curl",
-        "httr",
-        "methods",
-        "restfulr",
-        "tools",
-        "zlibbioc"
-      ],
-      "Hash": "3d6f004fce582bd7d68e2e18d44abbc1"
-    },
     "sass": {
       "Package": "sass",
       "Version": "0.4.9",
@@ -2885,6 +2628,38 @@
       ],
       "Hash": "d316e4abb854dd1677f7bd3ad08bc4e8"
     },
+    "scran": {
+      "Package": "scran",
+      "Version": "1.32.0",
+      "Source": "Bioconductor",
+      "Repository": "Bioconductor 3.19",
+      "Requirements": [
+        "BH",
+        "BiocGenerics",
+        "BiocParallel",
+        "BiocSingular",
+        "DelayedArray",
+        "DelayedMatrixStats",
+        "Matrix",
+        "Rcpp",
+        "S4Vectors",
+        "SingleCellExperiment",
+        "SummarizedExperiment",
+        "beachmat",
+        "bluster",
+        "dqrng",
+        "edgeR",
+        "igraph",
+        "limma",
+        "metapod",
+        "methods",
+        "scuttle",
+        "statmod",
+        "stats",
+        "utils"
+      ],
+      "Hash": "5b11173a6b49f06dda0db31e80caa561"
+    },
     "sctransform": {
       "Package": "sctransform",
       "Version": "0.4.1",
@@ -3275,7 +3050,7 @@
     },
     "testthat": {
       "Package": "testthat",
-      "Version": "3.2.1.1",
+      "Version": "3.2.2",
       "Source": "Repository",
       "Repository": "CRAN",
       "Requirements": [
@@ -3300,7 +3075,7 @@
         "waldo",
         "withr"
       ],
-      "Hash": "3f6e7e5e2220856ff865e4834766bf2b"
+      "Hash": "6773967afbe2f74c87021e72c1bb05c0"
     },
     "textshaping": {
       "Package": "textshaping",
@@ -3383,17 +3158,6 @@
       ],
       "Hash": "cfbad971a71f0e27cec22e544a08bc3b"
     },
-    "tzdb": {
-      "Package": "tzdb",
-      "Version": "0.4.0",
-      "Source": "Repository",
-      "Repository": "CRAN",
-      "Requirements": [
-        "R",
-        "cpp11"
-      ],
-      "Hash": "f561504ec2897f4d46f0c7657e488ae1"
-    },
     "usethis": {
       "Package": "usethis",
       "Version": "3.0.0",
@@ -3502,35 +3266,9 @@
       ],
       "Hash": "c826c7c4241b6fc89ff55aaea3fa7491"
     },
-    "vroom": {
-      "Package": "vroom",
-      "Version": "1.6.5",
-      "Source": "Repository",
-      "Repository": "CRAN",
-      "Requirements": [
-        "R",
-        "bit64",
-        "cli",
-        "cpp11",
-        "crayon",
-        "glue",
-        "hms",
-        "lifecycle",
-        "methods",
-        "progress",
-        "rlang",
-        "stats",
-        "tibble",
-        "tidyselect",
-        "tzdb",
-        "vctrs",
-        "withr"
-      ],
-      "Hash": "390f9315bc0025be03012054103d227c"
-    },
     "waldo": {
       "Package": "waldo",
-      "Version": "0.5.3",
+      "Version": "0.6.1",
       "Source": "Repository",
       "Repository": "CRAN",
       "Requirements": [
@@ -3539,11 +3277,9 @@
         "diffobj",
         "glue",
         "methods",
-        "rematch2",
-        "rlang",
-        "tibble"
+        "rlang"
       ],
-      "Hash": "16aa934a49658677d8041df9017329b9"
+      "Hash": "52f574062a7b66e56926988c3fbdb3b7"
     },
     "whisker": {
       "Package": "whisker",
@@ -3554,7 +3290,7 @@
     },
     "withr": {
       "Package": "withr",
-      "Version": "3.0.1",
+      "Version": "3.0.2",
       "Source": "Repository",
       "Repository": "RSPM",
       "Requirements": [
@@ -3562,7 +3298,7 @@
         "grDevices",
         "graphics"
       ],
-      "Hash": "07909200e8bbe90426fbfeb73e1e27aa"
+      "Hash": "cc2d62c76458d425210d1eb1478b30b4"
     },
     "xfun": {
       "Package": "xfun",
diff --git a/tests/testthat/data/filtered_sce.rds b/tests/testthat/data/filtered_sce.rds
new file mode 100644
index 0000000..6d2413a
Binary files /dev/null and b/tests/testthat/data/filtered_sce.rds differ
diff --git a/tests/testthat/data/scpca_sce.rds b/tests/testthat/data/scpca_sce.rds
index de4562d..8fcb17e 100644
Binary files a/tests/testthat/data/scpca_sce.rds and b/tests/testthat/data/scpca_sce.rds differ
diff --git a/tests/testthat/test-convert-gene-ids.R b/tests/testthat/test-convert-gene-ids.R
index 23ec3c2..6be0083 100644
--- a/tests/testthat/test-convert-gene-ids.R
+++ b/tests/testthat/test-convert-gene-ids.R
@@ -8,7 +8,7 @@ test_that("basic gene symbol conversion works", {
 test_that("gene symbol conversion works with unexpected ids", {
   ensembl_ids <- c("ENSG00000141510", "ENSG00000134323", "foobar")
 
-  expect_warning(gene_symbols <- ensembl_to_symbol(ensembl_ids))
+  expect_message(gene_symbols <- ensembl_to_symbol(ensembl_ids))
   expect_equal(gene_symbols, c("TP53", "MYCN", "foobar"))
 
   expect_no_warning(gene_symbols_na <- ensembl_to_symbol(ensembl_ids, leave_na = TRUE))
@@ -45,13 +45,25 @@ test_that("gene symbol conversion works using an SCE reference", {
   sce <- readRDS(test_path("data", "scpca_sce.rds"))
   ensembl_ids <- c("ENSG00000015479", "ENSG00000269226")
 
-  gene_symbols <- ensembl_to_symbol(ensembl_ids, sce = sce, unique = FALSE)
+  expect_message(gene_symbols <- ensembl_to_symbol(ensembl_ids, sce = sce, unique = FALSE))
   expect_equal(gene_symbols, c("MATR3", "TMSB15B"))
 
-  gene_symbols <- ensembl_to_symbol(ensembl_ids, sce = sce, unique = TRUE)
+  expect_message(gene_symbols <- ensembl_to_symbol(ensembl_ids, sce = sce, unique = TRUE))
   expect_equal(gene_symbols, c("MATR3.1", "TMSB15B.1"))
 })
 
+test_that("gene symbol conversion in seurat compatibility mode works", {
+  ensembl_ids <- c("ENSG00000141510", "ENSG00000134323")
+  gene_symbols <- ensembl_to_symbol(ensembl_ids, unique = FALSE, seurat_compatible = TRUE)
+  expect_equal(gene_symbols, c("TP53", "MYCN"))
+
+  ensembl_ids <- c("ENSG00000285609", "ENSG00000252254", "ENSG00000283274")
+  expect_warning( # this includes name changes for compatibility, so a warning is expected
+    gene_symbols <- ensembl_to_symbol(ensembl_ids, unique = FALSE, seurat_compatible = TRUE)
+  )
+  expect_equal(gene_symbols, c("5S-rRNA", "Y-RNA", "5-8S-rRNA"))
+})
+
 
 
 test_that("conversion of a full sce object works as expected", {
@@ -60,7 +72,7 @@ test_that("conversion of a full sce object works as expected", {
   names(gene_symbols) <- rowData(sce)$gene_ids
   gene_symbols[is.na(gene_symbols)] <- names(gene_symbols)[is.na(gene_symbols)]
 
-  expect_warning(converted_sce <- sce_to_symbols(sce))
+  expect_message(converted_sce <- sce_to_symbols(sce)) |> expect_message() # two messages expected here
   expect_equal(rownames(converted_sce), unname(gene_symbols))
 
   # check that hvg and PCA were converted too.
diff --git a/tests/testthat/test-make-seurat.R b/tests/testthat/test-make-seurat.R
new file mode 100644
index 0000000..df03a77
--- /dev/null
+++ b/tests/testthat/test-make-seurat.R
@@ -0,0 +1,239 @@
+test_that("SCE to Seurat with no id conversion works", {
+  sce <- readRDS(test_path("data", "scpca_sce.rds"))
+  seurat_obj <- sce_to_seurat(sce, use_symbols = FALSE)
+  expect_s4_class(seurat_obj, "Seurat")
+
+  expect_equal(dim(seurat_obj), dim(sce))
+  expect_setequal(rownames(seurat_obj), rownames(sce))
+  expect_setequal(colnames(seurat_obj), colnames(sce))
+
+  expect_equal(names(seurat_obj@assays), c("RNA", "spliced"))
+  expect_equal(Seurat::DefaultAssay(seurat_obj), "RNA")
+
+  # assay types
+  expect_s4_class(seurat_obj[["RNA"]], "Assay")
+  expect_s4_class(seurat_obj[["spliced"]], "Assay")
+
+  # assay contents
+  expect_contains(
+    slotNames(seurat_obj[["RNA"]]),
+    c("counts", "data", "scale.data", "var.features", "meta.features")
+  )
+
+  expect_equal(seurat_obj[["RNA"]]$counts, counts(sce))
+  expect_equal(seurat_obj[["RNA"]]$data, logcounts(sce))
+
+  expect_equal(
+    Seurat::VariableFeatures(seurat_obj[["RNA"]]),
+    metadata(sce)$highly_variable_genes,
+    ignore_attr = TRUE
+  )
+
+  expect_equal(names(seurat_obj@reductions), c("pca", "umap"))
+  expect_equal(
+    dim(seurat_obj[["pca"]]),
+    dim(reducedDim(sce, "PCA"))
+  )
+  expect_equal(
+    dim(seurat_obj[["umap"]]),
+    dim(reducedDim(sce, "UMAP"))
+  )
+})
+
+
+test_that("SCE to Seurat with id conversion works as expected", {
+  sce <- readRDS(test_path("data", "scpca_sce.rds"))
+  expect_warning(
+    seurat_obj <- sce_to_seurat(sce, use_symbols = TRUE, reference = "sce")
+  )
+  expect_s4_class(seurat_obj, "Seurat")
+
+  new_genes <- suppressMessages(
+    ensembl_to_symbol(rownames(sce), unique = TRUE, sce = sce)
+  )
+  new_genes <- gsub("_", "-", new_genes)
+
+  hv_genes <- suppressMessages(
+    ensembl_to_symbol(metadata(sce)$highly_variable_genes, unique = TRUE, sce = sce)
+  )
+  hv_genes <- gsub("_", "-", hv_genes)
+
+  expect_equal(dim(seurat_obj), dim(sce))
+  expect_setequal(colnames(seurat_obj), colnames(sce))
+  expect_setequal(rownames(seurat_obj), new_genes)
+
+  expect_equal(names(seurat_obj@assays), c("RNA", "spliced"))
+  expect_equal(Seurat::DefaultAssay(seurat_obj), "RNA")
+
+  # assay types
+  expect_s4_class(seurat_obj[["RNA"]], "Assay")
+  expect_s4_class(seurat_obj[["spliced"]], "Assay")
+
+  # assay contents
+  expect_contains(
+    slotNames(seurat_obj[["RNA"]]),
+    c("counts", "data", "scale.data", "var.features", "meta.features")
+  )
+
+  expect_equal(
+    Seurat::VariableFeatures(seurat_obj[["RNA"]]),
+    hv_genes,
+    ignore_attr = TRUE
+  )
+  expect_equal(names(seurat_obj@reductions), c("pca", "umap"))
+  expect_equal(names(seurat_obj@reductions), c("pca", "umap"))
+  expect_equal(
+    dim(seurat_obj[["pca"]]),
+    dim(reducedDim(sce, "PCA"))
+  )
+  expect_equal(
+    dim(seurat_obj[["umap"]]),
+    dim(reducedDim(sce, "UMAP"))
+  )
+})
+
+test_that("SCE to Seurat with id conversion and 10x reference works as expected", {
+  sce <- readRDS(test_path("data", "scpca_sce.rds"))
+  seurat_obj <- sce_to_seurat(sce, use_symbols = TRUE, reference = "10x2020")
+  expect_s4_class(seurat_obj, "Seurat")
+
+  new_genes <- suppressMessages(
+    ensembl_to_symbol(rownames(sce), unique = TRUE, reference = "10x2020")
+  )
+  new_genes <- gsub("_", "-", new_genes)
+
+  hv_genes <- suppressMessages(
+    ensembl_to_symbol(metadata(sce)$highly_variable_genes, unique = TRUE, reference = "10x2020")
+  )
+  hv_genes <- gsub("_", "-", hv_genes)
+
+  expect_equal(dim(seurat_obj), dim(sce))
+  expect_setequal(colnames(seurat_obj), colnames(sce))
+  expect_setequal(rownames(seurat_obj), new_genes)
+
+  expect_equal(names(seurat_obj@assays), c("RNA", "spliced"))
+  expect_equal(Seurat::DefaultAssay(seurat_obj), "RNA")
+
+
+  # assay types
+  expect_s4_class(seurat_obj[["RNA"]], "Assay")
+  expect_s4_class(seurat_obj[["spliced"]], "Assay")
+
+  # assay contents
+  expect_contains(
+    slotNames(seurat_obj[["RNA"]]),
+    c("counts", "data", "scale.data", "var.features", "meta.features")
+  )
+
+  expect_equal(
+    Seurat::VariableFeatures(seurat_obj[["RNA"]]),
+    hv_genes,
+    ignore_attr = TRUE
+  )
+  expect_equal(names(seurat_obj@reductions), c("pca", "umap"))
+  expect_equal(names(seurat_obj@reductions), c("pca", "umap"))
+  expect_equal(
+    dim(seurat_obj[["pca"]]),
+    dim(reducedDim(sce, "PCA"))
+  )
+  expect_equal(
+    dim(seurat_obj[["umap"]]),
+    dim(reducedDim(sce, "UMAP"))
+  )
+})
+
+test_that("conversion works with altExps", {
+  sce <- readRDS(test_path("data", "scpca_sce.rds"))
+  altsce1 <- SingleCellExperiment(assays = list(counts = counts(sce)[1:10, ], logcounts = logcounts(sce)[1:10, ]))
+  altsce2 <- SingleCellExperiment(assays = list(counts = counts(sce)[1:3, ]))
+  rownames(altsce2) <- c("F_1", "F_2", "F_3") # check feature names with underscores
+  altExps(sce) <- list(
+    alt1 = altsce1,
+    alt2 = altsce2
+  )
+
+  expect_warning(seurat_obj <- sce_to_seurat(sce, use_symbols = FALSE))
+  expect_s4_class(seurat_obj, "Seurat")
+
+  expect_equal(dim(seurat_obj), dim(sce))
+  expect_setequal(rownames(seurat_obj), rownames(sce))
+  expect_setequal(colnames(seurat_obj), colnames(sce))
+
+  expect_setequal(names(seurat_obj@assays), c("RNA", "spliced", "alt1", "alt2"))
+  expect_equal(Seurat::DefaultAssay(seurat_obj), "RNA")
+
+  # assay types
+  expect_s4_class(seurat_obj[["RNA"]], "Assay")
+  expect_s4_class(seurat_obj[["spliced"]], "Assay")
+  expect_s4_class(seurat_obj[["alt1"]], "Assay")
+  expect_s4_class(seurat_obj[["alt2"]], "Assay")
+})
+
+test_that("Seurat v5 conversion works", {
+  sce <- readRDS(test_path("data", "scpca_sce.rds"))
+  seurat_obj <- sce_to_seurat(sce, use_symbols = FALSE, seurat_assay_version = "v5")
+  expect_s4_class(seurat_obj, "Seurat")
+
+  expect_equal(dim(seurat_obj), dim(sce))
+  expect_setequal(rownames(seurat_obj), rownames(sce))
+  expect_setequal(colnames(seurat_obj), colnames(sce))
+
+  expect_equal(names(seurat_obj@assays), c("RNA", "spliced"))
+  expect_equal(Seurat::DefaultAssay(seurat_obj), "RNA")
+
+  # assay types
+  expect_s4_class(seurat_obj[["RNA"]], "Assay5")
+  expect_s4_class(seurat_obj[["spliced"]], "Assay5")
+
+  # expected layers present
+  expect_contains(
+    names(seurat_obj[["RNA"]]@layers),
+    c("counts", "data", "scale.data")
+  )
+
+  expect_equal(seurat_obj[["RNA"]]$counts, counts(sce))
+  expect_equal(seurat_obj[["RNA"]]$data, logcounts(sce))
+
+  expect_equal(
+    Seurat::VariableFeatures(seurat_obj[["RNA"]]),
+    metadata(sce)$highly_variable_genes,
+    ignore_attr = TRUE
+  )
+
+  expect_equal(names(seurat_obj@reductions), c("pca", "umap"))
+  expect_equal(
+    dim(seurat_obj[["pca"]]),
+    dim(reducedDim(sce, "PCA"))
+  )
+  expect_equal(
+    dim(seurat_obj[["umap"]]),
+    dim(reducedDim(sce, "UMAP"))
+  )
+})
+
+test_that("Conversion works for non-processed samples", {
+  sce <- readRDS(test_path("data", "filtered_sce.rds"))
+  seurat_obj <- sce_to_seurat(sce, use_symbols = FALSE)
+  expect_s4_class(seurat_obj, "Seurat")
+
+  expect_equal(dim(seurat_obj), dim(sce))
+  expect_setequal(rownames(seurat_obj), rownames(sce))
+  expect_setequal(colnames(seurat_obj), colnames(sce))
+
+  expect_equal(names(seurat_obj@assays), c("RNA", "spliced"))
+  expect_equal(Seurat::DefaultAssay(seurat_obj), "RNA")
+
+  # assay types
+  expect_s4_class(seurat_obj[["RNA"]], "Assay")
+  expect_s4_class(seurat_obj[["spliced"]], "Assay")
+
+  # assay contents
+  expect_contains(
+    slotNames(seurat_obj[["RNA"]]),
+    c("counts", "data", "scale.data", "var.features", "meta.features")
+  )
+
+  expect_equal(seurat_obj[["RNA"]]$counts, counts(sce))
+
+  expect_null(names(seurat_obj@reductions))
+})
diff --git a/tests/testthat/test-sum-duplicate-genes.R b/tests/testthat/test-sum-duplicate-genes.R
new file mode 100644
index 0000000..127c098
--- /dev/null
+++ b/tests/testthat/test-sum-duplicate-genes.R
@@ -0,0 +1,39 @@
+test_that("merging works as expected", {
+  sce <- readRDS(test_path("data", "scpca_sce.rds"))
+  double_sce <- rbind(sce, sce)
+  deduped_sce <- sum_duplicate_genes(double_sce)
+
+  expect_equal(dim(deduped_sce), dim(sce))
+
+  expect_setequal(rownames(deduped_sce), rownames(sce))
+
+
+  expect_equal(
+    counts(deduped_sce)[rownames(sce), ], # need to make the row order identical
+    2 * counts(sce) |> Matrix::drop0() # original matrices may have explicit 0s
+  )
+  expect_equal(
+    assay(deduped_sce, "spliced")[rownames(sce), ],
+    2 * assay(sce, "spliced") |> Matrix::drop0()
+  )
+})
+
+test_that("merging works as expected with unprocessed SCE", {
+  sce <- readRDS(test_path("data", "filtered_sce.rds"))
+  double_sce <- rbind(sce, sce)
+  deduped_sce <- sum_duplicate_genes(double_sce)
+
+  expect_equal(dim(deduped_sce), dim(sce))
+
+  expect_setequal(rownames(deduped_sce), rownames(sce))
+
+
+  expect_equal(
+    counts(deduped_sce)[rownames(sce), ], # need to make the row order identical
+    2 * counts(sce) |> Matrix::drop0() # original matrices may have explicit 0s
+  )
+  expect_equal(
+    assay(deduped_sce, "spliced")[rownames(sce), ],
+    2 * assay(sce, "spliced") |> Matrix::drop0()
+  )
+})