Skip to content

Commit

Permalink
Merge pull request #4 from AlexsLemonade/sjspielman/3-update-stabilit…
Browse files Browse the repository at this point in the history
…y-function

Update the stability function
  • Loading branch information
sjspielman authored Nov 8, 2024
2 parents fc78444 + ea017de commit f71a819
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 22 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
^.*\.Rproj$
^\.Rproj\.user$
^\.github$
26 changes: 17 additions & 9 deletions R/evaluate-clusters.R
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,10 @@ calculate_purity <- function(
#' either a SingleCellExperiment object, a Seurat object, or a matrix where columns
#' are PCs and rows are cells. If a matrix is provided, it must have row names of cell
#' ids (e.g., barcodes).
#' @param clusters A vector of cluster ids, typically a numeric factor variable, obtained
#' by previously clustering the PCs.
#' @param cluster_df A data frame that contains at least the columns `cell_id` and
#' `cluster`. The `cell_id` values should match either the PC matrix row names,
#' or the SingleCellExperiment/Seurat object cell ids. Typically this will be output from
#' the `rOpenScPCA::calculate_clusters()` function.
#' @param replicates Number of bootstrap replicates to perform. Default is 20.
#' @param seed Random seed
#' @param pc_name Optionally, the name of the PC matrix in the object. Not used if a
Expand All @@ -159,7 +161,7 @@ calculate_purity <- function(
#' # and setting a seed for reproducibility
#' cluster_df <- calculate_clusters(sce_object, seed = 11)
#' # Second, calculate cluster stability using default parameters
#' stability_df <- calculate_stability(sce_object, cluster_df$clusters, seed = 11)
#' stability_df <- calculate_stability(sce_object, cluster_df, seed = 11)
#'
#'
#' # First, cluster PCs from a SingleCellExperiment object using default parameters
Expand All @@ -168,7 +170,7 @@ calculate_purity <- function(
#' # Second, calculate cluster stability using default parameters and 50 replicates
#' stability_df <- calculate_stability(
#' sce_object,
#' cluster_df$clusters,
#' cluster_df,
#' replicates = 50,
#' seed = 11
#' )
Expand All @@ -186,15 +188,15 @@ calculate_purity <- function(
#' # for the initial clustering
#' stability_df <- calculate_stability(
#' sce_object,
#' cluster_df$clusters,
#' cluster_df,
#' algorithm = "leiden",
#' resolution = 0.1,
#' seed = 11
#' )
#' }
calculate_stability <- function(
x,
clusters,
cluster_df,
replicates = 20,
seed = NULL,
pc_name = NULL,
Expand All @@ -206,12 +208,18 @@ calculate_stability <- function(
# ensure we have a matrix
pca_matrix <- prepare_pc_matrix(x, pc_name = pc_name)

# check clusters and matrix compatibility
# ensure pca matrix and cluster df compatibility
stopifnot(
"The number of rows in the matrix must equal the length of the clusters vector." =
nrow(pca_matrix) == length(clusters)
"The cluster dataframe must have the same number of rows as the PCA matrix." =
nrow(pca_matrix) == nrow(cluster_df),
"Cell ids in the cluster dataframe must match the PCA matrix rownames." =
length(setdiff(rownames(pca_matrix), cluster_df$cell_id)) == 0
)

# Extract vector of clusters, ensuring same order as pca_matrix
rownames(cluster_df) <- cluster_df$cell_id
clusters <- cluster_df[rownames(pca_matrix),]$cluster

# calculate ARI for each cluster result bootstrap replicate
all_ari_df <- 1:replicates |>
purrr::map(
Expand Down
14 changes: 8 additions & 6 deletions man/calculate_stability.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 10 additions & 7 deletions tests/testthat/test-evaluate-clusters.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ test_that("calculate_stability works as expected with defaults", {
# note that we suppress warnings since this calculation done on fake
# test data gives expected warnings about ties during the ARI calculation.
suppressWarnings({
df <- calculate_stability(test_mat, cluster_df$cluster)
df <- calculate_stability(test_mat, cluster_df)
})

expected_names <- colnames(cluster_df)[!(colnames(cluster_df) %in% c("cell_id", "cluster"))]
Expand All @@ -62,7 +62,7 @@ test_that("calculate_stability works as expected with different replicates", {
# note that we suppress warnings since this calculation done on fake
# test data gives expected warnings about ties during the ARI calculation.
suppressWarnings({
df <- calculate_stability(test_mat, cluster_df$cluster, replicates = 2)
df <- calculate_stability(test_mat, cluster_df, replicates = 2)
})
expect_equal(nrow(df), 2)
})
Expand All @@ -77,7 +77,7 @@ test_that("calculate_stability works as expected with object and pc_name", {
suppressWarnings({
df <- calculate_stability(
sce,
cluster_df$cluster,
cluster_df,
replicates = 2,
pc_name = "my_pca"
)
Expand All @@ -88,13 +88,16 @@ test_that("calculate_stability works as expected with object and pc_name", {


test_that("calculate_stability errors as expected", {

# cluster_df too short
expect_error({
# mismatched cluster vector length
calculate_stability(test_mat, cluster_df$cluster[1:5])
calculate_stability(test_mat, cluster_df[1:5,])
})

# cluster_df too long
cluster_df_extra <- cluster_df |>
tibble::add_row(cell_id = "extra_barcode")
expect_error({
# cluster_df not a vector
calculate_stability(test_mat, cluster_df)
calculate_stability(test_mat, cluster_df_extra)
})
})

0 comments on commit f71a819

Please sign in to comment.