From 12e072e9bb1424e7a36603eb2a0bb9a7f2c8f8db Mon Sep 17 00:00:00 2001 From: Candace Savonen Date: Wed, 18 Dec 2024 09:48:45 -0500 Subject: [PATCH] multi eval function --- R/evaluate-clusters.R | 91 +++++++++++++++++++++++++++++++++++++++++++ R/sweep-clusters.R | 6 +-- 2 files changed, 94 insertions(+), 3 deletions(-) diff --git a/R/evaluate-clusters.R b/R/evaluate-clusters.R index 6592d06..714a4f7 100644 --- a/R/evaluate-clusters.R +++ b/R/evaluate-clusters.R @@ -307,3 +307,94 @@ calculate_stability <- function( return(all_ari_df) } + +#' Evaluate set of clusters +#' +#' This wrapper function can be used to evaluate clusters calculated using `sweep_clusters()` function. +#' Input should be be a list of data frames with the resulting clusters from all parameter combinations provided to +#' the `sweep_clusters()` function. Output +#' +#' @param x An object containing PCs that clusters were calculated from. This can be +#' either a SingleCellExperiment object, a Seurat object, or a matrix where columns +#' are PCs and rows are cells. If a matrix is provided, it must have row names of cell +#' ids (e.g., barcodes). +#' @param sweep_list A list of data frames obtained from `sweep_clusters()`. each data frame +#' in the list that contains at least two columns: one representing +#' unique cell ids, and one containing cluster assignments. By default, these columns +#' should be named `cell_id` and `cluster` respectively, though this can be customized. +#' The cell id column's values should match either the PC matrix row names, or the +#' SingleCellExperiment/Seurat object cell ids. Typically this data frame will be +#' output from the `rOpenScPCA::calculate_clusters()` function. +#' @param ... Additional argument are passed on to the respective `calculate_purity()` and +#' `calculate_silhouette()` functions. +#' +#' @return A list of data frames with the original `sweep_clusters()` information as well as the additional +#' columns with evaluation information from the `calculate_purity()` and +#' `calculate_silhouette()` functions output. +#' +#' @export +#' +#' @examples +#' \dontrun{ +#' +#' set.seed(2024) +#' +#' sce_object <- splatter::simpleSimulate(nGenes = 1000, verbose = FALSE) |> +#' scater::logNormCounts() |> +#' scater::runPCA(ncomponents = 10) +#' +#' # Calculate Principal Components +#' pc_mat <- reducedDim(sce_object, "PCA") +#' +#' sweep_list <- sweep_clusters( +#' sce_object, +#' algorithm = "walktrap", +#' weighting = "jaccard", +#' nn = c(10, 15, 25), +#' resolution = c(.75, 1), +#' seed = 9 +#' ) +#' +#' sweep_list_evaled <- calculate_cell_cluster_metrics( +#' x = pc_mat, +#' sweep_list = sweep_list) +#' } +#' +calculate_cell_cluster_metrics <- function(x, + sweep_list, + evals = c("purity", "silhouette"), + ...) { + + supported_evals <- c("purity", "silhouette") + + # Check input arguments + stopifnot( + "`sweep_list` must be a list containing data.frames" = is.list(sweep_list), + "`sweep_list` must be a list containing data.frames" = is.data.frame(sweep_list[[1]]), + " Cluster `evals` that are supported are only 'purity' and 'silhouette'" = all(evals %in% supported_evals) + ) + + + evaled_list <- sweep_list |> + purrr::map( + \(df) { + if ("purity" %in% evals) { + df <- calculate_purity( + x = x, + cluster_df = df, + ... + ) + } + if ("silhouette" %in% evals) { + df <- calculate_silhouette( + x = x, + cluster_df = df, + ... + ) + } + return(df) + } + ) + + return(evaled_list) +} diff --git a/R/sweep-clusters.R b/R/sweep-clusters.R index 62a44dc..83273db 100644 --- a/R/sweep-clusters.R +++ b/R/sweep-clusters.R @@ -48,7 +48,7 @@ #' \dontrun{ #' # perform Louvain clustering with Jaccard weighting (defaults), #' # varying the nearest neighobor parameter, and set a seed for reproducibility -#' cluster_df <- sweep_clusters( +#' cluster_list <- sweep_clusters( #' sce_object, #' nn = c(10, 15, 20, 25), #' seed = 11 @@ -56,7 +56,7 @@ #' #' # perform Louvain clustering, with Jaccard and rank weighting, and #' # varying the nearest neighbor and resolution parameters. -#' cluster_df <- sweep_clusters( +#' cluster_list <- sweep_clusters( #' sce_object, #' algorithm = "louvain", #' weighting = c("jaccard", "rank"), @@ -67,7 +67,7 @@ #' #' # perform walktrap and Louvain clustering with Jaccard weighting, and #' # varying the nearest neighbors for both algorithms, and resolution for Louvain. -#' cluster_df <- sweep_clusters( +#' cluster_list <- sweep_clusters( #' sce_object, #' algorithm = c("walktrap", "louvain"), #' weighting = "jaccard",