Skip to content

Commit

Permalink
multi eval function
Browse files Browse the repository at this point in the history
  • Loading branch information
cansavvy committed Dec 18, 2024
1 parent 0424106 commit 12e072e
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 3 deletions.
91 changes: 91 additions & 0 deletions R/evaluate-clusters.R
Original file line number Diff line number Diff line change
Expand Up @@ -307,3 +307,94 @@ calculate_stability <- function(

return(all_ari_df)
}

#' Evaluate set of clusters
#'
#' This wrapper function can be used to evaluate clusters calculated using `sweep_clusters()` function.
#' Input should be be a list of data frames with the resulting clusters from all parameter combinations provided to
#' the `sweep_clusters()` function. Output
#'
#' @param x An object containing PCs that clusters were calculated from. This can be
#' either a SingleCellExperiment object, a Seurat object, or a matrix where columns
#' are PCs and rows are cells. If a matrix is provided, it must have row names of cell
#' ids (e.g., barcodes).
#' @param sweep_list A list of data frames obtained from `sweep_clusters()`. each data frame
#' in the list that contains at least two columns: one representing
#' unique cell ids, and one containing cluster assignments. By default, these columns
#' should be named `cell_id` and `cluster` respectively, though this can be customized.
#' The cell id column's values should match either the PC matrix row names, or the
#' SingleCellExperiment/Seurat object cell ids. Typically this data frame will be
#' output from the `rOpenScPCA::calculate_clusters()` function.
#' @param ... Additional argument are passed on to the respective `calculate_purity()` and
#' `calculate_silhouette()` functions.
#'
#' @return A list of data frames with the original `sweep_clusters()` information as well as the additional
#' columns with evaluation information from the `calculate_purity()` and
#' `calculate_silhouette()` functions output.
#'
#' @export
#'
#' @examples
#' \dontrun{
#'
#' set.seed(2024)
#'
#' sce_object <- splatter::simpleSimulate(nGenes = 1000, verbose = FALSE) |>
#' scater::logNormCounts() |>
#' scater::runPCA(ncomponents = 10)
#'
#' # Calculate Principal Components
#' pc_mat <- reducedDim(sce_object, "PCA")
#'
#' sweep_list <- sweep_clusters(
#' sce_object,
#' algorithm = "walktrap",
#' weighting = "jaccard",
#' nn = c(10, 15, 25),
#' resolution = c(.75, 1),
#' seed = 9
#' )
#'
#' sweep_list_evaled <- calculate_cell_cluster_metrics(
#' x = pc_mat,
#' sweep_list = sweep_list)
#' }
#'
calculate_cell_cluster_metrics <- function(x,
sweep_list,
evals = c("purity", "silhouette"),
...) {

supported_evals <- c("purity", "silhouette")

# Check input arguments
stopifnot(
"`sweep_list` must be a list containing data.frames" = is.list(sweep_list),
"`sweep_list` must be a list containing data.frames" = is.data.frame(sweep_list[[1]]),
" Cluster `evals` that are supported are only 'purity' and 'silhouette'" = all(evals %in% supported_evals)
)


evaled_list <- sweep_list |>
purrr::map(
\(df) {
if ("purity" %in% evals) {
df <- calculate_purity(
x = x,
cluster_df = df,
...
)
}
if ("silhouette" %in% evals) {
df <- calculate_silhouette(
x = x,
cluster_df = df,
...
)
}
return(df)
}
)

return(evaled_list)
}
6 changes: 3 additions & 3 deletions R/sweep-clusters.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,15 @@
#' \dontrun{
#' # perform Louvain clustering with Jaccard weighting (defaults),
#' # varying the nearest neighobor parameter, and set a seed for reproducibility
#' cluster_df <- sweep_clusters(
#' cluster_list <- sweep_clusters(
#' sce_object,
#' nn = c(10, 15, 20, 25),
#' seed = 11
#' )
#'
#' # perform Louvain clustering, with Jaccard and rank weighting, and
#' # varying the nearest neighbor and resolution parameters.
#' cluster_df <- sweep_clusters(
#' cluster_list <- sweep_clusters(
#' sce_object,
#' algorithm = "louvain",
#' weighting = c("jaccard", "rank"),
Expand All @@ -67,7 +67,7 @@
#'
#' # perform walktrap and Louvain clustering with Jaccard weighting, and
#' # varying the nearest neighbors for both algorithms, and resolution for Louvain.
#' cluster_df <- sweep_clusters(
#' cluster_list <- sweep_clusters(
#' sce_object,
#' algorithm = c("walktrap", "louvain"),
#' weighting = "jaccard",
Expand Down

0 comments on commit 12e072e

Please sign in to comment.