From 12e072e9bb1424e7a36603eb2a0bb9a7f2c8f8db Mon Sep 17 00:00:00 2001
From: Candace Savonen <cansav09@gmail.com>
Date: Wed, 18 Dec 2024 09:48:45 -0500
Subject: [PATCH] multi eval function

---
 R/evaluate-clusters.R | 91 +++++++++++++++++++++++++++++++++++++++++++
 R/sweep-clusters.R    |  6 +--
 2 files changed, 94 insertions(+), 3 deletions(-)

diff --git a/R/evaluate-clusters.R b/R/evaluate-clusters.R
index 6592d06..714a4f7 100644
--- a/R/evaluate-clusters.R
+++ b/R/evaluate-clusters.R
@@ -307,3 +307,94 @@ calculate_stability <- function(
 
   return(all_ari_df)
 }
+
+#' Evaluate set of clusters
+#'
+#' This wrapper function can be used to evaluate clusters calculated using `sweep_clusters()` function.
+#' Input should be be a list of data frames with the resulting clusters from all parameter combinations provided to
+#' the `sweep_clusters()` function. Output
+#'
+#' @param x An object containing PCs that clusters were calculated from. This can be
+#'   either a SingleCellExperiment object, a Seurat object, or a matrix where columns
+#'   are PCs and rows are cells. If a matrix is provided, it must have row names of cell
+#'   ids (e.g., barcodes).
+#' @param sweep_list A list of data frames obtained from `sweep_clusters()`. each data frame
+#'   in the list that contains at least two columns: one representing
+#'   unique cell ids, and one containing cluster assignments. By default, these columns
+#'   should be named `cell_id` and `cluster` respectively, though this can be customized.
+#'   The cell id column's values should match either the PC matrix row names, or the
+#'   SingleCellExperiment/Seurat object cell ids. Typically this data frame will be
+#'   output from the `rOpenScPCA::calculate_clusters()` function.
+#' @param ... Additional argument are passed on to the respective `calculate_purity()` and
+#' `calculate_silhouette()` functions.
+#'
+#' @return A list of data frames with the original `sweep_clusters()` information as well as the additional
+#'   columns with evaluation information from the `calculate_purity()` and
+#'   `calculate_silhouette()` functions output.
+#'
+#' @export
+#'
+#' @examples
+#' \dontrun{
+#'
+#' set.seed(2024)
+#'
+#' sce_object <- splatter::simpleSimulate(nGenes = 1000, verbose = FALSE) |>
+#'   scater::logNormCounts() |>
+#'   scater::runPCA(ncomponents = 10)
+#'
+#' # Calculate Principal Components
+#' pc_mat <- reducedDim(sce_object, "PCA")
+#'
+#' sweep_list <- sweep_clusters(
+#'   sce_object,
+#'   algorithm = "walktrap",
+#'   weighting = "jaccard",
+#'   nn = c(10, 15, 25),
+#'   resolution = c(.75, 1),
+#'   seed = 9
+#' )
+#'
+#' sweep_list_evaled <- calculate_cell_cluster_metrics(
+#'   x = pc_mat,
+#'   sweep_list = sweep_list)
+#' }
+#'
+calculate_cell_cluster_metrics <- function(x,
+                                           sweep_list,
+                                           evals = c("purity", "silhouette"),
+                                           ...) {
+
+  supported_evals <- c("purity", "silhouette")
+
+  # Check input arguments
+  stopifnot(
+    "`sweep_list` must be a list containing data.frames" = is.list(sweep_list),
+    "`sweep_list` must be a list containing data.frames" = is.data.frame(sweep_list[[1]]),
+    " Cluster `evals` that are supported are only 'purity' and 'silhouette'" = all(evals %in% supported_evals)
+  )
+
+
+  evaled_list <- sweep_list |>
+    purrr::map(
+      \(df) {
+        if ("purity" %in% evals) {
+          df <- calculate_purity(
+            x = x,
+            cluster_df = df,
+            ...
+          )
+        }
+        if ("silhouette" %in% evals) {
+          df <- calculate_silhouette(
+            x = x,
+            cluster_df = df,
+            ...
+          )
+        }
+        return(df)
+      }
+    )
+
+  return(evaled_list)
+}
diff --git a/R/sweep-clusters.R b/R/sweep-clusters.R
index 62a44dc..83273db 100644
--- a/R/sweep-clusters.R
+++ b/R/sweep-clusters.R
@@ -48,7 +48,7 @@
 #' \dontrun{
 #' # perform Louvain clustering with Jaccard weighting (defaults),
 #' # varying the nearest neighobor parameter, and set a seed for reproducibility
-#' cluster_df <- sweep_clusters(
+#' cluster_list <- sweep_clusters(
 #'   sce_object,
 #'   nn = c(10, 15, 20, 25),
 #'   seed = 11
@@ -56,7 +56,7 @@
 #'
 #' # perform Louvain clustering, with Jaccard and rank weighting, and
 #' # varying the nearest neighbor and resolution parameters.
-#' cluster_df <- sweep_clusters(
+#' cluster_list <- sweep_clusters(
 #'   sce_object,
 #'   algorithm = "louvain",
 #'   weighting = c("jaccard", "rank"),
@@ -67,7 +67,7 @@
 #'
 #' # perform walktrap and Louvain clustering with Jaccard weighting, and
 #' # varying the nearest neighbors for both algorithms, and resolution for Louvain.
-#' cluster_df <- sweep_clusters(
+#' cluster_list <- sweep_clusters(
 #'   sce_object,
 #'   algorithm = c("walktrap", "louvain"),
 #'   weighting = "jaccard",