Merge pull request #11 from AlexsLemonade/jashapiro/style

Add pre-commit and style
AlexsLemonade · Nov 20, 2024 · 9c5472c · 9c5472c
2 parents fe1cf9f + e825ef2
commit 9c5472c
Show file tree

Hide file tree

Showing 11 changed files with 124 additions and 66 deletions.
diff --git a/.lintr b/.lintr
@@ -0,0 +1,9 @@
+encoding: "UTF-8"
+linters: linters_with_defaults(
+    line_length_linter(120),
+    cyclocomp_linter(20),
+    commented_code_linter = NULL,
+    indentation_linter = NULL,
+    object_usage_linter = NULL,
+    object_name_linter = NULL
+  )
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,42 @@
+# All available hooks: https://pre-commit.com/hooks.html
+# R specific hooks: https://github.com/lorenzwalthert/precommit
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v5.0.0
+    hooks:
+      - id: detect-aws-credentials
+        args: [--allow-missing-credentials]
+      - id: detect-private-key
+      - id: forbid-submodules
+      - id: check-case-conflict
+      - id: check-merge-conflict
+      - id: trailing-whitespace
+        exclude: 'renv/.*'
+      - id: end-of-file-fixer
+        exclude: '\.Rd'
+
+  - repo: https://github.com/crate-ci/typos
+    rev: typos-dict-v0.11.34
+    hooks:
+      - id: typos
+
+  - repo: https://github.com/gitleaks/gitleaks
+    rev: v8.21.2
+    hooks:
+      - id: gitleaks
+
+  - repo: https://github.com/lorenzwalthert/precommit
+    rev: v0.4.3.9003
+    hooks:
+
+      - id: style-files
+        args: [--style_pkg=styler, --style_fun=tidyverse_style]
+      # codemeta must be above use-tidy-description when both are used
+      # -   id: codemeta-description-updated
+      - id: use-tidy-description
+      - id: lintr
+      - id: parsable-R
+      - id: no-browser-statement
+      - id: no-debug-statement
+      - id: deps-in-desc
+        exclude: 'docker/.*|renv/.*'
diff --git a/R/calculate-clusters.R b/R/calculate-clusters.R
@@ -1,34 +1,37 @@
 #' Calculate graph-based clusters from a provided matrix
 #'
 #' This function is provided to simplify application of bluster package clustering functions on OpenScPCA data.
-#' In particular, this function runs bluster::clusterRows() with the bluster::NNGraphParam() function on a
+#' In particular, this function runs `bluster::clusterRows()` with the `bluster::NNGraphParam()` function on a
 #' principal components matrix, provided either directly or via single-cell object.
-#' Note that defaults for some arguments may differ from the bluster::NNGraphParam() defaults.
+#' Note that defaults for some arguments may differ from the `bluster::NNGraphParam()` defaults.
 #' Specifically, the clustering algorithm defaults to "louvain" and the weighting scheme to "jaccard"
 #' to align with common practice in scRNA-seq analysis.
 #'
 #' @import methods
 #'
 #' @param x An object containing PCs that clustering can be performed in. This can be either a SingleCellExperiment
-#'   object, a Seurat object, or a matrix where columns are PCs and rows are cells. If a matrix is provided, it must
-#'   have row names of cell ids (e.g., barcodes).
+#'   object, a Seurat object, or a matrix where columns are PCs and rows are cells.
+#'   If a matrix is provided, it must have row names of cell ids (e.g., barcodes).
 #' @param algorithm Clustering algorithm to use. Must be one of "louvain" (default), "walktrap", or "leiden".
 #' @param weighting Weighting scheme to use. Must be one of "jaccard" (default), "rank", or "number"
 #' @param nn Number of nearest neighbors. The default is 10.
-#' @param resolution Resolution parameter used by louvain and leiden clustering only. Default is 1.
-#' @param objective_function Leiden-specific parameter for whether to use the Constant Potts Model ("CPM"; default) or "modularity"
+#' @param resolution Resolution parameter used by Louvain and Leiden clustering only. Default is 1.
+#' @param objective_function Leiden-specific parameter for whether to use the Constant Potts Model ("CPM"; default)
+#'   or "modularity"
 #' @param cluster_args List of additional arguments to pass to the chosen clustering function.
 #'   Only single values for each argument are supported (no vectors or lists).
-#'   See igraph documentation for details on each clustering function: https://igraph.org/r/html/latest
+#'   See `igraph` documentation for details on each clustering function: <https://igraph.org/r/html/latest>
 #' @param threads Number of threads to use. The default is 1.
 #' @param seed Random seed to set for clustering.
-#' @param pc_name Name of principal components slot in provided object. This argument is only used if a SingleCellExperiment
-#'   or Seurat object is provided. If not provided, the SingleCellExperiment object name will default to "PCA" and the
+#' @param pc_name Name of principal components slot in provided object.
+#'   This argument is only used if a SingleCellExperiment or Seurat object is provided.
+#'   If not provided, the SingleCellExperiment object name will default to "PCA" and the
 #'   Seurat object name will default to "pca".
 #'
-#' @return A data frame of cluster results with columns `cell_id` and `cluster`. Additional columns represent algorithm parameters
-#'   and include at least: `algorithm`, `weighting`, and `nn`. Louvain and leiden clustering will also include `resolution`, and
-#'   leiden clustering will further include `objective_function`.
+#' @return A data frame of cluster results with columns `cell_id` and `cluster`.
+#'   Additional columns represent algorithm parameters and include at least: `algorithm`, `weighting`, and `nn`.
+#'   Louvain and Leiden clustering will also include `resolution`,
+#'   and Leiden clustering will further include `objective_function`.
 #'
 #' @export
 #'
@@ -47,15 +50,15 @@
 #' # cluster directly from a matrix using default parameters
 #' cluster_df <- calculate_clusters(pca_matrix, seed = 11)
 #'
-#' # cluster directly from a matrix using the leiden algorithm with a resolution of 0.1
+#' # cluster directly from a matrix using the Leiden algorithm with a resolution of 0.1
 #' cluster_df <- calculate_clusters(
 #'   pca_matrix,
 #'   algorithm = "leiden",
 #'   resolution = 0.1,
 #'   seed = 11
 #' )
 #'
-#' # cluster directly from a matrix using the leiden algorithm with 3 iterations
+#' # cluster directly from a matrix using the Leiden algorithm with 3 iterations
 #' cluster_df <- calculate_clusters(
 #'   pca_matrix,
 #'   algorithm = "leiden",
@@ -68,8 +71,8 @@ calculate_clusters <- function(
     algorithm = c("louvain", "walktrap", "leiden"),
     weighting = c("jaccard", "rank", "number"),
     nn = 10,
-    resolution = 1, # louvain or leiden
-    objective_function = c("CPM", "modularity"), # leiden only
+    resolution = 1, # Louvain or Leiden
+    objective_function = c("CPM", "modularity"), # Leiden only
     cluster_args = list(),
     threads = 1,
     seed = NULL,
@@ -154,15 +157,15 @@ calculate_clusters <- function(
 #'
 #' This function first determines if the provided object is a SingleCellExperiment or
 #' Seurat object, and then extract the PC matrix. If no name for the PC matrix is provided,
-#' this function will assume the name of "PCA" for SingleCellExperiment objects, and
+#' this function will use "PCA" for SingleCellExperiment objects, and
 #' "pca" for Seurat objects.
 #'
 #' @import SingleCellExperiment
 #' @import methods
 #'
 #' @param sc_object Either a SingleCellExperiment or Seurat object
 #' @param pc_name Optionally, the name of the PC matrix in the object. If this is
-#' not provided, the name "PCA" is assumed for SingleCellExperiment objects, and
+#' not provided, the name "PCA" is used for SingleCellExperiment objects, and
 #' "pca" for Seurat objects.
 #'
 #' @return PC matrix with row names
@@ -171,13 +174,13 @@ calculate_clusters <- function(
 #'
 #' @examples
 #' \dontrun{
-#' # extract PC matrix from SCE object, assuming default name "PCA"
+#' # extract PC matrix from SCE object, using default name "PCA"
 #' pca_matrix <- extract_pc_matrix(sce_object)
 #'
 #' # extract PC matrix from SCE object with non-default name "PCA_MAT"
 #' pca_matrix <- extract_pc_matrix(sce_object, pc_name = "PCA_MAT")
 #'
-#' # extract PC matrix from Seurat object, assuming default name "pca"
+#' # extract PC matrix from Seurat object, using default name "pca"
 #' pca_matrix <- extract_pc_matrix(seurat_object)
 #' }
 extract_pc_matrix <- function(sc_object, pc_name = NULL) {
@@ -230,7 +233,7 @@ extract_pc_matrix <- function(sc_object, pc_name = NULL) {
 #'  or Seurat object containing PCs. If a matrix is provided, rows should be cells
 #'  and columns should be PCs, and row names should be cell ids (e.g., barcodes).
 #' @param pc_name Optionally, the name of the PC matrix in the object. Not used for
-#'   matrices. If this is not provided, the name "PCA" is assumed for
+#'   matrices. If this is not provided, the name "PCA" is used for
 #'   SingleCellExperiment objects, and "pca" for Seurat objects.
 #'
 #' @return A matrix of PCs with row names representing cell ids
@@ -242,7 +245,10 @@ prepare_pc_matrix <- function(x, pc_name = NULL) {
   } else if (is(x, "SingleCellExperiment") || is(x, "Seurat")) {
     x <- extract_pc_matrix(x, pc_name = pc_name)
   } else {
-    stop("The first argument should be one of: a SingleCellExperiment object, a Seurat object, or a matrix with row names.")
+    stop(
+      "The first argument should be one of: ",
+      "a SingleCellExperiment object, a Seurat object, or a matrix with row names."
+    )
   }
 
   return(x)

diff --git a/R/evaluate-clusters.R b/R/evaluate-clusters.R
@@ -279,7 +279,9 @@ calculate_stability <- function(
 
         resampled_df <- withCallingHandlers(
           calculate_clusters(resampled_pca, ...),
-          warning = \(w) {if(!warnings) tryInvokeRestart("muffleWarning")}
+          warning = \(w) {
+            if (!warnings) tryInvokeRestart("muffleWarning")
+          }
         )
 
         ari <- pdfCluster::adj.rand.index(resampled_df$cluster, original_clusters)

diff --git a/R/sweep-clusters.R b/R/sweep-clusters.R
@@ -11,7 +11,7 @@
 #' For each algorithm specified, all parameters possible to use with that
 #' algorithm will be systematically varied. This function does not accept additional
 #' parameters besides those listed above.
-#' Note that defaults for some arguments may differ from the bluster::NNGraphParam() defaults.
+#' Note that defaults for some arguments may differ from the `bluster::NNGraphParam()` defaults.
 #' Specifically, the clustering algorithm defaults to "louvain" and the weighting scheme to "jaccard"
 #' to align with common practice in scRNA-seq analysis.
 #'
@@ -25,7 +25,7 @@
 #'   "rank", or "number"
 #' @param nn Number of nearest neighbors to consider when sweeping parameters.
 #'  Provide a vector of unique values to vary this parameter. The default is 10.
-#' @param resolution Resolution parameter used by louvain and leiden clustering only.
+#' @param resolution Resolution parameter used by Louvain and Leiden clustering only.
 #'   Provide a vector of unique values to vary this parameter. The default is 1.
 #' @param objective_function Leiden-specific parameter for whether to use the
 #'   Constant Potts Model ("CPM"; default) or "modularity". Provide a vector of unique values
@@ -39,22 +39,22 @@
 #' @return A list of data frames from performing clustering across all parameter combinations.
 #'   Columns include `cluster_set` (identifier column for results from a single clustering run),
 #'   `cell_id`, and `cluster`. Additional columns represent algorithm parameters and include at least:
-#'   `algorithm`, `weighting`, and `nn`. Louvain and leiden clustering will also include `resolution`,
-#'   and leiden clustering will further include `objective_function`.
+#'   `algorithm`, `weighting`, and `nn`. Louvain and Leiden clustering will also include `resolution`,
+#'   and Leiden clustering will further include `objective_function`.
 #'
 #' @export
 #'
 #' @examples
 #' \dontrun{
-#' # perform louvain clustering with jaccard weighting (defaults),
+#' # perform Louvain clustering with Jaccard weighting (defaults),
 #' # varying the nearest neighobor parameter, and set a seed for reproducibility
 #' cluster_df <- sweep_clusters(
 #'   sce_object,
 #'   nn = c(10, 15, 20, 25),
 #'   seed = 11
 #' )
 #'
-#' # perform louvain clustering, with jaccard and rank weighting, and
+#' # perform Louvain clustering, with Jaccard and rank weighting, and
 #' # varying the nearest neighbor and resolution parameters.
 #' cluster_df <- sweep_clusters(
 #'   sce_object,
@@ -65,8 +65,8 @@
 #'   seed = 11
 #' )
 #'
-#' # perform walktrap and louvain clustering with jaccard weighting, and
-#' # varying the nearest neighbors for both algorithms, and resolution for louvain.
+#' # perform walktrap and Louvain clustering with Jaccard weighting, and
+#' # varying the nearest neighbors for both algorithms, and resolution for Louvain.
 #' cluster_df <- sweep_clusters(
 #'   sce_object,
 #'   algorithm = c("walktrap", "louvain"),
@@ -81,8 +81,8 @@ sweep_clusters <- function(
     algorithm = "louvain",
     weighting = "jaccard",
     nn = 10,
-    resolution = 1, # louvain or leiden
-    objective_function = "CPM", # leiden only
+    resolution = 1, # Louvain or Leiden
+    objective_function = "CPM", # Leiden only
     threads = 1,
     seed = NULL,
     pc_name = NULL) {

diff --git a/man/calculate_clusters.Rd b/man/calculate_clusters.Rd
diff --git a/man/extract_pc_matrix.Rd b/man/extract_pc_matrix.Rd
diff --git a/man/prepare_pc_matrix.Rd b/man/prepare_pc_matrix.Rd