diff --git a/.lintr b/.lintr new file mode 100644 index 0000000..512b64b --- /dev/null +++ b/.lintr @@ -0,0 +1,9 @@ +encoding: "UTF-8" +linters: linters_with_defaults( + line_length_linter(120), + cyclocomp_linter(20), + commented_code_linter = NULL, + indentation_linter = NULL, + object_usage_linter = NULL, + object_name_linter = NULL + ) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..32537c7 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,42 @@ +# All available hooks: https://pre-commit.com/hooks.html +# R specific hooks: https://github.com/lorenzwalthert/precommit +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: detect-aws-credentials + args: [--allow-missing-credentials] + - id: detect-private-key + - id: forbid-submodules + - id: check-case-conflict + - id: check-merge-conflict + - id: trailing-whitespace + exclude: 'renv/.*' + - id: end-of-file-fixer + exclude: '\.Rd' + + - repo: https://github.com/crate-ci/typos + rev: typos-dict-v0.11.34 + hooks: + - id: typos + + - repo: https://github.com/gitleaks/gitleaks + rev: v8.21.2 + hooks: + - id: gitleaks + + - repo: https://github.com/lorenzwalthert/precommit + rev: v0.4.3.9003 + hooks: + + - id: style-files + args: [--style_pkg=styler, --style_fun=tidyverse_style] + # codemeta must be above use-tidy-description when both are used + # - id: codemeta-description-updated + - id: use-tidy-description + - id: lintr + - id: parsable-R + - id: no-browser-statement + - id: no-debug-statement + - id: deps-in-desc + exclude: 'docker/.*|renv/.*' diff --git a/R/calculate-clusters.R b/R/calculate-clusters.R index 9e13fe7..5156eeb 100644 --- a/R/calculate-clusters.R +++ b/R/calculate-clusters.R @@ -1,34 +1,37 @@ #' Calculate graph-based clusters from a provided matrix #' #' This function is provided to simplify application of bluster package clustering functions on OpenScPCA data. -#' In particular, this function runs bluster::clusterRows() with the bluster::NNGraphParam() function on a +#' In particular, this function runs `bluster::clusterRows()` with the `bluster::NNGraphParam()` function on a #' principal components matrix, provided either directly or via single-cell object. -#' Note that defaults for some arguments may differ from the bluster::NNGraphParam() defaults. +#' Note that defaults for some arguments may differ from the `bluster::NNGraphParam()` defaults. #' Specifically, the clustering algorithm defaults to "louvain" and the weighting scheme to "jaccard" #' to align with common practice in scRNA-seq analysis. #' #' @import methods #' #' @param x An object containing PCs that clustering can be performed in. This can be either a SingleCellExperiment -#' object, a Seurat object, or a matrix where columns are PCs and rows are cells. If a matrix is provided, it must -#' have row names of cell ids (e.g., barcodes). +#' object, a Seurat object, or a matrix where columns are PCs and rows are cells. +#' If a matrix is provided, it must have row names of cell ids (e.g., barcodes). #' @param algorithm Clustering algorithm to use. Must be one of "louvain" (default), "walktrap", or "leiden". #' @param weighting Weighting scheme to use. Must be one of "jaccard" (default), "rank", or "number" #' @param nn Number of nearest neighbors. The default is 10. -#' @param resolution Resolution parameter used by louvain and leiden clustering only. Default is 1. -#' @param objective_function Leiden-specific parameter for whether to use the Constant Potts Model ("CPM"; default) or "modularity" +#' @param resolution Resolution parameter used by Louvain and Leiden clustering only. Default is 1. +#' @param objective_function Leiden-specific parameter for whether to use the Constant Potts Model ("CPM"; default) +#' or "modularity" #' @param cluster_args List of additional arguments to pass to the chosen clustering function. #' Only single values for each argument are supported (no vectors or lists). -#' See igraph documentation for details on each clustering function: https://igraph.org/r/html/latest +#' See `igraph` documentation for details on each clustering function: #' @param threads Number of threads to use. The default is 1. #' @param seed Random seed to set for clustering. -#' @param pc_name Name of principal components slot in provided object. This argument is only used if a SingleCellExperiment -#' or Seurat object is provided. If not provided, the SingleCellExperiment object name will default to "PCA" and the +#' @param pc_name Name of principal components slot in provided object. +#' This argument is only used if a SingleCellExperiment or Seurat object is provided. +#' If not provided, the SingleCellExperiment object name will default to "PCA" and the #' Seurat object name will default to "pca". #' -#' @return A data frame of cluster results with columns `cell_id` and `cluster`. Additional columns represent algorithm parameters -#' and include at least: `algorithm`, `weighting`, and `nn`. Louvain and leiden clustering will also include `resolution`, and -#' leiden clustering will further include `objective_function`. +#' @return A data frame of cluster results with columns `cell_id` and `cluster`. +#' Additional columns represent algorithm parameters and include at least: `algorithm`, `weighting`, and `nn`. +#' Louvain and Leiden clustering will also include `resolution`, +#' and Leiden clustering will further include `objective_function`. #' #' @export #' @@ -47,7 +50,7 @@ #' # cluster directly from a matrix using default parameters #' cluster_df <- calculate_clusters(pca_matrix, seed = 11) #' -#' # cluster directly from a matrix using the leiden algorithm with a resolution of 0.1 +#' # cluster directly from a matrix using the Leiden algorithm with a resolution of 0.1 #' cluster_df <- calculate_clusters( #' pca_matrix, #' algorithm = "leiden", @@ -55,7 +58,7 @@ #' seed = 11 #' ) #' -#' # cluster directly from a matrix using the leiden algorithm with 3 iterations +#' # cluster directly from a matrix using the Leiden algorithm with 3 iterations #' cluster_df <- calculate_clusters( #' pca_matrix, #' algorithm = "leiden", @@ -68,8 +71,8 @@ calculate_clusters <- function( algorithm = c("louvain", "walktrap", "leiden"), weighting = c("jaccard", "rank", "number"), nn = 10, - resolution = 1, # louvain or leiden - objective_function = c("CPM", "modularity"), # leiden only + resolution = 1, # Louvain or Leiden + objective_function = c("CPM", "modularity"), # Leiden only cluster_args = list(), threads = 1, seed = NULL, @@ -154,7 +157,7 @@ calculate_clusters <- function( #' #' This function first determines if the provided object is a SingleCellExperiment or #' Seurat object, and then extract the PC matrix. If no name for the PC matrix is provided, -#' this function will assume the name of "PCA" for SingleCellExperiment objects, and +#' this function will use "PCA" for SingleCellExperiment objects, and #' "pca" for Seurat objects. #' #' @import SingleCellExperiment @@ -162,7 +165,7 @@ calculate_clusters <- function( #' #' @param sc_object Either a SingleCellExperiment or Seurat object #' @param pc_name Optionally, the name of the PC matrix in the object. If this is -#' not provided, the name "PCA" is assumed for SingleCellExperiment objects, and +#' not provided, the name "PCA" is used for SingleCellExperiment objects, and #' "pca" for Seurat objects. #' #' @return PC matrix with row names @@ -171,13 +174,13 @@ calculate_clusters <- function( #' #' @examples #' \dontrun{ -#' # extract PC matrix from SCE object, assuming default name "PCA" +#' # extract PC matrix from SCE object, using default name "PCA" #' pca_matrix <- extract_pc_matrix(sce_object) #' #' # extract PC matrix from SCE object with non-default name "PCA_MAT" #' pca_matrix <- extract_pc_matrix(sce_object, pc_name = "PCA_MAT") #' -#' # extract PC matrix from Seurat object, assuming default name "pca" +#' # extract PC matrix from Seurat object, using default name "pca" #' pca_matrix <- extract_pc_matrix(seurat_object) #' } extract_pc_matrix <- function(sc_object, pc_name = NULL) { @@ -230,7 +233,7 @@ extract_pc_matrix <- function(sc_object, pc_name = NULL) { #' or Seurat object containing PCs. If a matrix is provided, rows should be cells #' and columns should be PCs, and row names should be cell ids (e.g., barcodes). #' @param pc_name Optionally, the name of the PC matrix in the object. Not used for -#' matrices. If this is not provided, the name "PCA" is assumed for +#' matrices. If this is not provided, the name "PCA" is used for #' SingleCellExperiment objects, and "pca" for Seurat objects. #' #' @return A matrix of PCs with row names representing cell ids @@ -242,7 +245,10 @@ prepare_pc_matrix <- function(x, pc_name = NULL) { } else if (is(x, "SingleCellExperiment") || is(x, "Seurat")) { x <- extract_pc_matrix(x, pc_name = pc_name) } else { - stop("The first argument should be one of: a SingleCellExperiment object, a Seurat object, or a matrix with row names.") + stop( + "The first argument should be one of: ", + "a SingleCellExperiment object, a Seurat object, or a matrix with row names." + ) } return(x) diff --git a/R/evaluate-clusters.R b/R/evaluate-clusters.R index 4caa35b..480467e 100644 --- a/R/evaluate-clusters.R +++ b/R/evaluate-clusters.R @@ -279,7 +279,9 @@ calculate_stability <- function( resampled_df <- withCallingHandlers( calculate_clusters(resampled_pca, ...), - warning = \(w) {if(!warnings) tryInvokeRestart("muffleWarning")} + warning = \(w) { + if (!warnings) tryInvokeRestart("muffleWarning") + } ) ari <- pdfCluster::adj.rand.index(resampled_df$cluster, original_clusters) diff --git a/R/sweep-clusters.R b/R/sweep-clusters.R index 9422418..62a44dc 100644 --- a/R/sweep-clusters.R +++ b/R/sweep-clusters.R @@ -11,7 +11,7 @@ #' For each algorithm specified, all parameters possible to use with that #' algorithm will be systematically varied. This function does not accept additional #' parameters besides those listed above. -#' Note that defaults for some arguments may differ from the bluster::NNGraphParam() defaults. +#' Note that defaults for some arguments may differ from the `bluster::NNGraphParam()` defaults. #' Specifically, the clustering algorithm defaults to "louvain" and the weighting scheme to "jaccard" #' to align with common practice in scRNA-seq analysis. #' @@ -25,7 +25,7 @@ #' "rank", or "number" #' @param nn Number of nearest neighbors to consider when sweeping parameters. #' Provide a vector of unique values to vary this parameter. The default is 10. -#' @param resolution Resolution parameter used by louvain and leiden clustering only. +#' @param resolution Resolution parameter used by Louvain and Leiden clustering only. #' Provide a vector of unique values to vary this parameter. The default is 1. #' @param objective_function Leiden-specific parameter for whether to use the #' Constant Potts Model ("CPM"; default) or "modularity". Provide a vector of unique values @@ -39,14 +39,14 @@ #' @return A list of data frames from performing clustering across all parameter combinations. #' Columns include `cluster_set` (identifier column for results from a single clustering run), #' `cell_id`, and `cluster`. Additional columns represent algorithm parameters and include at least: -#' `algorithm`, `weighting`, and `nn`. Louvain and leiden clustering will also include `resolution`, -#' and leiden clustering will further include `objective_function`. +#' `algorithm`, `weighting`, and `nn`. Louvain and Leiden clustering will also include `resolution`, +#' and Leiden clustering will further include `objective_function`. #' #' @export #' #' @examples #' \dontrun{ -#' # perform louvain clustering with jaccard weighting (defaults), +#' # perform Louvain clustering with Jaccard weighting (defaults), #' # varying the nearest neighobor parameter, and set a seed for reproducibility #' cluster_df <- sweep_clusters( #' sce_object, @@ -54,7 +54,7 @@ #' seed = 11 #' ) #' -#' # perform louvain clustering, with jaccard and rank weighting, and +#' # perform Louvain clustering, with Jaccard and rank weighting, and #' # varying the nearest neighbor and resolution parameters. #' cluster_df <- sweep_clusters( #' sce_object, @@ -65,8 +65,8 @@ #' seed = 11 #' ) #' -#' # perform walktrap and louvain clustering with jaccard weighting, and -#' # varying the nearest neighbors for both algorithms, and resolution for louvain. +#' # perform walktrap and Louvain clustering with Jaccard weighting, and +#' # varying the nearest neighbors for both algorithms, and resolution for Louvain. #' cluster_df <- sweep_clusters( #' sce_object, #' algorithm = c("walktrap", "louvain"), @@ -81,8 +81,8 @@ sweep_clusters <- function( algorithm = "louvain", weighting = "jaccard", nn = 10, - resolution = 1, # louvain or leiden - objective_function = "CPM", # leiden only + resolution = 1, # Louvain or Leiden + objective_function = "CPM", # Leiden only threads = 1, seed = NULL, pc_name = NULL) { diff --git a/man/calculate_clusters.Rd b/man/calculate_clusters.Rd index e263dd5..5e93e9d 100644 --- a/man/calculate_clusters.Rd +++ b/man/calculate_clusters.Rd @@ -19,8 +19,8 @@ calculate_clusters( } \arguments{ \item{x}{An object containing PCs that clustering can be performed in. This can be either a SingleCellExperiment -object, a Seurat object, or a matrix where columns are PCs and rows are cells. If a matrix is provided, it must -have row names of cell ids (e.g., barcodes).} +object, a Seurat object, or a matrix where columns are PCs and rows are cells. +If a matrix is provided, it must have row names of cell ids (e.g., barcodes).} \item{algorithm}{Clustering algorithm to use. Must be one of "louvain" (default), "walktrap", or "leiden".} @@ -28,32 +28,35 @@ have row names of cell ids (e.g., barcodes).} \item{nn}{Number of nearest neighbors. The default is 10.} -\item{resolution}{Resolution parameter used by louvain and leiden clustering only. Default is 1.} +\item{resolution}{Resolution parameter used by Louvain and Leiden clustering only. Default is 1.} -\item{objective_function}{Leiden-specific parameter for whether to use the Constant Potts Model ("CPM"; default) or "modularity"} +\item{objective_function}{Leiden-specific parameter for whether to use the Constant Potts Model ("CPM"; default) +or "modularity"} \item{cluster_args}{List of additional arguments to pass to the chosen clustering function. Only single values for each argument are supported (no vectors or lists). -See igraph documentation for details on each clustering function: https://igraph.org/r/html/latest} +See `igraph` documentation for details on each clustering function: } \item{threads}{Number of threads to use. The default is 1.} \item{seed}{Random seed to set for clustering.} -\item{pc_name}{Name of principal components slot in provided object. This argument is only used if a SingleCellExperiment -or Seurat object is provided. If not provided, the SingleCellExperiment object name will default to "PCA" and the +\item{pc_name}{Name of principal components slot in provided object. +This argument is only used if a SingleCellExperiment or Seurat object is provided. +If not provided, the SingleCellExperiment object name will default to "PCA" and the Seurat object name will default to "pca".} } \value{ -A data frame of cluster results with columns `cell_id` and `cluster`. Additional columns represent algorithm parameters - and include at least: `algorithm`, `weighting`, and `nn`. Louvain and leiden clustering will also include `resolution`, and - leiden clustering will further include `objective_function`. +A data frame of cluster results with columns `cell_id` and `cluster`. + Additional columns represent algorithm parameters and include at least: `algorithm`, `weighting`, and `nn`. + Louvain and Leiden clustering will also include `resolution`, + and Leiden clustering will further include `objective_function`. } \description{ This function is provided to simplify application of bluster package clustering functions on OpenScPCA data. -In particular, this function runs bluster::clusterRows() with the bluster::NNGraphParam() function on a +In particular, this function runs `bluster::clusterRows()` with the `bluster::NNGraphParam()` function on a principal components matrix, provided either directly or via single-cell object. -Note that defaults for some arguments may differ from the bluster::NNGraphParam() defaults. +Note that defaults for some arguments may differ from the `bluster::NNGraphParam()` defaults. Specifically, the clustering algorithm defaults to "louvain" and the weighting scheme to "jaccard" to align with common practice in scRNA-seq analysis. } @@ -72,7 +75,7 @@ cluster_df <- calculate_clusters(seurat_object, seed = 11) # cluster directly from a matrix using default parameters cluster_df <- calculate_clusters(pca_matrix, seed = 11) -# cluster directly from a matrix using the leiden algorithm with a resolution of 0.1 +# cluster directly from a matrix using the Leiden algorithm with a resolution of 0.1 cluster_df <- calculate_clusters( pca_matrix, algorithm = "leiden", @@ -80,7 +83,7 @@ cluster_df <- calculate_clusters( seed = 11 ) -# cluster directly from a matrix using the leiden algorithm with 3 iterations +# cluster directly from a matrix using the Leiden algorithm with 3 iterations cluster_df <- calculate_clusters( pca_matrix, algorithm = "leiden", diff --git a/man/extract_pc_matrix.Rd b/man/extract_pc_matrix.Rd index 8f21bc9..40953a3 100644 --- a/man/extract_pc_matrix.Rd +++ b/man/extract_pc_matrix.Rd @@ -11,7 +11,7 @@ extract_pc_matrix(sc_object, pc_name = NULL) \item{sc_object}{Either a SingleCellExperiment or Seurat object} \item{pc_name}{Optionally, the name of the PC matrix in the object. If this is -not provided, the name "PCA" is assumed for SingleCellExperiment objects, and +not provided, the name "PCA" is used for SingleCellExperiment objects, and "pca" for Seurat objects.} } \value{ @@ -20,18 +20,18 @@ PC matrix with row names \description{ This function first determines if the provided object is a SingleCellExperiment or Seurat object, and then extract the PC matrix. If no name for the PC matrix is provided, -this function will assume the name of "PCA" for SingleCellExperiment objects, and +this function will use "PCA" for SingleCellExperiment objects, and "pca" for Seurat objects. } \examples{ \dontrun{ -# extract PC matrix from SCE object, assuming default name "PCA" +# extract PC matrix from SCE object, using default name "PCA" pca_matrix <- extract_pc_matrix(sce_object) # extract PC matrix from SCE object with non-default name "PCA_MAT" pca_matrix <- extract_pc_matrix(sce_object, pc_name = "PCA_MAT") -# extract PC matrix from Seurat object, assuming default name "pca" +# extract PC matrix from Seurat object, using default name "pca" pca_matrix <- extract_pc_matrix(seurat_object) } } diff --git a/man/prepare_pc_matrix.Rd b/man/prepare_pc_matrix.Rd index 9c4eadc..e071672 100644 --- a/man/prepare_pc_matrix.Rd +++ b/man/prepare_pc_matrix.Rd @@ -12,7 +12,7 @@ or Seurat object containing PCs. If a matrix is provided, rows should be cells and columns should be PCs, and row names should be cell ids (e.g., barcodes).} \item{pc_name}{Optionally, the name of the PC matrix in the object. Not used for -matrices. If this is not provided, the name "PCA" is assumed for +matrices. If this is not provided, the name "PCA" is used for SingleCellExperiment objects, and "pca" for Seurat objects.} } \value{ diff --git a/man/sweep_clusters.Rd b/man/sweep_clusters.Rd index 6f05b5c..93d79f8 100644 --- a/man/sweep_clusters.Rd +++ b/man/sweep_clusters.Rd @@ -31,7 +31,7 @@ Provide a vector of unique values to vary this parameter. Options include "jacca \item{nn}{Number of nearest neighbors to consider when sweeping parameters. Provide a vector of unique values to vary this parameter. The default is 10.} -\item{resolution}{Resolution parameter used by louvain and leiden clustering only. +\item{resolution}{Resolution parameter used by Louvain and Leiden clustering only. Provide a vector of unique values to vary this parameter. The default is 1.} \item{objective_function}{Leiden-specific parameter for whether to use the @@ -50,8 +50,8 @@ object name will default to "PCA" and the Seurat object name will default to "pc A list of data frames from performing clustering across all parameter combinations. Columns include `cluster_set` (identifier column for results from a single clustering run), `cell_id`, and `cluster`. Additional columns represent algorithm parameters and include at least: - `algorithm`, `weighting`, and `nn`. Louvain and leiden clustering will also include `resolution`, - and leiden clustering will further include `objective_function`. + `algorithm`, `weighting`, and `nn`. Louvain and Leiden clustering will also include `resolution`, + and Leiden clustering will further include `objective_function`. } \description{ This function can be used to perform reproducible clustering while varying a set of parameters. @@ -66,13 +66,13 @@ Multiple values can be provided for any of: For each algorithm specified, all parameters possible to use with that algorithm will be systematically varied. This function does not accept additional parameters besides those listed above. -Note that defaults for some arguments may differ from the bluster::NNGraphParam() defaults. +Note that defaults for some arguments may differ from the `bluster::NNGraphParam()` defaults. Specifically, the clustering algorithm defaults to "louvain" and the weighting scheme to "jaccard" to align with common practice in scRNA-seq analysis. } \examples{ \dontrun{ -# perform louvain clustering with jaccard weighting (defaults), +# perform Louvain clustering with Jaccard weighting (defaults), # varying the nearest neighobor parameter, and set a seed for reproducibility cluster_df <- sweep_clusters( sce_object, @@ -80,7 +80,7 @@ cluster_df <- sweep_clusters( seed = 11 ) -# perform louvain clustering, with jaccard and rank weighting, and +# perform Louvain clustering, with Jaccard and rank weighting, and # varying the nearest neighbor and resolution parameters. cluster_df <- sweep_clusters( sce_object, @@ -91,8 +91,8 @@ cluster_df <- sweep_clusters( seed = 11 ) -# perform walktrap and louvain clustering with jaccard weighting, and -# varying the nearest neighbors for both algorithms, and resolution for louvain. +# perform walktrap and Louvain clustering with Jaccard weighting, and +# varying the nearest neighbors for both algorithms, and resolution for Louvain. cluster_df <- sweep_clusters( sce_object, algorithm = c("walktrap", "louvain"), diff --git a/rOpenScPCA.Rproj b/rOpenScPCA.Rproj index ba381fb..ea83efd 100644 --- a/rOpenScPCA.Rproj +++ b/rOpenScPCA.Rproj @@ -18,3 +18,4 @@ StripTrailingWhitespace: Yes BuildType: Package PackageUseDevtools: Yes PackageInstallArgs: --no-multiarch --with-keep.source +PackageRoxygenize: rd,collate,namespace diff --git a/tests/testthat/test-evaluate-clusters.R b/tests/testthat/test-evaluate-clusters.R index 746adc6..10096ae 100644 --- a/tests/testthat/test-evaluate-clusters.R +++ b/tests/testthat/test-evaluate-clusters.R @@ -95,7 +95,6 @@ test_that("calculate_purity works as expected with non-default cell id column na test_that("calculate_stability works as expected with defaults", { - df <- calculate_stability(test_mat, cluster_df) expected_names <- colnames(cluster_df)[!(colnames(cluster_df) %in% c("cell_id", "cluster"))] @@ -143,14 +142,12 @@ test_that("calculate_stability warnings argument works", { expect_warning({ df <- calculate_stability(test_mat, cluster_df, replicates = 1, seed = 1, warnings = TRUE) }) - }) test_that("calculate_stability works as expected with non-default cluster column name", { - cluster_df <- cluster_df |> dplyr::rename(clusters = cluster) @@ -165,7 +162,6 @@ test_that("calculate_stability works as expected with non-default cluster column test_that("calculate_stability works as expected with non-default cell id name", { - cluster_df <- cluster_df |> dplyr::rename(barcodes = cell_id) @@ -182,10 +178,9 @@ test_that("calculate_stability works as expected with non-default cell id name", test_that("calculate_stability errors as expected", { - # cluster_df too short expect_error({ - calculate_stability(test_mat, cluster_df[1:5,]) + calculate_stability(test_mat, cluster_df[1:5, ]) }) # cluster_df too long