Merge pull request #240 from AlexsLemonade/jashapiro/precommit

Add precommit and specify Seurat version
AlexsLemonade · Nov 22, 2023 · 811d267 · 811d267
2 parents ea76bdb + f155bee
commit 811d267
Show file tree

Hide file tree

Showing 12 changed files with 124 additions and 47 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -18,3 +18,7 @@
 
 # Docker folder
 ^docker$
+
+# development
+^\.pre-commit-config\.yaml$
+^\.lintr$
diff --git a/.lintr b/.lintr
@@ -0,0 +1,8 @@
+encoding: "UTF-8"
+linters: linters_with_defaults(
+    line_length_linter(120),
+    commented_code_linter = NULL,
+    indentation_linter = NULL,
+    object_usage_linter = NULL,
+    object_name_linter = NULL
+  )
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,54 @@
+# All available hooks: https://pre-commit.com/hooks.html
+# R specific hooks: https://github.com/lorenzwalthert/precommit
+repos:
+-   repo: https://github.com/lorenzwalthert/precommit
+    rev: v0.3.2.9027
+    hooks:
+    -   id: style-files
+        args: [--style_pkg=styler, --style_fun=tidyverse_style]
+    # codemeta must be above use-tidy-description when both are used
+    # -   id: codemeta-description-updated
+    -   id: use-tidy-description
+    -   id: spell-check
+        exclude: >
+          (?x)^(
+          .*\.[rR]|
+          .*\.feather|
+          .*\.jpeg|
+          .*\.pdf|
+          .*\.png|
+          .*\.py|
+          .*\.RData|
+          .*\.rds|
+          .*\.Rds|
+          .*\.Rproj|
+          .*\.sh|
+          (.*/|)\.gitignore|
+          (.*/|)\.gitlab-ci\.yml|
+          (.*/|)\.lintr|
+          (.*/|)\.pre-commit-.*|
+          (.*/|)\.Rbuildignore|
+          (.*/|)\.Renviron|
+          (.*/|)\.Rprofile|
+          (.*/|)\.travis\.yml|
+          (.*/|)appveyor\.yml|
+          (.*/|)DESCRIPTION|
+          (.*/|)NAMESPACE|
+          (.*/|)renv/settings\.dcf|
+          (.*/|)renv\.lock|
+          (.*/|)WORDLIST|
+          \.github/workflows/.*|
+          data/.*|
+          )$
+    -   id: lintr
+    -   id: parsable-R
+    -   id: no-browser-statement
+    -   id: no-debug-statement
+    -   id: deps-in-desc
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.5.0
+    hooks:
+    -   id: check-added-large-files
+        args: ['--maxkb=200']
+    -   id: end-of-file-fixer
+        exclude: '\.Rd'
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -2,7 +2,19 @@
 
 ## Coding style
 
-We generally follow the [tidyverse style](http://style.tidyverse.org/).
-You can use the [`styler` package](https://styler.r-lib.org) to perform styling locally.
+We try to follow the [tidyverse style](http://style.tidyverse.org/).
+You can use the [`styler` package](https://styler.r-lib.org) to perform styling locally, or use the pre-commit hooks described below.
+
 If code needs styling during/after a PR, one of the repository owners can perform styling automatically by making a comment with just the word `/style` in the PR.
 (Note that for this to work, the commenter must have set their membership to "Public" in https://github.com/orgs/AlexsLemonade/people)
+
+## Pre-commit hooks
+
+For convenience, we have included a set of [pre-commit hooks](https://pre-commit.com/) that can be used to automatically format code according to the above specifications, as well as to spell check and check for other common errors.
+
+To use these hooks, install the `pre-commit` package according to your favorite method (`pip install pre-commit` or `conda install pre-commit`), then run `pre-commit install` in the `scpcaTools` directory.
+This will install the hooks in the `.git/hooks` directory, and they will be run automatically when you commit changes.
+If any of the hooks fail, the commit will be aborted, and you will need to fix the errors and re-commit.
+
+Notably, the `spell-check` hook will report spelling errors, but will also add any words it finds to the dictionary file (stored at `inst/WORDLIST`).
+This is convenient for many cases (where the word is real but unknown), but be sure to remove truly misspelled words from the dictionary file before committing, or they will not be caught in the future!
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -62,7 +62,7 @@ Suggests:
     scran,
     scpcaData,
     sessioninfo,
-    Seurat,
+    Seurat (< 5.0.0),
     testthat (>= 3.1.9),
     zellkonverter
 Remotes:

diff --git a/R/integrate_sces.R b/R/integrate_sces.R
@@ -188,12 +188,12 @@ integrate_harmony <- function(merged_sce,
   }
 
   # if no covariate lambda provided, set to 1
-  if(length(covariate_lambda) == 0){
+  if (length(covariate_lambda) == 0) {
     covariate_lambda <- rep(1, length(covariate_cols))
   }
 
   # check that the lambda values match up
-  if(length(covariate_lambda) != length(covariate_cols)){
+  if (length(covariate_lambda) != length(covariate_cols)) {
     stop("The number of covariate columns must be equal to the number of covariate lambda values.")
   }
 

diff --git a/R/read_alevin.R b/R/read_alevin.R
@@ -1,3 +1,4 @@
+# nolint start: cyclocomp_linter.
 #' Read in counts data processed with Alevin or Alevin-fry
 #'
 #' @param quant_dir Path to directory where output files are located.
@@ -48,18 +49,18 @@
 #'   include_unspliced = TRUE
 #' )
 #' }
-read_alevin <- function(quant_dir,
-                        fry_mode = FALSE,
-                        include_unspliced = TRUE,
-                        feature_data = FALSE,
-                        round_counts = TRUE,
-                        library_id = NULL,
-                        sample_id = NULL,
-                        project_id = NULL,
-                        tech_version = NULL,
-                        assay_ontology_term_id = NULL,
-                        seq_unit = NULL
-                        ) {
+read_alevin <- function(
+    quant_dir,
+    fry_mode = FALSE,
+    include_unspliced = TRUE,
+    feature_data = FALSE,
+    round_counts = TRUE,
+    library_id = NULL,
+    sample_id = NULL,
+    project_id = NULL,
+    tech_version = NULL,
+    assay_ontology_term_id = NULL,
+    seq_unit = NULL) {
   # checks for *_mode
   if (!is.logical(fry_mode)) {
     stop("fry_mode must be set as TRUE or FALSE")
@@ -72,7 +73,7 @@ read_alevin <- function(quant_dir,
   }
 
   # make sure that include unspliced and feature data are not both set as TRUE
-  if (include_unspliced & feature_data) {
+  if (include_unspliced && feature_data) {
     stop("Feature data does not have unspliced reads, cannot use `include_unspliced=TRUE`")
   }
 
@@ -95,11 +96,8 @@ read_alevin <- function(quant_dir,
   # if alevin-fry USA and MTX format directly create SCE object with fishpond
   if (fry_mode) {
     # only check for usa mode for non-feature data
-    if (!feature_data) {
-      # actually check that files are in usa mode
-      if (meta$usa_mode != TRUE) {
-        stop("Output files not in USA mode")
-      }
+    if (!feature_data && !meta$usa_mode) {
+      stop("Output files not in USA mode")
     }
 
     # define assays to include in SCE object based on include_unspliced
@@ -120,9 +118,7 @@ read_alevin <- function(quant_dir,
       fryDir = quant_dir,
       outputFormat = assay_formats
     )
-  }
-
-  if (!fry_mode) {
+  } else {
     # read in any non-USA formatted alevin-fry data or Alevin data
     counts <- read_tximport(quant_dir)
 
@@ -149,7 +145,7 @@ read_alevin <- function(quant_dir,
 
   return(sce)
 }
-
+# nolint end
 
 #' Read in counts data processed with Alevin or alevin-fry in tximport-compatible formats
 #'
@@ -175,6 +171,8 @@ read_tximport <- function(quant_dir) {
   counts <- as(txi$counts, "CsparseMatrix")
 }
 
+
+
 #' Read alevin metadata from json files
 #'
 #' @param quant_dir Path alevin output directory.

diff --git a/R/sce_to_anndata.R b/R/sce_to_anndata.R
@@ -29,11 +29,11 @@ sce_to_anndata <- function(sce, anndata_file, x_assay_name = "counts") {
     stop("Input must be a SingleCellExperiment object.")
   }
 
-  if(ncol(sce) < 2){
+  if (ncol(sce) < 2) {
     stop("Input SingleCellExperiment must contain at least 2 cells.")
   }
 
-  if(nrow(sce) < 2){
+  if (nrow(sce) < 2) {
     stop("Input SingleCellExperiment must contain at least 2 genes or features.")
   }
 
@@ -56,8 +56,8 @@ sce_to_anndata <- function(sce, anndata_file, x_assay_name = "counts") {
     purrr::discard(is.list)
 
   # print out warning that removed objects won't be converted
-  removed_metadata <- setdiff(names(metadata(sce_to_convert)),  names(metadata_to_keep))
-  if(length(removed_metadata) > 0){
+  removed_metadata <- setdiff(names(metadata(sce_to_convert)), names(metadata_to_keep))
+  if (length(removed_metadata) > 0) {
     glue::glue("{removed_metadata} cannot be converted between SCE and AnnData.") |>
       purrr::walk(message)
   }

diff --git a/inst/WORDLIST b/inst/WORDLIST
@@ -0,0 +1,4 @@
+commenter
+pre
+scpcaTools
+tidyverse
diff --git a/tests/testthat/test-integrate_sces.R b/tests/testthat/test-integrate_sces.R
@@ -99,7 +99,7 @@ test_that("`integrate_harmony` fails when covariates and covariate_lambda don't
       "harmony",
       batch_column,
       covariate_cols = "covariate",
-      covariate_lambda = c(1,2) # 1 extra lambda
+      covariate_lambda = c(1, 2) # 1 extra lambda
     )
   )
 })
@@ -226,7 +226,6 @@ test_that("`integrate_sces` works as expected with harmony extra arguments", {
 })
 
 test_that("`integrate_sces` works as expected with a harmony covariate", {
-
   # first with providing lambda
   expect_no_error(
     integrate_sces(
@@ -247,6 +246,4 @@ test_that("`integrate_sces` works as expected with a harmony covariate", {
       covariate_cols = "covariate"
     )
   )
-
 })
-
diff --git a/tests/testthat/test-metadata_to_coldata.R b/tests/testthat/test-metadata_to_coldata.R
@@ -35,7 +35,7 @@ sce_list <- purrr::imap(
     "sce3" = sce3
   ),
   # add some colData and metadata to each SCE object
-  \(sce, batch){
+  \(sce, batch) {
     # add some colData so merge_sce_list doesn't give a warning
     colData(sce)[["sum"]] <- runif(ncol(sce))
 
@@ -130,7 +130,8 @@ test_that("`metadata_to_coldata` fails as expected", {
   # column is missing from sample metadata
   # add a new column that is only in the colData and not in the sample metadata
   sce$batch_column <- "library1"
-  expect_error(metadata_to_coldata(sce,
+  expect_error(metadata_to_coldata(
+    sce,
     join_columns = "batch_column"
   ))
 
@@ -170,8 +171,8 @@ test_that("`metadata_to_coldata` works as expected with multiplexed libraries",
   # only joining on library ids will mean non-unique matches
   suppressWarnings(
     expect_error(metadata_to_coldata(sce,
-                                     join_columns = "library_id")
-    )
+      join_columns = "library_id"
+    ))
   )
 
   # joining on sample and library ids will mean unique matches and should work

diff --git a/tests/testthat/test-sce_to_anndata.R b/tests/testthat/test-sce_to_anndata.R
@@ -5,11 +5,13 @@ set.seed(1665)
 sce <- sim_sce(n_cells = 100, n_genes = 200, n_empty = 0)
 # need to pull out barcodes to add to colData, otherwise colnames get replaced with NULL
 barcodes <- colnames(sce)
-colData(sce) <- DataFrame("test_column" = sample(0:10, 100, rep = TRUE),
-                          "na_column" = NA,
-                          "na_char_column" = NA_character_,
-                          "some_na" = c("a", NA),
-                          row.names = barcodes)
+colData(sce) <- DataFrame(
+  "test_column" = sample(0:10, 100, rep = TRUE),
+  "na_column" = NA,
+  "na_char_column" = NA_character_,
+  "some_na" = c("a", NA),
+  row.names = barcodes
+)
 rowData(sce) <- DataFrame("test_row" = sample(0:10, 200, rep = TRUE))
 
 # add some metadata
@@ -26,7 +28,6 @@ tempdir <- tempdir()
 anndata_file <- file.path(tempdir, "anndata.h5")
 
 test_that("Conversion of SCE to AnnData works as expected", {
-
   # test that the H5 file is created
   expect_snapshot_file({
     sce_to_anndata(sce, anndata_file)
@@ -67,11 +68,9 @@ test_that("Conversion of SCE to AnnData works as expected", {
     logcounts(sce),
     assay(new_sce, "X")
   )
-
 })
 
 test_that("Conversion of SCE to AnnData fails as expected", {
-
   # check that inputting an improper file name causes a failure
   anndata_bad_file <- tempfile(fileext = ".rds")
   expect_error(sce_to_anndata(sce, anndata_bad_file))