From ff7ff9f047abc1d257f0cc5505127b334ee9da3f Mon Sep 17 00:00:00 2001
From: Joshua Shapiro <josh.shapiro@ccdatalab.org>
Date: Wed, 8 Nov 2023 17:19:46 -0500
Subject: [PATCH 1/7] Add pre-commit config with linter settings

---
 .Rbuildignore           |  4 ++++
 .lintr                  |  7 ++++++
 .pre-commit-config.yaml | 53 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 64 insertions(+)
 create mode 100644 .lintr
 create mode 100644 .pre-commit-config.yaml

diff --git a/.Rbuildignore b/.Rbuildignore
index 0322287c..7e55268d 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -18,3 +18,7 @@
 
 # Docker folder
 ^docker$
+
+# development
+^\.pre-commit-config\.yaml$
+^\.lintr$
diff --git a/.lintr b/.lintr
new file mode 100644
index 00000000..bf17bfd3
--- /dev/null
+++ b/.lintr
@@ -0,0 +1,7 @@
+linters: linters_with_defaults(
+    line_length_linter(120),
+    indentation_linter = NULL,
+    object_usage_linter = NULL,
+    object_name_linter = NULL
+  )
+encoding: "UTF-8"
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 00000000..1c11830b
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,53 @@
+# All available hooks: https://pre-commit.com/hooks.html
+# R specific hooks: https://github.com/lorenzwalthert/precommit
+repos:
+-   repo: https://github.com/lorenzwalthert/precommit
+    rev: v0.3.2.9025
+    hooks:
+    -   id: style-files
+        args: [--style_pkg=styler, --style_fun=tidyverse_style]
+    # codemeta must be above use-tidy-description when both are used
+    # -   id: codemeta-description-updated
+    -   id: use-tidy-description
+    -   id: spell-check
+        exclude: >
+          (?x)^(
+          .*\.[rR]|
+          .*\.feather|
+          .*\.jpeg|
+          .*\.pdf|
+          .*\.png|
+          .*\.py|
+          .*\.RData|
+          .*\.rds|
+          .*\.Rds|
+          .*\.Rproj|
+          .*\.sh|
+          (.*/|)\.gitignore|
+          (.*/|)\.gitlab-ci\.yml|
+          (.*/|)\.lintr|
+          (.*/|)\.pre-commit-.*|
+          (.*/|)\.Rbuildignore|
+          (.*/|)\.Renviron|
+          (.*/|)\.Rprofile|
+          (.*/|)\.travis\.yml|
+          (.*/|)appveyor\.yml|
+          (.*/|)NAMESPACE|
+          (.*/|)renv/settings\.dcf|
+          (.*/|)renv\.lock|
+          (.*/|)WORDLIST|
+          \.github/workflows/.*|
+          data/.*|
+          )$
+    -   id: lintr
+    -   id: parsable-R
+    -   id: no-browser-statement
+    -   id: no-debug-statement
+    -   id: deps-in-desc
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.5.0
+    hooks:
+    -   id: check-added-large-files
+        args: ['--maxkb=200']
+    -   id: end-of-file-fixer
+        exclude: '\.Rd'

From 1d44a90b6983d495361f3fa725338db25b047978 Mon Sep 17 00:00:00 2001
From: Joshua Shapiro <josh.shapiro@ccdatalab.org>
Date: Wed, 22 Nov 2023 10:13:52 -0500
Subject: [PATCH 2/7] update precommit to latest

---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 1c11830b..04a6cdec 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -2,7 +2,7 @@
 # R specific hooks: https://github.com/lorenzwalthert/precommit
 repos:
 -   repo: https://github.com/lorenzwalthert/precommit
-    rev: v0.3.2.9025
+    rev: v0.3.2.9027
     hooks:
     -   id: style-files
         args: [--style_pkg=styler, --style_fun=tidyverse_style]

From 75ca604047bf08c239d0ab84477fa16ca295c1b0 Mon Sep 17 00:00:00 2001
From: Joshua Shapiro <josh.shapiro@ccdatalab.org>
Date: Wed, 22 Nov 2023 10:19:17 -0500
Subject: [PATCH 3/7] Add pre-commit to contributing.md

update wordlist
---
 CONTRIBUTING.md | 16 ++++++++++++++--
 inst/WORDLIST   |  4 ++++
 2 files changed, 18 insertions(+), 2 deletions(-)
 create mode 100644 inst/WORDLIST

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index daa0d60f..b6e1efb9 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -2,7 +2,19 @@
 
 ## Coding style
 
-We generally follow the [tidyverse style](http://style.tidyverse.org/).
-You can use the [`styler` package](https://styler.r-lib.org) to perform styling locally.
+We try to follow the [tidyverse style](http://style.tidyverse.org/).
+You can use the [`styler` package](https://styler.r-lib.org) to perform styling locally, or use the pre-commit hooks described below.
+
 If code needs styling during/after a PR, one of the repository owners can perform styling automatically by making a comment with just the word `/style` in the PR.
 (Note that for this to work, the commenter must have set their membership to "Public" in https://github.com/orgs/AlexsLemonade/people)
+
+## Pre-commit hooks
+
+For convenience, we have included a set of [pre-commit hooks](https://pre-commit.com/) that can be used to automatically format code according to the above specifications, as well as to spell check and check for other common errors.
+
+To use these hooks, install the `pre-commit` package according to your favorite method (`pip install pre-commit` or `conda install pre-commit`), then run `pre-commit install` in the `scpca-nf` directory.
+This will install the hooks in the `.git/hooks` directory, and they will be run automatically when you commit changes.
+If any of the hooks fail, the commit will be aborted, and you will need to fix the errors and re-commit.
+
+Notably, the `spell-check` hook will report spelling errors, but will also add any words it finds to the dictionary file (stored at `inst/WORDLIST`).
+This is convenient for many cases (where the word is real but unknown), but be sure to remove truly misspelled words from the dictionary file before committing, or they will not be caught in the future!
diff --git a/inst/WORDLIST b/inst/WORDLIST
new file mode 100644
index 00000000..d22ed35f
--- /dev/null
+++ b/inst/WORDLIST
@@ -0,0 +1,4 @@
+commenter
+pre
+scpcaTools
+tidyverse

From 48933af0936f2b3b64750e9a67331f64870b76bf Mon Sep 17 00:00:00 2001
From: Joshua Shapiro <josh.shapiro@ccdatalab.org>
Date: Wed, 22 Nov 2023 10:44:56 -0500
Subject: [PATCH 4/7] lintr update

---
 .lintr | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.lintr b/.lintr
index bf17bfd3..74863d0f 100644
--- a/.lintr
+++ b/.lintr
@@ -1,7 +1,8 @@
+encoding: "UTF-8"
 linters: linters_with_defaults(
     line_length_linter(120),
+    commented_code_linter = NULL,
     indentation_linter = NULL,
     object_usage_linter = NULL,
     object_name_linter = NULL
   )
-encoding: "UTF-8"

From 61015f6fa1cffd93e469d3c95f9506cb6626f6ab Mon Sep 17 00:00:00 2001
From: Joshua Shapiro <josh.shapiro@ccdatalab.org>
Date: Wed, 22 Nov 2023 10:45:06 -0500
Subject: [PATCH 5/7] style and linting updates

---
 R/integrate_sces.R                        |  4 +--
 R/read_alevin.R                           | 42 +++++++++++------------
 R/sce_to_anndata.R                        |  8 ++---
 tests/testthat/test-integrate_sces.R      |  5 +--
 tests/testthat/test-metadata_to_coldata.R |  9 ++---
 tests/testthat/test-sce_to_anndata.R      | 15 ++++----
 6 files changed, 39 insertions(+), 44 deletions(-)

diff --git a/R/integrate_sces.R b/R/integrate_sces.R
index d5fe20ca..fd8c6ffb 100644
--- a/R/integrate_sces.R
+++ b/R/integrate_sces.R
@@ -188,12 +188,12 @@ integrate_harmony <- function(merged_sce,
   }
 
   # if no covariate lambda provided, set to 1
-  if(length(covariate_lambda) == 0){
+  if (length(covariate_lambda) == 0) {
     covariate_lambda <- rep(1, length(covariate_cols))
   }
 
   # check that the lambda values match up
-  if(length(covariate_lambda) != length(covariate_cols)){
+  if (length(covariate_lambda) != length(covariate_cols)) {
     stop("The number of covariate columns must be equal to the number of covariate lambda values.")
   }
 
diff --git a/R/read_alevin.R b/R/read_alevin.R
index 1d10203a..b9e64052 100644
--- a/R/read_alevin.R
+++ b/R/read_alevin.R
@@ -1,3 +1,4 @@
+# nolint start: cyclocomp_linter.
 #' Read in counts data processed with Alevin or Alevin-fry
 #'
 #' @param quant_dir Path to directory where output files are located.
@@ -48,18 +49,18 @@
 #'   include_unspliced = TRUE
 #' )
 #' }
-read_alevin <- function(quant_dir,
-                        fry_mode = FALSE,
-                        include_unspliced = TRUE,
-                        feature_data = FALSE,
-                        round_counts = TRUE,
-                        library_id = NULL,
-                        sample_id = NULL,
-                        project_id = NULL,
-                        tech_version = NULL,
-                        assay_ontology_term_id = NULL,
-                        seq_unit = NULL
-                        ) {
+read_alevin <- function(
+    quant_dir,
+    fry_mode = FALSE,
+    include_unspliced = TRUE,
+    feature_data = FALSE,
+    round_counts = TRUE,
+    library_id = NULL,
+    sample_id = NULL,
+    project_id = NULL,
+    tech_version = NULL,
+    assay_ontology_term_id = NULL,
+    seq_unit = NULL) {
   # checks for *_mode
   if (!is.logical(fry_mode)) {
     stop("fry_mode must be set as TRUE or FALSE")
@@ -72,7 +73,7 @@ read_alevin <- function(quant_dir,
   }
 
   # make sure that include unspliced and feature data are not both set as TRUE
-  if (include_unspliced & feature_data) {
+  if (include_unspliced && feature_data) {
     stop("Feature data does not have unspliced reads, cannot use `include_unspliced=TRUE`")
   }
 
@@ -95,11 +96,8 @@ read_alevin <- function(quant_dir,
   # if alevin-fry USA and MTX format directly create SCE object with fishpond
   if (fry_mode) {
     # only check for usa mode for non-feature data
-    if (!feature_data) {
-      # actually check that files are in usa mode
-      if (meta$usa_mode != TRUE) {
-        stop("Output files not in USA mode")
-      }
+    if (!feature_data && !meta$usa_mode) {
+      stop("Output files not in USA mode")
     }
 
     # define assays to include in SCE object based on include_unspliced
@@ -120,9 +118,7 @@ read_alevin <- function(quant_dir,
       fryDir = quant_dir,
       outputFormat = assay_formats
     )
-  }
-
-  if (!fry_mode) {
+  } else {
     # read in any non-USA formatted alevin-fry data or Alevin data
     counts <- read_tximport(quant_dir)
 
@@ -149,7 +145,7 @@ read_alevin <- function(quant_dir,
 
   return(sce)
 }
-
+# nolint end
 
 #' Read in counts data processed with Alevin or alevin-fry in tximport-compatible formats
 #'
@@ -175,6 +171,8 @@ read_tximport <- function(quant_dir) {
   counts <- as(txi$counts, "CsparseMatrix")
 }
 
+
+
 #' Read alevin metadata from json files
 #'
 #' @param quant_dir Path alevin output directory.
diff --git a/R/sce_to_anndata.R b/R/sce_to_anndata.R
index 46965c3d..7a760b45 100644
--- a/R/sce_to_anndata.R
+++ b/R/sce_to_anndata.R
@@ -29,11 +29,11 @@ sce_to_anndata <- function(sce, anndata_file, x_assay_name = "counts") {
     stop("Input must be a SingleCellExperiment object.")
   }
 
-  if(ncol(sce) < 2){
+  if (ncol(sce) < 2) {
     stop("Input SingleCellExperiment must contain at least 2 cells.")
   }
 
-  if(nrow(sce) < 2){
+  if (nrow(sce) < 2) {
     stop("Input SingleCellExperiment must contain at least 2 genes or features.")
   }
 
@@ -56,8 +56,8 @@ sce_to_anndata <- function(sce, anndata_file, x_assay_name = "counts") {
     purrr::discard(is.list)
 
   # print out warning that removed objects won't be converted
-  removed_metadata <- setdiff(names(metadata(sce_to_convert)),  names(metadata_to_keep))
-  if(length(removed_metadata) > 0){
+  removed_metadata <- setdiff(names(metadata(sce_to_convert)), names(metadata_to_keep))
+  if (length(removed_metadata) > 0) {
     glue::glue("{removed_metadata} cannot be converted between SCE and AnnData.") |>
       purrr::walk(message)
   }
diff --git a/tests/testthat/test-integrate_sces.R b/tests/testthat/test-integrate_sces.R
index 3fb051b9..2e628bfc 100644
--- a/tests/testthat/test-integrate_sces.R
+++ b/tests/testthat/test-integrate_sces.R
@@ -99,7 +99,7 @@ test_that("`integrate_harmony` fails when covariates and covariate_lambda don't
       "harmony",
       batch_column,
       covariate_cols = "covariate",
-      covariate_lambda = c(1,2) # 1 extra lambda
+      covariate_lambda = c(1, 2) # 1 extra lambda
     )
   )
 })
@@ -226,7 +226,6 @@ test_that("`integrate_sces` works as expected with harmony extra arguments", {
 })
 
 test_that("`integrate_sces` works as expected with a harmony covariate", {
-
   # first with providing lambda
   expect_no_error(
     integrate_sces(
@@ -247,6 +246,4 @@ test_that("`integrate_sces` works as expected with a harmony covariate", {
       covariate_cols = "covariate"
     )
   )
-
 })
-
diff --git a/tests/testthat/test-metadata_to_coldata.R b/tests/testthat/test-metadata_to_coldata.R
index be2ea692..14d17197 100644
--- a/tests/testthat/test-metadata_to_coldata.R
+++ b/tests/testthat/test-metadata_to_coldata.R
@@ -35,7 +35,7 @@ sce_list <- purrr::imap(
     "sce3" = sce3
   ),
   # add some colData and metadata to each SCE object
-  \(sce, batch){
+  \(sce, batch) {
     # add some colData so merge_sce_list doesn't give a warning
     colData(sce)[["sum"]] <- runif(ncol(sce))
 
@@ -130,7 +130,8 @@ test_that("`metadata_to_coldata` fails as expected", {
   # column is missing from sample metadata
   # add a new column that is only in the colData and not in the sample metadata
   sce$batch_column <- "library1"
-  expect_error(metadata_to_coldata(sce,
+  expect_error(metadata_to_coldata(
+    sce,
     join_columns = "batch_column"
   ))
 
@@ -170,8 +171,8 @@ test_that("`metadata_to_coldata` works as expected with multiplexed libraries",
   # only joining on library ids will mean non-unique matches
   suppressWarnings(
     expect_error(metadata_to_coldata(sce,
-                                     join_columns = "library_id")
-    )
+      join_columns = "library_id"
+    ))
   )
 
   # joining on sample and library ids will mean unique matches and should work
diff --git a/tests/testthat/test-sce_to_anndata.R b/tests/testthat/test-sce_to_anndata.R
index 86bc7f70..ea15e79e 100644
--- a/tests/testthat/test-sce_to_anndata.R
+++ b/tests/testthat/test-sce_to_anndata.R
@@ -5,11 +5,13 @@ set.seed(1665)
 sce <- sim_sce(n_cells = 100, n_genes = 200, n_empty = 0)
 # need to pull out barcodes to add to colData, otherwise colnames get replaced with NULL
 barcodes <- colnames(sce)
-colData(sce) <- DataFrame("test_column" = sample(0:10, 100, rep = TRUE),
-                          "na_column" = NA,
-                          "na_char_column" = NA_character_,
-                          "some_na" = c("a", NA),
-                          row.names = barcodes)
+colData(sce) <- DataFrame(
+  "test_column" = sample(0:10, 100, rep = TRUE),
+  "na_column" = NA,
+  "na_char_column" = NA_character_,
+  "some_na" = c("a", NA),
+  row.names = barcodes
+)
 rowData(sce) <- DataFrame("test_row" = sample(0:10, 200, rep = TRUE))
 
 # add some metadata
@@ -26,7 +28,6 @@ tempdir <- tempdir()
 anndata_file <- file.path(tempdir, "anndata.h5")
 
 test_that("Conversion of SCE to AnnData works as expected", {
-
   # test that the H5 file is created
   expect_snapshot_file({
     sce_to_anndata(sce, anndata_file)
@@ -67,11 +68,9 @@ test_that("Conversion of SCE to AnnData works as expected", {
     logcounts(sce),
     assay(new_sce, "X")
   )
-
 })
 
 test_that("Conversion of SCE to AnnData fails as expected", {
-
   # check that inputting an improper file name causes a failure
   anndata_bad_file <- tempfile(fileext = ".rds")
   expect_error(sce_to_anndata(sce, anndata_bad_file))

From 3db6c7abca2173f7c407abbfe4ce305cb2cb05d0 Mon Sep 17 00:00:00 2001
From: Joshua Shapiro <josh.shapiro@ccdatalab.org>
Date: Wed, 22 Nov 2023 11:48:09 -0500
Subject: [PATCH 6/7] Require Seurat before 5

---
 .pre-commit-config.yaml | 1 +
 DESCRIPTION             | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 04a6cdec..34cae75d 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -32,6 +32,7 @@ repos:
           (.*/|)\.Rprofile|
           (.*/|)\.travis\.yml|
           (.*/|)appveyor\.yml|
+          (.*/|)DESCRIPTION|
           (.*/|)NAMESPACE|
           (.*/|)renv/settings\.dcf|
           (.*/|)renv\.lock|
diff --git a/DESCRIPTION b/DESCRIPTION
index 4de46c3b..d72d6b87 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -62,7 +62,7 @@ Suggests:
     scran,
     scpcaData,
     sessioninfo,
-    Seurat,
+    Seurat (< 5.0.0),
     testthat (>= 3.1.9),
     zellkonverter
 Remotes:

From f155bee5a62458418846365879662608f4e111be Mon Sep 17 00:00:00 2001
From: Joshua Shapiro <josh.shapiro@ccdatalab.org>
Date: Wed, 22 Nov 2023 12:02:01 -0500
Subject: [PATCH 7/7] fix copy paste error

Co-authored-by: Ally Hawkins <54039191+allyhawkins@users.noreply.github.com>
---
 CONTRIBUTING.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index b6e1efb9..d40f8fe8 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -12,7 +12,7 @@ If code needs styling during/after a PR, one of the repository owners can perfor
 
 For convenience, we have included a set of [pre-commit hooks](https://pre-commit.com/) that can be used to automatically format code according to the above specifications, as well as to spell check and check for other common errors.
 
-To use these hooks, install the `pre-commit` package according to your favorite method (`pip install pre-commit` or `conda install pre-commit`), then run `pre-commit install` in the `scpca-nf` directory.
+To use these hooks, install the `pre-commit` package according to your favorite method (`pip install pre-commit` or `conda install pre-commit`), then run `pre-commit install` in the `scpcaTools` directory.
 This will install the hooks in the `.git/hooks` directory, and they will be run automatically when you commit changes.
 If any of the hooks fail, the commit will be aborted, and you will need to fix the errors and re-commit.