diff --git a/.Rbuildignore b/.Rbuildignore index 0322287c..7e55268d 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -18,3 +18,7 @@ # Docker folder ^docker$ + +# development +^\.pre-commit-config\.yaml$ +^\.lintr$ diff --git a/.lintr b/.lintr new file mode 100644 index 00000000..74863d0f --- /dev/null +++ b/.lintr @@ -0,0 +1,8 @@ +encoding: "UTF-8" +linters: linters_with_defaults( + line_length_linter(120), + commented_code_linter = NULL, + indentation_linter = NULL, + object_usage_linter = NULL, + object_name_linter = NULL + ) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..34cae75d --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,54 @@ +# All available hooks: https://pre-commit.com/hooks.html +# R specific hooks: https://github.com/lorenzwalthert/precommit +repos: +- repo: https://github.com/lorenzwalthert/precommit + rev: v0.3.2.9027 + hooks: + - id: style-files + args: [--style_pkg=styler, --style_fun=tidyverse_style] + # codemeta must be above use-tidy-description when both are used + # - id: codemeta-description-updated + - id: use-tidy-description + - id: spell-check + exclude: > + (?x)^( + .*\.[rR]| + .*\.feather| + .*\.jpeg| + .*\.pdf| + .*\.png| + .*\.py| + .*\.RData| + .*\.rds| + .*\.Rds| + .*\.Rproj| + .*\.sh| + (.*/|)\.gitignore| + (.*/|)\.gitlab-ci\.yml| + (.*/|)\.lintr| + (.*/|)\.pre-commit-.*| + (.*/|)\.Rbuildignore| + (.*/|)\.Renviron| + (.*/|)\.Rprofile| + (.*/|)\.travis\.yml| + (.*/|)appveyor\.yml| + (.*/|)DESCRIPTION| + (.*/|)NAMESPACE| + (.*/|)renv/settings\.dcf| + (.*/|)renv\.lock| + (.*/|)WORDLIST| + \.github/workflows/.*| + data/.*| + )$ + - id: lintr + - id: parsable-R + - id: no-browser-statement + - id: no-debug-statement + - id: deps-in-desc +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: check-added-large-files + args: ['--maxkb=200'] + - id: end-of-file-fixer + exclude: '\.Rd' diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index daa0d60f..d40f8fe8 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,7 +2,19 @@ ## Coding style -We generally follow the [tidyverse style](http://style.tidyverse.org/). -You can use the [`styler` package](https://styler.r-lib.org) to perform styling locally. +We try to follow the [tidyverse style](http://style.tidyverse.org/). +You can use the [`styler` package](https://styler.r-lib.org) to perform styling locally, or use the pre-commit hooks described below. + If code needs styling during/after a PR, one of the repository owners can perform styling automatically by making a comment with just the word `/style` in the PR. (Note that for this to work, the commenter must have set their membership to "Public" in https://github.com/orgs/AlexsLemonade/people) + +## Pre-commit hooks + +For convenience, we have included a set of [pre-commit hooks](https://pre-commit.com/) that can be used to automatically format code according to the above specifications, as well as to spell check and check for other common errors. + +To use these hooks, install the `pre-commit` package according to your favorite method (`pip install pre-commit` or `conda install pre-commit`), then run `pre-commit install` in the `scpcaTools` directory. +This will install the hooks in the `.git/hooks` directory, and they will be run automatically when you commit changes. +If any of the hooks fail, the commit will be aborted, and you will need to fix the errors and re-commit. + +Notably, the `spell-check` hook will report spelling errors, but will also add any words it finds to the dictionary file (stored at `inst/WORDLIST`). +This is convenient for many cases (where the word is real but unknown), but be sure to remove truly misspelled words from the dictionary file before committing, or they will not be caught in the future! diff --git a/DESCRIPTION b/DESCRIPTION index 4de46c3b..d72d6b87 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -62,7 +62,7 @@ Suggests: scran, scpcaData, sessioninfo, - Seurat, + Seurat (< 5.0.0), testthat (>= 3.1.9), zellkonverter Remotes: diff --git a/R/integrate_sces.R b/R/integrate_sces.R index d5fe20ca..fd8c6ffb 100644 --- a/R/integrate_sces.R +++ b/R/integrate_sces.R @@ -188,12 +188,12 @@ integrate_harmony <- function(merged_sce, } # if no covariate lambda provided, set to 1 - if(length(covariate_lambda) == 0){ + if (length(covariate_lambda) == 0) { covariate_lambda <- rep(1, length(covariate_cols)) } # check that the lambda values match up - if(length(covariate_lambda) != length(covariate_cols)){ + if (length(covariate_lambda) != length(covariate_cols)) { stop("The number of covariate columns must be equal to the number of covariate lambda values.") } diff --git a/R/read_alevin.R b/R/read_alevin.R index 1d10203a..b9e64052 100644 --- a/R/read_alevin.R +++ b/R/read_alevin.R @@ -1,3 +1,4 @@ +# nolint start: cyclocomp_linter. #' Read in counts data processed with Alevin or Alevin-fry #' #' @param quant_dir Path to directory where output files are located. @@ -48,18 +49,18 @@ #' include_unspliced = TRUE #' ) #' } -read_alevin <- function(quant_dir, - fry_mode = FALSE, - include_unspliced = TRUE, - feature_data = FALSE, - round_counts = TRUE, - library_id = NULL, - sample_id = NULL, - project_id = NULL, - tech_version = NULL, - assay_ontology_term_id = NULL, - seq_unit = NULL - ) { +read_alevin <- function( + quant_dir, + fry_mode = FALSE, + include_unspliced = TRUE, + feature_data = FALSE, + round_counts = TRUE, + library_id = NULL, + sample_id = NULL, + project_id = NULL, + tech_version = NULL, + assay_ontology_term_id = NULL, + seq_unit = NULL) { # checks for *_mode if (!is.logical(fry_mode)) { stop("fry_mode must be set as TRUE or FALSE") @@ -72,7 +73,7 @@ read_alevin <- function(quant_dir, } # make sure that include unspliced and feature data are not both set as TRUE - if (include_unspliced & feature_data) { + if (include_unspliced && feature_data) { stop("Feature data does not have unspliced reads, cannot use `include_unspliced=TRUE`") } @@ -95,11 +96,8 @@ read_alevin <- function(quant_dir, # if alevin-fry USA and MTX format directly create SCE object with fishpond if (fry_mode) { # only check for usa mode for non-feature data - if (!feature_data) { - # actually check that files are in usa mode - if (meta$usa_mode != TRUE) { - stop("Output files not in USA mode") - } + if (!feature_data && !meta$usa_mode) { + stop("Output files not in USA mode") } # define assays to include in SCE object based on include_unspliced @@ -120,9 +118,7 @@ read_alevin <- function(quant_dir, fryDir = quant_dir, outputFormat = assay_formats ) - } - - if (!fry_mode) { + } else { # read in any non-USA formatted alevin-fry data or Alevin data counts <- read_tximport(quant_dir) @@ -149,7 +145,7 @@ read_alevin <- function(quant_dir, return(sce) } - +# nolint end #' Read in counts data processed with Alevin or alevin-fry in tximport-compatible formats #' @@ -175,6 +171,8 @@ read_tximport <- function(quant_dir) { counts <- as(txi$counts, "CsparseMatrix") } + + #' Read alevin metadata from json files #' #' @param quant_dir Path alevin output directory. diff --git a/R/sce_to_anndata.R b/R/sce_to_anndata.R index 46965c3d..7a760b45 100644 --- a/R/sce_to_anndata.R +++ b/R/sce_to_anndata.R @@ -29,11 +29,11 @@ sce_to_anndata <- function(sce, anndata_file, x_assay_name = "counts") { stop("Input must be a SingleCellExperiment object.") } - if(ncol(sce) < 2){ + if (ncol(sce) < 2) { stop("Input SingleCellExperiment must contain at least 2 cells.") } - if(nrow(sce) < 2){ + if (nrow(sce) < 2) { stop("Input SingleCellExperiment must contain at least 2 genes or features.") } @@ -56,8 +56,8 @@ sce_to_anndata <- function(sce, anndata_file, x_assay_name = "counts") { purrr::discard(is.list) # print out warning that removed objects won't be converted - removed_metadata <- setdiff(names(metadata(sce_to_convert)), names(metadata_to_keep)) - if(length(removed_metadata) > 0){ + removed_metadata <- setdiff(names(metadata(sce_to_convert)), names(metadata_to_keep)) + if (length(removed_metadata) > 0) { glue::glue("{removed_metadata} cannot be converted between SCE and AnnData.") |> purrr::walk(message) } diff --git a/inst/WORDLIST b/inst/WORDLIST new file mode 100644 index 00000000..d22ed35f --- /dev/null +++ b/inst/WORDLIST @@ -0,0 +1,4 @@ +commenter +pre +scpcaTools +tidyverse diff --git a/tests/testthat/test-integrate_sces.R b/tests/testthat/test-integrate_sces.R index 3fb051b9..2e628bfc 100644 --- a/tests/testthat/test-integrate_sces.R +++ b/tests/testthat/test-integrate_sces.R @@ -99,7 +99,7 @@ test_that("`integrate_harmony` fails when covariates and covariate_lambda don't "harmony", batch_column, covariate_cols = "covariate", - covariate_lambda = c(1,2) # 1 extra lambda + covariate_lambda = c(1, 2) # 1 extra lambda ) ) }) @@ -226,7 +226,6 @@ test_that("`integrate_sces` works as expected with harmony extra arguments", { }) test_that("`integrate_sces` works as expected with a harmony covariate", { - # first with providing lambda expect_no_error( integrate_sces( @@ -247,6 +246,4 @@ test_that("`integrate_sces` works as expected with a harmony covariate", { covariate_cols = "covariate" ) ) - }) - diff --git a/tests/testthat/test-metadata_to_coldata.R b/tests/testthat/test-metadata_to_coldata.R index be2ea692..14d17197 100644 --- a/tests/testthat/test-metadata_to_coldata.R +++ b/tests/testthat/test-metadata_to_coldata.R @@ -35,7 +35,7 @@ sce_list <- purrr::imap( "sce3" = sce3 ), # add some colData and metadata to each SCE object - \(sce, batch){ + \(sce, batch) { # add some colData so merge_sce_list doesn't give a warning colData(sce)[["sum"]] <- runif(ncol(sce)) @@ -130,7 +130,8 @@ test_that("`metadata_to_coldata` fails as expected", { # column is missing from sample metadata # add a new column that is only in the colData and not in the sample metadata sce$batch_column <- "library1" - expect_error(metadata_to_coldata(sce, + expect_error(metadata_to_coldata( + sce, join_columns = "batch_column" )) @@ -170,8 +171,8 @@ test_that("`metadata_to_coldata` works as expected with multiplexed libraries", # only joining on library ids will mean non-unique matches suppressWarnings( expect_error(metadata_to_coldata(sce, - join_columns = "library_id") - ) + join_columns = "library_id" + )) ) # joining on sample and library ids will mean unique matches and should work diff --git a/tests/testthat/test-sce_to_anndata.R b/tests/testthat/test-sce_to_anndata.R index 86bc7f70..ea15e79e 100644 --- a/tests/testthat/test-sce_to_anndata.R +++ b/tests/testthat/test-sce_to_anndata.R @@ -5,11 +5,13 @@ set.seed(1665) sce <- sim_sce(n_cells = 100, n_genes = 200, n_empty = 0) # need to pull out barcodes to add to colData, otherwise colnames get replaced with NULL barcodes <- colnames(sce) -colData(sce) <- DataFrame("test_column" = sample(0:10, 100, rep = TRUE), - "na_column" = NA, - "na_char_column" = NA_character_, - "some_na" = c("a", NA), - row.names = barcodes) +colData(sce) <- DataFrame( + "test_column" = sample(0:10, 100, rep = TRUE), + "na_column" = NA, + "na_char_column" = NA_character_, + "some_na" = c("a", NA), + row.names = barcodes +) rowData(sce) <- DataFrame("test_row" = sample(0:10, 200, rep = TRUE)) # add some metadata @@ -26,7 +28,6 @@ tempdir <- tempdir() anndata_file <- file.path(tempdir, "anndata.h5") test_that("Conversion of SCE to AnnData works as expected", { - # test that the H5 file is created expect_snapshot_file({ sce_to_anndata(sce, anndata_file) @@ -67,11 +68,9 @@ test_that("Conversion of SCE to AnnData works as expected", { logcounts(sce), assay(new_sce, "X") ) - }) test_that("Conversion of SCE to AnnData fails as expected", { - # check that inputting an improper file name causes a failure anndata_bad_file <- tempfile(fileext = ".rds") expect_error(sce_to_anndata(sce, anndata_bad_file))