diff --git a/NEWS.md b/NEWS.md index 709288926..ac597b1c4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -25,6 +25,7 @@ * Setting `exclusions` supports globs like `knitr*` to exclude files/directories with a pattern (#1554, @MichaelChirico). * `object_name_linter()` and `object_length_linter()` apply to objects assigned with `assign()` or generics created with `setGeneric()` (#1665, @MichaelChirico). * `object_usage_linter()` gains argument `interpret_extensions` to govern which false positive-prone common syntaxes should be checked for used objects (#1472, @MichaelChirico). Currently `"glue"` (renamed from earlier argument `interpret_glue`) and `"rlang"` are supported. The latter newly covers usage of the `.env` pronoun like `.env$key`, where `key` was previously missed as being a used variable. +* `line_length_linter()` has a new argument `ignore_string_bodies` (defaulting to `FALSE`) which governs whether the contents of multi-line string bodies should be linted (#856, @MichaelChirico). We think the biggest use case for this is writing SQL in R strings, especially in cases where the recommended string with for SQL & R differ. ### New linters diff --git a/R/line_length_linter.R b/R/line_length_linter.R index c8780c6d8..0655131ce 100644 --- a/R/line_length_linter.R +++ b/R/line_length_linter.R @@ -2,7 +2,10 @@ #' #' Check that the line length of both comments and code is less than `length`. #' -#' @param length maximum line length allowed. Default is 80L (Hollerith limit). +#' @param length Maximum line length allowed. Default is `80L` (Hollerith limit). +#' @param ignore_string_bodies Logical, default `FALSE`. If `TRUE`, the contents +#' of string literals are ignored. The quotes themselves are included, so this +#' mainly affects wide multiline strings, e.g. SQL queries. #' #' @examples #' # will produce lints @@ -22,7 +25,7 @@ #' - [linters] for a complete list of linters available in lintr. #' - #' @export -line_length_linter <- function(length = 80L) { +line_length_linter <- function(length = 80L, ignore_string_bodies = FALSE) { general_msg <- paste("Lines should not be more than", length, "characters.") Linter(linter_level = "file", function(source_expression) { @@ -30,6 +33,12 @@ line_length_linter <- function(length = 80L) { line_lengths <- nchar(source_expression$file_lines) long_lines <- which(line_lengths > length) + if (ignore_string_bodies) { + in_string_body_idx <- + is_in_string_body(source_expression$full_parsed_content, length, long_lines) + long_lines <- long_lines[!in_string_body_idx] + } + Map( function(long_line, line_length) { Lint( @@ -47,3 +56,39 @@ line_length_linter <- function(length = 80L) { ) }) } + +is_in_string_body <- function(parse_data, max_length, long_idx) { + str_idx <- parse_data$token == "STR_CONST" + if (!any(str_idx)) { + return(rep(FALSE, length(long_idx))) + } + str_data <- parse_data[str_idx, ] + if (all(str_data$line1 == str_data$line2)) { + return(rep(FALSE, length(long_idx))) + } + # right delimiter just ends at 'col2', but 'col1' takes some sleuthing + str_data$line1_width <- nchar(vapply( + strsplit(str_data$text, "\n", fixed = TRUE), + function(x) x[1L], + FUN.VALUE = character(1L), + USE.NAMES = FALSE + )) + str_data$col1_end <- str_data$col1 + str_data$line1_width + vapply( + long_idx, + function(line) { + # strictly inside a multi-line string body + if (any(str_data$line1 < line & str_data$line2 > line)) { + return(TRUE) + } + on_line1_idx <- str_data$line1 == line + if (any(on_line1_idx)) { + return(max(str_data$col1_end[on_line1_idx]) <= max_length) + } + # use parse data to capture possible trailing expressions on this line + on_line2_idx <- parse_data$line2 == line + any(on_line2_idx) && max(parse_data$col2[on_line2_idx]) <= max_length + }, + logical(1L) + ) +} diff --git a/R/utils.R b/R/utils.R index 748a73658..00a6c1ca8 100644 --- a/R/utils.R +++ b/R/utils.R @@ -234,8 +234,12 @@ get_r_string <- function(s, xpath = NULL) { s <- xml_find_chr(s, sprintf("string(%s)", xpath)) } } - # parse() skips "" elements --> offsets the length of the output, - # but NA in --> NA out + r_string_from_parse_text(s) +} + +# parse() skips "" elements --> offsets the length of the output, +# but NA in --> NA out +r_string_from_parse_text <- function(s) { is.na(s) <- !nzchar(s) out <- as.character(parse(text = s, keep.source = FALSE)) is.na(out) <- is.na(s) diff --git a/man/line_length_linter.Rd b/man/line_length_linter.Rd index 5585139d2..4305e5df5 100644 --- a/man/line_length_linter.Rd +++ b/man/line_length_linter.Rd @@ -4,10 +4,14 @@ \alias{line_length_linter} \title{Line length linter} \usage{ -line_length_linter(length = 80L) +line_length_linter(length = 80L, ignore_string_bodies = FALSE) } \arguments{ -\item{length}{maximum line length allowed. Default is 80L (Hollerith limit).} +\item{length}{Maximum line length allowed. Default is \code{80L} (Hollerith limit).} + +\item{ignore_string_bodies}{Logical, default \code{FALSE}. If \code{TRUE}, the contents +of string literals are ignored. The quotes themselves are included, so this +mainly affects wide multiline strings, e.g. SQL queries.} } \description{ Check that the line length of both comments and code is less than \code{length}. diff --git a/tests/testthat/test-line_length_linter.R b/tests/testthat/test-line_length_linter.R index 5e22fc523..8ad23acdc 100644 --- a/tests/testthat/test-line_length_linter.R +++ b/tests/testthat/test-line_length_linter.R @@ -1,8 +1,8 @@ test_that("line_length_linter skips allowed usages", { linter <- line_length_linter(80L) - expect_lint("blah", NULL, linter) - expect_lint(strrep("x", 80L), NULL, linter) + expect_no_lint("blah", linter) + expect_no_lint(strrep("x", 80L), linter) }) test_that("line_length_linter blocks disallowed usages", { @@ -37,7 +37,7 @@ test_that("line_length_linter blocks disallowed usages", { linter <- line_length_linter(20L) lint_msg <- rex::rex("Lines should not be more than 20 characters. This line is 22 characters.") - expect_lint(strrep("a", 20L), NULL, linter) + expect_no_lint(strrep("a", 20L), linter) expect_lint( strrep("a", 22L), list( @@ -71,3 +71,86 @@ test_that("Multiple lints give custom messages", { line_length_linter(5L) ) }) + +test_that("string bodies can be ignored", { + linter <- line_length_linter(10L, ignore_string_bodies = TRUE) + lint_msg <- rex::rex("Lines should not be more than 10 characters. This line is 15 characters.") + + expect_no_lint( + trim_some(" + 1234567890 + str <- ' + 123456789012345 + ' + "), + linter + ) + + expect_no_lint( + trim_some(" + 1234567890 + str45 <- ' + 123456789012345 + ' + "), + linter + ) + + expect_no_lint( + trim_some(" + 1234567890 + str <- '90 + 123456789012345 + 123456789' + "), + linter + ) + + expect_lint( + trim_some(" + 1234567890 + str456 <- ' + 123456789012345 + ' + "), + list( + list("11 characters", line_number = 2L), + list("11 characters", line_number = 4L) + ), + linter + ) + + expect_lint( + trim_some(" + 1234567890 + str <- '9012345 + 1234567890 + 123456789' + "), + lint_msg, + linter + ) + + expect_lint( + trim_some(" + 1234567890 + str <- '90 + 1234567890 + 12345678'; 2345 + "), + lint_msg, + linter + ) + + expect_lint( + "'1'; '2'; '345'", + lint_msg, + linter + ) + + expect_lint( + "123456789012345", + lint_msg, + linter + ) +})