From 304aed5697aaf30fc0bd20ca5e35fe8b285eff82 Mon Sep 17 00:00:00 2001
From: Michael Chirico <michaelchirico4@gmail.com>
Date: Fri, 19 Apr 2024 09:17:36 -0700
Subject: [PATCH 1/8] sep= works with by= approach to splitting (#6028)

---
 NEWS.md               | 2 ++
 R/data.table.R        | 6 ++++--
 inst/tests/tests.Rraw | 5 +++++
 man/split.Rd          | 2 +-
 4 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index bc5147107..e30849114 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -36,6 +36,8 @@
 
 10. `measure` now supports user-specified `cols` argument, which can be useful to specify a subset of columns to `melt`, without having to use a regex, [#5063](https://github.com/Rdatatable/data.table/issues/5063). Thanks to @UweBlock and @Henrik-P for reporting, and @tdhock for the PR.
 
+11. `split.data.table` recognizes `sep=` when splitting with `by=`, just like the default and data.frame methods [#5417](https://github.com/Rdatatable/data.table/issues/5417).
+
 ## BUG FIXES
 
 1. `unique()` returns a copy the case when `nrows(x) <= 1` instead of a mutable alias, [#5932](https://github.com/Rdatatable/data.table/pull/5932). This is consistent with existing `unique()` behavior when the input has no duplicates but more than one row. Thanks to @brookslogan for the report and @dshemetov for the fix.
diff --git a/R/data.table.R b/R/data.table.R
index 89309e58b..e0cddb38f 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -2452,9 +2452,11 @@ split.data.table = function(x, f, drop = FALSE, by, sorted = FALSE, keep.by = TR
     dtq[["i"]] = quote(levs)
     join = TRUE
   }
+  dots = list(...)
+  if (!"sep" %chin% names(dots)) dots$sep = "."
   dtq[["j"]] = substitute(
-    list(.ll.tech.split=list(.expr), .ll.tech.split.names=paste(lapply(.BY, as.character), collapse=".")),
-    list(.expr = if (join) quote(if(.N == 0L) .SD[0L] else .SD) else as.name(".SD"))
+    list(.ll.tech.split=list(.expr), .ll.tech.split.names=paste(lapply(.BY, as.character), collapse=.sep)),
+    list(.expr = if (join) quote(if(.N == 0L) .SD[0L] else .SD) else as.name(".SD"), .sep = dots$sep)
   )
   dtq[["by"]] = substitute( # retain order, for `join` and `sorted` it will use order of `i` data.table instead of `keyby`.
     .expr,
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 28532eb59..e00c4ac6e 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -18481,3 +18481,8 @@ DT = data.table(x = c(NA, "e", "b", "j", "w", NA))
 test(2258.8, capture.output(print(DT, na.print=".", topn=2, col.names="none")), c(" 1: .", " 2: e", "---  ", " 5: w", " 6: ."))
 # table requires splitting, col.names!="none"
 test(2258.9, capture.output(print(DT, na.print=".", topn=2)), c("    x", " 1: .", " 2: e", "---  ", " 5: w", " 6: ."))
+
+# split(by = ., sep = ..) works like split(f= ., sep = ..), #5417
+x = data.table(rep(1:2, each=5L), 1:5, 1:10)
+test(2259.1, names(split(x, by = c("V1", "V2"), sep = "|")), sort(names(split(x, list(x$V1, x$V2), sep = "|"))))
+test(2259.2, names(split(x, by = c("V1", "V2"), sep = "||")), sort(names(split(x, list(x$V1, x$V2), sep = "||"))))
diff --git a/man/split.Rd b/man/split.Rd
index 687771f0c..eedbe7d67 100644
--- a/man/split.Rd
+++ b/man/split.Rd
@@ -18,7 +18,7 @@
   \item{sorted}{When default \code{FALSE} it will retain the order of groups we are splitting on. When \code{TRUE} then sorted list(s) are returned. Does not have effect for \code{f} argument.}
   \item{keep.by}{logical default \code{TRUE}. Keep column provided to \code{by} argument.}
   \item{flatten}{logical default \code{TRUE} will unlist nested lists of data.tables. When using \code{f} results are always flattened to list of data.tables.}
-  \item{\dots}{passed to data.frame way of processing when using \code{f} argument.}
+  \item{\dots}{When using \code{f}, passed to \code{\link[base:split]{split.data.frame}}. When using \code{by}, \code{sep} is recognized as with the default method.}
   \item{verbose}{logical default \code{FALSE}. When \code{TRUE} it will print to console data.table split query used to split data.}
 }
 \details{

From b9d51f1034cc0fdd06d324e62bdd05b9dcb84cef Mon Sep 17 00:00:00 2001
From: Michael Chirico <michaelchirico4@gmail.com>
Date: Fri, 19 Apr 2024 09:32:26 -0700
Subject: [PATCH 2/8] Tests robust to locale sorting (#6074)

* Tests robust to locale sorting

* NEWS

* Comment describing helper

* add a TODO for our future selves
---
 NEWS.md               |  4 +++-
 inst/tests/tests.Rraw | 53 +++++++++++++++++++++++++++----------------
 2 files changed, 36 insertions(+), 21 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index e30849114..b14b9491c 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -74,7 +74,9 @@
 
 9. `print.data.table` now handles combination multibyte characters correctly when truncating wide string entries, [#5096](https://github.com/Rdatatable/data.table/issues/5096). Thanks to @MichaelChirico for the report and @joshhwuu for the fix.
 
-10. `test.data.table()` runs correctly in more sessions, in particular those where the `digits` or `warn` settings are not their defaults (`7` and `0`, respectively), [#5285](https://github.com/Rdatatable/data.table/issues/5285). Thanks @OfekShilon for the report and suggested fix and @MichaelChirico for the PR.
+10. `test.data.table()` runs robustly:
+  + In sessions where the `digits` or `warn` options are not their defaults (`7` and `0`, respectively), [#5285](https://github.com/Rdatatable/data.table/issues/5285). Thanks @OfekShilon for the report and suggested fix and @MichaelChirico for the PR.
+  + In locales where `letters != sort(letters)`, e.g. Latvian, [#3502](https://github.com/Rdatatable/data.table/issues/3502). Thanks @minemR for the report and @MichaelChirico for the fix.
 
 11. Using `print.data.table` when truncation is needed with `row.names = FALSE` prints the indicator `---` in every value column instead of adding a blank column where the `rownames` would have been just to include `---`, [#4083](https://github.com/Rdatatable/data.table/issues/4083). Thanks @MichaelChirico for the report and @joshhwuu for the fix.
 
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index e00c4ac6e..e2791ed5d 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -192,6 +192,16 @@ base_messages = list(
   NULL
 )
 
+# Ensure an operation uses C-locale sorting (#3502). For test set-ups/comparisons that use base operations, which are
+#   susceptible to locale-specific sorting issues, but shouldn't be needed for data.table code, which always uses C sorting.
+# TODO(R>=3.3.0): use order(method="radix") as a way to avoid needing this helper
+with_c_collate = function(expr) {
+  old = Sys.getlocale("LC_COLLATE")
+  on.exit(Sys.setlocale("LC_COLLATE", old))
+  Sys.setlocale("LC_COLLATE", "C")
+  expr
+}
+
 ##########################
 .do_not_rm = ls()  # objects that exist at this point should not be removed by rm_all(); e.g. test_*, base_messages, Ctest_dt_win_snprintf, prevtest, etc
 ##########################
@@ -1834,10 +1844,10 @@ test(609, chorder(character()), base::order(character()))
 test(610, chorder(""), base::order(""))
 # Extra tests of chorder and chgroup
 x = sample(LETTERS)
-test(610.1, chorder(x), base::order(x))
+test(610.1, chorder(x), with_c_collate(base::order(x)))
 test(610.2, chgroup(x), seq_along(x))
 x = sample(LETTERS,1000,replace=TRUE)
-test(610.3, chorder(x), base::order(x))
+test(610.3, chorder(x), with_c_collate(base::order(x)))
 test(610.4, unique(x[chgroup(x)]), unique(x))
 
 # := by group
@@ -3612,34 +3622,37 @@ test(1100, dt1[dt2,roll=-Inf,rollends=c(FALSE,TRUE)]$ind, INT(NA,NA,1,2,2,2,2,2,
   test(1102.12, dcast(DT, "a ~  c ", value.var="b"), error="not found or of unknown type")
   test(1102.13, dcast(DT, a ~  a, value.var="c"), error="are not found in 'data'")
 
+  # NB: for 1102.{14,15,16}, always supply levels for letters in setup data for locale robustness (#3502)
+
   # fix for #47 - issue when factor columns on formula LHS along with `drop=FALSE`
   set.seed(1L)
-  DT = data.table(a=factor(sample(letters[1:3], 10, replace=TRUE), letters[1:5]),
-                  b=factor(sample(tail(letters, 5), 10, replace=TRUE)))
+  DT = data.table(a=factor(sample(letters[1:3], 10L, replace=TRUE), levels=letters[1:5]),
+                  b=factor(sample(letters[22:26], 10L, replace=TRUE), levels=letters[22:26]))
   test(1102.14, dcast(DT, a~b, drop=FALSE, fun.aggregate=length, value.var="b"),
-                data.table(a=factor(letters[1:5]), v=INT(0,1,0,0,0), w=INT(1,1,1,0,0), x=INT(0,0,1,0,0), y=INT(2,1,1,0,0), z=INT(0,1,0,0,0), key="a"))
+                data.table(a=factor(letters[1:5], levels=letters[1:5]), v=INT(0,1,0,0,0), w=INT(1,1,1,0,0), x=INT(0,0,1,0,0), y=INT(2,1,1,0,0), z=INT(0,1,0,0,0), key="a"))
 
   # reverse the levels
   set.seed(1L)
-  DT = data.table(a=factor(sample(letters[1:3], 10, replace=TRUE), letters[5:1]),
-                  b=factor(sample(tail(letters, 5), 10, replace=TRUE)))
+  DT = data.table(a=factor(sample(letters[1:3], 10L, replace=TRUE), levels=letters[5:1]),
+                  b=factor(sample(letters[22:26], 10L, replace=TRUE), levels=letters[22:26]))
   test(1102.15, dcast(DT, a~b, drop=FALSE, value.var="b", fun.aggregate=length),
-                data.table(a=factor(c("e","d","c","b","a"),levels=levels(DT$a)), v=INT(0,0,0,1,0), w=INT(0,0,1,1,1), x=INT(0,0,1,0,0), y=INT(0,0,1,1,2), z=INT(0,0,0,1,0), key="a"))
+                data.table(a=factor(c("e","d","c","b","a"), levels=levels(DT$a)), v=INT(0,0,0,1,0), w=INT(0,0,1,1,1), x=INT(0,0,1,0,0), y=INT(0,0,1,1,2), z=INT(0,0,0,1,0), key="a"))
 
   # more factor cols
   set.seed(1L)
-  DT = data.table(a1=factor(sample(letters[1:3], 10, replace=TRUE), letters[1:5]), # factor col 1
-                 a2=factor(sample(letters[6:10], 10, replace=TRUE), letters[6:10]), # factor col 2
-                 a3=sample(letters[1:3], 10, TRUE), # no factor
-                 b=factor(sample(tail(letters, 5), 10, replace=TRUE)))
+  DT = data.table(a1=factor(sample(letters[1:3], 10L, replace=TRUE), levels=letters[1:5]), # factor col 1
+                 a2=factor(sample(letters[6:10], 10L, replace=TRUE), levels=letters[6:10]), # factor col 2
+                 a3=sample(letters[1:3], 10L, TRUE), # no factor
+                 b=factor(sample(letters[22:26], 10L, replace=TRUE), levels=letters[22:26]))
   test(1102.16, dcast(DT, a1+a2+a3~b, drop=FALSE, value.var="b")[c(1,21,.N)],
-                data.table(a1=factor(c("a","b","e"),levels=letters[1:5]),
+                data.table(a1=factor(c("a","b","e"), levels=letters[1:5]),
                            a2=factor(c("f","g","j"), levels=letters[6:10]),
                            a3=c("a","c","c"),
-                           v=factor(NA, levels=tail(letters,5)),
-                           x=factor(NA, levels=tail(letters,5)),
-                           y=factor(c(NA,"y",NA), levels=tail(letters,5)),
-                           z=factor(NA, levels=tail(letters,5)), key=c("a1", "a2", "a3")))
+                           v=factor(NA, levels=letters[22:26]),
+                           w=factor(NA, levels=letters[22:26]),
+                           x=factor(NA, levels=letters[22:26]),
+                           y=factor(c(NA,"y",NA), levels=letters[22:26]),
+                           z=factor(NA, levels=letters[22:26]), key=c("a1", "a2", "a3")))
 
   # dcast bug fix for 'subset' argument (it doesn't get key set before to run C-fcast):
   DT = data.table(x=c(1,1,1,2,2,2,1,1), y=c(1,2,3,1,2,1,1,2), z=c(1,2,3,NA,4,5,NA,NA))
@@ -4490,7 +4503,7 @@ for (nvars in seq_along(names(DT))) {
           }
         })
       ))
-      test(1223.0 + test_no*0.001, forderv(DT, by=x, order=signs[i,]), with(DT, eval(ll)))
+      test(1223.0 + test_no*0.001, forderv(DT, by=x, order=signs[i,]), with_c_collate(with(DT, eval(ll))))
     }
     integer()
   })
@@ -4759,11 +4772,11 @@ for (i in seq_along(names(DT))) {
         })
       ))
       ans1 = forderv(DT, by=x, order=y, na.last=TRUE)         # adding tests for both nalast=TRUE and nalast=NA
-      test(1252.0 + test_no*0.001, ans1, with(DT, eval(ll)))
+      test(1252.0 + test_no*0.001, ans1, with_c_collate(with(DT, eval(ll))))
       test_no <<- test_no + 1L
       ll <- as.call(c(as.list(ll), na.last=NA))
       ans1 = forderv(DT, by=x, order=y, na.last=NA)           # nalast=NA here.
-      test(1252.0 + test_no*0.001, ans1[ans1 != 0], with(DT, eval(ll)))
+      test(1252.0 + test_no*0.001, ans1[ans1 != 0], with_c_collate(with(DT, eval(ll))))
     })
     dim(tmp)=NULL
     list(tmp)

From 6f3fc8dcd37ac6976050b290e37f85762d0bccb5 Mon Sep 17 00:00:00 2001
From: Michael Chirico <michaelchirico4@gmail.com>
Date: Fri, 19 Apr 2024 11:04:15 -0700
Subject: [PATCH 3/8] New regression test re: column plonk in magrittr pipeline
 (#6090)

* new regression test re: column plonk in magrittr pipeline

* simplify
---
 inst/tests/other.Rraw | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/inst/tests/other.Rraw b/inst/tests/other.Rraw
index 88593bcdf..99169809f 100644
--- a/inst/tests/other.Rraw
+++ b/inst/tests/other.Rraw
@@ -722,3 +722,8 @@ if (FALSE) {  # moved from tests.Rraw in #5517 and not yet back on; wasn't sure
   }
 }
 
+if (loaded[["dplyr"]]) {
+  # regression test for converting character->list column in a magrittr (dplyr) pipe, #2651
+  DT = data.table(a = 1, b = 2, c = '1,2,3,4]', d = 4)
+  test(30, DT[, c := strsplit(c, ',', fixed = TRUE) %>% lapply(as.integer) %>% as.list]$c, list(1:4))
+}

From 54d6b966ada6950d8977fa80628e2c0dc727c31d Mon Sep 17 00:00:00 2001
From: Michael Chirico <michaelchirico4@gmail.com>
Date: Fri, 19 Apr 2024 11:14:04 -0700
Subject: [PATCH 4/8] Expand exclusion array (which isn't valid) (#6096)

---
 .github/workflows/R-CMD-check-occasional.yaml | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/R-CMD-check-occasional.yaml b/.github/workflows/R-CMD-check-occasional.yaml
index 1358f0538..cd0fec0bd 100644
--- a/.github/workflows/R-CMD-check-occasional.yaml
+++ b/.github/workflows/R-CMD-check-occasional.yaml
@@ -16,9 +16,15 @@ jobs:
         os: [macOS-latest, windows-latest, ubuntu-latest]
         r: ['devel', 'release', '3.2', '3.3', '3.4', '3.5', '3.6', '4.0', '4.1', '4.2', '4.3']
         locale: ['en_US.utf8', 'zh_CN.utf8', 'lv_LV.utf8'] # Chinese for translations, Latvian for collate order (#3502)
-        exclude:
-          - os: ['macOS-latest', 'windows-latest'] # only run non-English locale CI on Ubuntu
-            locale: ['zh_CN.utf8', 'lv_LV.utf8']
+        exclude: # only run non-English locale CI on Ubuntu
+          - os: macOS-latest
+            locale: 'zh_CN.utf8'
+          - os: macOS-latest
+            locale: 'lv_LV.utf8'
+          - os: windows-latest
+            locale: 'zh_CN.utf8'
+          - os: windows-latest
+            locale: 'lv_LV.utf8'
 
     env:
       R_REMOTES_NO_ERRORS_FROM_WARNINGS: true

From 8ae1b2d343258e22bd81dfb5bb411eb28b387d5c Mon Sep 17 00:00:00 2001
From: Michael Chirico <michaelchirico4@gmail.com>
Date: Fri, 19 Apr 2024 13:12:25 -0700
Subject: [PATCH 5/8] Be sure to test 'other' in occasional suite (#6095)

---
 .github/workflows/R-CMD-check-occasional.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/R-CMD-check-occasional.yaml b/.github/workflows/R-CMD-check-occasional.yaml
index cd0fec0bd..9a5f48277 100644
--- a/.github/workflows/R-CMD-check-occasional.yaml
+++ b/.github/workflows/R-CMD-check-occasional.yaml
@@ -28,6 +28,7 @@ jobs:
 
     env:
       R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
+      TEST_DATA_TABLE_WITH_OTHER_PACKAGES: true
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
 
     steps:

From d420afe916aa78c41f8d9857630f8c4de8abf537 Mon Sep 17 00:00:00 2001
From: Michael Chirico <michaelchirico4@gmail.com>
Date: Fri, 19 Apr 2024 18:44:47 -0700
Subject: [PATCH 6/8] class= argument for condition calls (#5914)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* 1.15.0 on CRAN. Bump to 1.15.99

* Fix transform slowness (#5493)

* Fix 5492 by limiting the costly deparse to `nlines=1`

* Implementing PR feedbacks

* Added  inside

* Fix typo in name

* Idiomatic use of  inside

* Separating the deparse line limit to a different PR

---------

Co-authored-by: Michael Chirico <chiricom@google.com>

* Improvements to the introductory vignette (#5836)

* Added my improvements to the intro vignette

* Removed two lines I added extra as a mistake earlier

* Requested changes

* Vignette typo patch (#5402)

* fix typos and grammatical mistakes

* fix typos and punctuation

* remove double spaces where it wasn't necessary

* fix typos and adhere to British English spelling

* fix typos

* fix typos

* add missing closing bracket

* fix typos

* review fixes

* Update vignettes/datatable-benchmarking.Rmd

Co-authored-by: Michael Chirico <michaelchirico4@gmail.com>

* Update vignettes/datatable-benchmarking.Rmd

Co-authored-by: Michael Chirico <michaelchirico4@gmail.com>

* Apply suggestions from code review benchmarking

Co-authored-by: Michael Chirico <michaelchirico4@gmail.com>

* remove unnecessary [ ] from datatable-keys-fast-subset.Rmd

* Update vignettes/datatable-programming.Rmd

Co-authored-by: Michael Chirico <michaelchirico4@gmail.com>

* Update vignettes/datatable-reshape.Rmd

Co-authored-by: Michael Chirico <michaelchirico4@gmail.com>

* One last batch of fine-tuning

---------

Co-authored-by: Michael Chirico <michaelchirico4@gmail.com>
Co-authored-by: Michael Chirico <chiricom@google.com>

* Improved handling of list columns with NULL entries (#4250)

* Updated documentation for rbindlist(fill=TRUE)

* Print NULL entries of list as NULL

* Added news item

* edit NEWS, use '[NULL]' not 'NULL'

* fix test

* split NEWS item

* add example

---------

Co-authored-by: Michael Chirico <chiricom@google.com>
Co-authored-by: Michael Chirico <michaelchirico4@gmail.com>
Co-authored-by: Benjamin Schwendinger <benjamin.schwendinger@tuwien.ac.at>

* clarify that list input->unnamed list output (#5383)

* clarify that list input->unnamed list output

* Add example where make.names is used

* mention role of make.names

* fix subsetting issue in split.data.table (#5368)

* fix subsetting issue in split.data.table

* add a test

* drop=FALSE on inner [

* switch to 3.2.0 R dep (#5905)

* Allow early exit from check for eval/evalq in cedta (#5660)

* Allow early exit from check for eval/evalq in cedta

Done in the browser+untested, please take a second look :)

* Use %chin%

* nocov new code

* frollmax1: frollmax, frollmax adaptive, left adaptive support (#5889)

* frollmax exact, buggy fast, no fast adaptive

* frollmax fast fixing bugs

* frollmax man to fix CRAN check

* frollmax fast adaptive non NA, dev

* froll docs, adaptive left

* no frollmax fast adaptive

* frollmax adaptive exact NAs handling

* PR summary in news

* copy-edit changes from reviews

Co-authored-by: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com>

* Apply suggestions from code review

Co-authored-by: Michael Chirico <michaelchirico4@gmail.com>
Co-authored-by: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com>

* comment requested by Michael

* update NEWS file

* Apply suggestions from code review

Co-authored-by: Michael Chirico <michaelchirico4@gmail.com>

* Apply suggestions from code review

Co-authored-by: Michael Chirico <michaelchirico4@gmail.com>

* add comment requested by Michael

* add comment about int iterator for loop over k-1 obs

* extra comments

* Revert "extra comments"

This reverts commit 03af0e30f1a6a9e75f82b5827c1078f42db48e45.

* add comments to frollmax and frollsum

* typo fix

---------

Co-authored-by: Michael Chirico <michaelchirico4@gmail.com>
Co-authored-by: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com>

* Friendlier error in assignment with trailing comma (#5467)

* friendlier error in assignment with trailing comma

e.g. `DT[, `:=`(a = 1, b = 2,)`.

WIP. Need to add tests and such, but editing from browser before I forget.

* Another pass

* include unnamed indices on RHS too

* tests

* NEWS

* test numbering

* explicit example in NEWS

* Link to ?read.delim in ?fread to give a closer analogue of expected behavior (#5635)

* fread is similar to read.delim (#5634)

* Use ?read.csv / ?read.delim

---------

Co-authored-by: Michael Chirico <chiricom@google.com>
Co-authored-by: Michael Chirico <michaelchirico4@gmail.com>

* Run GHA jobs on 1-15-99 dev branch (#5909)

* Make declarations static for covr (#5910)

* class= argument for condition calls

* Unify logic with helper

* Add tests

* Use call.=FALSE where possible

* correct caught class

* strip call=/call.= handling

* botched merge

---------

Co-authored-by: Ofek <ofekshilon@gmail.com>
Co-authored-by: Ani <bloodraven166@gmail.com>
Co-authored-by: David Budzynski <56514985+davidbudzynski@users.noreply.github.com>
Co-authored-by: Scott Ritchie <sritchie73@gmail.com>
Co-authored-by: Benjamin Schwendinger <benjamin.schwendinger@tuwien.ac.at>
Co-authored-by: Jan Gorecki <J.Gorecki@wit.edu.pl>
Co-authored-by: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com>
Co-authored-by: Manuel López-Ibáñez <2620021+MLopez-Ibanez@users.noreply.github.com>
---
 R/translation.R       | 30 ++++++++++++++++++++++--------
 inst/tests/tests.Rraw | 27 +++++++++++++++++++++++++++
 2 files changed, 49 insertions(+), 8 deletions(-)

diff --git a/R/translation.R b/R/translation.R
index 66faa9fe8..42073aced 100644
--- a/R/translation.R
+++ b/R/translation.R
@@ -4,18 +4,32 @@ catf = function(fmt, ..., sep=" ", domain="R-data.table") {
   cat(gettextf(fmt, ..., domain=domain), sep=sep)
 }
 
-stopf = function(fmt, ..., domain="R-data.table") {
-  stop(gettextf(fmt, ..., domain=domain), domain=NA, call. = FALSE)
+raise_condition = function(signal, message, classes, immediate=FALSE, appendLF=FALSE) {
+  obj = list(message=message, call=sys.call(2))
+  # NB: append _after_ translation
+  if (appendLF) obj$message = paste0(obj$message, "\n")
+  setattr(obj, "class", classes)
+  # cannot set immediate.=TRUE through warning(), so use the description in ?warning to replicate this behavior ourselves. tested manually.
+  if (immediate) {
+    old = options(warn=1)
+    on.exit(options(old))
+  }
+  signal(obj)
 }
 
-warningf = function(fmt, ..., immediate.=FALSE, noBreaks.=FALSE, domain="R-data.table") {
-  warning(gettextf(fmt, ..., domain=domain), domain=NA, call.=FALSE, immediate.=immediate., noBreaks.=noBreaks.)
+stopf = function(fmt, ..., class=NULL, domain="R-data.table") {
+  raise_condition(stop, gettextf(fmt, ..., domain=domain), c(class, "simpleError", "error", "condition"))
 }
 
-messagef = function(fmt, ..., appendLF=TRUE, domain="R-data.table") {
-  message(gettextf(fmt, ..., domain=domain), domain=NA, appendLF=appendLF)
+warningf = function(fmt, ..., immediate.=FALSE, class=NULL, domain="R-data.table") {
+  raise_condition(warning, gettextf(fmt, ..., domain=domain), c(class, "simpleWarning", "warning", "condition"), immediate=immediate.)
 }
 
-packageStartupMessagef = function(fmt, ..., appendLF=TRUE, domain="R-data.table") {
-  packageStartupMessage(gettextf(fmt, ..., domain=domain), domain=NA, appendLF=appendLF)
+messagef = function(fmt, ..., appendLF=TRUE, class=NULL, domain="R-data.table") {
+  raise_condition(message, gettextf(fmt, ..., domain=domain), c(class, "simpleMessage", "message", "condition"), appendLF=appendLF)
+}
+
+packageStartupMessagef = function(fmt, ..., appendLF=TRUE, class=NULL, domain="R-data.table") {
+  # NB: packageStartupMessage() itself calls message(.packageStartupMessage(...))
+  messagef(fmt, ..., appendLF=appendLF, class=c(class, "packageStartupMessage"), domain=domain)
 }
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index e2791ed5d..5a7d8b7a3 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -53,7 +53,9 @@ if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) {
   isRealReallyInt = data.table:::isRealReallyInt
   is_utc = data.table:::is_utc
   melt.data.table = data.table:::melt.data.table  # for test 1953.4
+  messagef = data.table:::messagef
   null.data.table = data.table:::null.data.table
+  packageStartupMessagef = data.table:::packageStartupMessagef
   print.data.table = data.table:::print.data.table
   replace_dot_alias = data.table:::replace_dot_alias
   rollup.data.table = data.table:::rollup.data.table
@@ -66,9 +68,11 @@ if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) {
   .shallow = data.table:::.shallow
   split.data.table = data.table:::split.data.table
   if (!exists('startsWith', 'package:base', inherits=FALSE)) startsWith = data.table:::startsWith
+  stopf = data.table:::stopf
   test = data.table:::test
   uniqlengths = data.table:::uniqlengths
   uniqlist = data.table:::uniqlist
+  warningf = data.table:::warningf
   which_ = data.table:::which_
   which.first = data.table:::which.first
   which.last = data.table:::which.last
@@ -18499,3 +18503,26 @@ test(2258.9, capture.output(print(DT, na.print=".", topn=2)), c("    x", " 1: ."
 x = data.table(rep(1:2, each=5L), 1:5, 1:10)
 test(2259.1, names(split(x, by = c("V1", "V2"), sep = "|")), sort(names(split(x, list(x$V1, x$V2), sep = "|"))))
 test(2259.2, names(split(x, by = c("V1", "V2"), sep = "||")), sort(names(split(x, list(x$V1, x$V2), sep = "||"))))
+
+# custom signaling functions
+## basics: default signals with/without formats
+test(2260.01, tryCatch(stopf("%s", "abc"), error=function(x) conditionMessage(x)), "abc")
+test(2260.02, tryCatch(stopf("abc"), error=function(x) conditionMessage(x)), "abc")
+test(2260.03, tryCatch(warningf("%s", "abc"), warning=function(x) conditionMessage(x)), "abc")
+test(2260.04, tryCatch(warningf("abc"), warning=function(x) conditionMessage(x)), "abc")
+test(2260.05, tryCatch(messagef("%s", "abc"), message=function(x) conditionMessage(x)), "abc\n")
+test(2260.06, tryCatch(messagef("abc"), message=function(x) conditionMessage(x)), "abc\n")
+test(2260.07, tryCatch(messagef("abc", appendLF=FALSE), message=function(x) conditionMessage(x)), "abc")
+test(2260.08, tryCatch(packageStartupMessagef("%s", "abc"), packageStartupMessage=function(x) conditionMessage(x)), "abc\n")
+test(2260.09, tryCatch(packageStartupMessagef("abc"), packageStartupMessage=function(x) conditionMessage(x)), "abc\n")
+test(2260.10, tryCatch(packageStartupMessagef("abc", appendLF=FALSE), packageStartupMessage=function(x) conditionMessage(x)), "abc")
+
+## custom signal classes
+test(2260.11, inherits(tryCatch(stopf("x", class="test_error"), condition=identity), "test_error"))
+test(2260.12, inherits(tryCatch(stopf("x", class="test_error"), condition=identity), "error"))
+test(2260.13, inherits(tryCatch(warningf("x", class="test_warning"), condition=identity), "test_warning"))
+test(2260.14, inherits(tryCatch(warningf("x", class="test_warning"), condition=identity), "warning"))
+test(2260.15, inherits(tryCatch(messagef("x", class="test_message"), condition=identity), "test_message"))
+test(2260.16, inherits(tryCatch(messagef("x", class="test_message"), condition=identity), "message"))
+test(2260.17, inherits(tryCatch(packageStartupMessagef("x", class="test_psm"), condition=identity), "test_psm"))
+test(2260.18, inherits(tryCatch(packageStartupMessagef("x", class="test_psm"), condition=identity), "packageStartupMessage"))

From 20126b118bfbf1f19a018bf0f22a4ce6adc1cf39 Mon Sep 17 00:00:00 2001
From: Michael Chirico <michaelchirico4@gmail.com>
Date: Mon, 22 Apr 2024 09:39:21 -0700
Subject: [PATCH 7/8] Fix other.Rraw tests to run locally (#6097)

---
 inst/tests/other.Rraw | 15 +++++++++------
 tests/other.R         | 12 ++----------
 2 files changed, 11 insertions(+), 16 deletions(-)

diff --git a/inst/tests/other.Rraw b/inst/tests/other.Rraw
index 99169809f..087b3bada 100644
--- a/inst/tests/other.Rraw
+++ b/inst/tests/other.Rraw
@@ -27,7 +27,7 @@ f = function(pkg) suppressWarnings(suppressMessages(isTRUE(
 )))
 loaded = sapply(pkgs, f)
 if (any(!loaded)) {
-  stop("test.data.table('other.Rraw') is missing required package(s): ", paste(names(loaded)[!loaded], collapse=", "), ". If you can't install them and this is R CMD check, please set environment variable TEST_DATA_TABLE_WITH_OTHER_PACKAGES back to the default, false.")
+  stop("test.data.table('other.Rraw') is missing required package(s): ", toString(names(loaded)[!loaded]), ". If you can't install them and this is R CMD check, please set environment variable TEST_DATA_TABLE_WITH_OTHER_PACKAGES back to the default, false.")
   # Would like to install them now for convenience but gitlab-ci.yml seems to install to bus/mirror-other-packages/cran.
   # If that's a cache, that's nice, but we don't know at this point whether this script is being run by GLCI or by a user or in dev.
   # We don't allow skipping (e.g. if _R_CHECK_FORCE_SUGGESTS_ is FALSE) to keep things simple and to keep things strict; i.e.
@@ -35,7 +35,7 @@ if (any(!loaded)) {
 }
 
 cat("\n")
-print(data.table(pkg=pkgs, loaded)[loaded==TRUE, version:=as.character(sapply(pkg, function(p) format(packageVersion(p))))][])
+print(data.table(pkg=pkgs, loaded)[loaded==TRUE, .(pkg, version=sapply(pkg, function(p) format(packageVersion(p))))])
 cat("\n")
 print(sessionInfo())
 cat("\n")
@@ -50,7 +50,9 @@ if (loaded[["ggplot2"]]) {
   test(1.2, DT[,print(ggplot(.SD,aes(b,f))+geom_point()),by=list(grp%%2L)],data.table(grp=integer()))  # %%2 to reduce time needed for ggplot2 to plot
   if (loaded[["hexbin"]]) {
     # Test reported by C Neff on 11 Oct 2011
-    test(1.3, names(print(ggplot(DT) + geom_hex(aes(b, f)) + facet_wrap(~grp)))[c(1,3)], c("data","scales"))
+    # TODO(r-lib/gtable#94): don't suppressWarnings() here.
+    x <- suppressWarnings(print(ggplot(DT) + geom_hex(aes(b, f)) + facet_wrap(~grp)))
+    test(1.3, names(x)[c(1L, 3L)], c("data", "scales"))
   }
   # Test plotting ITime with ggplot2 which seems to require an as.data.frame method for ITime, #1713
   datetimes = c("2011 NOV18 09:29:16", "2011 NOV18 10:42:40", "2011 NOV18 23:47:12",
@@ -97,7 +99,8 @@ if (loaded[["caret"]]) {
   # So I put the win-builder fail down to resource issues and moved this test into test.data.table("other.Rraw").
   DT = data.table(x = rnorm(10), y = rnorm(10))
   cv.ctrl = trainControl(method = 'repeatedcv', number = 5, repeats = 1)
-  fit = train(y ~ x, data = DT, 'lm', trControl = cv.ctrl)
+  # TODO(topepo/caret#1361): remove suppressWarnings() for partially matched args internal to caret
+  fit = suppressWarnings(train(y ~ x, data = DT, 'lm', trControl = cv.ctrl))
   test(4, names(DT), c("x", "y"))
 }
 
@@ -209,7 +212,7 @@ test(14.1, {example(':=', package='data.table', local=TRUE, echo=FALSE); TRUE})
 test(14.2, {example('CJ', package='data.table', local=TRUE, echo=FALSE); TRUE})
 
 if (loaded[["sf"]]) {  #2273
-  DT = as.data.table(st_read(system.file("shape/nc.shp", package = "sf")))
+  DT = as.data.table(st_read(system.file("shape/nc.shp", package = "sf"), quiet=TRUE))
   test(15, DT[1:3, .(NAME, FIPS, geometry)], output="Ashe.*-81.4.*Surry.*-80.4")
 
   dsf = sf::st_as_sf(data.table(x=1:10, y=1:10, s=sample(1:2, 10, TRUE)), coords=1:2)
@@ -724,6 +727,6 @@ if (FALSE) {  # moved from tests.Rraw in #5517 and not yet back on; wasn't sure
 
 if (loaded[["dplyr"]]) {
   # regression test for converting character->list column in a magrittr (dplyr) pipe, #2651
-  DT = data.table(a = 1, b = 2, c = '1,2,3,4]', d = 4)
+  DT = data.table(a = 1, b = 2, c = '1,2,3,4', d = 4)
   test(30, DT[, c := strsplit(c, ',', fixed = TRUE) %>% lapply(as.integer) %>% as.list]$c, list(1:4))
 }
diff --git a/tests/other.R b/tests/other.R
index 5b2969bbf..34ced2327 100644
--- a/tests/other.R
+++ b/tests/other.R
@@ -1,15 +1,7 @@
-require(data.table)
+library(data.table)
 if (!as.logical(Sys.getenv("TEST_DATA_TABLE_WITH_OTHER_PACKAGES", "FALSE"))) {
+  cat("Skipping tests in 'other' and quitting, set TEST_DATA_TABLE_WITH_OTHER_PACKAGES to proceed.\n")
   q('no')
 }
 
-options(warn=1)
-# test.data.table() turns on R's warnPartial* options and currently there
-# are partial argument names used in base and other packages. Without the
-# options(warn=1), other.Rout just contains "There were 16 warnings (use
-# warnings() to see them)". However, a print(warnings()) after test.data.table()
-# just results in NULL in other.Rout. Hence options(warn=1) because that
-# worked to display the warnings, not because we want them displayed at the
-# time per se.
-
 test.data.table(script="other.Rraw")

From 6db0eda711bb59ad9b6009208584c56da1abb915 Mon Sep 17 00:00:00 2001
From: Michael Chirico <michaelchirico4@gmail.com>
Date: Tue, 23 Apr 2024 12:12:12 -0700
Subject: [PATCH 8/8] Add a GHA for linting code (#5908)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* 1.15.0 on CRAN. Bump to 1.15.99

* Fix transform slowness (#5493)

* Fix 5492 by limiting the costly deparse to `nlines=1`

* Implementing PR feedbacks

* Added  inside

* Fix typo in name

* Idiomatic use of  inside

* Separating the deparse line limit to a different PR

---------

Co-authored-by: Michael Chirico <chiricom@google.com>

* Improvements to the introductory vignette (#5836)

* Added my improvements to the intro vignette

* Removed two lines I added extra as a mistake earlier

* Requested changes

* Vignette typo patch (#5402)

* fix typos and grammatical mistakes

* fix typos and punctuation

* remove double spaces where it wasn't necessary

* fix typos and adhere to British English spelling

* fix typos

* fix typos

* add missing closing bracket

* fix typos

* review fixes

* Update vignettes/datatable-benchmarking.Rmd

Co-authored-by: Michael Chirico <michaelchirico4@gmail.com>

* Update vignettes/datatable-benchmarking.Rmd

Co-authored-by: Michael Chirico <michaelchirico4@gmail.com>

* Apply suggestions from code review benchmarking

Co-authored-by: Michael Chirico <michaelchirico4@gmail.com>

* remove unnecessary [ ] from datatable-keys-fast-subset.Rmd

* Update vignettes/datatable-programming.Rmd

Co-authored-by: Michael Chirico <michaelchirico4@gmail.com>

* Update vignettes/datatable-reshape.Rmd

Co-authored-by: Michael Chirico <michaelchirico4@gmail.com>

* One last batch of fine-tuning

---------

Co-authored-by: Michael Chirico <michaelchirico4@gmail.com>
Co-authored-by: Michael Chirico <chiricom@google.com>

* Improved handling of list columns with NULL entries (#4250)

* Updated documentation for rbindlist(fill=TRUE)

* Print NULL entries of list as NULL

* Added news item

* edit NEWS, use '[NULL]' not 'NULL'

* fix test

* split NEWS item

* add example

---------

Co-authored-by: Michael Chirico <chiricom@google.com>
Co-authored-by: Michael Chirico <michaelchirico4@gmail.com>
Co-authored-by: Benjamin Schwendinger <benjamin.schwendinger@tuwien.ac.at>

* clarify that list input->unnamed list output (#5383)

* clarify that list input->unnamed list output

* Add example where make.names is used

* mention role of make.names

* fix subsetting issue in split.data.table (#5368)

* fix subsetting issue in split.data.table

* add a test

* drop=FALSE on inner [

* switch to 3.2.0 R dep (#5905)

* Allow early exit from check for eval/evalq in cedta (#5660)

* Allow early exit from check for eval/evalq in cedta

Done in the browser+untested, please take a second look :)

* Use %chin%

* nocov new code

* frollmax1: frollmax, frollmax adaptive, left adaptive support (#5889)

* frollmax exact, buggy fast, no fast adaptive

* frollmax fast fixing bugs

* frollmax man to fix CRAN check

* frollmax fast adaptive non NA, dev

* froll docs, adaptive left

* no frollmax fast adaptive

* frollmax adaptive exact NAs handling

* PR summary in news

* copy-edit changes from reviews

Co-authored-by: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com>

* Apply suggestions from code review

Co-authored-by: Michael Chirico <michaelchirico4@gmail.com>
Co-authored-by: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com>

* comment requested by Michael

* update NEWS file

* Apply suggestions from code review

Co-authored-by: Michael Chirico <michaelchirico4@gmail.com>

* Apply suggestions from code review

Co-authored-by: Michael Chirico <michaelchirico4@gmail.com>

* add comment requested by Michael

* add comment about int iterator for loop over k-1 obs

* extra comments

* Revert "extra comments"

This reverts commit 03af0e30f1a6a9e75f82b5827c1078f42db48e45.

* add comments to frollmax and frollsum

* typo fix

---------

Co-authored-by: Michael Chirico <michaelchirico4@gmail.com>
Co-authored-by: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com>

* Friendlier error in assignment with trailing comma (#5467)

* friendlier error in assignment with trailing comma

e.g. `DT[, `:=`(a = 1, b = 2,)`.

WIP. Need to add tests and such, but editing from browser before I forget.

* Another pass

* include unnamed indices on RHS too

* tests

* NEWS

* test numbering

* explicit example in NEWS

* Link to ?read.delim in ?fread to give a closer analogue of expected behavior (#5635)

* fread is similar to read.delim (#5634)

* Use ?read.csv / ?read.delim

---------

Co-authored-by: Michael Chirico <chiricom@google.com>
Co-authored-by: Michael Chirico <michaelchirico4@gmail.com>

* Run GHA jobs on 1-15-99 dev branch (#5909)

* overhauled linter

* revert code changes

* Initial commit of {lintr} approach

* first pass at personalization

* first custom linter

* delint vignettes

* delint tests

* delint R sources

* rm empty

* re-merge

* Move config to .ci directory

* Use endsWithAny

* Make declarations static for covr (#5910)

* restore lint on branch

* extension needed after all?

* set option in R

* debug printing

* Exact file name in option

* really hacky approach

* skip more linters

* One more round of deactivation

* FIx whitespace issues (again??)

* botched merge

* obsolete branch ref

* restore simple CI script thanks to upstream fix

* more delint

* just disable unused_import_linter() everywhere for now

* rm whitespace from atime tests

* comment about comment

---------

Co-authored-by: Ofek <ofekshilon@gmail.com>
Co-authored-by: Ani <bloodraven166@gmail.com>
Co-authored-by: David Budzynski <56514985+davidbudzynski@users.noreply.github.com>
Co-authored-by: Scott Ritchie <sritchie73@gmail.com>
Co-authored-by: Benjamin Schwendinger <benjamin.schwendinger@tuwien.ac.at>
Co-authored-by: Jan Gorecki <J.Gorecki@wit.edu.pl>
Co-authored-by: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com>
Co-authored-by: Manuel López-Ibáñez <2620021+MLopez-Ibanez@users.noreply.github.com>
---
 .ci/.lintr.R                             | 105 +++++++++++++++++++++++
 .github/workflows/lint.yaml              |  35 ++++++++
 R/bmerge.R                               |   1 -
 R/data.table.R                           |   2 +-
 R/duplicated.R                           |   1 -
 R/foverlaps.R                            |   1 -
 R/fwrite.R                               |   3 +-
 R/onAttach.R                             |   2 +-
 R/openmp-utils.R                         |   1 -
 R/print.data.table.R                     |   3 +-
 R/setkey.R                               |   1 -
 R/setops.R                               |   1 -
 R/tables.R                               |   1 -
 R/test.data.table.R                      |   4 +-
 R/timetaken.R                            |   1 -
 R/transpose.R                            |   3 +-
 R/uniqlist.R                             |   1 -
 R/utils.R                                |   1 -
 inst/atime/tests.R                       |  22 ++---
 vignettes/datatable-faq.Rmd              |   4 +-
 vignettes/datatable-intro.Rmd            |   2 +-
 vignettes/datatable-keys-fast-subset.Rmd |   6 +-
 vignettes/datatable-programming.Rmd      |   2 +-
 vignettes/datatable-sd-usage.Rmd         |   4 +-
 24 files changed, 168 insertions(+), 39 deletions(-)
 create mode 100644 .ci/.lintr.R
 create mode 100644 .github/workflows/lint.yaml

diff --git a/.ci/.lintr.R b/.ci/.lintr.R
new file mode 100644
index 000000000..09a0db3dc
--- /dev/null
+++ b/.ci/.lintr.R
@@ -0,0 +1,105 @@
+for (f in list.files('ci/linters', full.names=TRUE)) source(f)
+rm(f)
+
+linters = all_linters(
+  packages = "lintr", # TODO(lintr->3.2.0): Remove this.
+  # eq_assignment_linter(),
+  brace_linter(allow_single_line = TRUE),
+  # TODO(michaelchirico): Activate these incrementally. These are the
+  #   parameterizations that match our style guide.
+  # implicit_assignment_linter(allow_lazy = TRUE, allow_scoped = TRUE),
+  # implicit_integer_linter(allow_colon = TRUE),
+  # system_time_linter = undesirable_function_linter(c(
+  #   system.time = "Only run timings in benchmark.Rraw"
+  # )),
+  # undesirable_function_linter(modify_defaults(
+  #   default_undesirable_functions,
+  #   ifelse = "Use fifelse instead.",
+  #   Sys.setenv = NULL,
+  #   library = NULL,
+  #   options = NULL,
+  #   par = NULL,
+  #   setwd = NULL
+  # )),
+  undesirable_operator_linter(modify_defaults(
+    default_undesirable_operators,
+    `<<-` = NULL
+  )),
+  # TODO(lintr#2441): Use upstream implementation.
+  assignment_linter = NULL,
+  # TODO(lintr#2442): Use this once x[ , j, by] is supported.
+  commas_linter = NULL,
+  commented_code_linter = NULL,
+  # TODO(linter->3.2.0): Activate this.
+  consecutive_assertion_linter = NULL,
+  cyclocomp_linter = NULL,
+  function_argument_linter = NULL,
+  indentation_linter = NULL,
+  infix_spaces_linter = NULL,
+  # TODO(R>3.2.0): Activate this, extending to recognize vapply_1i(x, length).
+  lengths_linter = NULL,
+  line_length_linter = NULL,
+  missing_package_linter = NULL,
+  namespace_linter = NULL,
+  nonportable_path_linter = NULL,
+  object_name_linter = NULL,
+  object_usage_linter = NULL,
+  quotes_linter = NULL,
+  semicolon_linter = NULL,
+  spaces_inside_linter = NULL,
+  spaces_left_parentheses_linter = NULL,
+  # TODO(michaelchirico): Only exclude from vignettes, not sure what's wrong.
+  strings_as_factors_linter = NULL,
+  # TODO(lintr->3.2.0): Fix on a valid TODO style, enforce it, and re-activate.
+  todo_comment_linter = NULL,
+  # TODO(michaelchirico): Enforce these and re-activate them one-by-one. Also stop using '<<-'.
+  brace_linter = NULL,
+  condition_call_linter = NULL,
+  conjunct_test_linter = NULL,
+  fixed_regex_linter = NULL,
+  function_left_parentheses_linter = NULL,
+  if_not_else_linter = NULL,
+  implicit_assignment_linter = NULL,
+  implicit_integer_linter = NULL,
+  keyword_quote_linter = NULL,
+  length_levels_linter = NULL,
+  matrix_apply_linter = NULL,
+  missing_argument_linter = NULL,
+  nzchar_linter = NULL,
+  object_overwrite_linter = NULL,
+  paren_body_linter = NULL,
+  redundant_equals_linter = NULL,
+  rep_len_linter = NULL,
+  repeat_linter = NULL,
+  return_linter = NULL,
+  sample_int_linter = NULL,
+  scalar_in_linter = NULL,
+  seq_linter = NULL,
+  undesirable_function_linter = NULL,
+  unnecessary_concatenation_linter = NULL,
+  unnecessary_lambda_linter = NULL,
+  unnecessary_nesting_linter = NULL,
+  unreachable_code_linter = NULL,
+  unused_import_linter = NULL
+)
+# TODO(lintr#2172): Glob with lintr itself.
+exclusions = local({
+  exclusion_for_dir <- function(dir, exclusions) {
+    files = list.files(dir, pattern = "\\.(R|Rmd)$")
+    stats::setNames(rep(list(exclusions), length(files)), files)
+  }
+  c(
+    exclusion_for_dir("tests", list(
+      quotes_linter = Inf,
+      # TODO(michaelchirico): Enforce these and re-activate them one-by-one.
+      implicit_integer_linter = Inf,
+      infix_spaces_linter = Inf,
+      undesirable_function_linter = Inf
+    )),
+    exclusion_for_dir("vignettes", list(
+      quotes_linter = Inf
+      # strings_as_factors_linter = Inf
+      # system_time_linter = Inf
+    ))
+  )
+})
diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml
new file mode 100644
index 000000000..7170016b1
--- /dev/null
+++ b/.github/workflows/lint.yaml
@@ -0,0 +1,35 @@
+on:
+  push:
+    branches:
+     - master
+  pull_request:
+    branches:
+     - master
+
+name: lint
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          use-public-rspm: true
+
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          extra-packages: |
+            r-lib/lintr
+            local::.
+          needs: lint
+
+      - name: Lint
+        run: lintr::lint_package()
+        shell: Rscript {0}
+        env:
+          LINTR_ERROR_ON_LINT: true
+          R_LINTR_LINTER_FILE: .ci/.lintr
diff --git a/R/bmerge.R b/R/bmerge.R
index ddaedc1b3..ff40fddb4 100644
--- a/R/bmerge.R
+++ b/R/bmerge.R
@@ -187,4 +187,3 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
   ans$xo = xo  # for further use by [.data.table
   return(ans)
 }
-
diff --git a/R/data.table.R b/R/data.table.R
index e0cddb38f..5513fb276 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -2300,7 +2300,7 @@ transform.data.table = function (`_data`, ...)
 {
   if (!cedta()) return(NextMethod()) # nocov
   `_data` = copy(`_data`)
-  e = eval(substitute(list(...)), `_data`, parent.frame()) 
+  e = eval(substitute(list(...)), `_data`, parent.frame())
   set(`_data`, ,names(e), e)
   `_data`
 }
diff --git a/R/duplicated.R b/R/duplicated.R
index 27b903812..0aad2ebdd 100644
--- a/R/duplicated.R
+++ b/R/duplicated.R
@@ -118,4 +118,3 @@ uniqueN = function(x, by = if (is.list(x)) seq_along(x) else NULL, na.rm=FALSE)
     length(starts)
   }
 }
-
diff --git a/R/foverlaps.R b/R/foverlaps.R
index 9a0cd5580..54dc61f93 100644
--- a/R/foverlaps.R
+++ b/R/foverlaps.R
@@ -247,4 +247,3 @@ foverlaps = function(x, y, by.x=if (!is.null(key(x))) key(x) else key(y), by.y=k
 # Tests are added to ensure we cover these aspects (to my knowledge) to ensure that any undesirable changes in the future breaks those tests.
 
 # Conclusion: floating point manipulations are hell!
-
diff --git a/R/fwrite.R b/R/fwrite.R
index b13b0afb7..37968d4ea 100644
--- a/R/fwrite.R
+++ b/R/fwrite.R
@@ -64,7 +64,7 @@ fwrite = function(x, file="", append=FALSE, quote="auto",
     length(nThread)==1L && !is.na(nThread) && nThread>=1L
     )
 
-  is_gzip = compress == "gzip" || (compress == "auto" && grepl("\\.gz$", file))
+  is_gzip = compress == "gzip" || (compress == "auto" && endsWithAny(file, ".gz"))
 
   file = path.expand(file)  # "~/foo/bar"
   if (append && (file=="" || file.exists(file))) {
@@ -122,4 +122,3 @@ fwrite = function(x, file="", append=FALSE, quote="auto",
 }
 
 haszlib = function() .Call(Cdt_has_zlib)
-
diff --git a/R/onAttach.R b/R/onAttach.R
index 6ff17972b..7a4a9be3e 100644
--- a/R/onAttach.R
+++ b/R/onAttach.R
@@ -21,7 +21,7 @@
     nth = getDTthreads(verbose=FALSE)
     if (dev)
       packageStartupMessagef("data.table %s IN DEVELOPMENT built %s%s using %d threads (see ?getDTthreads).  ", v, d, g, nth, appendLF=FALSE)
-    else 
+    else
       packageStartupMessagef("data.table %s using %d threads (see ?getDTthreads).  ", v, nth, appendLF=FALSE)
     packageStartupMessagef("Latest news: r-datatable.com")
     if (gettext("TRANSLATION CHECK") != "TRANSLATION CHECK")
diff --git a/R/openmp-utils.R b/R/openmp-utils.R
index f19120724..85f6b3257 100644
--- a/R/openmp-utils.R
+++ b/R/openmp-utils.R
@@ -13,4 +13,3 @@ setDTthreads = function(threads=NULL, restore_after_fork=NULL, percent=NULL, thr
 getDTthreads = function(verbose=getOption("datatable.verbose")) {
   .Call(CgetDTthreads, verbose)
 }
-
diff --git a/R/print.data.table.R b/R/print.data.table.R
index 9e33e0c4d..f80a5833c 100644
--- a/R/print.data.table.R
+++ b/R/print.data.table.R
@@ -230,7 +230,7 @@ format_list_item.default = function(x, ...) {
 char.trunc = function(x, trunc.char = getOption("datatable.prettyprint.char")) {
   trunc.char = max(0L, suppressWarnings(as.integer(trunc.char[1L])), na.rm=TRUE)
   if (!is.character(x) || trunc.char <= 0L) return(x)
-  nchar_width = nchar(x, 'width') # Check whether string is full-width or half-width, #5096 
+  nchar_width = nchar(x, 'width') # Check whether string is full-width or half-width, #5096
   nchar_chars = nchar(x, 'char')
   is_full_width = nchar_width > nchar_chars
   idx = pmin(nchar_width, nchar_chars) > trunc.char
@@ -272,4 +272,3 @@ trunc_cols_message = function(not_printed, abbs, class, col.names){
     n, brackify(paste0(not_printed, classes))
   )
 }
-
diff --git a/R/setkey.R b/R/setkey.R
index 84488a803..62da9ebe8 100644
--- a/R/setkey.R
+++ b/R/setkey.R
@@ -353,4 +353,3 @@ CJ = function(..., sorted = TRUE, unique = FALSE)
   }
   l
 }
-
diff --git a/R/setops.R b/R/setops.R
index 1034b0f0f..23dd6ec8f 100644
--- a/R/setops.R
+++ b/R/setops.R
@@ -290,4 +290,3 @@ all.equal.data.table = function(target, current, trim.levels=TRUE, check.attribu
   }
   TRUE
 }
-
diff --git a/R/tables.R b/R/tables.R
index e47a1a42e..6a0209c86 100644
--- a/R/tables.R
+++ b/R/tables.R
@@ -60,4 +60,3 @@ tables = function(mb=type_size, order.col="NAME", width=80,
   }
   invisible(info)
 }
-
diff --git a/R/test.data.table.R b/R/test.data.table.R
index 748e09512..43486e278 100644
--- a/R/test.data.table.R
+++ b/R/test.data.table.R
@@ -7,7 +7,7 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F
   if (length(memtest.id)) {
     if (length(memtest.id)==1L) memtest.id = rep(memtest.id, 2L)  # for convenience of supplying one id rather than always a range
     stopifnot(length(memtest.id)<=2L,  # conditions quoted to user when false so "<=2L" even though following conditions rely on ==2L
-                     !anyNA(memtest.id), memtest.id[1L]<=memtest.id[2L]) 
+                     !anyNA(memtest.id), memtest.id[1L]<=memtest.id[2L])
     if (memtest==0L) memtest=1L  # using memtest.id implies memtest
   }
   if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) {
@@ -134,7 +134,7 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F
 
   owd = setwd(tempdir()) # ensure writeable directory; e.g. tests that plot may write .pdf here depending on device option and/or batch mode; #5190
   on.exit(setwd(owd))
-  
+
   if (memtest) {
     catf("\n***\n*** memtest=%d. This should be the first call in a fresh R_GC_MEM_GROW=0 R session for best results. Ctrl-C now if not.\n***\n\n", memtest)
     if (is.na(rss())) stopf("memtest intended for Linux. Step through data.table:::rss() to see what went wrong.")
diff --git a/R/timetaken.R b/R/timetaken.R
index daa52c9f1..ae4b384fc 100644
--- a/R/timetaken.R
+++ b/R/timetaken.R
@@ -12,4 +12,3 @@ timetaken = function(started.at)
   tt = proc.time()-started.at  # diff all 3 times
   paste0(format(tt[3L])," elapsed (", format(tt[1L]), " cpu)")
 }
-
diff --git a/R/transpose.R b/R/transpose.R
index 684b135d4..6d6da6779 100644
--- a/R/transpose.R
+++ b/R/transpose.R
@@ -56,7 +56,7 @@ tstrsplit = function(x, ..., fill=NA, type.convert=FALSE, keep, names=FALSE) {
         if (!(sum(!is_named) == 1L && !is_named[n] && is.function(type.convert[[n]])))
           stopf("When the argument 'type.convert' contains an unnamed element, it is expected to be the last element and should be a function. More than one unnamed element is not allowed unless all elements are functions with length equal to %d (the length of the transpose list or 'keep' argument if it is specified).", length(keep))
         else {
-          fothers = type.convert[[n]]         
+          fothers = type.convert[[n]]
           type.convert = type.convert[-n]
         }
       }
@@ -90,4 +90,3 @@ tstrsplit = function(x, ..., fill=NA, type.convert=FALSE, keep, names=FALSE) {
   setattr(ans, 'names', names)
   ans
 }
-
diff --git a/R/uniqlist.R b/R/uniqlist.R
index 2a610ab1a..4f3600f83 100644
--- a/R/uniqlist.R
+++ b/R/uniqlist.R
@@ -21,4 +21,3 @@ uniqlengths = function(x, len) {
   ans = .Call(Cuniqlengths, as.integer(x), as.integer(len))
   ans
 }
-
diff --git a/R/utils.R b/R/utils.R
index a78e5450f..feacd2b00 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -165,4 +165,3 @@ rss = function() {  #5515 #5517
   round(ans / 1024, 1L)  # return MB
   # nocov end
 }
-
diff --git a/inst/atime/tests.R b/inst/atime/tests.R
index a0635d063..19c1b27a8 100644
--- a/inst/atime/tests.R
+++ b/inst/atime/tests.R
@@ -1,7 +1,7 @@
 # A function to customize R package metadata and source files to facilitate version-specific installation and testing.
 #
-# This is specifically tailored for handling data.table which requires specific changes in non-standard files (such as the object file name in Makevars and version checking code in onLoad.R) 
-# to support testing across different versions (base and HEAD for PRs, commits associated with historical regressions, etc.) of the package. 
+# This is specifically tailored for handling data.table which requires specific changes in non-standard files (such as the object file name in Makevars and version checking code in onLoad.R)
+# to support testing across different versions (base and HEAD for PRs, commits associated with historical regressions, etc.) of the package.
 # It appends a SHA1 hash to the package name (PKG.SHA), ensuring each version can be installed and tested separately.
 #
 # @param old.Package Current name of the package.
@@ -29,7 +29,7 @@ pkg.edit.fun = function(old.Package, new.Package, sha, new.pkg.path) {
       Package_ <- gsub(".", "_", old.Package, fixed = TRUE)
       new.Package_ <- paste0(Package_, "_", sha)
       pkg_find_replace(
-        "DESCRIPTION", 
+        "DESCRIPTION",
         paste0("Package:\\s+", old.Package),
         paste("Package:", new.Package))
       pkg_find_replace(
@@ -55,13 +55,13 @@ pkg.edit.fun = function(old.Package, new.Package, sha, new.pkg.path) {
     }
 
 # A list of performance tests.
-# 
+#
 # Each entry in this list corresponds to a performance test and contains a sublist with three mandatory arguments:
 # - N: A numeric sequence of data sizes to vary.
 # - setup: An expression evaluated for every data size before measuring time/memory.
-# - expr: An expression that will be evaluated for benchmarking performance across different git commit versions. 
+# - expr: An expression that will be evaluated for benchmarking performance across different git commit versions.
 #         This must call a function from data.table using a syntax with double or triple colon prefix.
-#         The package name before the colons will be replaced by a new package name that uses the commit SHA hash. 
+#         The package name before the colons will be replaced by a new package name that uses the commit SHA hash.
 #         (For instance, data.table:::[.data.table will become data.table.some_40_digit_SHA1_hash:::[.data.table)
 #
 # Optional parameters that may be useful to configure tests:
@@ -70,8 +70,9 @@ pkg.edit.fun = function(old.Package, new.Package, sha, new.pkg.path) {
 # - sha.vec: Named character vector or a list of vectors that specify data.table-specific commit SHAs for testing across those different git commit versions.
 #            For historical regressions, use 'Before', 'Regression', and 'Fixed' (otherwise something like 'Slow' or 'Fast' ideally).
 # @note Please check https://github.com/tdhock/atime/blob/main/vignettes/data.table.Rmd for more information.
+# nolint start: undesirable_operator_linter. ':::' needed+appropriate here.
 test.list <- list(
-  # Performance regression discussed in: https://github.com/Rdatatable/data.table/issues/4311 
+  # Performance regression discussed in: https://github.com/Rdatatable/data.table/issues/4311
   # Fixed in: https://github.com/Rdatatable/data.table/pull/4440
   "Test regression fixed in #4440" = list(
     pkg.edit.fun = pkg.edit.fun,
@@ -88,7 +89,7 @@ test.list <- list(
 
   # Test based on: https://github.com/Rdatatable/data.table/issues/5424
   # Performance regression introduced from a commit in: https://github.com/Rdatatable/data.table/pull/4491
-  # Fixed in: https://github.com/Rdatatable/data.table/pull/5463    
+  # Fixed in: https://github.com/Rdatatable/data.table/pull/5463
   "Test regression fixed in #5463" = list(
     pkg.edit.fun = pkg.edit.fun,
     N = 10^seq(3, 8),
@@ -101,8 +102,9 @@ test.list <- list(
         key = "g")
       dt_mod <- copy(dt)
     }),
-    expr = quote(data.table:::`[.data.table`(dt_mod, , N := .N, by = g)),    
+    expr = quote(data.table:::`[.data.table`(dt_mod, , N := .N, by = g)),
     Before = "be2f72e6f5c90622fe72e1c315ca05769a9dc854", # Parent of the regression causing commit (https://github.com/Rdatatable/data.table/commit/e793f53466d99f86e70fc2611b708ae8c601a451) in the PR that introduced the issue (https://github.com/Rdatatable/data.table/pull/4491/commits)
     Regression = "e793f53466d99f86e70fc2611b708ae8c601a451", # Commit responsible for regression in the PR that introduced the issue (https://github.com/Rdatatable/data.table/pull/4491/commits)
-    Fixed = "58409197426ced4714af842650b0cc3b9e2cb842") # Last commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/5463/commits)    
+    Fixed = "58409197426ced4714af842650b0cc3b9e2cb842") # Last commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/5463/commits)
 )
+# nolint end: undesirable_operator_linter.
diff --git a/vignettes/datatable-faq.Rmd b/vignettes/datatable-faq.Rmd
index 97c11aeba..1501497bc 100644
--- a/vignettes/datatable-faq.Rmd
+++ b/vignettes/datatable-faq.Rmd
@@ -373,7 +373,7 @@ Yes:
 
 The general form is:
 
-```{r, eval = FALSE}
+```r
 DT[where, select|update, group by][order by][...] ... [...]
 ```
 
@@ -619,4 +619,4 @@ Please see [this answer](https://stackoverflow.com/a/10529888/403310).
 
 ```{r, echo=FALSE}
 setDTthreads(.old.th)
-```
\ No newline at end of file
+```
diff --git a/vignettes/datatable-intro.Rmd b/vignettes/datatable-intro.Rmd
index c783c3fa7..e63caee5d 100644
--- a/vignettes/datatable-intro.Rmd
+++ b/vignettes/datatable-intro.Rmd
@@ -652,4 +652,4 @@ We will see how to *add/update/delete* columns *by reference* and how to combine
 
 ```{r, echo=FALSE}
 setDTthreads(.old.th)
-```
\ No newline at end of file
+```
diff --git a/vignettes/datatable-keys-fast-subset.Rmd b/vignettes/datatable-keys-fast-subset.Rmd
index 3ec50640c..d85f69ad8 100644
--- a/vignettes/datatable-keys-fast-subset.Rmd
+++ b/vignettes/datatable-keys-fast-subset.Rmd
@@ -157,7 +157,7 @@ Once you *key* a *data.table* by certain columns, you can subset by querying tho
 flights[.("JFK")]
 
 ## alternatively
-# flights[J("JFK")] (or) 
+# flights[J("JFK")] (or)
 # flights[list("JFK")]
 ```
 
@@ -464,7 +464,7 @@ Now let us look at binary search approach (method 2). Recall from [Properties of
 
 Here's a very simple illustration. Let's consider the (sorted) numbers shown below:
 
-```{r eval = FALSE}
+```
 1, 5, 10, 19, 22, 23, 30
 ```
 
@@ -499,4 +499,4 @@ Key based subsets are **incredibly fast** and are particularly useful when the t
 
 ```{r, echo=FALSE}
 setDTthreads(.old.th)
-```
\ No newline at end of file
+```
diff --git a/vignettes/datatable-programming.Rmd b/vignettes/datatable-programming.Rmd
index 0536f11d6..3ec1f57d5 100644
--- a/vignettes/datatable-programming.Rmd
+++ b/vignettes/datatable-programming.Rmd
@@ -420,4 +420,4 @@ DT[, cl, env = list(cl = cl)]
 ```{r cleanup, echo=FALSE}
 options(.opts)
 registerS3method("print", "data.frame", base::print.data.frame)
-```
\ No newline at end of file
+```
diff --git a/vignettes/datatable-sd-usage.Rmd b/vignettes/datatable-sd-usage.Rmd
index 09243c820..bd2618d53 100644
--- a/vignettes/datatable-sd-usage.Rmd
+++ b/vignettes/datatable-sd-usage.Rmd
@@ -169,7 +169,7 @@ lm_coef = sapply(models, function(rhs) {
 })
 barplot(lm_coef, names.arg = sapply(models, paste, collapse = '/'),
         main = 'Wins Coefficient\nWith Various Covariates',
-        col = col16, las = 2L, cex.names = .8)
+        col = col16, las = 2L, cex.names = 0.8)
 ```
 
 The coefficient always has the expected sign (better pitchers tend to have more wins and fewer runs allowed), but the magnitude can vary substantially depending on what else we control for.
@@ -254,4 +254,4 @@ The above is just a short introduction of the power of `.SD` in facilitating bea
 
 ```{r, echo=FALSE}
 setDTthreads(.old.th)
-```
\ No newline at end of file
+```