Skip to content

Commit

Permalink
Merge branch 'master' into add-a-test-and-move-to-ci
Browse files Browse the repository at this point in the history
  • Loading branch information
MichaelChirico committed Apr 23, 2024
2 parents 1d8c699 + 6db0eda commit 2b7fe3a
Show file tree
Hide file tree
Showing 31 changed files with 289 additions and 88 deletions.
105 changes: 105 additions & 0 deletions .ci/.lintr.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
for (f in list.files('ci/linters', full.names=TRUE)) source(f)
rm(f)

linters = all_linters(
packages = "lintr", # TODO(lintr->3.2.0): Remove this.
# eq_assignment_linter(),
brace_linter(allow_single_line = TRUE),
# TODO(michaelchirico): Activate these incrementally. These are the
# parameterizations that match our style guide.
# implicit_assignment_linter(allow_lazy = TRUE, allow_scoped = TRUE),
# implicit_integer_linter(allow_colon = TRUE),
# system_time_linter = undesirable_function_linter(c(
# system.time = "Only run timings in benchmark.Rraw"
# )),
# undesirable_function_linter(modify_defaults(
# default_undesirable_functions,
# ifelse = "Use fifelse instead.",
# Sys.setenv = NULL,
# library = NULL,
# options = NULL,
# par = NULL,
# setwd = NULL
# )),
undesirable_operator_linter(modify_defaults(
default_undesirable_operators,
`<<-` = NULL
)),
# TODO(lintr#2441): Use upstream implementation.
assignment_linter = NULL,
# TODO(lintr#2442): Use this once x[ , j, by] is supported.
commas_linter = NULL,
commented_code_linter = NULL,
# TODO(linter->3.2.0): Activate this.
consecutive_assertion_linter = NULL,
cyclocomp_linter = NULL,
function_argument_linter = NULL,
indentation_linter = NULL,
infix_spaces_linter = NULL,
# TODO(R>3.2.0): Activate this, extending to recognize vapply_1i(x, length).
lengths_linter = NULL,
line_length_linter = NULL,
missing_package_linter = NULL,
namespace_linter = NULL,
nonportable_path_linter = NULL,
object_name_linter = NULL,
object_usage_linter = NULL,
quotes_linter = NULL,
semicolon_linter = NULL,
spaces_inside_linter = NULL,
spaces_left_parentheses_linter = NULL,
# TODO(michaelchirico): Only exclude from vignettes, not sure what's wrong.
strings_as_factors_linter = NULL,
# TODO(lintr->3.2.0): Fix on a valid TODO style, enforce it, and re-activate.
todo_comment_linter = NULL,
# TODO(michaelchirico): Enforce these and re-activate them one-by-one. Also stop using '<<-'.
brace_linter = NULL,
condition_call_linter = NULL,
conjunct_test_linter = NULL,
fixed_regex_linter = NULL,
function_left_parentheses_linter = NULL,
if_not_else_linter = NULL,
implicit_assignment_linter = NULL,
implicit_integer_linter = NULL,
keyword_quote_linter = NULL,
length_levels_linter = NULL,
matrix_apply_linter = NULL,
missing_argument_linter = NULL,
nzchar_linter = NULL,
object_overwrite_linter = NULL,
paren_body_linter = NULL,
redundant_equals_linter = NULL,
rep_len_linter = NULL,
repeat_linter = NULL,
return_linter = NULL,
sample_int_linter = NULL,
scalar_in_linter = NULL,
seq_linter = NULL,
undesirable_function_linter = NULL,
unnecessary_concatenation_linter = NULL,
unnecessary_lambda_linter = NULL,
unnecessary_nesting_linter = NULL,
unreachable_code_linter = NULL,
unused_import_linter = NULL
)
# TODO(lintr#2172): Glob with lintr itself.
exclusions = local({
exclusion_for_dir <- function(dir, exclusions) {
files = list.files(dir, pattern = "\\.(R|Rmd)$")
stats::setNames(rep(list(exclusions), length(files)), files)
}
c(
exclusion_for_dir("tests", list(
quotes_linter = Inf,
# TODO(michaelchirico): Enforce these and re-activate them one-by-one.
implicit_integer_linter = Inf,
infix_spaces_linter = Inf,
undesirable_function_linter = Inf
)),
exclusion_for_dir("vignettes", list(
quotes_linter = Inf
# strings_as_factors_linter = Inf
# system_time_linter = Inf
))
)
})
20 changes: 11 additions & 9 deletions .ci/atime/tests.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# A function to customize R package metadata and source files to facilitate version-specific installation and testing.
#
# This is specifically tailored for handling data.table which requires specific changes in non-standard files (such as the object file name in Makevars and version checking code in onLoad.R)
# to support testing across different versions (base and HEAD for PRs, commits associated with historical regressions, etc.) of the package.
# This is specifically tailored for handling data.table which requires specific changes in non-standard files (such as the object file name in Makevars and version checking code in onLoad.R)
# to support testing across different versions (base and HEAD for PRs, commits associated with historical regressions, etc.) of the package.
# It appends a SHA1 hash to the package name (PKG.SHA), ensuring each version can be installed and tested separately.
#
# @param old.Package Current name of the package.
Expand Down Expand Up @@ -29,7 +29,7 @@ pkg.edit.fun = function(old.Package, new.Package, sha, new.pkg.path) {
Package_ <- gsub(".", "_", old.Package, fixed = TRUE)
new.Package_ <- paste0(Package_, "_", sha)
pkg_find_replace(
"DESCRIPTION",
"DESCRIPTION",
paste0("Package:\\s+", old.Package),
paste("Package:", new.Package))
pkg_find_replace(
Expand All @@ -55,13 +55,13 @@ pkg.edit.fun = function(old.Package, new.Package, sha, new.pkg.path) {
}

# A list of performance tests.
#
#
# Each entry in this list corresponds to a performance test and contains a sublist with three mandatory arguments:
# - N: A numeric sequence of data sizes to vary.
# - setup: An expression evaluated for every data size before measuring time/memory.
# - expr: An expression that will be evaluated for benchmarking performance across different git commit versions.
# - expr: An expression that will be evaluated for benchmarking performance across different git commit versions.
# This must call a function from data.table using a syntax with double or triple colon prefix.
# The package name before the colons will be replaced by a new package name that uses the commit SHA hash.
# The package name before the colons will be replaced by a new package name that uses the commit SHA hash.
# (For instance, data.table:::[.data.table will become data.table.some_40_digit_SHA1_hash:::[.data.table)
#
# Optional parameters that may be useful to configure tests:
Expand All @@ -70,8 +70,9 @@ pkg.edit.fun = function(old.Package, new.Package, sha, new.pkg.path) {
# - sha.vec: Named character vector or a list of vectors that specify data.table-specific commit SHAs for testing across those different git commit versions.
# For historical regressions, use 'Before', 'Regression', and 'Fixed' (otherwise something like 'Slow' or 'Fast' ideally).
# @note Please check https://github.com/tdhock/atime/blob/main/vignettes/data.table.Rmd for more information.
# nolint start: undesirable_operator_linter. ':::' needed+appropriate here.
test.list <- list(
# Performance regression discussed in: https://github.com/Rdatatable/data.table/issues/4311
# Performance regression discussed in: https://github.com/Rdatatable/data.table/issues/4311
# Fixed in: https://github.com/Rdatatable/data.table/pull/4440
"Regression fixed in #4440" = list(
pkg.edit.fun = pkg.edit.fun,
Expand All @@ -88,7 +89,7 @@ test.list <- list(

# Test based on: https://github.com/Rdatatable/data.table/issues/5424
# Performance regression introduced from a commit in: https://github.com/Rdatatable/data.table/pull/4491
# Fixed in: https://github.com/Rdatatable/data.table/pull/5463
# Fixed in: https://github.com/Rdatatable/data.table/pull/5463
"Regression fixed in #5463" = list(
pkg.edit.fun = pkg.edit.fun,
N = 10^seq(3, 8),
Expand All @@ -101,7 +102,7 @@ test.list <- list(
key = "g")
dt_mod <- copy(dt)
}),
expr = quote(data.table:::`[.data.table`(dt_mod, , N := .N, by = g)),
expr = quote(data.table:::`[.data.table`(dt_mod, , N := .N, by = g)),
Before = "be2f72e6f5c90622fe72e1c315ca05769a9dc854", # Parent of the regression causing commit (https://github.com/Rdatatable/data.table/commit/e793f53466d99f86e70fc2611b708ae8c601a451) in the PR that introduced the issue (https://github.com/Rdatatable/data.table/pull/4491/commits)
Regression = "e793f53466d99f86e70fc2611b708ae8c601a451", # Commit responsible for regression in the PR that introduced the issue (https://github.com/Rdatatable/data.table/pull/4491/commits)
Fixed = "58409197426ced4714af842650b0cc3b9e2cb842"), # Last commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/5463/commits)
Expand All @@ -118,3 +119,4 @@ test.list <- list(
Slow = "c4a2085e35689a108d67dacb2f8261e4964d7e12", # Parent of the first commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/commit/7cc4da4c1c8e568f655ab5167922dcdb75953801)
Fast = "1872f473b20fdcddc5c1b35d79fe9229cd9a1d15") # Last commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/pull/5427/commits)
)
# nolint end: undesirable_operator_linter.
13 changes: 10 additions & 3 deletions .github/workflows/R-CMD-check-occasional.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,19 @@ jobs:
os: [macOS-latest, windows-latest, ubuntu-latest]
r: ['devel', 'release', '3.2', '3.3', '3.4', '3.5', '3.6', '4.0', '4.1', '4.2', '4.3']
locale: ['en_US.utf8', 'zh_CN.utf8', 'lv_LV.utf8'] # Chinese for translations, Latvian for collate order (#3502)
exclude:
- os: ['macOS-latest', 'windows-latest'] # only run non-English locale CI on Ubuntu
locale: ['zh_CN.utf8', 'lv_LV.utf8']
exclude: # only run non-English locale CI on Ubuntu
- os: macOS-latest
locale: 'zh_CN.utf8'
- os: macOS-latest
locale: 'lv_LV.utf8'
- os: windows-latest
locale: 'zh_CN.utf8'
- os: windows-latest
locale: 'lv_LV.utf8'

env:
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
TEST_DATA_TABLE_WITH_OTHER_PACKAGES: true
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}

steps:
Expand Down
35 changes: 35 additions & 0 deletions .github/workflows/lint.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
on:
push:
branches:
- master
pull_request:
branches:
- master

name: lint

jobs:
lint:
runs-on: ubuntu-latest
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
steps:
- uses: actions/checkout@v4

- uses: r-lib/actions/setup-r@v2
with:
use-public-rspm: true

- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: |
r-lib/lintr
local::.
needs: lint

- name: Lint
run: lintr::lint_package()
shell: Rscript {0}
env:
LINTR_ERROR_ON_LINT: true
R_LINTR_LINTER_FILE: .ci/.lintr
6 changes: 5 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@
10. `measure` now supports user-specified `cols` argument, which can be useful to specify a subset of columns to `melt`, without having to use a regex, [#5063](https://github.com/Rdatatable/data.table/issues/5063). Thanks to @UweBlock and @Henrik-P for reporting, and @tdhock for the PR.
11. `split.data.table` recognizes `sep=` when splitting with `by=`, just like the default and data.frame methods [#5417](https://github.com/Rdatatable/data.table/issues/5417).
## BUG FIXES
1. `unique()` returns a copy the case when `nrows(x) <= 1` instead of a mutable alias, [#5932](https://github.com/Rdatatable/data.table/pull/5932). This is consistent with existing `unique()` behavior when the input has no duplicates but more than one row. Thanks to @brookslogan for the report and @dshemetov for the fix.
Expand Down Expand Up @@ -72,7 +74,9 @@
9. `print.data.table` now handles combination multibyte characters correctly when truncating wide string entries, [#5096](https://github.com/Rdatatable/data.table/issues/5096). Thanks to @MichaelChirico for the report and @joshhwuu for the fix.
10. `test.data.table()` runs correctly in more sessions, in particular those where the `digits` or `warn` settings are not their defaults (`7` and `0`, respectively), [#5285](https://github.com/Rdatatable/data.table/issues/5285). Thanks @OfekShilon for the report and suggested fix and @MichaelChirico for the PR.
10. `test.data.table()` runs robustly:
+ In sessions where the `digits` or `warn` options are not their defaults (`7` and `0`, respectively), [#5285](https://github.com/Rdatatable/data.table/issues/5285). Thanks @OfekShilon for the report and suggested fix and @MichaelChirico for the PR.
+ In locales where `letters != sort(letters)`, e.g. Latvian, [#3502](https://github.com/Rdatatable/data.table/issues/3502). Thanks @minemR for the report and @MichaelChirico for the fix.
11. Using `print.data.table` when truncation is needed with `row.names = FALSE` prints the indicator `---` in every value column instead of adding a blank column where the `rownames` would have been just to include `---`, [#4083](https://github.com/Rdatatable/data.table/issues/4083). Thanks @MichaelChirico for the report and @joshhwuu for the fix.
Expand Down
1 change: 0 additions & 1 deletion R/bmerge.R
Original file line number Diff line number Diff line change
Expand Up @@ -187,4 +187,3 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
ans$xo = xo # for further use by [.data.table
return(ans)
}

8 changes: 5 additions & 3 deletions R/data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -2300,7 +2300,7 @@ transform.data.table = function (`_data`, ...)
{
if (!cedta()) return(NextMethod()) # nocov
`_data` = copy(`_data`)
e = eval(substitute(list(...)), `_data`, parent.frame())
e = eval(substitute(list(...)), `_data`, parent.frame())
set(`_data`, ,names(e), e)
`_data`
}
Expand Down Expand Up @@ -2452,9 +2452,11 @@ split.data.table = function(x, f, drop = FALSE, by, sorted = FALSE, keep.by = TR
dtq[["i"]] = quote(levs)
join = TRUE
}
dots = list(...)
if (!"sep" %chin% names(dots)) dots$sep = "."
dtq[["j"]] = substitute(
list(.ll.tech.split=list(.expr), .ll.tech.split.names=paste(lapply(.BY, as.character), collapse=".")),
list(.expr = if (join) quote(if(.N == 0L) .SD[0L] else .SD) else as.name(".SD"))
list(.ll.tech.split=list(.expr), .ll.tech.split.names=paste(lapply(.BY, as.character), collapse=.sep)),
list(.expr = if (join) quote(if(.N == 0L) .SD[0L] else .SD) else as.name(".SD"), .sep = dots$sep)
)
dtq[["by"]] = substitute( # retain order, for `join` and `sorted` it will use order of `i` data.table instead of `keyby`.
.expr,
Expand Down
1 change: 0 additions & 1 deletion R/duplicated.R
Original file line number Diff line number Diff line change
Expand Up @@ -118,4 +118,3 @@ uniqueN = function(x, by = if (is.list(x)) seq_along(x) else NULL, na.rm=FALSE)
length(starts)
}
}

1 change: 0 additions & 1 deletion R/foverlaps.R
Original file line number Diff line number Diff line change
Expand Up @@ -247,4 +247,3 @@ foverlaps = function(x, y, by.x=if (!is.null(key(x))) key(x) else key(y), by.y=k
# Tests are added to ensure we cover these aspects (to my knowledge) to ensure that any undesirable changes in the future breaks those tests.

# Conclusion: floating point manipulations are hell!

3 changes: 1 addition & 2 deletions R/fwrite.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ fwrite = function(x, file="", append=FALSE, quote="auto",
length(nThread)==1L && !is.na(nThread) && nThread>=1L
)

is_gzip = compress == "gzip" || (compress == "auto" && grepl("\\.gz$", file))
is_gzip = compress == "gzip" || (compress == "auto" && endsWithAny(file, ".gz"))

file = path.expand(file) # "~/foo/bar"
if (append && (file=="" || file.exists(file))) {
Expand Down Expand Up @@ -122,4 +122,3 @@ fwrite = function(x, file="", append=FALSE, quote="auto",
}

haszlib = function() .Call(Cdt_has_zlib)

2 changes: 1 addition & 1 deletion R/onAttach.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
nth = getDTthreads(verbose=FALSE)
if (dev)
packageStartupMessagef("data.table %s IN DEVELOPMENT built %s%s using %d threads (see ?getDTthreads). ", v, d, g, nth, appendLF=FALSE)
else
else
packageStartupMessagef("data.table %s using %d threads (see ?getDTthreads). ", v, nth, appendLF=FALSE)
packageStartupMessagef("Latest news: r-datatable.com")
if (gettext("TRANSLATION CHECK") != "TRANSLATION CHECK")
Expand Down
1 change: 0 additions & 1 deletion R/openmp-utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,3 @@ setDTthreads = function(threads=NULL, restore_after_fork=NULL, percent=NULL, thr
getDTthreads = function(verbose=getOption("datatable.verbose")) {
.Call(CgetDTthreads, verbose)
}

3 changes: 1 addition & 2 deletions R/print.data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ format_list_item.default = function(x, ...) {
char.trunc = function(x, trunc.char = getOption("datatable.prettyprint.char")) {
trunc.char = max(0L, suppressWarnings(as.integer(trunc.char[1L])), na.rm=TRUE)
if (!is.character(x) || trunc.char <= 0L) return(x)
nchar_width = nchar(x, 'width') # Check whether string is full-width or half-width, #5096
nchar_width = nchar(x, 'width') # Check whether string is full-width or half-width, #5096
nchar_chars = nchar(x, 'char')
is_full_width = nchar_width > nchar_chars
idx = pmin(nchar_width, nchar_chars) > trunc.char
Expand Down Expand Up @@ -272,4 +272,3 @@ trunc_cols_message = function(not_printed, abbs, class, col.names){
n, brackify(paste0(not_printed, classes))
)
}

1 change: 0 additions & 1 deletion R/setkey.R
Original file line number Diff line number Diff line change
Expand Up @@ -353,4 +353,3 @@ CJ = function(..., sorted = TRUE, unique = FALSE)
}
l
}

1 change: 0 additions & 1 deletion R/setops.R
Original file line number Diff line number Diff line change
Expand Up @@ -290,4 +290,3 @@ all.equal.data.table = function(target, current, trim.levels=TRUE, check.attribu
}
TRUE
}

1 change: 0 additions & 1 deletion R/tables.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,4 +60,3 @@ tables = function(mb=type_size, order.col="NAME", width=80,
}
invisible(info)
}

4 changes: 2 additions & 2 deletions R/test.data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F
if (length(memtest.id)) {
if (length(memtest.id)==1L) memtest.id = rep(memtest.id, 2L) # for convenience of supplying one id rather than always a range
stopifnot(length(memtest.id)<=2L, # conditions quoted to user when false so "<=2L" even though following conditions rely on ==2L
!anyNA(memtest.id), memtest.id[1L]<=memtest.id[2L])
!anyNA(memtest.id), memtest.id[1L]<=memtest.id[2L])
if (memtest==0L) memtest=1L # using memtest.id implies memtest
}
if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) {
Expand Down Expand Up @@ -134,7 +134,7 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F

owd = setwd(tempdir()) # ensure writeable directory; e.g. tests that plot may write .pdf here depending on device option and/or batch mode; #5190
on.exit(setwd(owd))

if (memtest) {
catf("\n***\n*** memtest=%d. This should be the first call in a fresh R_GC_MEM_GROW=0 R session for best results. Ctrl-C now if not.\n***\n\n", memtest)
if (is.na(rss())) stopf("memtest intended for Linux. Step through data.table:::rss() to see what went wrong.")
Expand Down
1 change: 0 additions & 1 deletion R/timetaken.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,3 @@ timetaken = function(started.at)
tt = proc.time()-started.at # diff all 3 times
paste0(format(tt[3L])," elapsed (", format(tt[1L]), " cpu)")
}

Loading

0 comments on commit 2b7fe3a

Please sign in to comment.