diff --git a/NEWS.md b/NEWS.md index 7340b189b..a6bd12ea8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -55,7 +55,7 @@ rowwiseDT( # [1] "V1" "b" "c" ``` -5. Queries like `DT[, min(x):max(x)]` now work as expected, i.e. the same as `DT[, seq(min(x), max(x))]` or `with(DT, min(x):max(x))`, [#2069](https://github.com/Rdatatable/data.table/issues/2069). Shorthand like `DT[, a:b]` meaning "select from columns `a` through `b`" still works. Thanks to @franknarf1 for reporting and @jangorecki for the fix. +5. Queries like `DT[, min(x):max(x)]` now work as expected, i.e. the same as `DT[, seq(min(x), max(x))]` or `with(DT, min(x):max(x))`, [#2069](https://github.com/Rdatatable/data.table/issues/2069). Shorthand like `DT[, a:b]` meaning "select from columns `a` through `b`" still works. Thanks to @franknarf1 for reporting, @jangorecki for the fix, and @MichaelChirico for a follow-up ensuring back-compatibility. 6. Fixed a segfault in `fcase()`, [#6448](https://github.com/Rdatatable/data.table/issues/6448). Thanks @ethanbsmith for reporting with reprex, @aitap for finding the root cause, and @MichaelChirico for the PR. diff --git a/R/data.table.R b/R/data.table.R index 0f2c6746a..60a0dbe50 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -244,9 +244,11 @@ replace_dot_alias = function(e) { if (!missing(j)) { jsub = replace_dot_alias(jsub) root = root_name(jsub) - if ((root == ":" && !is.call(jsub[[2L]]) && !is.call(jsub[[3L]])) || ## x[, V1:V2]; but not x[, (V1):(V2)] #2069 + av = all.vars(jsub) + all..names = FALSE + if ((.is_withFALSE_range(jsub, x, root, av)) || (root %chin% c("-","!") && jsub[[2L]] %iscall% '(' && jsub[[2L]][[2L]] %iscall% ':') || ## x[, !(V8:V10)] - ( (!length(av<-all.vars(jsub)) || all(startsWith(av, ".."))) && ## x[, "V1"]; x[, ..v] + ( (!length(av) || (all..names <- all(startsWith(av, "..")))) && ## x[, "V1"]; x[, ..v] root %chin% c("","c","paste","paste0","-","!") && ## x[, c("V1","V2")]; x[, paste("V",1:2,sep="")]; x[, paste0("V",1:2)] missingby )) { # test 763. TODO: likely that !missingby iff with==TRUE (so, with can be removed) # When no variable names (i.e. symbols) occur in j, scope doesn't matter because there are no symbols to find. @@ -261,18 +263,19 @@ replace_dot_alias = function(e) { # want decisions like this to depend on the data or vector lengths since that can introduce # inconsistency reminiscent of drop=TRUE in [.data.frame that we seek to avoid. with=FALSE - if (length(av)) { + if (all..names) { for (..name in av) { name = substr(..name, 3L, nchar(..name)) if (!nzchar(name)) stopf("The symbol .. is invalid. The .. prefix must be followed by at least one character.") + parent_has_..name = exists(..name, where=parent.frame()) if (!exists(name, where=parent.frame())) { - suggested = if (exists(..name, where=parent.frame())) + suggested = if (parent_has_..name) gettextf(" Variable '..%s' does exist in calling scope though, so please just removed the .. prefix from that variable name in calling scope.", name) # We have recommended 'manual' .. prefix in the past, so try to be helpful else "" stopf("Variable '%s' is not found in calling scope. Looking in calling scope because you used the .. prefix.%s", name, suggested) - } else if (exists(..name, where=parent.frame())) { + } else if (parent_has_..name) { warningf("Both '%1$s' and '..%1$s' exist in calling scope. Please remove the '..%1$s' variable in calling scope for clarity.", name) } } @@ -3028,6 +3031,13 @@ rleidv = function(x, cols=seq_along(x), prefix=NULL) { ids } +.is_withFALSE_range = function(e, x, root=root_name(e), vars=all.vars(e)) { + if (root != ":") return(FALSE) + if (!length(vars)) return(TRUE) # e.g. 1:10 + if (!all(vars %chin% names(x))) return(TRUE) # e.g. 1:ncol(x) + is.name(e[[1L]]) && is.name(e[[2L]]) # e.g. V1:V2, but not min(V1):max(V2) or 1:max(V2) +} + # GForce functions # to add a new function to GForce (from the R side -- the easy part!): # (1) add it to gfuns diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 12b25d174..9e44f71a6 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -19165,14 +19165,17 @@ for (opt in c(0, 1, 2)) { } # Confusing behavior with DT[, min(var):max(var)] #2069 -DT = data.table(t = c(2L, 1L, 3L)) +DT = data.table(t = c(2L, 1L, 3L), a=0, b=1) test(2284.1, DT[, min(t):max(t)], with(DT, min(t):max(t))) test(2284.2, DT[, 1:max(t)], with(DT, 1:max(t))) test(2284.3, DT[, max(t):1], with(DT, max(t):1)) test(2284.4, DT[, 1:t[1L]], with(DT, 1:t[1L])) test(2284.5, DT[, t[2L]:1], with(DT, t[2L]:1)) -test(2284.6, DT[1L, t:max(t)], with(DT[1L], t:max(t))) -test(2284.7, DT[1L, min(t):t], with(DT[1L], min(t):t)) +test(2284.6, DT[, min(a):max(t)], with(DT, min(a):max(t))) +# but not every `:` with a call in from or to is with=TRUE, #6486 +test(2284.7, DT[, 1:ncol(DT)], DT) +test(2284.8, DT[, 2:(ncol(DT) - 1L)], DT[, "a"]) +test(2284.9, DT[, (ncol(DT) - 1L):ncol(DT)], DT[, c("a", "b")]) # Extra regression tests for #4772, which was already incidentally fixed by #5183. x = data.table(a=1:3)