diff --git a/NEWS.md b/NEWS.md index 81406fed2..aaf7f11fe 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,10 @@ # data.table [v1.15.99](https://github.com/Rdatatable/data.table/milestone/30) (in development) +## NOTES + +1. `transform` method for data.table sped up substantially when creating new columns on large tables. Thanks to @OfekShilon for the report and PR. The implemented solution was proposed by @ColeMiller1. + # data.table [v1.15.0](https://github.com/Rdatatable/data.table/milestone/29) (30 Jan 2024) ## BREAKING CHANGE diff --git a/R/data.table.R b/R/data.table.R index af701b988..3ca194d83 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -2345,25 +2345,10 @@ transform.data.table = function (`_data`, ...) # basically transform.data.frame with data.table instead of data.frame, and retains key { if (!cedta()) return(NextMethod()) # nocov - e = eval(substitute(list(...)), `_data`, parent.frame()) - tags = names(e) - inx = chmatch(tags, names(`_data`)) - matched = !is.na(inx) - if (any(matched)) { - .Call(C_unlock, `_data`) # fix for #1641, now covered by test 104.2 - `_data`[,inx[matched]] = e[matched] - `_data` = as.data.table(`_data`) - } - if (!all(matched)) { - ans = do.call("data.table", c(list(`_data`), e[!matched])) - } else { - ans = `_data` - } - key.cols = key(`_data`) - if (!any(tags %chin% key.cols)) { - setattr(ans, "sorted", key.cols) - } - ans + `_data` = copy(`_data`) + e = eval(substitute(list(...)), `_data`, parent.frame()) + set(`_data`, ,names(e), e) + `_data` } subset.data.table = function (x, subset, select, ...) diff --git a/man/transform.data.table.Rd b/man/transform.data.table.Rd index 4a173ce7a..1bf6fb551 100644 --- a/man/transform.data.table.Rd +++ b/man/transform.data.table.Rd @@ -7,9 +7,7 @@ \description{ Utilities for \code{data.table} transformation. - \strong{\code{transform} by group is particularly slow. Please use \code{:=} by group instead.} - - \code{within}, \code{transform} and other similar functions in \code{data.table} are not just provided for users who expect them to work, but for non-data.table-aware packages to retain keys, for example. Hopefully the (much) faster and more convenient \code{data.table} syntax will be used in time. See examples. + \code{within}, \code{transform} and other similar functions in \code{data.table} are not just provided for users who expect them to work, but for non-data.table-aware packages to retain keys, for example. Hopefully the faster and more convenient \code{data.table} syntax will be used in time. See examples. } \usage{ \method{transform}{data.table}(`_data`, \ldots)