diff --git a/DESCRIPTION b/DESCRIPTION index 1fab0614c..25a7bd6d3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -97,5 +97,6 @@ Authors@R: c( person("Vincent", "Runge", role="ctb"), person("Christian", "Wia", role="ctb"), person("Elise", "Maigné", role="ctb"), - person("Vincent", "Rocher", role="ctb") + person("Vincent", "Rocher", role="ctb"), + person("Vijay", "Lulla", role="ctb") ) diff --git a/NEWS.md b/NEWS.md index be02a0522..f203117ba 100644 --- a/NEWS.md +++ b/NEWS.md @@ -105,6 +105,8 @@ rowwiseDT( 10. `DT[1, on=NULL]` now works for returning the first row, [#6579](https://github.com/Rdatatable/data.table/issues/6579). Thanks to @Kodiologist for the report and @tdhock for the PR. +11. `tables()` now returns the correct size for data.tables over 2GiB, [#6607](https://github.com/Rdatatable/data.table/issues/6607). Thanks to @vlulla for the report and the PR. + ## NOTES 1. Tests run again when some Suggests packages are missing, [#6411](https://github.com/Rdatatable/data.table/issues/6411). Thanks @aadler for the note and @MichaelChirico for the fix. diff --git a/R/tables.R b/R/tables.R index 7662598d8..18729ee03 100644 --- a/R/tables.R +++ b/R/tables.R @@ -5,7 +5,7 @@ type_size = function(DT) { # for speed and ram efficiency, a lower bound by not descending into character string lengths or list items # if a more accurate and higher estimate is needed then user can pass object.size or alternative to mb= # in case number of columns is very large (e.g. 1e6 columns) then we use a for() to avoid allocation of sapply() - ans = 0L + ans = 0.0 lookup = c("raw"=1L, "integer"=4L, "double"=8L, "complex"=16L) for (i in seq_along(DT)) { col = DT[[i]] diff --git a/inst/tests/other.Rraw b/inst/tests/other.Rraw index 044d82cfa..96f40b071 100644 --- a/inst/tests/other.Rraw +++ b/inst/tests/other.Rraw @@ -766,3 +766,10 @@ if (loaded[["nanotime"]]) { # respect dec=',' for nanotime, related to #6446, corresponding to tests 2281.* test(31, fwrite(data.table(as.nanotime(.POSIXct(0))), dec=',', sep=';'), output="1970-01-01T00:00:00,000000000Z") } + +# tables() with large environment #6607 +.e <- new.env() ## to not populate the .GlobalEnv +.e[["DT"]] <- as.data.table(lapply(1:15,function(i) runif(20e6))) +res <- tables(env=.e) +test(32, res[, .(NAME,NROW,NCOL,MB)], data.table(NAME="DT",NROW=20000000L,NCOL=15L,MB=2288.0)) +rm(.e, res)