From d4244e8d5df2210af2af4ad9415b70e4af9db38a Mon Sep 17 00:00:00 2001 From: r2evans Date: Tue, 3 Dec 2024 07:31:53 -0500 Subject: [PATCH 1/4] add format_list_item.data.frame (#6593) * Closes #6592, nested frames break if any are just 1-column * typo * upd tests * fix typo in test * style, tweaks * refine NEWS * fix link in NEWS * Add as contributor --------- Co-authored-by: Bill Evans Co-authored-by: Michael Chirico Co-authored-by: Michael Chirico --- DESCRIPTION | 3 ++- NAMESPACE | 1 + NEWS.md | 2 ++ R/print.data.table.R | 5 +++++ inst/tests/tests.Rraw | 13 +++++++++++++ 5 files changed, 23 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 87a18c742..7447fed2a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -99,5 +99,6 @@ Authors@R: c( person("Elise", "Maigné", role="ctb"), person("Vincent", "Rocher", role="ctb"), person("Vijay", "Lulla", role="ctb"), - person("Aljaž", "Sluga", role="ctb") + person("Aljaž", "Sluga", role="ctb"), + person("Bill", "Evans", role="ctb") ) diff --git a/NAMESPACE b/NAMESPACE index 50b81e8e6..58037b5f6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -201,6 +201,7 @@ S3method(format_col, POSIXct) S3method(format_col, expression) export(format_list_item) S3method(format_list_item, default) +S3method(format_list_item, data.frame) export(fdroplevels, setdroplevels) S3method(droplevels, data.table) diff --git a/NEWS.md b/NEWS.md index 23213934f..dd556f8f5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -113,6 +113,8 @@ rowwiseDT( 13. `rbindlist(l, use.names=TRUE)` can now handle different encodings for the column names in different entries of `l`, [#5452](https://github.com/Rdatatable/data.table/issues/5452). Thanks to @MEO265 for the report, and Benjamin Schwendinger for the fix. +14. Added a `data.frame` method for `format_list_item()` to fix error printing data.tables with columns containing 1-column data.frames, [#6592](https://github.com/Rdatatable/data.table/issues/6592). Thanks to @r2evans for the bug report and fix. + ## NOTES 1. Tests run again when some Suggests packages are missing, [#6411](https://github.com/Rdatatable/data.table/issues/6411). Thanks @aadler for the note and @MichaelChirico for the fix. diff --git a/R/print.data.table.R b/R/print.data.table.R index 583bc2219..09bcd0579 100644 --- a/R/print.data.table.R +++ b/R/print.data.table.R @@ -247,6 +247,11 @@ format_list_item.default = function(x, ...) { } } +# #6592 -- nested 1-column frames breaks printing +format_list_item.data.frame = function(x, ...) { + paste0("<", class1(x), paste_dims(x), ">") +} + # FR #1091 for pretty printing of character # TODO: maybe instead of doing "this is...", we could do "this ... test"? # Current implementation may have issues when dealing with strings that have combinations of full-width and half-width characters, diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 451dad684..4b4cddf73 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -20651,3 +20651,16 @@ test(2298.2, rbindlist(list(y,x), use.names=TRUE), data.table("\u00f6"=c(4,2), " set(y, j="\u00e4", value=NULL) test(2298.3, rbindlist(list(x,y), use.names=TRUE, fill=TRUE), data.table("\u00e4"=c(1,NA), "\u00f6"=c(2,4), "\u00fc"=c(3,5))) test(2298.4, rbindlist(list(y,x), use.names=TRUE, fill=TRUE), data.table("\u00f6"=c(4,2), "\u00fc"=c(5,3), "\u00e4"=c(NA,1))) + +# #6592: printing nested single-column frames +test(2299.01, format_list_item(data.frame(a=1)), output="") +test(2299.02, format_list_item(data.frame(a=1)[0,,drop=FALSE]), output="") +test(2299.03, format_list_item(data.frame(a=1)[,0]), output="") +test(2299.04, format_list_item(data.frame(a=1, b=2)[0,,drop=FALSE]), output="") +test(2299.06, format_list_item(data.table(a=1)), output="") +test(2299.07, format_list_item(data.table(a=numeric())), output="") +test(2299.08, format_list_item(data.table()), output="") +test(2299.09, format_list_item(data.table(a=numeric(), b=numeric())), output="") +test(2299.10, data.table(a=1), output="a\n1: *1") +test(2299.11, data.table(a=list(data.frame(b=1))), output="a\n1: ") +test(2299.12, data.table(a=list(data.table(b=1))), output="a\n1: ") From 6be4cddd1efa6f873843c1533f1f5212527d2c21 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Tue, 3 Dec 2024 13:35:54 +0100 Subject: [PATCH 2/4] add clang-tidy to GLCI (#6580) * add clang tidy * use clang-tidy readability * use c99 standard --------- Co-authored-by: Michael Chirico --- .gitlab-ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 6bde4d405..91d1a9849 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -194,6 +194,7 @@ test-lin-dev-clang-cran: - echo 'CFLAGS=-g -O2 -fno-common -Wall -Wvla -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' > ~/.R/Makevars - echo 'CXXFLAGS=-g -O2 -fno-common -Wall -Wvla -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' >> ~/.R/Makevars - *install-deps + - clang-tidy -extra-arg=-I/usr/local/lib/R/include -checks='readability-inconsistent-declaration-parameter' src/*.c -- -std=c99 - R CMD check --as-cran $(ls -1t data.table_*.tar.gz | head -n 1) - (! grep "warning:" data.table.Rcheck/00install.out) - >- From 98cf24e635ad47ba02617929d2237fca263e2b0c Mon Sep 17 00:00:00 2001 From: gurbuxanink <48993383+gurbuxanink@users.noreply.github.com> Date: Tue, 3 Dec 2024 08:31:53 -0500 Subject: [PATCH 3/4] add list column example in intro vignette (#6558) --- vignettes/datatable-intro.Rmd | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/vignettes/datatable-intro.Rmd b/vignettes/datatable-intro.Rmd index d32a25eb9..9bc4d027c 100644 --- a/vignettes/datatable-intro.Rmd +++ b/vignettes/datatable-intro.Rmd @@ -643,6 +643,26 @@ DT[, print(list(c(a,b))), by = ID] # (2) In (1), for each group, a vector is returned, with length = 6,4,2 here. However, (2) returns a list of length 1 for each group, with its first element holding vectors of length 6,4,2. Therefore, (1) results in a length of ` 6+4+2 = `r 6+4+2``, whereas (2) returns `1+1+1=`r 1+1+1``. +Flexibility of j allows us to store any list object as an element of data.table. For example, when statistical models are fit to groups, these models can be stored in a data.table. Code is concise and easy to understand. + +```{r} +## Do long distance flights cover up departure delay more than short distance flights? +## Does cover up vary by month? +flights[, `:=`(makeup = dep_delay - arr_delay)] + +makeup.models <- flights[, .(fit = list(lm(makeup ~ distance))), by = .(month)] +makeup.models[, .(coefdist = coef(fit[[1]])[2], rsq = summary(fit[[1]])$r.squared), by = .(month)] +``` +Using data.frames, we need more complicated code to obtain same result. +```{r} +setDF(flights) +flights.split <- split(flights, f = flights$month) +makeup.models.list <- lapply(flights.split, function(df) c(month = df$month[1], fit = list(lm(makeup ~ distance, data = df)))) +makeup.models.df <- do.call(rbind, makeup.models.list) +sapply(makeup.models.df[, "fit"], function(model) c(coefdist = coef(model)[2], rsq = summary(model)$r.squared)) |> t() |> data.frame() +setDT(flights) +``` + ## Summary The general form of `data.table` syntax is: From a94401aa217973d5de411ca6252b8f33d1286a11 Mon Sep 17 00:00:00 2001 From: aitap Date: Thu, 5 Dec 2024 07:33:26 +0000 Subject: [PATCH 4/4] Solve the `knitr` auto-printing problem by registering a method for `knit_print` (#6589) * Respect shouldPrint when auto-printing from knitr Implementing a method for the knitr::knit_print generic makes it possible to customise the behaviour without looking up the call stack. The current solution only works on R >= 3.6.0 because that's where delayed S3 registration has been introduced. * Delay S3method(knit_print, data.table) for R < 3.6 Use setHook() to ensure that registerS3method() will be called in the same session if 'knitr' is loaded later. Not needed on R >= 3.6.0 where S3method(knitr::knit_print) will do the right thing by itself. * ws-only style * put setHook() in a branch * Position comment on the same line * Restore the still-required #2369 condition * Regression test for #2369 Avoid breaking it again like in #6589 * NEWS entry * Comment the .onLoad condition Co-authored-by: Michael Chirico * restore unconditional setHook() --------- Co-authored-by: Michael Chirico --- NAMESPACE | 1 + NEWS.md | 2 ++ R/onLoad.R | 9 +++++++++ R/print.data.table.R | 24 +++++++----------------- tests/autoprint.R | 12 ++++++++++++ tests/autoprint.Rout.save | 18 ++++++++++++++++++ 6 files changed, 49 insertions(+), 17 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 58037b5f6..2497f0cf9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -104,6 +104,7 @@ if (getRversion() >= "4.0.0") { # version of R (and that is checked in .onLoad with error if not). export(.rbind.data.table) # only export in R<4.0.0 where it is still used; R-devel now detects it is missing doc, #5600 } +if (getRversion() >= "3.6.0") S3method(knitr::knit_print, data.table) # else manual delayed registration from the onLoad hook S3method(dim, data.table) S3method(dimnames, data.table) S3method("dimnames<-", data.table) diff --git a/NEWS.md b/NEWS.md index dd556f8f5..53f37d478 100644 --- a/NEWS.md +++ b/NEWS.md @@ -115,6 +115,8 @@ rowwiseDT( 14. Added a `data.frame` method for `format_list_item()` to fix error printing data.tables with columns containing 1-column data.frames, [#6592](https://github.com/Rdatatable/data.table/issues/6592). Thanks to @r2evans for the bug report and fix. +15. The auto-printing suppression in `knitr` documents is now done by implementing a method for `knit_print` instead of looking up the call stack, [#6589](https://github.com/Rdatatable/data.table/pull/6589). Thanks to @jangorecki for the report [#6509](https://github.com/Rdatatable/data.table/issues/6509) and @aitap for the fix. + ## NOTES 1. Tests run again when some Suggests packages are missing, [#6411](https://github.com/Rdatatable/data.table/issues/6411). Thanks @aadler for the note and @MichaelChirico for the fix. diff --git a/R/onLoad.R b/R/onLoad.R index ef96849e8..662f3132e 100644 --- a/R/onLoad.R +++ b/R/onLoad.R @@ -66,6 +66,15 @@ lockBinding("rbind.data.frame",baseenv()) } } + if (session_r_version < "3.6.0") { # corresponds to S3method() directive in NAMESPACE + # no delayed registration support for NAMESPACE; perform it manually + if (isNamespaceLoaded("knitr")) { + registerS3method("knit_print", "data.table", knit_print.data.table, envir = asNamespace("knitr")) + } + setHook(packageEvent("knitr", "onLoad"), function(...) { + registerS3method("knit_print", "data.table", knit_print.data.table, envir = asNamespace("knitr")) + }) + } # Set options for the speed boost in v1.8.0 by avoiding 'default' arg of getOption(,default=) # In fread and fwrite we have moved back to using getOption's default argument since it is unlikely fread and fread will be called in a loop many times, plus they diff --git a/R/print.data.table.R b/R/print.data.table.R index 09bcd0579..bc3cf56ed 100644 --- a/R/print.data.table.R +++ b/R/print.data.table.R @@ -32,21 +32,8 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"), SYS = sys.calls() if (length(SYS) <= 2L || # "> DT" auto-print or "> print(DT)" explicit print (cannot distinguish from R 3.2.0 but that's ok) ( length(SYS) >= 3L && is.symbol(thisSYS <- SYS[[length(SYS)-2L]][[1L]]) && - as.character(thisSYS) == 'source') || # suppress printing from source(echo = TRUE) calls, #2369 - ( length(SYS) > 3L && is.symbol(thisSYS <- SYS[[length(SYS)-3L]][[1L]]) && - as.character(thisSYS) %chin% mimicsAutoPrint ) || # suppress printing from knitr, #6509 - # In previous versions of knitr, call stack when auto-printing looked like: - # knit_print -> knit_print.default -> normal_print -> print -> print.data.table - # and we detected and avoided that by checking fourth last call in the stack. - # As of September 2024, the call stack can also look like: - # knit_print.default -> normal_print -> render -> evalq -> evalq -> print -> print.data.table - # so we have to check the 7th last call in the stack too. - # Ideally, we would like to return invisibly from DT[, foo := bar] and have knitr respect that, but a flag in - # .Primitive("[") sets all values returned from [.data.table to visible, hence the need for printing hacks later. - ( length(SYS) > 6L && is.symbol(thisSYS <- SYS[[length(SYS)-6L]][[1L]]) && - as.character(thisSYS) %chin% mimicsAutoPrint ) ) { + as.character(thisSYS) == 'source') ) { # suppress printing from source(echo = TRUE) calls, #2369 return(invisible(x)) - # is.symbol() temp fix for #1758. } } if (!is.numeric(nrows)) nrows = 100L @@ -168,9 +155,6 @@ format.data.table = function(x, ..., justify="none") { do.call(cbind, lapply(x, format_col, ..., justify=justify)) } -mimicsAutoPrint = c("knit_print.default") -# add maybe repr_text.default. See https://github.com/Rdatatable/data.table/issues/933#issuecomment-220237965 - shouldPrint = function(x) { ret = (identical(.global$print, "") || # to save address() calls and adding lots of address strings to R's global cache address(x)!=.global$print) @@ -303,3 +287,9 @@ trunc_cols_message = function(not_printed, abbs, class, col.names){ domain=NA ) } + +# Maybe add a method for repr::repr_text. See https://github.com/Rdatatable/data.table/issues/933#issuecomment-220237965 +knit_print.data.table <- function(x, ...) { + if (!shouldPrint(x)) return(invisible(x)) + NextMethod() +} diff --git a/tests/autoprint.R b/tests/autoprint.R index 1e4694668..964af7089 100644 --- a/tests/autoprint.R +++ b/tests/autoprint.R @@ -43,3 +43,15 @@ DT[1,a:=10L][] # yes. ...[] == oops, forgot print(...) tryCatch(DT[,foo:=ColumnNameTypo], error=function(e) e$message) # error: not found. DT # yes DT # yes + +# Regression test for auto-printing suppression in source(), #2369 +local({ + f = tempfile(fileext = ".R") + on.exit(unlink(f)) + writeLines(c( + "library(data.table)", + "DT = data.table(a = 1)", + "DT[,a:=1] # not auto-printed" + ), f) + source(f, local = TRUE, echo = TRUE) +}) diff --git a/tests/autoprint.Rout.save b/tests/autoprint.Rout.save index 41aaa8965..6b0b641b0 100644 --- a/tests/autoprint.Rout.save +++ b/tests/autoprint.Rout.save @@ -136,6 +136,24 @@ NULL 1: 10 2: 10 > +> # Regression test for auto-printing suppression in source(), #2369 +> local({ ++ f = tempfile(fileext = ".R") ++ on.exit(unlink(f)) ++ writeLines(c( ++ "library(data.table)", ++ "DT = data.table(a = 1)", ++ "DT[,a:=1] # not auto-printed" ++ ), f) ++ source(f, local = TRUE, echo = TRUE) ++ }) + +> library(data.table) + +> DT = data.table(a = 1) + +> DT[, `:=`(a, 1)] +> > proc.time() user system elapsed 0.223 0.016 0.231