Rdatatable · mattdowle · Aug 6, 2021 · May 10, 2019 · Jan 2, 2020 · Jan 2, 2020
diff --git a/.ci/ci.R b/.ci/ci.R
@@ -5,10 +5,10 @@
 ## added ver argument to produce R version independent urls
 ## https://bugs.r-project.org/bugzilla3/show_bug.cgi?id=17420
 contrib.url <-
-function (repos, type = getOption("pkgType"), ver) 
+function (repos, type = getOption("pkgType"), ver)
 {
   type <- utils:::resolvePkgType(type)
-  if (is.null(repos)) 
+  if (is.null(repos))
     return(NULL)
   if ("@CRAN@" %in% repos && interactive()) {
     cat(gettext("--- Please select a CRAN mirror for use in this session ---"), "\n", sep = "")
@@ -17,12 +17,12 @@ function (repos, type = getOption("pkgType"), ver)
     m <- match("@CRAN@", repos)
     nm <- names(repos)
     repos[m] <- getOption("repos")["CRAN"]
-    if (is.null(nm)) 
+    if (is.null(nm))
       nm <- rep("", length(repos))
     nm[m] <- "CRAN"
     names(repos) <- nm
   }
-  if ("@CRAN@" %in% repos) 
+  if ("@CRAN@" %in% repos)
     stop("trying to use CRAN without setting a mirror")
   if(missing(ver)) {
     ver <- paste(R.version$major, strsplit(R.version$minor, ".", fixed=TRUE)[[1L]][1L], sep = ".")
@@ -37,15 +37,15 @@ function (repos, type = getOption("pkgType"), ver)
   res <- switch(
     type,
     source = paste(gsub("/$", "", repos), "src", "contrib", sep = "/"),
-    mac.binary = paste(gsub("/$", "", repos), "bin", mac.path, "contrib", ver, sep = "/"), 
+    mac.binary = paste(gsub("/$", "", repos), "bin", mac.path, "contrib", ver, sep = "/"),
     win.binary = paste(gsub("/$", "", repos), "bin", "windows", "contrib", ver, sep = "/")
   )
   res
 }
 
 ## returns dependencies for a package based on its DESCRIPTION file
 dcf.dependencies <-
-function(file = "DESCRIPTION", 
+function(file = "DESCRIPTION",
          which = NA,
          except.priority = "base") {
   if (!is.character(file) || !length(file) || !all(file.exists(file)))
@@ -71,7 +71,7 @@ function(file = "DESCRIPTION",
   }, which = which), use.names = FALSE)
   local.extract_dependency_package_names = function (x) {
     ## do not filter out R like tools:::.extract_dependency_package_names, used for web/$pkg/index.html
-    if (is.na(x)) 
+    if (is.na(x))
       return(character())
     x <- unlist(strsplit(x, ",[[:space:]]*"))
     x <- sub("[[:space:]]*([[:alnum:].]+).*", "\\1", x)
@@ -101,13 +101,13 @@ function(file = "DESCRIPTION") {
 ## download dependencies recursively for provided packages
 ## put all downloaded packages into local repository
 mirror.packages <-
-function(pkgs, 
-         which = c("Depends", "Imports", "LinkingTo"), 
-         repos = getOption("repos"), 
-         type = c("source", "mac.binary", "win.binary"), 
-         repodir, 
-         except.repodir = repodir, 
-         except.priority = "base", 
+function(pkgs,
+         which = c("Depends", "Imports", "LinkingTo"),
+         repos = getOption("repos"),
+         type = c("source", "mac.binary", "win.binary"),
+         repodir,
+         except.repodir = repodir,
+         except.priority = "base",
          method,
          quiet = TRUE,
          binary.ver,
@@ -161,7 +161,7 @@ function(pkgs,
     warning(sprintf("Packages binaries could not be found in provided reposistories for R version %s: %s", binary.ver, paste(newpkgs[!availpkgs], collapse = ", ")))
     newpkgs <- newpkgs[availpkgs]
   }
-  
+
   pkgsext <- switch(type,
                     "source" = "tar.gz",
                     "mac.binary" = "tgz",
@@ -171,8 +171,8 @@ function(pkgs,
   unlink(dlfiles[file.exists(dlfiles)])
   ## repos argument is not used in download.packages, only as default for contriburl argument
   ## we provide contriburl to avoid interactive CRAN menu popup twice in mirror.packages
-  dp <- utils::download.packages(pkgs = newpkgs, destdir = destdir, 
-                                 available = db, contriburl = repos.url, 
+  dp <- utils::download.packages(pkgs = newpkgs, destdir = destdir,
+                                 available = db, contriburl = repos.url,
                                  type = type, method = method, quiet = quiet)
   tools::write_PACKAGES(dir = destdir, type = type, ...)
   dp

diff --git a/.ci/deploy.sh b/.ci/deploy.sh
@@ -24,7 +24,7 @@ addToDrat(){
     commit='Travis publish data.table: build $TRAVIS_COMMIT', \
     addFiles=TRUE, fields='Revision')"
   git push --force upstream gh-pages 2>err.txt
-  
+
 }
 
 addToDrat
diff --git a/.dev/cc.R b/.dev/cc.R
@@ -88,4 +88,3 @@ cc = function(test=TRUE, clean=FALSE, debug=FALSE, omp=!debug, cc_dir, path=Sys.
 }
 
 dd = function(omp=FALSE)cc(FALSE,debug=TRUE,omp=omp,clean=TRUE)
-
diff --git a/.dev/revdep.R b/.dev/revdep.R
@@ -219,4 +219,3 @@ status()
 
 # Now R prompt is ready to fix any problems with CRAN or Bioconductor updates.
 # Then run run(), status() and log() as per section in CRAN_Release.cmd
-
@@ -185,6 +185,14 @@ S3method(unique, ITime)
 S3method('[<-', IDate)
 S3method(edit, data.table)
 
+# generic to support custom column formatters
+export(format_col)
+S3method(format_col, default)
+S3method(format_col, POSIXct)
+S3method(format_col, expression)
+export(format_list_item)
+S3method(format_list_item, default)
+
 # duplist
 # getdots
 # NCOL

@@ -71,6 +71,8 @@ unit = "s")
 
 9. `rbindlist` now supports columns of type `expression`, [#546](https://github.com/Rdatatable/data.table/issues/546). Thanks @jangorecki for the report.
 
+10. `data.table` printing now supports customizable methods for both columns and list column row items, part of [#1523](https://github.com/Rdatatable/data.table/issues/1523). `format_col` is S3-generic for customizing how to print whole columns; `format_list_item` is S3-generic for customizing how to print each row of a list column. Thanks variously to @mllg, who initially filed [#3338](https://github.com/Rdatatable/data.table/pulls/3338) with the seed of the idea, @franknarf1 who earlier suggested the idea of providing custom formatters, @fparages who submitted a patch to improve the printing of timezones for [#2842](https://github.com/Rdatatable/data.table/issues/2842), @RichardRedding for pointing out an error relating to printing wide `expression` columns in [#3011](https://github.com/Rdatatable/data.table/issues/3011), and @MichaelChirico for the ultimate implementation. See `?print.data.table` for examples.
+
 ## BUG FIXES
 
 1. A NULL timezone on POSIXct was interpreted by `as.IDate` and `as.ITime` as UTC rather than the session's default timezone (`tz=""`) , [#4085](https://github.com/Rdatatable/data.table/issues/4085).
@@ -1282,4 +1284,3 @@ When `j` is a symbol (as in the quanteda and xgboost examples above) it will con
 
 
 # data.table v1.9.8 (Nov 2016) back to v1.2 (Aug 2008) has been moved to [NEWS.0.md](https://github.com/Rdatatable/data.table/blob/master/NEWS.0.md)
-
diff --git a/R/fwrite.R b/R/fwrite.R
@@ -111,4 +111,3 @@ fwrite = function(x, file="", append=FALSE, quote="auto",
         showProgress, is_gzip, bom, yaml, verbose)
   invisible()
 }
-
@@ -8,7 +8,7 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"),
                print.keys=getOption("datatable.print.keys"),
                trunc.cols=getOption("datatable.print.trunc.cols"),
                quote=FALSE,
-               timezone=FALSE, ...) {
+               ...) {
   # topn  - print the top topn and bottom topn rows with '---' inbetween (5)
   # nrows - under this the whole (small) table is printed, unless topn is provided (100)
   # class - should column class be printed underneath column name? (FALSE)
@@ -68,7 +68,8 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"),
     rn = seq_len(nrow(x))
     printdots = FALSE
   }
-  toprint=format.data.table(toprint, na.encode=FALSE, timezone = timezone, ...)  # na.encode=FALSE so that NA in character cols print as <NA>
+  toprint=format.data.table(toprint, na.encode=FALSE, ...)  # na.encode=FALSE so that NA in character cols print as <NA>
+
   require_bit64_if_needed(x)
 
   # FR #5020 - add row.names = logical argument to print.data.table
@@ -135,44 +136,11 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"),
   invisible(x)
 }
 
-format.data.table = function (x, ..., justify="none", timezone = FALSE) {
+format.data.table <- function (x, ..., justify="none") {
   if (is.atomic(x) && !is.null(x)) {
     stop("Internal structure doesn't seem to be a list. Possibly corrupt data.table.")
   }
-  format.item = function(x) {
-    if (is.null(x))  # NULL item in a list column
-      ""
-    else if (is.atomic(x) || inherits(x,"formula")) # FR #2591 - format.data.table issue with columns of class "formula"
-      paste(c(format(head(x, 6L), justify=justify, ...), if (length(x) > 6L) "..."), collapse=",")  # fix for #5435 - format has to be added here...
-    else
-      paste0("<", class(x)[1L], ">")
-  }
-  # FR #2842 add timezone for posix timestamps
-  format.timezone = function(col) { # paste timezone to a time object
-    tz = attr(col,'tzone', exact=TRUE)
-    if (!is.null(tz)) { # date object with tz
-      nas = is.na(col)
-      col = paste0(as.character(col)," ",tz) # parse to character
-      col[nas] = NA_character_
-    }
-    return(col)
-  }
-  # FR #1091 for pretty printing of character
-  # TODO: maybe instead of doing "this is...", we could do "this ... test"?
-  char.trunc = function(x, trunc.char = getOption("datatable.prettyprint.char")) {
-    trunc.char = max(0L, suppressWarnings(as.integer(trunc.char[1L])), na.rm=TRUE)
-    if (!is.character(x) || trunc.char <= 0L) return(x)
-    idx = which(nchar(x) > trunc.char)
-    x[idx] = paste0(substr(x[idx], 1L, as.integer(trunc.char)), "...")
-    x
-  }
-  do.call("cbind",lapply(x,function(col,...) {
-    if (!is.null(dim(col))) return("<multi-column>")
-    if(timezone) col = format.timezone(col)
-    if (is.list(col)) col = vapply_1c(col, format.item)
-    else col = format(char.trunc(col), justify=justify, ...) # added an else here to fix #5435
-    col
-  },...))
+  do.call("cbind", lapply(x, format_col, ..., justify=justify))
 }
 
 mimicsAutoPrint = c("knit_print.default")
@@ -189,6 +157,44 @@ shouldPrint = function(x) {
 #   as opposed to printing a blank line, for excluding col.names per PR #1483
 cut_top = function(x) cat(capture.output(x)[-1L], sep = '\n')
 
+format_col = function(x, ...) {
+  UseMethod("format_col")
+}
+
+format_list_item = function(x, ...) {
+  UseMethod("format_list_item")
+}
+
+format_col.default = function(x, ...) {
+  if (!is.null(dim(x))) return("<multi-column>")
+  if (is.list(x)) return(vapply_1c(x, format_list_item, ...))
+  format(char.trunc(x), ...) # added an else here to fix #5435
+}
+
+# #2842 -- different columns can have different tzone, so force usage in output
+format_col.POSIXct = function(x, ...) format(x, usetz = TRUE, ...)
+
+# #3011 -- expression columns can wrap to newlines which breaks printing
+format_col.expression = function(x, ...) format(char.trunc(as.character(x)), ...)
+
+format_list_item.default = function(x, ...) {
+  if (is.null(x)) return ("") # NULL item in a list column
+  if (is.atomic(x) || inherits(x, "formula")) # FR #2591 - format.data.table issue with columns of class "formula"
+    paste(c(format(head(x, 6L), ...), if (length(x) > 6L) "..."), collapse=",")  # fix for #5435 - format has to be added here...
+  else
+    paste0("<", class(x)[1L], ">")
+}
+
+# FR #1091 for pretty printing of character
+# TODO: maybe instead of doing "this is...", we could do "this ... test"?
+char.trunc <- function(x, trunc.char = getOption("datatable.prettyprint.char")) {
+  trunc.char = max(0L, suppressWarnings(as.integer(trunc.char[1L])), na.rm=TRUE)
+  if (!is.character(x) || trunc.char <= 0L) return(x)
+  idx = which(nchar(x) > trunc.char)
+  x[idx] = paste0(substr(x[idx], 1L, as.integer(trunc.char)), "...")
+  x
+}
+
 # to calculate widths of data.table for PR #4074
 # gets the width of the data.table at each column
 #   and compares it to the console width
@@ -224,4 +230,3 @@ trunc_cols_message = function(not_printed, abbs, class, col.names){
     n, brackify(paste0(not_printed, classes))
   ))
 }
-
diff --git a/R/setkey.R b/R/setkey.R
@@ -369,4 +369,3 @@ CJ = function(..., sorted = TRUE, unique = FALSE)
   }
   l
 }
-
diff --git a/R/transpose.R b/R/transpose.R
@@ -47,4 +47,3 @@ tstrsplit = function(x, ..., fill=NA, type.convert=FALSE, keep, names=FALSE) {
   setattr(ans, 'names', names)
   ans
 }
-
@@ -4884,7 +4884,7 @@ c(2497.94263862333,
 "fr", "fg", "fsal", "mr", "mg", "msal"), class = c("data.table",
 "data.frame"), row.names = c(NA, -2L))
 
-ans1 = capture.output(print(DT, digits=4, row.names=FALSE))
+ans1 = capture.output(print(DT, digits=4L, row.names=FALSE))
 ans2 = c(" fisyr  er      eg      esal  fr      fg      fsal    mr      mg      msal",
          "  1995 1,3 0.01973 2330,2424 4,4 0.03931 2521,2521  5,30 0.01978 2572,4216",
          "  1996 1,3 0.01973 2263,2354 4,4 0.03931 2449,2449  5,30 0.01978 2498,4095")
@@ -14507,12 +14507,13 @@ test(2025.52, fread(f), data.table(A=1:2, B=c("foobar","baz"), C=3:4))
 
 # printing timezone, #2842
 DT = data.table(t1 = as.POSIXct("1982-04-26 13:34:56", tz = "Europe/Madrid"),t2 = as.POSIXct("2019-01-01 19:00:01",tz = "UTC"))
-test(2026.1, capture.output(print(DT))[2], "1: 1982-04-26 13:34:56 2019-01-01 19:00:01")
-test(2026.2, capture.output(print(DT,timezone = TRUE))[2], "1: 1982-04-26 13:34:56 Europe/Madrid 2019-01-01 19:00:01 UTC")
-DT = data.table(v1 = c(1,as.numeric(NA)))
-DT[2,t:= as.POSIXct("2019-01-01 19:00:01",tz = "UTC")]
-test(2026.3, capture.output(print(DT)), c("   v1                   t","1:  1                <NA>", "2: NA 2019-01-01 19:00:01"))
-test(2026.4, capture.output(print(DT, timezone = TRUE)), c("   v1                       t","1:  1                    <NA>","2: NA 2019-01-01 19:00:01 UTC"))
+test(2026.1, capture.output(print(DT))[2L], "1: 1982-04-26 13:34:56 CEST 2019-01-01 19:00:01 UTC")
+DT = data.table(v1 = c(1, as.numeric(NA)))
+DT[2L, t:= as.POSIXct("2019-01-01 19:00:01", tz = "UTC")]
+test(2026.2, capture.output(print(DT)),
+     c("   v1                       t",
+       "1:  1                    <NA>",
+       "2: NA 2019-01-01 19:00:01 UTC"))
 
 # empty item in j=list(x, ) errors gracefully, #3507
 DT = data.table(a = 1:5)
@@ -16708,6 +16709,28 @@ A = data.table(c1 = 1, c2 = 'asd', c3 = expression(as.character(Sys.time())))
 B = data.table(c1 = 3, c2 = 'qwe', c3 = expression(as.character(Sys.time()+5)))
 test(2129, rbind(A,B)$c3, expression(as.character(Sys.time()), as.character(Sys.time()+5)))
 
+# format_col and format_list_item printing helpers/generics
+## Use case: solve #2842 by defining format_col.POSIXct to have usetz = TRUE
+DT = data.table(
+  t1 = as.POSIXct('2018-05-01 12:34:56', tz = 'UTC'),
+  t2 = as.POSIXct('2018-05-01 12:34:56', tz = 'Asia/Singapore')
+)
+test(2130.1, DT, output = 'UTC.*\\+08')
+
+# #3011 -- default expression printing can break format_col.default
+test(2130.2, print(data.table(e = expression(1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13))), output = '1 + 2 + 3')
+
+# format_col generic is used
+format_col.complex = function(x, ...) sprintf('(%.1f, %.1fi)', Re(x), Im(x))
+registerS3method("format_col", "complex", format_col.complex)
+x = data.table(z = c(1 + 3i, 2 - 1i, pi + 2.718i))
+test(2130.3, x, output = '(1.0, 3.0i)')
+
+# format_list_item() generic is used
+format_list_item.myclass <- function(x, ...) paste0("<", class(x)[1L], ":", x$id, ">")
+registerS3method("format_list_item", "myclass", format_list_item.myclass)
+DT = data.table(row = 1:2, objs = list(structure(list(id = "foo"), class = "myclass"),  structure(list(id = "bar"), class = "myclass")))
+test(2130.4, print(DT), output = "myclass:foo.*myclass:bar")
 
 ########################
 #  Add new tests here  #

diff --git a/man/coalesce.Rd b/man/coalesce.Rd
@@ -33,4 +33,3 @@ fcoalesce(list(x,y,z))   # same
 fcoalesce(x, list(y,z))  # same
 }
 \keyword{ data }
-
diff --git a/man/frank.Rd b/man/frank.Rd
@@ -5,7 +5,7 @@
 \title{Fast rank}
 \description{
   Similar to \code{base::rank} but \emph{much faster}. And it accepts vectors, lists, data.frames or data.tables as input. In addition to the \code{ties.method} possibilities provided by \code{base::rank}, it also provides \code{ties.method="dense"}.
-  
+
   Like \code{\link{forder}}, sorting is done in "C-locale"; in particular, this may affect how capital/lowercase letters are ranked. See Details on \code{forder} for more.
 
   \code{bit64::integer64} type is also supported.

diff --git a/man/like.Rd b/man/like.Rd
@@ -6,7 +6,7 @@
 \title{ Convenience function for calling grep. }
 \description{
   Intended for use in \code{i} in \code{\link[=data.table]{[.data.table}}, i.e., for subsetting/filtering.
-  
+
   Syntax should be familiar to SQL users, with interpretation as regex.
 }
 \usage{
Original file line number	Diff line number	Diff line change
Expand Up		@@ -88,4 +88,3 @@ cc = function(test=TRUE, clean=FALSE, debug=FALSE, omp=!debug, cc_dir, path=Sys.
		}

		dd = function(omp=FALSE)cc(FALSE,debug=TRUE,omp=omp,clean=TRUE)
Original file line number	Diff line number	Diff line change
Expand Up		@@ -219,4 +219,3 @@ status()

		# Now R prompt is ready to fix any problems with CRAN or Bioconductor updates.
		# Then run run(), status() and log() as per section in CRAN_Release.cmd
Original file line number	Diff line number	Diff line change
Expand Up		@@ -111,4 +111,3 @@ fwrite = function(x, file="", append=FALSE, quote="auto",
		showProgress, is_gzip, bom, yaml, verbose)
		invisible()
		}
Original file line number	Diff line number	Diff line change
Expand Up		@@ -369,4 +369,3 @@ CJ = function(..., sorted = TRUE, unique = FALSE)
		}
		l
		}
Original file line number	Diff line number	Diff line change
Expand Up		@@ -47,4 +47,3 @@ tstrsplit = function(x, ..., fill=NA, type.convert=FALSE, keep, names=FALSE) {
		setattr(ans, 'names', names)
		ans
		}
Original file line number	Diff line number	Diff line change
Expand Up		@@ -33,4 +33,3 @@ fcoalesce(list(x,y,z)) # same
		fcoalesce(x, list(y,z)) # same
		}
		\keyword{ data }