Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

format_col/format_list_item printing generics for customization #3414

Merged
merged 14 commits into from
Aug 6, 2021
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 17 additions & 17 deletions .ci/ci.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
## added ver argument to produce R version independent urls
## https://bugs.r-project.org/bugzilla3/show_bug.cgi?id=17420
contrib.url <-
function (repos, type = getOption("pkgType"), ver)
function (repos, type = getOption("pkgType"), ver)
mattdowle marked this conversation as resolved.
Show resolved Hide resolved
{
type <- utils:::resolvePkgType(type)
if (is.null(repos))
if (is.null(repos))
return(NULL)
if ("@CRAN@" %in% repos && interactive()) {
cat(gettext("--- Please select a CRAN mirror for use in this session ---"), "\n", sep = "")
Expand All @@ -17,12 +17,12 @@ function (repos, type = getOption("pkgType"), ver)
m <- match("@CRAN@", repos)
nm <- names(repos)
repos[m] <- getOption("repos")["CRAN"]
if (is.null(nm))
if (is.null(nm))
nm <- rep("", length(repos))
nm[m] <- "CRAN"
names(repos) <- nm
}
if ("@CRAN@" %in% repos)
if ("@CRAN@" %in% repos)
stop("trying to use CRAN without setting a mirror")
if(missing(ver)) {
ver <- paste(R.version$major, strsplit(R.version$minor, ".", fixed=TRUE)[[1L]][1L], sep = ".")
Expand All @@ -37,15 +37,15 @@ function (repos, type = getOption("pkgType"), ver)
res <- switch(
type,
source = paste(gsub("/$", "", repos), "src", "contrib", sep = "/"),
mac.binary = paste(gsub("/$", "", repos), "bin", mac.path, "contrib", ver, sep = "/"),
mac.binary = paste(gsub("/$", "", repos), "bin", mac.path, "contrib", ver, sep = "/"),
win.binary = paste(gsub("/$", "", repos), "bin", "windows", "contrib", ver, sep = "/")
)
res
}

## returns dependencies for a package based on its DESCRIPTION file
dcf.dependencies <-
function(file = "DESCRIPTION",
function(file = "DESCRIPTION",
which = NA,
except.priority = "base") {
if (!is.character(file) || !length(file) || !all(file.exists(file)))
Expand All @@ -71,7 +71,7 @@ function(file = "DESCRIPTION",
}, which = which), use.names = FALSE)
local.extract_dependency_package_names = function (x) {
## do not filter out R like tools:::.extract_dependency_package_names, used for web/$pkg/index.html
if (is.na(x))
if (is.na(x))
return(character())
x <- unlist(strsplit(x, ",[[:space:]]*"))
x <- sub("[[:space:]]*([[:alnum:].]+).*", "\\1", x)
Expand Down Expand Up @@ -101,13 +101,13 @@ function(file = "DESCRIPTION") {
## download dependencies recursively for provided packages
## put all downloaded packages into local repository
mirror.packages <-
function(pkgs,
which = c("Depends", "Imports", "LinkingTo"),
repos = getOption("repos"),
type = c("source", "mac.binary", "win.binary"),
repodir,
except.repodir = repodir,
except.priority = "base",
function(pkgs,
which = c("Depends", "Imports", "LinkingTo"),
repos = getOption("repos"),
type = c("source", "mac.binary", "win.binary"),
repodir,
except.repodir = repodir,
except.priority = "base",
method,
quiet = TRUE,
binary.ver,
Expand Down Expand Up @@ -161,7 +161,7 @@ function(pkgs,
warning(sprintf("Packages binaries could not be found in provided reposistories for R version %s: %s", binary.ver, paste(newpkgs[!availpkgs], collapse = ", ")))
newpkgs <- newpkgs[availpkgs]
}

pkgsext <- switch(type,
"source" = "tar.gz",
"mac.binary" = "tgz",
Expand All @@ -171,8 +171,8 @@ function(pkgs,
unlink(dlfiles[file.exists(dlfiles)])
## repos argument is not used in download.packages, only as default for contriburl argument
## we provide contriburl to avoid interactive CRAN menu popup twice in mirror.packages
dp <- utils::download.packages(pkgs = newpkgs, destdir = destdir,
available = db, contriburl = repos.url,
dp <- utils::download.packages(pkgs = newpkgs, destdir = destdir,
available = db, contriburl = repos.url,
type = type, method = method, quiet = quiet)
tools::write_PACKAGES(dir = destdir, type = type, ...)
dp
Expand Down
2 changes: 1 addition & 1 deletion .ci/deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ addToDrat(){
commit='Travis publish data.table: build $TRAVIS_COMMIT', \
addFiles=TRUE, fields='Revision')"
git push --force upstream gh-pages 2>err.txt

}

addToDrat
1 change: 0 additions & 1 deletion .dev/cc.R
Original file line number Diff line number Diff line change
Expand Up @@ -88,4 +88,3 @@ cc = function(test=TRUE, clean=FALSE, debug=FALSE, omp=!debug, cc_dir, path=Sys.
}

dd = function(omp=FALSE)cc(FALSE,debug=TRUE,omp=omp,clean=TRUE)

1 change: 0 additions & 1 deletion .dev/revdep.R
Original file line number Diff line number Diff line change
Expand Up @@ -219,4 +219,3 @@ status()

# Now R prompt is ready to fix any problems with CRAN or Bioconductor updates.
# Then run run(), status() and log() as per section in CRAN_Release.cmd

8 changes: 8 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,14 @@ S3method(unique, ITime)
S3method('[<-', IDate)
S3method(edit, data.table)

# generic to support custom column formatters
export(format_col)
S3method(format_col, default)
S3method(format_col, POSIXct)
S3method(format_col, expression)
export(format_list_item)
S3method(format_list_item, default)

# duplist
# getdots
# NCOL
Expand Down
3 changes: 2 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ unit = "s")

9. `rbindlist` now supports columns of type `expression`, [#546](https://github.com/Rdatatable/data.table/issues/546). Thanks @jangorecki for the report.

10. `data.table` printing now supports customizable methods for both columns and list column row items, part of [#1523](https://github.com/Rdatatable/data.table/issues/1523). `format_col` is S3-generic for customizing how to print whole columns; `format_list_item` is S3-generic for customizing how to print each row of a list column. Thanks variously to @mllg, who initially filed [#3338](https://github.com/Rdatatable/data.table/pulls/3338) with the seed of the idea, @franknarf1 who earlier suggested the idea of providing custom formatters, @fparages who submitted a patch to improve the printing of timezones for [#2842](https://github.com/Rdatatable/data.table/issues/2842), @RichardRedding for pointing out an error relating to printing wide `expression` columns in [#3011](https://github.com/Rdatatable/data.table/issues/3011), and @MichaelChirico for the ultimate implementation. See `?print.data.table` for examples.

## BUG FIXES

1. A NULL timezone on POSIXct was interpreted by `as.IDate` and `as.ITime` as UTC rather than the session's default timezone (`tz=""`) , [#4085](https://github.com/Rdatatable/data.table/issues/4085).
Expand Down Expand Up @@ -1282,4 +1284,3 @@ When `j` is a symbol (as in the quanteda and xgboost examples above) it will con


# data.table v1.9.8 (Nov 2016) back to v1.2 (Aug 2008) has been moved to [NEWS.0.md](https://github.com/Rdatatable/data.table/blob/master/NEWS.0.md)

1 change: 0 additions & 1 deletion R/fwrite.R
Original file line number Diff line number Diff line change
Expand Up @@ -111,4 +111,3 @@ fwrite = function(x, file="", append=FALSE, quote="auto",
showProgress, is_gzip, bom, yaml, verbose)
invisible()
}

81 changes: 43 additions & 38 deletions R/print.data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"),
print.keys=getOption("datatable.print.keys"),
trunc.cols=getOption("datatable.print.trunc.cols"),
quote=FALSE,
timezone=FALSE, ...) {
...) {
# topn - print the top topn and bottom topn rows with '---' inbetween (5)
# nrows - under this the whole (small) table is printed, unless topn is provided (100)
# class - should column class be printed underneath column name? (FALSE)
Expand Down Expand Up @@ -68,7 +68,8 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"),
rn = seq_len(nrow(x))
printdots = FALSE
}
toprint=format.data.table(toprint, na.encode=FALSE, timezone = timezone, ...) # na.encode=FALSE so that NA in character cols print as <NA>
toprint=format.data.table(toprint, na.encode=FALSE, ...) # na.encode=FALSE so that NA in character cols print as <NA>

require_bit64_if_needed(x)

# FR #5020 - add row.names = logical argument to print.data.table
Expand Down Expand Up @@ -135,44 +136,11 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"),
invisible(x)
}

format.data.table = function (x, ..., justify="none", timezone = FALSE) {
format.data.table <- function (x, ..., justify="none") {
if (is.atomic(x) && !is.null(x)) {
stop("Internal structure doesn't seem to be a list. Possibly corrupt data.table.")
}
format.item = function(x) {
if (is.null(x)) # NULL item in a list column
""
else if (is.atomic(x) || inherits(x,"formula")) # FR #2591 - format.data.table issue with columns of class "formula"
paste(c(format(head(x, 6L), justify=justify, ...), if (length(x) > 6L) "..."), collapse=",") # fix for #5435 - format has to be added here...
else
paste0("<", class(x)[1L], ">")
}
# FR #2842 add timezone for posix timestamps
format.timezone = function(col) { # paste timezone to a time object
tz = attr(col,'tzone', exact=TRUE)
if (!is.null(tz)) { # date object with tz
nas = is.na(col)
col = paste0(as.character(col)," ",tz) # parse to character
col[nas] = NA_character_
}
return(col)
}
# FR #1091 for pretty printing of character
# TODO: maybe instead of doing "this is...", we could do "this ... test"?
char.trunc = function(x, trunc.char = getOption("datatable.prettyprint.char")) {
trunc.char = max(0L, suppressWarnings(as.integer(trunc.char[1L])), na.rm=TRUE)
if (!is.character(x) || trunc.char <= 0L) return(x)
idx = which(nchar(x) > trunc.char)
x[idx] = paste0(substr(x[idx], 1L, as.integer(trunc.char)), "...")
x
}
do.call("cbind",lapply(x,function(col,...) {
if (!is.null(dim(col))) return("<multi-column>")
if(timezone) col = format.timezone(col)
if (is.list(col)) col = vapply_1c(col, format.item)
else col = format(char.trunc(col), justify=justify, ...) # added an else here to fix #5435
col
},...))
do.call("cbind", lapply(x, format_col, ..., justify=justify))
}

mimicsAutoPrint = c("knit_print.default")
Expand All @@ -189,6 +157,44 @@ shouldPrint = function(x) {
# as opposed to printing a blank line, for excluding col.names per PR #1483
cut_top = function(x) cat(capture.output(x)[-1L], sep = '\n')

format_col = function(x, ...) {
UseMethod("format_col")
}

format_list_item = function(x, ...) {
UseMethod("format_list_item")
}

format_col.default = function(x, ...) {
if (!is.null(dim(x))) return("<multi-column>")
if (is.list(x)) return(vapply_1c(x, format_list_item, ...))
format(char.trunc(x), ...) # added an else here to fix #5435
}

# #2842 -- different columns can have different tzone, so force usage in output
format_col.POSIXct = function(x, ...) format(x, usetz = TRUE, ...)

# #3011 -- expression columns can wrap to newlines which breaks printing
format_col.expression = function(x, ...) format(char.trunc(as.character(x)), ...)

format_list_item.default = function(x, ...) {
if (is.null(x)) return ("") # NULL item in a list column
if (is.atomic(x) || inherits(x, "formula")) # FR #2591 - format.data.table issue with columns of class "formula"
paste(c(format(head(x, 6L), ...), if (length(x) > 6L) "..."), collapse=",") # fix for #5435 - format has to be added here...
else
paste0("<", class(x)[1L], ">")
}

# FR #1091 for pretty printing of character
# TODO: maybe instead of doing "this is...", we could do "this ... test"?
char.trunc <- function(x, trunc.char = getOption("datatable.prettyprint.char")) {
trunc.char = max(0L, suppressWarnings(as.integer(trunc.char[1L])), na.rm=TRUE)
if (!is.character(x) || trunc.char <= 0L) return(x)
idx = which(nchar(x) > trunc.char)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not crucial for this PR in particular, but wouldn't nchar(x, 'width') be more appropriate?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea. Would need a test too in a new PR: your Chinese multi-byte data?

x[idx] = paste0(substr(x[idx], 1L, as.integer(trunc.char)), "...")
x
}

# to calculate widths of data.table for PR #4074
# gets the width of the data.table at each column
# and compares it to the console width
Expand Down Expand Up @@ -224,4 +230,3 @@ trunc_cols_message = function(not_printed, abbs, class, col.names){
n, brackify(paste0(not_printed, classes))
))
}

1 change: 0 additions & 1 deletion R/setkey.R
Original file line number Diff line number Diff line change
Expand Up @@ -369,4 +369,3 @@ CJ = function(..., sorted = TRUE, unique = FALSE)
}
l
}

1 change: 0 additions & 1 deletion R/transpose.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,3 @@ tstrsplit = function(x, ..., fill=NA, type.convert=FALSE, keep, names=FALSE) {
setattr(ans, 'names', names)
ans
}

37 changes: 30 additions & 7 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -4884,7 +4884,7 @@ c(2497.94263862333,
"fr", "fg", "fsal", "mr", "mg", "msal"), class = c("data.table",
"data.frame"), row.names = c(NA, -2L))

ans1 = capture.output(print(DT, digits=4, row.names=FALSE))
ans1 = capture.output(print(DT, digits=4L, row.names=FALSE))
ans2 = c(" fisyr er eg esal fr fg fsal mr mg msal",
" 1995 1,3 0.01973 2330,2424 4,4 0.03931 2521,2521 5,30 0.01978 2572,4216",
" 1996 1,3 0.01973 2263,2354 4,4 0.03931 2449,2449 5,30 0.01978 2498,4095")
Expand Down Expand Up @@ -14507,12 +14507,13 @@ test(2025.52, fread(f), data.table(A=1:2, B=c("foobar","baz"), C=3:4))

# printing timezone, #2842
DT = data.table(t1 = as.POSIXct("1982-04-26 13:34:56", tz = "Europe/Madrid"),t2 = as.POSIXct("2019-01-01 19:00:01",tz = "UTC"))
test(2026.1, capture.output(print(DT))[2], "1: 1982-04-26 13:34:56 2019-01-01 19:00:01")
test(2026.2, capture.output(print(DT,timezone = TRUE))[2], "1: 1982-04-26 13:34:56 Europe/Madrid 2019-01-01 19:00:01 UTC")
DT = data.table(v1 = c(1,as.numeric(NA)))
DT[2,t:= as.POSIXct("2019-01-01 19:00:01",tz = "UTC")]
test(2026.3, capture.output(print(DT)), c(" v1 t","1: 1 <NA>", "2: NA 2019-01-01 19:00:01"))
test(2026.4, capture.output(print(DT, timezone = TRUE)), c(" v1 t","1: 1 <NA>","2: NA 2019-01-01 19:00:01 UTC"))
test(2026.1, capture.output(print(DT))[2L], "1: 1982-04-26 13:34:56 CEST 2019-01-01 19:00:01 UTC")
DT = data.table(v1 = c(1, as.numeric(NA)))
DT[2L, t:= as.POSIXct("2019-01-01 19:00:01", tz = "UTC")]
test(2026.2, capture.output(print(DT)),
c(" v1 t",
"1: 1 <NA>",
"2: NA 2019-01-01 19:00:01 UTC"))

# empty item in j=list(x, ) errors gracefully, #3507
DT = data.table(a = 1:5)
Expand Down Expand Up @@ -16708,6 +16709,28 @@ A = data.table(c1 = 1, c2 = 'asd', c3 = expression(as.character(Sys.time())))
B = data.table(c1 = 3, c2 = 'qwe', c3 = expression(as.character(Sys.time()+5)))
test(2129, rbind(A,B)$c3, expression(as.character(Sys.time()), as.character(Sys.time()+5)))

# format_col and format_list_item printing helpers/generics
## Use case: solve #2842 by defining format_col.POSIXct to have usetz = TRUE
DT = data.table(
t1 = as.POSIXct('2018-05-01 12:34:56', tz = 'UTC'),
t2 = as.POSIXct('2018-05-01 12:34:56', tz = 'Asia/Singapore')
)
test(2130.1, DT, output = 'UTC.*\\+08')

# #3011 -- default expression printing can break format_col.default
test(2130.2, print(data.table(e = expression(1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13))), output = '1 + 2 + 3')

# format_col generic is used
format_col.complex = function(x, ...) sprintf('(%.1f, %.1fi)', Re(x), Im(x))
registerS3method("format_col", "complex", format_col.complex)
x = data.table(z = c(1 + 3i, 2 - 1i, pi + 2.718i))
test(2130.3, x, output = '(1.0, 3.0i)')

# format_list_item() generic is used
format_list_item.myclass <- function(x, ...) paste0("<", class(x)[1L], ":", x$id, ">")
registerS3method("format_list_item", "myclass", format_list_item.myclass)
DT = data.table(row = 1:2, objs = list(structure(list(id = "foo"), class = "myclass"), structure(list(id = "bar"), class = "myclass")))
test(2130.4, print(DT), output = "myclass:foo.*myclass:bar")

########################
# Add new tests here #
Expand Down
1 change: 0 additions & 1 deletion man/coalesce.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,3 @@ fcoalesce(list(x,y,z)) # same
fcoalesce(x, list(y,z)) # same
}
\keyword{ data }

2 changes: 1 addition & 1 deletion man/frank.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
\title{Fast rank}
\description{
Similar to \code{base::rank} but \emph{much faster}. And it accepts vectors, lists, data.frames or data.tables as input. In addition to the \code{ties.method} possibilities provided by \code{base::rank}, it also provides \code{ties.method="dense"}.

Like \code{\link{forder}}, sorting is done in "C-locale"; in particular, this may affect how capital/lowercase letters are ranked. See Details on \code{forder} for more.

\code{bit64::integer64} type is also supported.
Expand Down
2 changes: 1 addition & 1 deletion man/like.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
\title{ Convenience function for calling grep. }
\description{
Intended for use in \code{i} in \code{\link[=data.table]{[.data.table}}, i.e., for subsetting/filtering.

Syntax should be familiar to SQL users, with interpretation as regex.
}
\usage{
Expand Down
Loading