Skip to content

Commit

Permalink
Merge branch 'master' into condition-signals
Browse files Browse the repository at this point in the history
  • Loading branch information
MichaelChirico committed Apr 9, 2024
2 parents f18ce75 + b09649b commit 25e524b
Show file tree
Hide file tree
Showing 24 changed files with 454 additions and 261 deletions.
21 changes: 21 additions & 0 deletions .dev/cc.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,26 @@ sourceDir = function(path=getwd(), trace = TRUE, ...) {
if(trace) cat("\n")
}

# NB: since we only import from default packages, this is rarely needed, but useful for truly minimal dev environments (#6056)
sourceImports = function(path=getwd(), quiet=FALSE) {
nsFile = file.path(path, "NAMESPACE")
if (!file.exists(nsFile)) {
if (!quiet) warning("No NAMESPACE file found, required to guarantee imports resolve correctly")
return(invisible())
}
nsParsedImports = parseNamespaceFile(basename(path), "..")$imports # weird signature to this function
if (!quiet && length(nsParsedImports)) cat(sprintf("Ensuring objects from %d import entries in NAMESPACE resolve correctly\n", length(nsParsedImports)))
for (ii in seq_along(nsParsedImports)) {
entry = nsParsedImports[[ii]]
# getNamespaceExports includes weird objects but that's intentional, consider evalq(.__C__VIRTUAL, asNamespace("Rcpp")) due to import(methods) in that NAMESPACE
imported = if (length(entry) == 1L) getNamespaceExports(entry) else entry[[2L]]
# Assign directly to better imitate actual namespace imports. Earlier tried to require(include.only=) these objects, but R doesn't allow multiple such require, meaning we can't add more objects later in tests, see:
# https://stat.ethz.ch/pipermail/r-devel/2024-April/083319.html
for (import in imported) assign(import, getExportedValue(entry[[1L]], import), .GlobalEnv)
}
return(invisible())
}

cc = function(test=FALSE, clean=FALSE, debug=FALSE, omp=!debug, cc_dir, path=Sys.getenv("PROJ_PATH"), CC="gcc", quiet=FALSE) {
if (!missing(cc_dir)) {
warning("'cc_dir' arg is deprecated, use 'path' argument or 'PROJ_PATH' env var instead")
Expand Down Expand Up @@ -81,6 +101,7 @@ cc = function(test=FALSE, clean=FALSE, debug=FALSE, omp=!debug, cc_dir, path=Sys
.GlobalEnv[[Call$name]] = Call$address
for (Extern in xx$.External)
.GlobalEnv[[Extern$name]] = Extern$address
sourceImports(path, quiet=quiet)
sourceDir(file.path(path, "R"), trace=!quiet)
if (base::getRversion()<"4.0.0") rm(list=c("rbind.data.table", "cbind.data.table"), envir=.GlobalEnv) # 3968 follow up
.GlobalEnv$testDir = function(x) file.path(path,"inst/tests",x)
Expand Down
5 changes: 4 additions & 1 deletion CODEOWNERS
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
# https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners
* @jangorecki @michaelchirico

# melt
# reshaping
/R/fcast.R @tdhock
/R/fmelt.R @tdhock
/src/fcast.c @tdhock
/src/fmelt.c @tdhock
/man/dcast.data.table.Rd @tdhock
/man/melt.data.table.Rd @tdhock
/vignettes/datatable-reshape.Rmd @tdhock

Expand Down
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -83,5 +83,6 @@ Authors@R: c(
person("Dereck","de Mezquita", role="ctb"),
person("Michael","Czekanski", role="ctb"),
person("Dmitry", "Shemetov", role="ctb"),
person("Nitish", "Jha", role="ctb")
person("Nitish", "Jha", role="ctb"),
person("Joshua", "Wu", role="ctb")
)
1 change: 0 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ useDynLib("data_table", .registration=TRUE)

## For S4-ization
importFrom(methods, "S3Part<-", slotNames)
importMethodsFrom(methods, "[")
exportClasses(data.table, IDate, ITime)
##

Expand Down
16 changes: 16 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

# data.table [v1.15.99](https://github.com/Rdatatable/data.table/milestone/30) (in development)

## BREAKING CHANGES

1. Usage of comma-separated character strings representing multiple columns in `data.table()`'s `key=` argument and `[`'s `by=`/`keyby=` arguments is deprecated, [#4357](https://github.com/Rdatatable/data.table/issues/4357). While sometimes convenient, ultimately it introduces inconsistency in implementation that is not worth the benefit to maintain. NB: this hard deprecation is temporary in the development version. Before release, it will soften into the normal data.table deprecation cycle starting from introducing the new behavior with an option, then changing the default for the option with a warning, then upgrading the warning to an error before finally removing the option and the error.

## NEW FEATURES

1. `print.data.table()` shows empty (`NULL`) list column entries as `[NULL]` for emphasis. Previously they would just print nothing (same as for empty string). Part of [#4198](https://github.com/Rdatatable/data.table/issues/4198). Thanks @sritchie73 for the proposal and fix.
Expand All @@ -26,6 +30,8 @@

7. `fread`'s `fill` argument now also accepts an `integer` in addition to boolean values. `fread` always guesses the number of columns based on reading a sample of rows in the file. When `fill=TRUE`, `fread` stops reading and ignores subsequent rows when this estimate winds up too low, e.g. when the sampled rows happen to exclude some rows that are even wider, [#2727](https://github.com/Rdatatable/data.table/issues/2727) [#2691](https://github.com/Rdatatable/data.table/issues/2691) [#4130](https://github.com/Rdatatable/data.table/issues/4130) [#3436](https://github.com/Rdatatable/data.table/issues/3436). Providing an `integer` as argument for `fill` allows for a manual estimate of the number of columns instead, [#1812](https://github.com/Rdatatable/data.table/issues/1812) [#5378](https://github.com/Rdatatable/data.table/issues/5378). Thanks to @jangorecki, @christellacaze, @Yiguan, @alexdthomas, @ibombonato, @Befrancesco, @TobiasGold for reporting/requesting, and Benjamin Schwendinger for the PR.
8. Computations in `j` can return a matrix or array _if it is one-dimensional_, e.g. a row or column vector, when `j` is a list of columns during grouping, [#783](https://github.com/Rdatatable/data.table/issues/783). Previously a matrix could be provided `DT[, expr, by]` form, but not `DT[, list(expr), by]` form; this resolves that inconsistency. It is still an error to return a "true" array, e.g. a `2x3` matrix.
## BUG FIXES
1. `unique()` returns a copy the case when `nrows(x) <= 1` instead of a mutable alias, [#5932](https://github.com/Rdatatable/data.table/pull/5932). This is consistent with existing `unique()` behavior when the input has no duplicates but more than one row. Thanks to @brookslogan for the report and @dshemetov for the fix.
Expand All @@ -38,6 +44,10 @@
5. `fwrite(x, row.names=TRUE)` with `x` a `matrix` writes `row.names` when present, not row numbers, [#5315](https://github.com/Rdatatable/data.table/issues/5315). Thanks to @Liripo for the report, and @ben-schwen for the fix.
6. `patterns()` helper for `.SDcols` now accepts arguments `ignore.case`, `perl`, `fixed`, and `useBytes`, which are passed to `grep`, #5387. Thanks to @iago-pssjd for the feature request, and @tdhock for the implementation.
7. `melt` returns an integer column for `variable` when `measure.vars` is a list of length=1, consistent with the documented behavior, [#5209](https://github.com/Rdatatable/data.table/issues/5209). Thanks to @tdhock for reporting and fixing. Any users who were relying on this behavior can change `measure.vars=list("col_name")` (output `variable` was column name, now is column index/integer) to `measure.vars="col_name"` (`variable` still is column name).
## NOTES
1. `transform` method for data.table sped up substantially when creating new columns on large tables. Thanks to @OfekShilon for the report and PR. The implemented solution was proposed by @ColeMiller1.
Expand All @@ -56,6 +66,12 @@
8. OpenMP detection when building from source on Mac is improved, [#4348](https://github.com/Rdatatable/data.table/issues/4348). Thanks @jameshester and @kevinushey for the request and @kevinushey for the PR, @jameslamb for the advice and @s-u of R-core for ensuring CRAN machines are configured to support the uxpected setup.
9. `print.data.table` now handles combination multibyte characters correctly when truncating wide string entries, [#5096](https://github.com/Rdatatable/data.table/issues/5096). Thanks to @MichaelChirico for the report and @joshhwuu for the fix.
10. `test.data.table()` runs correctly in more sessions, in particular those where the `digits` or `warn` settings are not their defaults (`7` and `0`, respectively), [#5285](https://github.com/Rdatatable/data.table/issues/5285). Thanks @OfekShilon for the report and suggested fix and @MichaelChirico for the PR.
11. Using `print.data.table` when truncation is needed with `row.names = FALSE` prints the indicator `---` in every value column instead of adding a blank column where the `rownames` would have been just to include `---`, [#4083](https://github.com/Rdatatable/data.table/issues/4083). Thanks @MichaelChirico for the report and @joshhwuu for the fix.
# data.table [v1.15.0](https://github.com/Rdatatable/data.table/milestone/29) (30 Jan 2024)
## BREAKING CHANGE
Expand Down
16 changes: 8 additions & 8 deletions R/AllS4.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,21 @@ if ("package:data.table" %in% search()) stopf("data.table package loaded. When d

## Allows data.table to be defined as an object of an S4 class,
## or even have data.table be a super class of an S4 class.
setOldClass(c('data.frame'))
setOldClass(c('data.table', 'data.frame'))
methods::setOldClass(c('data.frame'))
methods::setOldClass(c('data.table', 'data.frame'))

## as(some.data.frame, "data.table")
setAs("data.frame", "data.table", function(from) {
methods::setAs("data.frame", "data.table", function(from) {
as.data.table(from)
})

## as(some.data.table, "data.frame")
setAs("data.table", "data.frame", function(from) {
methods::setAs("data.table", "data.frame", function(from) {
as.data.frame(from)
})

setOldClass("IDate")
setOldClass("ITime")
methods::setOldClass("IDate")
methods::setOldClass("ITime")

setAs("character", "IDate", function(from) as.IDate(from))
setAs("character", "ITime", function(from) as.ITime(from))
methods::setAs("character", "IDate", function(from) as.IDate(from))
methods::setAs("character", "ITime", function(from) as.ITime(from))
17 changes: 10 additions & 7 deletions R/data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ dim.data.table = function(x)
}

.global = new.env() # thanks to: http://stackoverflow.com/a/12605694/403310
setPackageName("data.table",.global)
methods::setPackageName("data.table",.global)
.global$print = ""

# NB: if adding to/editing this list, be sure to do the following:
Expand Down Expand Up @@ -62,8 +62,7 @@ data.table = function(..., keep.rownames=FALSE, check.names=FALSE, key=NULL, str
if (!is.null(key)) {
if (!is.character(key)) stopf("key argument of data.table() must be character")
if (length(key)==1L) {
key = strsplit(key,split=",")[[1L]]
# eg key="A,B"; a syntax only useful in key argument to data.table(), really.
if (key != strsplit(key,split=",")[[1L]]) stopf("Usage of comma-separated literals in %s is deprecated, please split such entries yourself before passing to data.table", "key=")
}
setkeyv(ans,key)
} else {
Expand Down Expand Up @@ -806,8 +805,7 @@ replace_dot_alias = function(e) {

if (mode(bysub) == "character") {
if (any(grepl(",", bysub, fixed = TRUE))) {
if (length(bysub)>1L) stopf("'by' is a character vector length %d but one or more items include a comma. Either pass a vector of column names (which can contain spaces, but no commas), or pass a vector length 1 containing comma separated column names. See ?data.table for other possibilities.", length(bysub))
bysub = strsplit(bysub, split=",", fixed=TRUE)[[1L]]
stopf("Usage of comma-separated literals in %s is deprecated, please split such entries yourself before passing to data.table", "by=")
}
bysub = gsub("^`(.*)`$", "\\1", bysub) # see test 138
nzidx = nzchar(bysub)
Expand Down Expand Up @@ -1024,8 +1022,13 @@ replace_dot_alias = function(e) {
.SDcols = eval(colsub, setattr(as.list(seq_along(x)), 'names', names_x), parent.frame())
} else {
if (colsub %iscall% 'patterns') {
# each pattern gives a new filter condition, intersect the end result
.SDcols = Reduce(intersect, eval_with_cols(colsub, names_x))
patterns_list_or_vector = eval_with_cols(colsub, names_x)
.SDcols = if (is.list(patterns_list_or_vector)) {
# each pattern gives a new filter condition, intersect the end result
Reduce(intersect, patterns_list_or_vector)
} else {
patterns_list_or_vector
}
} else {
.SDcols = eval(colsub, parent.frame(), parent.frame())
# allow filtering via function in .SDcols, #3950
Expand Down
5 changes: 3 additions & 2 deletions R/fmelt.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,18 @@ melt.default = function(data, ..., na.rm = FALSE, value.name = "value") {
# nocov end
}

patterns = function(..., cols=character(0L)) {
patterns = function(..., cols=character(0L), ignore.case=FALSE, perl=FALSE, fixed=FALSE, useBytes=FALSE) {
# if ... has no names, names(list(...)) will be "";
# this assures they'll be NULL instead
L = list(...)
p = unlist(L, use.names = any(nzchar(names(L))))
if (!is.character(p))
stopf("Input patterns must be of type character.")
matched = lapply(p, grep, cols)
matched = lapply(p, grep, cols, ignore.case=ignore.case, perl=perl, fixed=fixed, useBytes=useBytes)
# replace with lengths when R 3.2.0 dependency arrives
if (length(idx <- which(sapply(matched, length) == 0L)))
stopf('Pattern(s) not found: [%s]', brackify(p[idx]))
if (length(matched) == 1L) return(matched[[1L]])
matched
}

Expand Down
2 changes: 1 addition & 1 deletion R/fread.R
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,7 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC")
if (!is.character(key))
stopf("key argument of data.table() must be a character vector naming columns (NB: col.names are applied before this)")
if (length(key) == 1L) {
key = strsplit(key, split = ",", fixed = TRUE)[[1L]]
if (key != strsplit(key,split=",")[[1L]]) stopf("Usage of comma-separated literals in %s is deprecated, please split such entries yourself before passing to data.table", "key=")
}
setkeyv(ans, key)
}
Expand Down
15 changes: 12 additions & 3 deletions R/print.data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,11 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"),
toprint = toprint_subset(toprint, cols_to_print)
}
if (printdots) {
toprint = rbind(head(toprint, topn + isTRUE(class)), "---"="", tail(toprint, topn))
if (isFALSE(row.names)) {
toprint = rbind(head(toprint, topn + isTRUE(class)), "---", tail(toprint, topn)) # 4083
} else {
toprint = rbind(head(toprint, topn + isTRUE(class)), "---"="", tail(toprint, topn))
}
rownames(toprint) = format(rownames(toprint), justify="right")
if (col.names == "none") {
cut_colnames(print(toprint, right=TRUE, quote=quote))
Expand Down Expand Up @@ -229,11 +233,16 @@ format_list_item.default = function(x, ...) {

# FR #1091 for pretty printing of character
# TODO: maybe instead of doing "this is...", we could do "this ... test"?
# Current implementation may have issues when dealing with strings that have combinations of full-width and half-width characters,
# if this becomes a problem in the future, we could consider string traversal instead.
char.trunc = function(x, trunc.char = getOption("datatable.prettyprint.char")) {
trunc.char = max(0L, suppressWarnings(as.integer(trunc.char[1L])), na.rm=TRUE)
if (!is.character(x) || trunc.char <= 0L) return(x)
idx = which(nchar(x) > trunc.char)
x[idx] = paste0(substr(x[idx], 1L, as.integer(trunc.char)), "...")
nchar_width = nchar(x, 'width') # Check whether string is full-width or half-width, #5096
nchar_chars = nchar(x, 'char')
is_full_width = nchar_width > nchar_chars
idx = pmin(nchar_width, nchar_chars) > trunc.char
x[idx] = paste0(strtrim(x[idx], trunc.char * fifelse(is_full_width[idx], 2L, 1L)), "...")
x
}

Expand Down
4 changes: 3 additions & 1 deletion R/test.data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F
datatable.print.trunc.cols = FALSE, #4552
datatable.rbindlist.check = NULL,
datatable.integer64 = "integer64",
digits = 7L, # ensure printing rounds to the expected number of digits in all sessions, #5285
warn = 0L, # ensure signals are emitted as they are in the code, #5285
warnPartialMatchArgs = base::getRversion()>="3.6.0", # ensure we don't rely on partial argument matching in internal code, #3664; >=3.6.0 for #3865
warnPartialMatchAttr = TRUE,
warnPartialMatchDollar = TRUE,
Expand Down Expand Up @@ -416,7 +418,7 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no
}
}
if (!fail && !length(error) && (!length(output) || !missing(y))) { # TODO test y when output=, too
y = try(y,TRUE)
capture.output(y <- try(y, silent=TRUE)) # y might produce verbose output, just toss it
if (identical(x,y)) return(invisible(TRUE))
all.equal.result = TRUE
if (is.data.frame(x) && is.data.frame(y)) {
Expand Down
Loading

0 comments on commit 25e524b

Please sign in to comment.