diff --git a/NEWS.md b/NEWS.md index 6dfaa1814..cb020d1e1 100644 --- a/NEWS.md +++ b/NEWS.md @@ -20,6 +20,8 @@ 4. Namespace-qualifying `data.table::shift()`, `data.table::first()`, or `data.table::last()` will not deactivate GForce, [#5942](https://github.com/Rdatatable/data.table/issues/5942). Thanks @MichaelChirico for the proposal and fix. Namespace-qualifying other calls like `stats::sum()`, `base::prod()`, etc., continue to work as an escape valve to avoid GForce, e.g. to ensure S3 method dispatch. +5. `transpose` gains `list.cols=` argument, [#5639](https://github.com/Rdatatable/data.table/issues/5639). Use this to return output with list columns and avoids type promotion (an exception is `factor` columns which are promoted to `character` for consistency between `list.cols=TRUE` and `list.cols=FALSE`). This is convenient for creating a row-major representation of a table. Thanks to @MLopez-Ibanez for the request, and Benjamin Schwendinger for the PR. + ## BUG FIXES 1. `unique()` returns a copy the case when `nrows(x) <= 1` instead of a mutable alias, [#5932](https://github.com/Rdatatable/data.table/pull/5932). This is consistent with existing `unique()` behavior when the input has no duplicates but more than one row. Thanks to @brookslogan for the report and @dshemetov for the fix. diff --git a/R/transpose.R b/R/transpose.R index 115752c04..684b135d4 100644 --- a/R/transpose.R +++ b/R/transpose.R @@ -1,4 +1,4 @@ -transpose = function(l, fill=NA, ignore.empty=FALSE, keep.names=NULL, make.names=NULL) { +transpose = function(l, fill=NA, ignore.empty=FALSE, keep.names=NULL, make.names=NULL, list.cols=FALSE) { if (!is.null(make.names)) { stopifnot(length(make.names)==1L) if (is.character(make.names)) { @@ -14,7 +14,7 @@ transpose = function(l, fill=NA, ignore.empty=FALSE, keep.names=NULL, make.names colnames = as.character(l[[make.names]]) l = if (is.data.table(l)) l[,-make.names,with=FALSE] else l[-make.names] } - ans = .Call(Ctranspose, l, fill, ignore.empty, keep.names) + ans = .Call(Ctranspose, l, fill, ignore.empty, keep.names, list.cols) if (!is.null(make.names)) setattr(ans, "names", c(keep.names, colnames)) else if (is.data.frame(l)) # including data.table but not plain list setattr(ans, "names", c(keep.names, paste0("V", seq_len(length(ans)-length(keep.names))))) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 169a715a8..bca2c13ab 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -6921,10 +6921,22 @@ ll = sapply(ll, paste, collapse=",") test(1477.07, transpose(strsplit(ll, ",", fixed=TRUE)), tstrsplit(ll, ",", fixed=TRUE)) test(1477.08, transpose(1:5), error="l must be a list") test(1477.09, transpose(list(as.complex(c(1, 1+5i)))), error="Unsupported column type") -test(1477.10, transpose(list(list(1:5))), error="Item 1 of list input is") +test(1477.10, transpose(list(x~y)), error="Item 1 of list input is") test(1477.11, transpose(as.list(1:5), fill=1:2), error="fill must be a length 1 vector") test(1477.12, transpose(as.list(1:5), ignore.empty=NA), error="ignore.empty should be logical TRUE/FALSE") test(1477.13, transpose(list()), list()) +# return list columns #5639 +la = list(as.list(1:3), list("a","b","c")) +lb = list(list(1L,"a"), list(2L,"b"), list(3L,"c")) +test(1477.14, transpose(list(1:3, c("a","b","c")), list.cols=TRUE), lb) +test(1477.15, transpose(list(1:3, c("a","b","c")), list.cols=FALSE), lapply(lb, unlist)) +test(1477.16, transpose(la, list.cols=TRUE), lb) +test(1477.17, transpose(lb, list.cols=TRUE), la) +test(1477.18, transpose(list(list(1L,"a"), list(2L), list(3L,"c")), list.cols=TRUE, fill="b"), la) +test(1477.19, transpose(list(1:2, c("a","b","c")), list.cols=TRUE, fill=3L), lb) +test(1477.20, transpose(list(factor(letters[1:3])), list.cols=TRUE), list(list("a"), list("b"), list("c"))) +test(1477.21, transpose(list(factor(letters[1:3])), list.cols=FALSE), list("a", "b", "c")) +test(1477.22, transpose(la, list.cols=NA), error="list.cols should be logical TRUE/FALSE.") # #480 `setDT` and 'lapply' ll = list(data.frame(a=1), data.frame(x=1, y=2), NULL, list()) diff --git a/man/transpose.Rd b/man/transpose.Rd index 61a2d1dd1..1d54ddbd0 100644 --- a/man/transpose.Rd +++ b/man/transpose.Rd @@ -6,7 +6,7 @@ } \usage{ -transpose(l, fill=NA, ignore.empty=FALSE, keep.names=NULL, make.names=NULL) +transpose(l, fill=NA, ignore.empty=FALSE, keep.names=NULL, make.names=NULL, list.cols=FALSE) } \arguments{ \item{l}{ A list, data.frame or data.table. } @@ -14,6 +14,7 @@ transpose(l, fill=NA, ignore.empty=FALSE, keep.names=NULL, make.names=NULL) \item{ignore.empty}{Default is \code{FALSE}. \code{TRUE} will ignore length-0 list elements.} \item{keep.names}{The name of the first column in the result containing the names of the input; e.g. \code{keep.names="rn"}. By default \code{NULL} and the names of the input are discarded.} \item{make.names}{The name or number of a column in the input to use as names of the output; e.g. \code{make.names="rn"}. By default \code{NULL} and default names are given to the output columns.} + \item{list.cols}{Default is \code{FALSE}. \code{TRUE} will avoid promoting types and return columns of type \code{list} instead. \code{factor} will always be cast to \code{character}.} } \details{ The list elements (or columns of \code{data.frame}/\code{data.table}) should be all \code{atomic}. If list elements are of unequal lengths, the value provided in \code{fill} will be used so that the resulting list always has all elements of identical lengths. The class of input object is also preserved in the transposed result. @@ -38,6 +39,14 @@ setDT(transpose(ll, fill=0))[] DT = data.table(x=1:5, y=6:10) transpose(DT) +DT = data.table(x=1:3, y=c("a","b","c")) +transpose(DT, list.cols=TRUE) + +# base R equivalent of transpose +l = list(1:3, c("a", "b", "c")) +lapply(seq(length(l[[1]])), function(x) lapply(l, `[[`, x)) +transpose(l, list.cols=TRUE) + ll = list(nm=c('x', 'y'), 1:2, 3:4) transpose(ll, make.names="nm") } diff --git a/src/data.table.h b/src/data.table.h index da82af7be..21b7e30e0 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -306,7 +306,7 @@ SEXP lookup(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); SEXP overlaps(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); SEXP whichwrapper(SEXP, SEXP); SEXP shift(SEXP, SEXP, SEXP, SEXP); -SEXP transpose(SEXP, SEXP, SEXP, SEXP); +SEXP transpose(SEXP, SEXP, SEXP, SEXP, SEXP); SEXP anyNA(SEXP, SEXP); SEXP setlevels(SEXP, SEXP, SEXP); SEXP rleid(SEXP, SEXP); diff --git a/src/transpose.c b/src/transpose.c index 6bc399bf3..f291cf0e7 100644 --- a/src/transpose.c +++ b/src/transpose.c @@ -2,7 +2,7 @@ #include #include -SEXP transpose(SEXP l, SEXP fill, SEXP ignoreArg, SEXP keepNamesArg) { +SEXP transpose(SEXP l, SEXP fill, SEXP ignoreArg, SEXP keepNamesArg, SEXP listColsArg) { int nprotect=0; if (!isNewList(l)) @@ -18,14 +18,17 @@ SEXP transpose(SEXP l, SEXP fill, SEXP ignoreArg, SEXP keepNamesArg) { if (length(fill) != 1) error(_("fill must be a length 1 vector, such as the default NA")); R_len_t ln = LENGTH(l); + if (!IS_TRUE_OR_FALSE(listColsArg)) + error(_("list.cols should be logical TRUE/FALSE.")); + bool listCol = LOGICAL(listColsArg)[0]; // preprocessing int maxlen=0, zerolen=0; SEXPTYPE maxtype=0; for (int i=0; imaxlen) maxlen=len; zerolen += (len==0); @@ -33,8 +36,8 @@ SEXP transpose(SEXP l, SEXP fill, SEXP ignoreArg, SEXP keepNamesArg) { if (isFactor(li)) type=STRSXP; if (type>maxtype) maxtype=type; } + if (listCol) maxtype=VECSXP; // need to keep preprocessing for zerolen fill = PROTECT(coerceVector(fill, maxtype)); nprotect++; - SEXP ans = PROTECT(allocVector(VECSXP, maxlen+rn)); nprotect++; int anslen = (ignore) ? (ln - zerolen) : ln; if (rn) { @@ -54,7 +57,7 @@ SEXP transpose(SEXP l, SEXP fill, SEXP ignoreArg, SEXP keepNamesArg) { const int len = length(li); if (ignore && len==0) continue; if (TYPEOF(li) != maxtype) { - li = PROTECT(isFactor(li) ? asCharacterFactor(li) : coerceVector(li, maxtype)); + li = PROTECT(isFactor(li) ? (listCol ? coerceVector(asCharacterFactor(li), VECSXP) : asCharacterFactor(li)) : coerceVector(li, maxtype)); } else PROTECT(li); // extra PROTECT just to help rchk by avoiding two counter variables switch (maxtype) { case LGLSXP : { @@ -84,6 +87,12 @@ SEXP transpose(SEXP l, SEXP fill, SEXP ignoreArg, SEXP keepNamesArg) { SET_STRING_ELT(ansp[j+rn], k, j