Skip to content

Commit

Permalink
Transpose(dt) allows to return list without promoting elements to max…
Browse files Browse the repository at this point in the history
…type (#5805)

* add feature

* change fill

* undup code

* update arguments

* add man

* add tests

* update usage docs

* add coverage

* add factors test

* update tests for factors

* add NEWS

* update news

* add example to docs

* update docs

* Update NEWS.md

Co-authored-by: Michael Chirico <[email protected]>

* remove extra blank line

* ease t/f error

* rm blank line

* restore logical case

* reordering test case numbers

* fix LGL case

* use unlist as proper action

* move NEWS

* fix doc

* rm blank line in tests

---------

Co-authored-by: Michael Chirico <[email protected]>
  • Loading branch information
ben-schwen and MichaelChirico authored Mar 16, 2024
1 parent 8de09b2 commit 821c8f9
Show file tree
Hide file tree
Showing 6 changed files with 42 additions and 10 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@

4. Namespace-qualifying `data.table::shift()`, `data.table::first()`, or `data.table::last()` will not deactivate GForce, [#5942](https://github.com/Rdatatable/data.table/issues/5942). Thanks @MichaelChirico for the proposal and fix. Namespace-qualifying other calls like `stats::sum()`, `base::prod()`, etc., continue to work as an escape valve to avoid GForce, e.g. to ensure S3 method dispatch.

5. `transpose` gains `list.cols=` argument, [#5639](https://github.com/Rdatatable/data.table/issues/5639). Use this to return output with list columns and avoids type promotion (an exception is `factor` columns which are promoted to `character` for consistency between `list.cols=TRUE` and `list.cols=FALSE`). This is convenient for creating a row-major representation of a table. Thanks to @MLopez-Ibanez for the request, and Benjamin Schwendinger for the PR.

## BUG FIXES

1. `unique()` returns a copy the case when `nrows(x) <= 1` instead of a mutable alias, [#5932](https://github.com/Rdatatable/data.table/pull/5932). This is consistent with existing `unique()` behavior when the input has no duplicates but more than one row. Thanks to @brookslogan for the report and @dshemetov for the fix.
Expand Down
4 changes: 2 additions & 2 deletions R/transpose.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
transpose = function(l, fill=NA, ignore.empty=FALSE, keep.names=NULL, make.names=NULL) {
transpose = function(l, fill=NA, ignore.empty=FALSE, keep.names=NULL, make.names=NULL, list.cols=FALSE) {
if (!is.null(make.names)) {
stopifnot(length(make.names)==1L)
if (is.character(make.names)) {
Expand All @@ -14,7 +14,7 @@ transpose = function(l, fill=NA, ignore.empty=FALSE, keep.names=NULL, make.names
colnames = as.character(l[[make.names]])
l = if (is.data.table(l)) l[,-make.names,with=FALSE] else l[-make.names]
}
ans = .Call(Ctranspose, l, fill, ignore.empty, keep.names)
ans = .Call(Ctranspose, l, fill, ignore.empty, keep.names, list.cols)
if (!is.null(make.names)) setattr(ans, "names", c(keep.names, colnames))
else if (is.data.frame(l)) # including data.table but not plain list
setattr(ans, "names", c(keep.names, paste0("V", seq_len(length(ans)-length(keep.names)))))
Expand Down
14 changes: 13 additions & 1 deletion inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -6921,10 +6921,22 @@ ll = sapply(ll, paste, collapse=",")
test(1477.07, transpose(strsplit(ll, ",", fixed=TRUE)), tstrsplit(ll, ",", fixed=TRUE))
test(1477.08, transpose(1:5), error="l must be a list")
test(1477.09, transpose(list(as.complex(c(1, 1+5i)))), error="Unsupported column type")
test(1477.10, transpose(list(list(1:5))), error="Item 1 of list input is")
test(1477.10, transpose(list(x~y)), error="Item 1 of list input is")
test(1477.11, transpose(as.list(1:5), fill=1:2), error="fill must be a length 1 vector")
test(1477.12, transpose(as.list(1:5), ignore.empty=NA), error="ignore.empty should be logical TRUE/FALSE")
test(1477.13, transpose(list()), list())
# return list columns #5639
la = list(as.list(1:3), list("a","b","c"))
lb = list(list(1L,"a"), list(2L,"b"), list(3L,"c"))
test(1477.14, transpose(list(1:3, c("a","b","c")), list.cols=TRUE), lb)
test(1477.15, transpose(list(1:3, c("a","b","c")), list.cols=FALSE), lapply(lb, unlist))
test(1477.16, transpose(la, list.cols=TRUE), lb)
test(1477.17, transpose(lb, list.cols=TRUE), la)
test(1477.18, transpose(list(list(1L,"a"), list(2L), list(3L,"c")), list.cols=TRUE, fill="b"), la)
test(1477.19, transpose(list(1:2, c("a","b","c")), list.cols=TRUE, fill=3L), lb)
test(1477.20, transpose(list(factor(letters[1:3])), list.cols=TRUE), list(list("a"), list("b"), list("c")))
test(1477.21, transpose(list(factor(letters[1:3])), list.cols=FALSE), list("a", "b", "c"))
test(1477.22, transpose(la, list.cols=NA), error="list.cols should be logical TRUE/FALSE.")

# #480 `setDT` and 'lapply'
ll = list(data.frame(a=1), data.frame(x=1, y=2), NULL, list())
Expand Down
11 changes: 10 additions & 1 deletion man/transpose.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,15 @@
}

\usage{
transpose(l, fill=NA, ignore.empty=FALSE, keep.names=NULL, make.names=NULL)
transpose(l, fill=NA, ignore.empty=FALSE, keep.names=NULL, make.names=NULL, list.cols=FALSE)
}
\arguments{
\item{l}{ A list, data.frame or data.table. }
\item{fill}{ Default is \code{NA}. It is used to fill shorter list elements so as to return each element of the transposed result of equal lengths. }
\item{ignore.empty}{Default is \code{FALSE}. \code{TRUE} will ignore length-0 list elements.}
\item{keep.names}{The name of the first column in the result containing the names of the input; e.g. \code{keep.names="rn"}. By default \code{NULL} and the names of the input are discarded.}
\item{make.names}{The name or number of a column in the input to use as names of the output; e.g. \code{make.names="rn"}. By default \code{NULL} and default names are given to the output columns.}
\item{list.cols}{Default is \code{FALSE}. \code{TRUE} will avoid promoting types and return columns of type \code{list} instead. \code{factor} will always be cast to \code{character}.}
}
\details{
The list elements (or columns of \code{data.frame}/\code{data.table}) should be all \code{atomic}. If list elements are of unequal lengths, the value provided in \code{fill} will be used so that the resulting list always has all elements of identical lengths. The class of input object is also preserved in the transposed result.
Expand All @@ -38,6 +39,14 @@ setDT(transpose(ll, fill=0))[]
DT = data.table(x=1:5, y=6:10)
transpose(DT)

DT = data.table(x=1:3, y=c("a","b","c"))
transpose(DT, list.cols=TRUE)

# base R equivalent of transpose
l = list(1:3, c("a", "b", "c"))
lapply(seq(length(l[[1]])), function(x) lapply(l, `[[`, x))
transpose(l, list.cols=TRUE)

ll = list(nm=c('x', 'y'), 1:2, 3:4)
transpose(ll, make.names="nm")
}
Expand Down
2 changes: 1 addition & 1 deletion src/data.table.h
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ SEXP lookup(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
SEXP overlaps(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
SEXP whichwrapper(SEXP, SEXP);
SEXP shift(SEXP, SEXP, SEXP, SEXP);
SEXP transpose(SEXP, SEXP, SEXP, SEXP);
SEXP transpose(SEXP, SEXP, SEXP, SEXP, SEXP);
SEXP anyNA(SEXP, SEXP);
SEXP setlevels(SEXP, SEXP, SEXP);
SEXP rleid(SEXP, SEXP);
Expand Down
19 changes: 14 additions & 5 deletions src/transpose.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#include <Rdefines.h>
#include <time.h>

SEXP transpose(SEXP l, SEXP fill, SEXP ignoreArg, SEXP keepNamesArg) {
SEXP transpose(SEXP l, SEXP fill, SEXP ignoreArg, SEXP keepNamesArg, SEXP listColsArg) {

int nprotect=0;
if (!isNewList(l))
Expand All @@ -18,23 +18,26 @@ SEXP transpose(SEXP l, SEXP fill, SEXP ignoreArg, SEXP keepNamesArg) {
if (length(fill) != 1)
error(_("fill must be a length 1 vector, such as the default NA"));
R_len_t ln = LENGTH(l);
if (!IS_TRUE_OR_FALSE(listColsArg))
error(_("list.cols should be logical TRUE/FALSE."));
bool listCol = LOGICAL(listColsArg)[0];

// preprocessing
int maxlen=0, zerolen=0;
SEXPTYPE maxtype=0;
for (int i=0; i<ln; ++i) {
SEXP li = VECTOR_ELT(l, i);
if (!isVectorAtomic(li) && !isNull(li))
error(_("Item %d of list input is not an atomic vector"), i+1);
if (!isVectorAtomic(li) && !isNull(li) && !isNewList(li))
error(_("Item %d of list input is not either an atomic vector, or a list"), i+1);
const int len = length(li);
if (len>maxlen) maxlen=len;
zerolen += (len==0);
SEXPTYPE type = TYPEOF(li);
if (isFactor(li)) type=STRSXP;
if (type>maxtype) maxtype=type;
}
if (listCol) maxtype=VECSXP; // need to keep preprocessing for zerolen
fill = PROTECT(coerceVector(fill, maxtype)); nprotect++;

SEXP ans = PROTECT(allocVector(VECSXP, maxlen+rn)); nprotect++;
int anslen = (ignore) ? (ln - zerolen) : ln;
if (rn) {
Expand All @@ -54,7 +57,7 @@ SEXP transpose(SEXP l, SEXP fill, SEXP ignoreArg, SEXP keepNamesArg) {
const int len = length(li);
if (ignore && len==0) continue;
if (TYPEOF(li) != maxtype) {
li = PROTECT(isFactor(li) ? asCharacterFactor(li) : coerceVector(li, maxtype));
li = PROTECT(isFactor(li) ? (listCol ? coerceVector(asCharacterFactor(li), VECSXP) : asCharacterFactor(li)) : coerceVector(li, maxtype));
} else PROTECT(li); // extra PROTECT just to help rchk by avoiding two counter variables
switch (maxtype) {
case LGLSXP : {
Expand Down Expand Up @@ -84,6 +87,12 @@ SEXP transpose(SEXP l, SEXP fill, SEXP ignoreArg, SEXP keepNamesArg) {
SET_STRING_ELT(ansp[j+rn], k, j<len ? STRING_ELT(li, j) : sfill);
}
} break;
case VECSXP : {
const SEXP vfill = VECTOR_ELT(fill, 0);
for (int j=0; j<maxlen; ++j) {
SET_VECTOR_ELT(ansp[j+rn], k, j<len ? VECTOR_ELT(li, j) : vfill);
}
} break;
default :
error(_("Unsupported column type '%s'"), type2char(maxtype));
}
Expand Down

0 comments on commit 821c8f9

Please sign in to comment.