Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More consistent handling of POSIXlt input to setDT #6658

Draft
wants to merge 8 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 5 additions & 10 deletions R/data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -2900,7 +2900,7 @@ setDT = function(x, keep.rownames=FALSE, key=NULL, check.names=FALSE) {
home = function(x, env) {
if (identical(env, emptyenv()))
stopf("Cannot find symbol %s", cname)
else if (exists(x, env, inherits=FALSE)) env
else if (exists(x, env, inherits=FALSE)) env # NB: _not_ get0(), since returning 'env' not 'get(x)'
else home(x, parent.env(env))
}
cname = as.character(name)
Expand All @@ -2915,16 +2915,11 @@ setDT = function(x, keep.rownames=FALSE, key=NULL, check.names=FALSE) {
if (!missing(key)) setkeyv(x, key) # fix for #1169
if (check.names) setattr(x, "names", make.names(names(x), unique=TRUE))
if (selfrefok(x) > 0L) return(invisible(x)) else setalloccol(x)
} else if (is.data.frame(x)) {
# check no matrix-like columns, #3760. Allow a single list(matrix) is unambiguous and depended on by some revdeps, #3581
# for performance, only warn on the first such column, #5426
for (jj in seq_along(x)) {
if (length(dim(x[[jj]])) > 1L) {
.Call(Cwarn_matrix_column_r, jj)
break
}
}
}

.Call(Ccheck_problematic_columns, x)

if (is.data.frame(x)) {
# Done to avoid affecting other copies of x when we setattr() below (#4784)
x = .shallow(x)

Expand Down
1 change: 0 additions & 1 deletion R/print.data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,6 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"),

format.data.table = function(x, ..., justify="none") {
if (is.atomic(x) && !is.null(x)) { ## future R can use if (is.atomic(x))

stopf("Internal structure doesn't seem to be a list. Possibly corrupt data.table.")
}
do.call(cbind, lapply(x, format_col, ..., justify=justify))
Expand Down
37 changes: 23 additions & 14 deletions src/assign.c
Original file line number Diff line number Diff line change
Expand Up @@ -198,16 +198,33 @@ static SEXP shallow(SEXP dt, SEXP cols, R_len_t n)
return(newdt);
}

// Wrapped in a function so the same message is issued for the data.frame case at the R level
void warn_matrix_column(/* 1-indexed */ int i) {
warning(_("Some columns are a multi-column type (such as a matrix column), for example column %d. setDT will retain these columns as-is but subsequent operations like grouping and joining may fail. Please consider as.data.table() instead which will create a new column for each embedded column."), i);
// check no matrix-like columns, #3760. Allow a single list(matrix) is unambiguous and depended on by some revdeps, #3581
// for performance, only warn on the first such column, #5426
SEXP check_problematic_columns(SEXP x) {
if (!isNewList(x))
return R_NilValue;

SEXP xi;
for (R_len_t i=0; i<LENGTH(x); ++i) {
xi = VECTOR_ELT(x, i);

if (Rf_inherits(xi, "POSIXlt")) {
error(_("Column %d has class 'POSIXlt'. Please convert it to POSIXct (using as.POSIXct) and run setDT() again, or use as.data.table() instead. We do not recommend the use of POSIXlt at all because it uses 40 bytes to store one date."), i+1);
}

if (length(getAttrib(xi, R_DimSymbol)) > 1) {
warning(_("Some columns are a multi-column type (such as a matrix column), for example column %d. setDT will retain these columns as-is but subsequent operations like grouping and joining may fail. Please consider as.data.table() instead which will create a new column for each embedded column."), i+1);
break;
}
}

return R_NilValue;
}

// input validation for setDT() list input; assume is.list(x) was tested in R
// input validation for setDT() list input; assume is.list(x) and check_problematic_columns() was tested in R
SEXP setdt_nrows(SEXP x)
{
int base_length = 0;
bool test_matrix_cols = true;

for (R_len_t i = 0; i < LENGTH(x); ++i) {
SEXP xi = VECTOR_ELT(x, i);
Expand All @@ -216,20 +233,12 @@ SEXP setdt_nrows(SEXP x)
* e.g. in package eplusr which calls setDT on a list when parsing JSON. Operations which
* fail for NULL columns will give helpful error at that point, #3480 and #3471 */
if (Rf_isNull(xi)) continue;
if (Rf_inherits(xi, "POSIXlt")) {
error(_("Column %d has class 'POSIXlt'. Please convert it to POSIXct (using as.POSIXct) and run setDT() again. We do not recommend the use of POSIXlt at all because it uses 40 bytes to store one date."), i+1);
}
SEXP dim_xi = getAttrib(xi, R_DimSymbol);
R_len_t len_xi;
// NB: LENGTH() produces an undefined large number here on R 3.3.0.
// There's also a note in NEWS for R 3.1.0 saying length() should always be used by packages,
// but with some overhead for being a function/not macro...
R_len_t n_dim = length(dim_xi);
if (n_dim) {
if (test_matrix_cols && n_dim > 1) {
warn_matrix_column(i+1);
test_matrix_cols = false;
}
if (length(dim_xi)) {
len_xi = INTEGER(dim_xi)[0];
} else {
len_xi = LENGTH(xi);
Expand Down
5 changes: 2 additions & 3 deletions src/data.table.h
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,6 @@ SEXP dt_na(SEXP x, SEXP cols);
SEXP alloccol(SEXP dt, R_len_t n, Rboolean verbose);
const char *memrecycle(const SEXP target, const SEXP where, const int start, const int len, SEXP source, const int sourceStart, const int sourceLen, const int colnum, const char *colname);
SEXP shallowwrapper(SEXP dt, SEXP cols);
void warn_matrix_column(int i);

SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols,
SEXP xjiscols, SEXP grporder, SEXP order, SEXP starts,
Expand Down Expand Up @@ -257,6 +256,7 @@ SEXP islockedR(SEXP x);
bool need2utf8(SEXP x);
SEXP coerceUtf8IfNeeded(SEXP x);
SEXP coerceAs(SEXP x, SEXP as, SEXP copyArg);
int length_with_dispatch(SEXP);
void internal_error(const char *call_name, const char *format, ...);

// types.c
Expand Down Expand Up @@ -327,7 +327,7 @@ SEXP glast(SEXP);
SEXP gfirst(SEXP);
SEXP gnthvalue(SEXP, SEXP);
SEXP dim(SEXP);
SEXP warn_matrix_column_r(SEXP);
SEXP check_problematic_columns(SEXP);
SEXP gvar(SEXP, SEXP);
SEXP gsd(SEXP, SEXP);
SEXP gprod(SEXP, SEXP);
Expand All @@ -350,4 +350,3 @@ SEXP dt_has_zlib(void);
SEXP startsWithAny(SEXP, SEXP, SEXP);
SEXP convertDate(SEXP, SEXP);
SEXP fastmean(SEXP);

108 changes: 55 additions & 53 deletions src/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,44 +5,45 @@

// global constants extern in data.table.h for gcc10 -fno-common; #4091
// these are written to once here on initialization, but because of that write they can't be declared const
SEXP char_allGrp1;
SEXP char_allLen1;
SEXP char_AsIs;
SEXP char_dataframe;
SEXP char_datatable;
SEXP char_Date;
SEXP char_factor;
SEXP char_IDate;
SEXP char_indices;
SEXP char_integer64;
SEXP char_ITime;
SEXP char_IDate;
SEXP char_Date;
SEXP char_lens;
SEXP char_maxString;
SEXP char_nanotime;
SEXP char_NULL;
SEXP char_ordered;
SEXP char_POSIXct;
SEXP char_POSIXt;
SEXP char_UTC;
SEXP char_nanotime;
SEXP char_lens;
SEXP char_indices;
SEXP char_allLen1;
SEXP char_allGrp1;
SEXP char_factor;
SEXP char_ordered;
SEXP char_datatable;
SEXP char_dataframe;
SEXP char_NULL;
SEXP char_maxString;
SEXP char_AsIs;
SEXP sym_sorted;
SEXP sym_index;
SEXP sym_BY;
SEXP sym_starts, char_starts;
SEXP sym_maxgrpn;
SEXP sym_anyna;
SEXP SelfRefSymbol;
SEXP sym_anyinfnan;
SEXP sym_anyna;
SEXP sym_anynotascii;
SEXP sym_anynotutf8;
SEXP sym_as_character;
SEXP sym_as_posixct;
SEXP sym_BY;
SEXP sym_colClassesAs;
SEXP sym_verbose;
SEXP SelfRefSymbol;
SEXP sym_inherits;
SEXP sym_datatable_locked;
SEXP sym_tzone;
SEXP sym_index;
SEXP sym_inherits;
SEXP sym_length;
SEXP sym_maxgrpn;
SEXP sym_old_fread_datetime_character;
SEXP sym_sorted;
SEXP sym_starts, char_starts;
SEXP sym_tzone;
SEXP sym_variable_table;
SEXP sym_as_character;
SEXP sym_as_posixct;
SEXP sym_verbose;
double NA_INT64_D;
long long NA_INT64_LL;
Rcomplex NA_CPLX;
Expand Down Expand Up @@ -149,7 +150,7 @@ R_CallMethodDef callMethods[] = {
{"CstartsWithAny", (DL_FUNC)&startsWithAny, -1},
{"CconvertDate", (DL_FUNC)&convertDate, -1},
{"Cnotchin", (DL_FUNC)&notchin, -1},
{"Cwarn_matrix_column_r", (DL_FUNC)&warn_matrix_column_r, -1},
{"Ccheck_problematic_columns", (DL_FUNC)&check_problematic_columns, -1},
{NULL, NULL, 0}
};

Expand Down Expand Up @@ -248,26 +249,26 @@ void attribute_visible R_init_data_table(DllInfo *info)
// create needed strings in advance for speed, same technique as R_*Symbol
// Following R-exts 5.9.4; paragraph and example starting "Using install ..."
// either use PRINTNAME(install()) or R_PreserveObject(mkChar()) here.
char_allGrp1 = PRINTNAME(install("allGrp1"));
char_allLen1 = PRINTNAME(install("allLen1"));
char_AsIs = PRINTNAME(install("AsIs"));
char_dataframe = PRINTNAME(install("data.frame"));
char_datatable = PRINTNAME(install("data.table"));
char_Date = PRINTNAME(install("Date")); // used for IDate too since IDate inherits from Date
char_factor = PRINTNAME(install("factor"));
char_IDate = PRINTNAME(install("IDate"));
char_indices = PRINTNAME(install("indices"));
char_integer64 = PRINTNAME(install("integer64"));
char_ITime = PRINTNAME(install("ITime"));
char_IDate = PRINTNAME(install("IDate"));
char_Date = PRINTNAME(install("Date")); // used for IDate too since IDate inherits from Date
char_lens = PRINTNAME(install("lens"));
char_maxString = PRINTNAME(install("\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"));
char_nanotime = PRINTNAME(install("nanotime"));
char_NULL = PRINTNAME(install("NULL"));
char_ordered = PRINTNAME(install("ordered"));
char_POSIXct = PRINTNAME(install("POSIXct"));
char_POSIXt = PRINTNAME(install("POSIXt"));
char_UTC = PRINTNAME(install("UTC"));
char_nanotime = PRINTNAME(install("nanotime"));
char_starts = PRINTNAME(sym_starts = install("starts"));
char_lens = PRINTNAME(install("lens"));
char_indices = PRINTNAME(install("indices"));
char_allLen1 = PRINTNAME(install("allLen1"));
char_allGrp1 = PRINTNAME(install("allGrp1"));
char_factor = PRINTNAME(install("factor"));
char_ordered = PRINTNAME(install("ordered"));
char_datatable = PRINTNAME(install("data.table"));
char_dataframe = PRINTNAME(install("data.frame"));
char_NULL = PRINTNAME(install("NULL"));
char_maxString = PRINTNAME(install("\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"));
char_AsIs = PRINTNAME(install("AsIs"));
char_UTC = PRINTNAME(install("UTC"));

if (TYPEOF(char_integer64) != CHARSXP) {
// checking one is enough in case of any R-devel changes
Expand All @@ -281,24 +282,25 @@ void attribute_visible R_init_data_table(DllInfo *info)
// avoids the gc without needing an extra PROTECT and immediate UNPROTECT after the setAttrib which would
// look odd (and devs in future might be tempted to remove them). Avoiding passing install() to API calls
// keeps the code neat and readable. Also see grep's added to CRAN_Release.cmd to find such calls.
sym_sorted = install("sorted");
sym_index = install("index");
sym_BY = install(".BY");
sym_maxgrpn = install("maxgrpn");
sym_anyna = install("anyna");
SelfRefSymbol = install(".internal.selfref");
sym_anyinfnan = install("anyinfnan");
sym_anyna = install("anyna");
sym_anynotascii = install("anynotascii");
sym_anynotutf8 = install("anynotutf8");
sym_as_character = install("as.character");
sym_as_posixct = install("as.POSIXct");
sym_BY = install(".BY");
sym_colClassesAs = install("colClassesAs");
sym_verbose = install("datatable.verbose");
SelfRefSymbol = install(".internal.selfref");
sym_inherits = install("inherits");
sym_datatable_locked = install(".data.table.locked");
sym_tzone = install("tzone");
sym_index = install("index");
sym_inherits = install("inherits");
sym_length = install("length");
sym_maxgrpn = install("maxgrpn");
sym_old_fread_datetime_character = install("datatable.old.fread.datetime.character");
sym_sorted = install("sorted");
sym_tzone = install("tzone");
sym_variable_table = install("variable_table");
sym_as_character = install("as.character");
sym_as_posixct = install("as.POSIXct");
sym_verbose = install("datatable.verbose");

initDTthreads();
avoid_openmp_hang_within_fork();
Expand Down
6 changes: 6 additions & 0 deletions src/utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,12 @@ SEXP startsWithAny(const SEXP x, const SEXP y, SEXP start) {
return ScalarLogical(false);
}

int length_with_dispatch(SEXP x) {
int l = INTEGER(PROTECT(eval(PROTECT(lang2(install("length"), x)), R_GlobalEnv)))[0];
UNPROTECT(2);
return l;
}

void internal_error(const char *call_name, const char *format, ...) {
char buff[1024];
va_list args;
Expand Down
8 changes: 2 additions & 6 deletions src/wrappers.c
Original file line number Diff line number Diff line change
Expand Up @@ -113,14 +113,10 @@ SEXP dim(SEXP x)
INTEGER(ans)[1] = 0;
}
else {
INTEGER(ans)[0] = length(VECTOR_ELT(x, 0));
// Column class might require dispatch to get length() correct
INTEGER(ans)[0] = length_with_dispatch(VECTOR_ELT(x, 0));
INTEGER(ans)[1] = length(x);
}
UNPROTECT(1);
return ans;
}

SEXP warn_matrix_column_r(SEXP i) {
warn_matrix_column(INTEGER(i)[0]);
return R_NilValue;
}
Loading