Skip to content

Commit

Permalink
Moving to shared input validation routine
Browse files Browse the repository at this point in the history
  • Loading branch information
MichaelChirico committed Dec 12, 2024
1 parent 0c268e0 commit ee9f287
Show file tree
Hide file tree
Showing 5 changed files with 28 additions and 45 deletions.
16 changes: 4 additions & 12 deletions R/data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -2915,19 +2915,11 @@ setDT = function(x, keep.rownames=FALSE, key=NULL, check.names=FALSE) {
if (!missing(key)) setkeyv(x, key) # fix for #1169
if (check.names) setattr(x, "names", make.names(names(x), unique=TRUE))
if (selfrefok(x) > 0L) return(invisible(x)) else setalloccol(x)
} else if (is.data.frame(x)) {
# check no matrix-like columns, #3760. Allow a single list(matrix) is unambiguous and depended on by some revdeps, #3581
# for performance, only warn on the first such column, #5426
for (jj in seq_along(x)) {
if (inherits(x[[jj]], "POSIXlt")) {
.Call(Cerr_posixl_column_r, jj)
}
if (length(dim(x[[jj]])) > 1L) {
.Call(Cwarn_matrix_column_r, jj)
break
}
}
}

.Call(Ccheck_problematic_columns, x)

if (is.data.frame(x)) {
# Done to avoid affecting other copies of x when we setattr() below (#4784)
x = .shallow(x)

Expand Down
39 changes: 22 additions & 17 deletions src/assign.c
Original file line number Diff line number Diff line change
Expand Up @@ -198,20 +198,33 @@ static SEXP shallow(SEXP dt, SEXP cols, R_len_t n)
return(newdt);
}

// Wrapped in a function so the same message is issued for the data.frame case at the R level
void warn_matrix_column(/* 1-indexed */ int i) {
warning(_("Some columns are a multi-column type (such as a matrix column), for example column %d. setDT will retain these columns as-is but subsequent operations like grouping and joining may fail. Please consider as.data.table() instead which will create a new column for each embedded column."), i);
}
// check no matrix-like columns, #3760. Allow a single list(matrix) is unambiguous and depended on by some revdeps, #3581
// for performance, only warn on the first such column, #5426
SEXP check_problematic_columns(SEXP x) {
if (!isNewList(x))
return R_NilValue;

SEXP xi;
for (R_len_t i=0; i<LENGTH(x); ++i) {
xi = VECTOR_ELT(x, i);

void err_posixl_column(/* 1-indexed */ int i) {
error(_("Column %d has class 'POSIXlt'. Please convert it to POSIXct (using as.POSIXct) and run setDT() again, or use as.data.table() instead. We do not recommend the use of POSIXlt at all because it uses 40 bytes to store one date."), i);
if (Rf_inherits(xi, "POSIXlt")) {
error(_("Column %d has class 'POSIXlt'. Please convert it to POSIXct (using as.POSIXct) and run setDT() again, or use as.data.table() instead. We do not recommend the use of POSIXlt at all because it uses 40 bytes to store one date."), i+1);
}

if (length(getAttrib(xi, R_DimSymbol)) > 1) {
warning(_("Some columns are a multi-column type (such as a matrix column), for example column %d. setDT will retain these columns as-is but subsequent operations like grouping and joining may fail. Please consider as.data.table() instead which will create a new column for each embedded column."), i+1);
break;
}
}

return R_NilValue;
}

// input validation for setDT() list input; assume is.list(x) was tested in R
// input validation for setDT() list input; assume is.list(x) and check_problematic_columns() was tested in R
SEXP setdt_nrows(SEXP x)
{
int base_length = 0;
bool test_matrix_cols = true;

for (R_len_t i = 0; i < LENGTH(x); ++i) {
SEXP xi = VECTOR_ELT(x, i);
Expand All @@ -220,20 +233,12 @@ SEXP setdt_nrows(SEXP x)
* e.g. in package eplusr which calls setDT on a list when parsing JSON. Operations which
* fail for NULL columns will give helpful error at that point, #3480 and #3471 */
if (Rf_isNull(xi)) continue;
if (Rf_inherits(xi, "POSIXlt")) {
err_posixl_column(i+1);
}
SEXP dim_xi = getAttrib(xi, R_DimSymbol);
R_len_t len_xi;
// NB: LENGTH() produces an undefined large number here on R 3.3.0.
// There's also a note in NEWS for R 3.1.0 saying length() should always be used by packages,
// but with some overhead for being a function/not macro...
R_len_t n_dim = length(dim_xi);
if (n_dim) {
if (test_matrix_cols && n_dim > 1) {
warn_matrix_column(i+1);
test_matrix_cols = false;
}
if (length(dim_xi)) {
len_xi = INTEGER(dim_xi)[0];
} else {
len_xi = LENGTH(xi);
Expand Down
5 changes: 1 addition & 4 deletions src/data.table.h
Original file line number Diff line number Diff line change
Expand Up @@ -181,8 +181,6 @@ SEXP dt_na(SEXP x, SEXP cols);
SEXP alloccol(SEXP dt, R_len_t n, Rboolean verbose);
const char *memrecycle(const SEXP target, const SEXP where, const int start, const int len, SEXP source, const int sourceStart, const int sourceLen, const int colnum, const char *colname);
SEXP shallowwrapper(SEXP dt, SEXP cols);
void warn_matrix_column(int i);
void err_posixl_column(int i);

SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols,
SEXP xjiscols, SEXP grporder, SEXP order, SEXP starts,
Expand Down Expand Up @@ -329,8 +327,7 @@ SEXP glast(SEXP);
SEXP gfirst(SEXP);
SEXP gnthvalue(SEXP, SEXP);
SEXP dim(SEXP);
SEXP warn_matrix_column_r(SEXP);
SEXP err_posixl_column_r(SEXP);
SEXP check_problematic_columns(SEXP);
SEXP gvar(SEXP, SEXP);
SEXP gsd(SEXP, SEXP);
SEXP gprod(SEXP, SEXP);
Expand Down
3 changes: 1 addition & 2 deletions src/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,7 @@ R_CallMethodDef callMethods[] = {
{"CstartsWithAny", (DL_FUNC)&startsWithAny, -1},
{"CconvertDate", (DL_FUNC)&convertDate, -1},
{"Cnotchin", (DL_FUNC)&notchin, -1},
{"Cwarn_matrix_column_r", (DL_FUNC)&warn_matrix_column_r, -1},
{"Cerr_posixl_column_r", (DL_FUNC)&err_posixl_column_r, -1},
{"Ccheck_problematic_columns", (DL_FUNC)&check_problematic_columns, -1},
{NULL, NULL, 0}
};

Expand Down
10 changes: 0 additions & 10 deletions src/wrappers.c
Original file line number Diff line number Diff line change
Expand Up @@ -120,13 +120,3 @@ SEXP dim(SEXP x)
UNPROTECT(1);
return ans;
}

SEXP warn_matrix_column_r(SEXP i) {
warn_matrix_column(INTEGER(i)[0]);
return R_NilValue;
}

SEXP err_posixl_column_r(SEXP i) {
err_posixl_column(INTEGER(i)[0]);
return R_NilValue;
}

0 comments on commit ee9f287

Please sign in to comment.