Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Postpone the Non-API Problem. #6640

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -100,5 +100,6 @@ Authors@R: c(
person("Vincent", "Rocher", role="ctb"),
person("Vijay", "Lulla", role="ctb"),
person("Aljaž", "Sluga", role="ctb"),
person("Bill", "Evans", role="ctb")
person("Bill", "Evans", role="ctb"),
person("Sebastian", "Krantz", role="ctb")
)
86 changes: 86 additions & 0 deletions src/R_Defn.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#include <R.h>
#include <Rinternals.h>

// NOTE: All of this is copied from Defn.h: https://github.com/wch/r-source/blob/28de75af0541f93832c5899139b969d290bf422e/src/include/Defn.h
// We intend to gradually remove the need for this header file

// Writable vector/string pointer
#define SEXPPTR(x) ((SEXP *)DATAPTR(x))

#ifndef NAMED_BITS
# define NAMED_BITS 16
#endif

#ifndef SEXPREC_HEADER

// https://github.com/wch/r-source/blob/2640a203d13473f95c9c7508eb2976fefb5c931c/src/include/Defn.h#L123
struct sxpinfo_struct {
SEXPTYPE type : TYPE_BITS;
/* ==> (FUNSXP == 99) %% 2^5 == 3 == CLOSXP
* -> warning: `type' is narrower than values
* of its type
* when SEXPTYPE was an enum */
unsigned int scalar: 1;
unsigned int obj : 1;
unsigned int alt : 1;
unsigned int gp : 16;
unsigned int mark : 1;
unsigned int debug : 1;
unsigned int trace : 1; /* functions and memory tracing */
unsigned int spare : 1; /* used on closures and when REFCNT is defined */
unsigned int gcgen : 1; /* old generation number */
unsigned int gccls : 3; /* node class */
unsigned int named : NAMED_BITS;
unsigned int extra : 32 - NAMED_BITS; /* used for immediate bindings */
}; /* Tot: 64 */

// https://github.com/wch/r-source/blob/2640a203d13473f95c9c7508eb2976fefb5c931c/src/include/Defn.h#L190
#define SEXPREC_HEADER \
struct sxpinfo_struct sxpinfo; \
struct SEXPREC *attrib; \
struct SEXPREC *gengc_next_node, *gengc_prev_node

// https://github.com/wch/r-source/blob/2640a203d13473f95c9c7508eb2976fefb5c931c/src/include/Defn.h#L143
struct vecsxp_struct {
R_xlen_t length;
R_xlen_t truelength;
};

// https://github.com/wch/r-source/blob/2640a203d13473f95c9c7508eb2976fefb5c931c/src/include/Defn.h#L214
typedef struct VECTOR_SEXPREC {
SEXPREC_HEADER;
struct vecsxp_struct vecsxp;
} VECTOR_SEXPREC, *VECSEXP;

#endif

// https://github.com/wch/r-source/blob/2640a203d13473f95c9c7508eb2976fefb5c931c/src/include/Defn.h#L197
typedef struct {
SEXPREC_HEADER;
} SEXPREC_partial;

// (SET_)TRULENGTH: https://github.com/wch/r-source/blob/2640a203d13473f95c9c7508eb2976fefb5c931c/src/include/Defn.h#L386
#define STDVEC_TRUELENGTH(x) (((VECSEXP) (x))->vecsxp.truelength)
// No method to set ALTREP_TRUELENGTH (gives error): https://github.com/wch/r-source/blob/2640a203d13473f95c9c7508eb2976fefb5c931c/src/include/Defn.h#L388
#define SET_TRULEN(x, v) (STDVEC_TRUELENGTH(x)=(v))
// ALTREP_TRUELENGTH is 0: https://github.com/wch/r-source/blob/48f06c1071fea6a6e7e365ad3d745217268e2175/src/main/altrep.c#L345
#define TRULEN(x) (ALTREP(x) ? 0 : STDVEC_TRUELENGTH(x))

// SETLENGTH: https://github.com/wch/r-source/blob/05bb18266d49e87f2477120ecb0ab1440f4e9b40/src/include/Defn.h#L385
#define STDVEC_LENGTH(x) (((VECSEXP) (x))->vecsxp.length)
#define SETSCAL(x, v) ((((SEXPREC_partial *)(x))->sxpinfo.scalar) = (v))
#define SET_STDVEC_LENGTH(x,v) do { \
SEXP __x__ = (x); \
R_xlen_t __v__ = (v); \
STDVEC_LENGTH(__x__) = __v__; \
SETSCAL(__x__, __v__ == 1 ? 1 : 0); \
} while (0)
// https://github.com/wch/r-source/blob/05bb18266d49e87f2477120ecb0ab1440f4e9b40/src/main/memory.c#L4072
#define SET_LEN SET_STDVEC_LENGTH

// LEVELS: https://github.com/wch/r-source/blob/2640a203d13473f95c9c7508eb2976fefb5c931c/src/include/Defn.h#L228
#define LEVLS(x) (((SEXPREC_partial *)(x))->sxpinfo.gp)

// SET_GROWABLE_BIT: https://github.com/wch/r-source/blob/2640a203d13473f95c9c7508eb2976fefb5c931c/src/include/Defn.h#L374
#define GROWBLE_MASK ((unsigned short)(1<<5))
#define SET_GROWBLE_BIT(x) (LEVLS(x) |= GROWBLE_MASK)
70 changes: 35 additions & 35 deletions src/assign.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ static void finalizer(SEXP p)
p = R_ExternalPtrTag(p);
if (!isString(p)) internal_error(__func__, "ExternalPtr doesn't see names in tag"); // # nocov
l = LENGTH(p);
tl = TRUELENGTH(p);
tl = TRULEN(p);
if (l<0 || tl<l) internal_error(__func__, "l=%d, tl=%d", l, tl); // # nocov
n = tl-l;
if (n==0) {
Expand All @@ -18,7 +18,7 @@ static void finalizer(SEXP p)
}
x = PROTECT(allocVector(INTSXP, 50)); // 50 so it's big enough to be on LargeVector heap. See NodeClassSize in memory.c:allocVector
// INTSXP rather than VECSXP so that GC doesn't inspect contents after LENGTH (thanks to Karl Miller, Jul 2015)
SETLENGTH(x,50+n*2*sizeof(void *)/4); // 1*n for the names, 1*n for the VECSXP itself (both are over allocated).
SET_LEN(x,50+n*2*sizeof(void *)/4); // 1*n for the names, 1*n for the VECSXP itself (both are over allocated).
UNPROTECT(1);
return;
}
Expand Down Expand Up @@ -86,8 +86,8 @@ closest I got to getting it to pass all tests :
UNPROTECT(2);

Then in finalizer:
SETLENGTH(names, tl)
SETLENGTH(dt, tl)
SET_LEN(names, tl)
SET_LEN(dt, tl)

and that finalizer indeed now happens before the GC releases memory (thanks to the env wrapper).

Expand Down Expand Up @@ -127,14 +127,14 @@ static int _selfrefok(SEXP x, Rboolean checkNames, Rboolean verbose) {
if (!(isNull(tag) || isString(tag))) internal_error(__func__, ".internal.selfref tag is neither NULL nor a character vector"); // # nocov
names = getAttrib(x, R_NamesSymbol);
if (names!=tag && isString(names) && !ALTREP(names)) // !ALTREP for #4734
SET_TRUELENGTH(names, LENGTH(names));
SET_TRULEN(names, LENGTH(names));
// R copied this vector not data.table; it's not actually over-allocated. It looks over-allocated
// because R copies the original vector's tl over despite allocating length.
prot = R_ExternalPtrProtected(v);
if (TYPEOF(prot) != EXTPTRSXP) // Very rare. Was error(_(".internal.selfref prot is not itself an extptr")).
return 0; // # nocov ; see http://stackoverflow.com/questions/15342227/getting-a-random-internal-selfref-error-in-data-table-for-r
if (x!=R_ExternalPtrAddr(prot) && !ALTREP(x))
SET_TRUELENGTH(x, LENGTH(x)); // R copied this vector not data.table, it's not actually over-allocated
SET_TRULEN(x, LENGTH(x)); // R copied this vector not data.table, it's not actually over-allocated
return checkNames ? names==tag : x==R_ExternalPtrAddr(prot);
}

Expand Down Expand Up @@ -189,10 +189,10 @@ static SEXP shallow(SEXP dt, SEXP cols, R_len_t n)
setAttrib(newdt, R_NamesSymbol, newnames);
// setAttrib appears to change length and truelength, so need to do that first _then_ SET next,
// otherwise (if the SET were were first) the 100 tl is assigned to length.
SETLENGTH(newnames,l);
SET_TRUELENGTH(newnames,n);
SETLENGTH(newdt,l);
SET_TRUELENGTH(newdt,n);
SET_LEN(newnames,l);
SET_TRULEN(newnames,n);
SET_LEN(newdt,l);
SET_TRULEN(newdt,n);
setselfref(newdt);
UNPROTECT(protecti);
return(newdt);
Expand Down Expand Up @@ -260,10 +260,10 @@ SEXP alloccol(SEXP dt, R_len_t n, Rboolean verbose)
return shallow(dt,R_NilValue,(n>l) ? n : l); // e.g. test 848 and 851 in R > 3.0.2
// added (n>l) ? ... for #970, see test 1481.
// TO DO: test realloc names if selfrefnamesok (users can setattr(x,"name") themselves for example.
// if (TRUELENGTH(getAttrib(dt,R_NamesSymbol))!=tl)
// internal_error(__func__, "tl of dt passes checks, but tl of names (%d) != tl of dt (%d)", tl, TRUELENGTH(getAttrib(dt,R_NamesSymbol))); // # nocov
// if (TRULEN(getAttrib(dt,R_NamesSymbol))!=tl)
// internal_error(__func__, "tl of dt passes checks, but tl of names (%d) != tl of dt (%d)", tl, TRULEN(getAttrib(dt,R_NamesSymbol))); // # nocov

tl = TRUELENGTH(dt);
tl = TRULEN(dt);
// R <= 2.13.2 and we didn't catch uninitialized tl somehow
if (tl<0) internal_error(__func__, "tl of class is marked but tl<0"); // # nocov
if (tl>0 && tl<l) internal_error(__func__, "tl (%d) < l (%d) but tl of class is marked", tl, l); // # nocov
Expand Down Expand Up @@ -313,11 +313,11 @@ SEXP shallowwrapper(SEXP dt, SEXP cols) {
if (!selfrefok(dt, FALSE)) {
int n = isNull(cols) ? length(dt) : length(cols);
return(shallow(dt, cols, n));
} else return(shallow(dt, cols, TRUELENGTH(dt)));
} else return(shallow(dt, cols, TRULEN(dt)));
}

SEXP truelength(SEXP x) {
return ScalarInteger(isNull(x) ? 0 : TRUELENGTH(x));
return ScalarInteger(isNull(x) ? 0 : TRULEN(x));
}

SEXP selfrefokwrapper(SEXP x, SEXP verbose) {
Expand Down Expand Up @@ -514,7 +514,7 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
// modify DT by reference. Other than if new columns are being added and the allocVec() fails with
// out-of-memory. In that case the user will receive hard halt and know to rerun.
if (length(newcolnames)) {
oldtncol = TRUELENGTH(dt); // TO DO: oldtncol can be just called tl now, as we won't realloc here any more.
oldtncol = TRULEN(dt); // TO DO: oldtncol can be just called tl now, as we won't realloc here any more.

if (oldtncol<oldncol) {
if (oldtncol==0) error(_("This data.table has either been loaded from disk (e.g. using readRDS()/load()) or constructed manually (e.g. using structure()). Please run setDT() or setalloccol() on it first (to pre-allocate space for new columns) before assigning by reference to it.")); // #2996
Expand All @@ -527,13 +527,13 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
error(_("It appears that at some earlier point, names of this data.table have been reassigned. Please ensure to use setnames() rather than names<- or colnames<-. Otherwise, please report to data.table issue tracker.")); // # nocov
// Can growVector at this point easily enough, but it shouldn't happen in first place so leave it as
// strong error message for now.
else if (TRUELENGTH(names) != oldtncol)
else if (TRULEN(names) != oldtncol)
// Use (long long) to cast R_xlen_t to a fixed type to robustly avoid -Wformat compiler warnings, see #5768, PRId64 didn't work
internal_error(__func__, "selfrefnames is ok but tl names [%lld] != tl [%d]", (long long)TRUELENGTH(names), oldtncol); // # nocov
internal_error(__func__, "selfrefnames is ok but tl names [%lld] != tl [%d]", (long long)TRULEN(names), oldtncol); // # nocov
if (!selfrefok(dt, verbose)) // #6410 setDT(dt) and subsequent attr<- can lead to invalid selfref
error(_("It appears that at some earlier point, attributes of this data.table have been reassigned. Please use setattr(DT, name, value) rather than attr(DT, name) <- value. If that doesn't apply to you, please report your case to the data.table issue tracker."));
SETLENGTH(dt, oldncol+LENGTH(newcolnames));
SETLENGTH(names, oldncol+LENGTH(newcolnames));
SET_LEN(dt, oldncol+LENGTH(newcolnames));
SET_LEN(names, oldncol+LENGTH(newcolnames));
for (int i=0; i<LENGTH(newcolnames); ++i)
SET_STRING_ELT(names,oldncol+i,STRING_ELT(newcolnames,i));
// truelengths of both already set by alloccol
Expand Down Expand Up @@ -730,8 +730,8 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
SET_VECTOR_ELT(dt, i, R_NilValue);
SET_STRING_ELT(names, i, NA_STRING); // release reference to the CHARSXP
}
SETLENGTH(dt, ndt-ndelete);
SETLENGTH(names, ndt-ndelete);
SET_LEN(dt, ndt-ndelete);
SET_LEN(names, ndt-ndelete);
if (LENGTH(names)==0) {
// That was last column deleted, leaving NULL data.table, so we need to reset .row_names, so that it really is the NULL data.table.
PROTECT(nullint=allocVector(INTSXP, 0)); protecti++;
Expand Down Expand Up @@ -830,26 +830,26 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con
savetl_init();
for (int k=0; k<nTargetLevels; ++k) {
const SEXP s = targetLevelsD[k];
const int tl = TRUELENGTH(s);
const int tl = TRULEN(s);
if (tl>0) {
savetl(s);
} else if (tl<0) {
// # nocov start
for (int j=0; j<k; ++j) SET_TRUELENGTH(s, 0); // wipe our negative usage and restore 0
for (int j=0; j<k; ++j) SET_TRULEN(s, 0); // wipe our negative usage and restore 0
savetl_end(); // then restore R's own usage (if any)
internal_error(__func__, "levels of target are either not unique or have truelength<0"); // # nocov
// # nocov end
}
SET_TRUELENGTH(s, -k-1);
SET_TRULEN(s, -k-1);
}
int nAdd = 0;
for (int k=0; k<nSourceLevels; ++k) {
const SEXP s = sourceLevelsD[k];
const int tl = TRUELENGTH(s);
const int tl = TRULEN(s);
if (tl>=0) {
if (!sourceIsFactor && s==NA_STRING) continue; // don't create NA factor level when assigning character to factor; test 2117
if (tl>0) savetl(s);
SET_TRUELENGTH(s, -nTargetLevels-(++nAdd));
SET_TRULEN(s, -nTargetLevels-(++nAdd));
} // else, when sourceIsString, it's normal for there to be duplicates here
}
const int nSource = length(source);
Expand All @@ -858,34 +858,34 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con
const int *sourceD = INTEGER(source);
for (int i=0; i<nSource; ++i) { // convert source integers to refer to target levels
const int val = sourceD[i];
newSourceD[i] = val==NA_INTEGER ? NA_INTEGER : -TRUELENGTH(sourceLevelsD[val-1]); // retains NA factor levels here via TL(NA_STRING); e.g. ordered factor
newSourceD[i] = val==NA_INTEGER ? NA_INTEGER : -TRULEN(sourceLevelsD[val-1]); // retains NA factor levels here via TL(NA_STRING); e.g. ordered factor
}
} else {
const SEXP *sourceD = STRING_PTR_RO(source);
for (int i=0; i<nSource; ++i) { // convert source integers to refer to target levels
const SEXP val = sourceD[i];
newSourceD[i] = val==NA_STRING ? NA_INTEGER : -TRUELENGTH(val);
newSourceD[i] = val==NA_STRING ? NA_INTEGER : -TRULEN(val);
}
}
source = newSource;
for (int k=0; k<nTargetLevels; ++k) SET_TRUELENGTH(targetLevelsD[k], 0); // don't need those anymore
for (int k=0; k<nTargetLevels; ++k) SET_TRULEN(targetLevelsD[k], 0); // don't need those anymore
if (nAdd) {
// cannot grow the levels yet as that would be R call which could fail to alloc and we have no hook to clear up
SEXP *temp = (SEXP *)malloc(nAdd * sizeof(SEXP *));
if (!temp) {
// # nocov start
for (int k=0; k<nSourceLevels; ++k) SET_TRUELENGTH(sourceLevelsD[k], 0);
for (int k=0; k<nSourceLevels; ++k) SET_TRULEN(sourceLevelsD[k], 0);
savetl_end();
error(_("Unable to allocate working memory of %zu bytes to combine factor levels"), nAdd*sizeof(SEXP *));
// # nocov end
}
for (int k=0, thisAdd=0; thisAdd<nAdd; ++k) { // thisAdd<nAdd to stop early when the added ones are all reached
SEXP s = sourceLevelsD[k];
int tl = TRUELENGTH(s);
int tl = TRULEN(s);
if (tl) { // tl negative here
if (tl != -nTargetLevels-thisAdd-1) internal_error(__func__, "extra level check sum failed"); // # nocov
temp[thisAdd++] = s;
SET_TRUELENGTH(s,0);
SET_TRULEN(s,0);
}
}
savetl_end();
Expand Down Expand Up @@ -1312,14 +1312,14 @@ void savetl(SEXP s)
savedtl = (R_len_t *)tmp;
}
saveds[nsaved] = s;
savedtl[nsaved] = TRUELENGTH(s);
savedtl[nsaved] = TRULEN(s);
nsaved++;
}

void savetl_end(void) {
// Can get called if nothing has been saved yet (nsaved==0), or even if _init() hasn't been called yet (pointers NULL). Such
// as to clear up before error. Also, it might be that nothing needed to be saved anyway.
for (int i=0; i<nsaved; i++) SET_TRUELENGTH(saveds[i],savedtl[i]);
for (int i=0; i<nsaved; i++) SET_TRULEN(saveds[i],savedtl[i]);
free(saveds); // possible free(NULL) which is safe no-op
saveds = NULL;
free(savedtl);
Expand Down
Loading
Loading