From e04e67fcd096fcc39ec3f99ef3198d1aaab11c3a Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sun, 3 Nov 2024 22:03:03 +0100 Subject: [PATCH 01/43] fix type coercion in bmerge --- R/bmerge.R | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/R/bmerge.R b/R/bmerge.R index aa7ea41039..82353bed0f 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -71,7 +71,13 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, xclass, iname, iclass) } if (xclass == iclass) { - if (verbose) catf("%s has same type (%s) as %s. No coercion needed.\n", iname, xclass, xname) + if (length(icols)>1 && class(x[[xc]])=="Date" && class(i[[ic]]=="Date")) { + set(x, j=xc, value=as.double(x[[xc]])) + set(i, j=ic, value=as.double(i[[ic]])) + if (verbose) catf("%s and %s are both Dates. R does not guarentee a type for Date internally, hence, coercing to double.\n", iname, xname) + } else { + if (verbose) catf("%s has same type (%s) as %s. No coercion needed.\n", iname, xclass, xname) + } next } if (xclass=="character" || iclass=="character" || From c55bfcc86f6eea12959e07c8d118287515d19650 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sun, 3 Nov 2024 22:04:36 +0100 Subject: [PATCH 02/43] fix bracket --- R/bmerge.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/bmerge.R b/R/bmerge.R index 82353bed0f..fba00669f6 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -71,7 +71,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, xclass, iname, iclass) } if (xclass == iclass) { - if (length(icols)>1 && class(x[[xc]])=="Date" && class(i[[ic]]=="Date")) { + if (length(icols)>1 && class(x[[xc]])=="Date" && class(i[[ic]])=="Date") { set(x, j=xc, value=as.double(x[[xc]])) set(i, j=ic, value=as.double(i[[ic]])) if (verbose) catf("%s and %s are both Dates. R does not guarentee a type for Date internally, hence, coercing to double.\n", iname, xname) From 272fc592f20ca3a0ea5e060b38c5078bd5673762 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sun, 3 Nov 2024 22:24:16 +0100 Subject: [PATCH 03/43] add test cases --- inst/tests/tests.Rraw | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index fef8f1fffb..aa9caedd20 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -20596,3 +20596,12 @@ test(2295.3, is.data.table(d2)) # #6588: .checkTypos used to give arbitrary strings to stopf as the first argument test(2296, d2[x %no such operator% 1], error = '%no such operator%') + +# coerce Dates to double if join on multiple columns, #6602 +date_int = seq(as.Date('2015-01-01'), as.Date('2015-01-01'), by="day") +date_dbl = as.Date('2015-01-01') +x = data.table(a=date_int, b=1L) +y = data.table(c=date_int, d=date_dbl) + +test(2297.1, options=c(datatable.verbose=TRUE), y[x, on=.(c == a)], data.table(c=date_int, d=date_dbl, b=1L), output="No coercion needed.") +test(2297.2, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=date_dbl, d=date_dbl, b=1L), output="coercing to double.") From 3aba1cc45433c6ecb03899b2559e2ffa8662ae5e Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sun, 3 Nov 2024 22:25:45 +0100 Subject: [PATCH 04/43] fix lint --- R/bmerge.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/bmerge.R b/R/bmerge.R index fba00669f6..de1d5cedc2 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -71,7 +71,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, xclass, iname, iclass) } if (xclass == iclass) { - if (length(icols)>1 && class(x[[xc]])=="Date" && class(i[[ic]])=="Date") { + if (length(icols)>1 && is(x[[xc]], "Date") && is(i[[ic]], "Date")) { set(x, j=xc, value=as.double(x[[xc]])) set(i, j=ic, value=as.double(i[[ic]])) if (verbose) catf("%s and %s are both Dates. R does not guarentee a type for Date internally, hence, coercing to double.\n", iname, xname) From 1d7a8c2289e5b8b765ae48ef2129d78d3394216d Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sun, 3 Nov 2024 22:39:12 +0100 Subject: [PATCH 05/43] fix old test case --- inst/tests/tests.Rraw | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index aa9caedd20..43497c6f68 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15727,7 +15727,8 @@ DT = data.table(z = 1i) test(2069.33, DT[DT, on = 'z'], error = "Type 'complex' is not supported for joining/merging") # forder verbose message when !isReallyReal Date, #1738 -DT = data.table(d=sample(seq(as.Date("2015-01-01"), as.Date("2015-01-05"), by="days"), 20, replace=TRUE)) +date_dbl = as.Date(as.double(seq(as.Date("2015-01-01"), as.Date("2015-01-05"), by="days")), origin="1970-01-01") +DT = data.table(d=sample(date_dbl, 20, replace=TRUE)) test(2070.01, typeof(DT$d), "double") test(2070.02, DT[, .N, keyby=d, verbose=TRUE], output="Column 1.*date.*8 byte double.*no fractions are present.*4 byte integer.*to save space and time") @@ -20604,4 +20605,4 @@ x = data.table(a=date_int, b=1L) y = data.table(c=date_int, d=date_dbl) test(2297.1, options=c(datatable.verbose=TRUE), y[x, on=.(c == a)], data.table(c=date_int, d=date_dbl, b=1L), output="No coercion needed.") -test(2297.2, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=date_dbl, d=date_dbl, b=1L), output="coercing to double.") +test(2297.2, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=date_int, d=date_int, b=1L), output="coercing to double.") From ff934ccb13325f76b6aabbfa085c975798dc9fcb Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sun, 24 Nov 2024 15:58:04 +0100 Subject: [PATCH 06/43] rename x/i class --- R/bmerge.R | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/R/bmerge.R b/R/bmerge.R index de1d5cedc2..cd6e8ca411 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -41,26 +41,26 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos # Note that if i is keyed, if this coerces i's key gets dropped by set() ic = icols[a] xc = xcols[a] - xclass = getClass(x[[xc]]) - iclass = getClass(i[[ic]]) + x_merge_type = getClass(x[[xc]]) + i_merge_type = getClass(i[[ic]]) xname = paste0("x.", names(x)[xc]) iname = paste0("i.", names(i)[ic]) - if (!xclass %chin% supported) stopf("%s is type %s which is not supported by data.table join", xname, xclass) - if (!iclass %chin% supported) stopf("%s is type %s which is not supported by data.table join", iname, iclass) - if (xclass=="factor" || iclass=="factor") { + if (!x_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", xname, x_merge_type) + if (!i_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", iname, i_merge_type) + if (x_merge_type=="factor" || i_merge_type=="factor") { if (roll!=0.0 && a==length(icols)) stopf("Attempting roll join on factor column when joining %s to %s. Only integer, double or character columns may be roll joined.", xname, iname) - if (xclass=="factor" && iclass=="factor") { + if (x_merge_type=="factor" && i_merge_type=="factor") { if (verbose) catf("Matching %s factor levels to %s factor levels.\n", iname, xname) set(i, j=ic, value=chmatch(levels(i[[ic]]), levels(x[[xc]]), nomatch=0L)[i[[ic]]]) # nomatch=0L otherwise a level that is missing would match to NA values next } else { - if (xclass=="character") { + if (x_merge_type=="character") { if (verbose) catf("Coercing factor column %s to type character to match type of %s.\n", iname, xname) set(i, j=ic, value=val<-as.character(i[[ic]])) set(callersi, j=ic, value=val) # factor in i joining to character in x will return character and not keep x's factor; e.g. for antaresRead #3581 next - } else if (iclass=="character") { + } else if (i_merge_type=="character") { if (verbose) catf("Matching character column %s to factor levels in %s.\n", iname, xname) newvalue = chmatch(i[[ic]], levels(x[[xc]]), nomatch=0L) if (anyNA(i[[ic]])) newvalue[is.na(i[[ic]])] = NA_integer_ # NA_character_ should match to NA in factor, #3809 @@ -68,43 +68,43 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos next } } - stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, xclass, iname, iclass) + stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, x_merge_type, iname, i_merge_type) } - if (xclass == iclass) { + if (x_merge_type == i_merge_type) { if (length(icols)>1 && is(x[[xc]], "Date") && is(i[[ic]], "Date")) { set(x, j=xc, value=as.double(x[[xc]])) set(i, j=ic, value=as.double(i[[ic]])) if (verbose) catf("%s and %s are both Dates. R does not guarentee a type for Date internally, hence, coercing to double.\n", iname, xname) } else { - if (verbose) catf("%s has same type (%s) as %s. No coercion needed.\n", iname, xclass, xname) + if (verbose) catf("%s has same type (%s) as %s. No coercion needed.\n", iname, x_merge_type, xname) } next } - if (xclass=="character" || iclass=="character" || - xclass=="logical" || iclass=="logical" || - xclass=="factor" || iclass=="factor") { + if (x_merge_type=="character" || i_merge_type=="character" || + x_merge_type=="logical" || i_merge_type=="logical" || + x_merge_type=="factor" || i_merge_type=="factor") { if (anyNA(i[[ic]]) && allNA(i[[ic]])) { - if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", iname, iclass, xclass, xname) - set(i, j=ic, value=match.fun(paste0("as.", xclass))(i[[ic]])) + if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", iname, i_merge_type, x_merge_type, xname) + set(i, j=ic, value=match.fun(paste0("as.", x_merge_type))(i[[ic]])) next } else if (anyNA(x[[xc]]) && allNA(x[[xc]])) { - if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", xname, xclass, iclass, iname) - set(x, j=xc, value=match.fun(paste0("as.", iclass))(x[[xc]])) + if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", xname, x_merge_type, i_merge_type, iname) + set(x, j=xc, value=match.fun(paste0("as.", i_merge_type))(x[[xc]])) next } - stopf("Incompatible join types: %s (%s) and %s (%s)", xname, xclass, iname, iclass) + stopf("Incompatible join types: %s (%s) and %s (%s)", xname, x_merge_type, iname, i_merge_type) } - if (xclass=="integer64" || iclass=="integer64") { + if (x_merge_type=="integer64" || i_merge_type=="integer64") { nm = c(iname, xname) - if (xclass=="integer64") { w=i; wc=ic; wclass=iclass; } else { w=x; wc=xc; wclass=xclass; nm=rev(nm) } # w is which to coerce + if (x_merge_type=="integer64") { w=i; wc=ic; wclass=i_merge_type; } else { w=x; wc=xc; wclass=x_merge_type; nm=rev(nm) } # w is which to coerce if (wclass=="integer" || (wclass=="double" && !isReallyReal(w[[wc]]))) { if (verbose) catf("Coercing %s column %s%s to type integer64 to match type of %s.\n", wclass, nm[1L], if (wclass=="double") " (which contains no fractions)" else "", nm[2L]) set(w, j=wc, value=bit64::as.integer64(w[[wc]])) } else stopf("Incompatible join types: %s is type integer64 but %s is type double and contains fractions", nm[2L], nm[1L]) } else { # just integer and double left - if (iclass=="double") { + if (i_merge_type=="double") { if (!isReallyReal(i[[ic]])) { # common case of ad hoc user-typed integers missing L postfix joining to correct integer keys # we've always coerced to int and returned int, for convenience. From a75fb0ede218460c98cb2c3757e2ce60b53ca0f7 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sun, 24 Nov 2024 16:20:41 +0100 Subject: [PATCH 07/43] add minimal test --- inst/tests/tests.Rraw | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 43497c6f68..98e65e3831 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -20599,10 +20599,8 @@ test(2295.3, is.data.table(d2)) test(2296, d2[x %no such operator% 1], error = '%no such operator%') # coerce Dates to double if join on multiple columns, #6602 -date_int = seq(as.Date('2015-01-01'), as.Date('2015-01-01'), by="day") -date_dbl = as.Date('2015-01-01') -x = data.table(a=date_int, b=1L) -y = data.table(c=date_int, d=date_dbl) +x = data.table(a=1L) +y = data.table(c=1L, d=2) +y[x, on=.(c == a, d == a)] -test(2297.1, options=c(datatable.verbose=TRUE), y[x, on=.(c == a)], data.table(c=date_int, d=date_dbl, b=1L), output="No coercion needed.") -test(2297.2, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=date_int, d=date_int, b=1L), output="coercing to double.") +test(2297.1, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=date_int, d=date_int, b=1L), output="coercing to double.") From 1bb60bf371a64569b860946072cc02cca5995c8c Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sun, 24 Nov 2024 16:44:53 +0100 Subject: [PATCH 08/43] indent loop --- R/bmerge.R | 150 ++++++++++++++++++++++++++--------------------------- 1 file changed, 73 insertions(+), 77 deletions(-) diff --git a/R/bmerge.R b/R/bmerge.R index cd6e8ca411..4460eb0339 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -34,92 +34,88 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos ans } - if (nrow(i)) for (a in seq_along(icols)) { - # - check that join columns have compatible types - # - do type coercions if necessary on just the shallow local copies for the purpose of join - # - handle factor columns appropriately - # Note that if i is keyed, if this coerces i's key gets dropped by set() - ic = icols[a] - xc = xcols[a] - x_merge_type = getClass(x[[xc]]) - i_merge_type = getClass(i[[ic]]) - xname = paste0("x.", names(x)[xc]) - iname = paste0("i.", names(i)[ic]) - if (!x_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", xname, x_merge_type) - if (!i_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", iname, i_merge_type) - if (x_merge_type=="factor" || i_merge_type=="factor") { - if (roll!=0.0 && a==length(icols)) - stopf("Attempting roll join on factor column when joining %s to %s. Only integer, double or character columns may be roll joined.", xname, iname) - if (x_merge_type=="factor" && i_merge_type=="factor") { - if (verbose) catf("Matching %s factor levels to %s factor levels.\n", iname, xname) - set(i, j=ic, value=chmatch(levels(i[[ic]]), levels(x[[xc]]), nomatch=0L)[i[[ic]]]) # nomatch=0L otherwise a level that is missing would match to NA values - next - } else { - if (x_merge_type=="character") { - if (verbose) catf("Coercing factor column %s to type character to match type of %s.\n", iname, xname) - set(i, j=ic, value=val<-as.character(i[[ic]])) - set(callersi, j=ic, value=val) # factor in i joining to character in x will return character and not keep x's factor; e.g. for antaresRead #3581 - next - } else if (i_merge_type=="character") { - if (verbose) catf("Matching character column %s to factor levels in %s.\n", iname, xname) - newvalue = chmatch(i[[ic]], levels(x[[xc]]), nomatch=0L) - if (anyNA(i[[ic]])) newvalue[is.na(i[[ic]])] = NA_integer_ # NA_character_ should match to NA in factor, #3809 - set(i, j=ic, value=newvalue) + if (nrow(i)) { + for (a in seq_along(icols)) { + # - check that join columns have compatible types + # - do type coercions if necessary on just the shallow local copies for the purpose of join + # - handle factor columns appropriately + # Note that if i is keyed, if this coerces i's key gets dropped by set() + ic = icols[a] + xc = xcols[a] + x_merge_type = getClass(x[[xc]]) + i_merge_type = getClass(i[[ic]]) + xname = paste0("x.", names(x)[xc]) + iname = paste0("i.", names(i)[ic]) + if (!x_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", xname, x_merge_type) + if (!i_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", iname, i_merge_type) + if (x_merge_type=="factor" || i_merge_type=="factor") { + if (roll!=0.0 && a==length(icols)) + stopf("Attempting roll join on factor column when joining %s to %s. Only integer, double or character columns may be roll joined.", xname, iname) + if (x_merge_type=="factor" && i_merge_type=="factor") { + if (verbose) catf("Matching %s factor levels to %s factor levels.\n", iname, xname) + set(i, j=ic, value=chmatch(levels(i[[ic]]), levels(x[[xc]]), nomatch=0L)[i[[ic]]]) # nomatch=0L otherwise a level that is missing would match to NA values next + } else { + if (x_merge_type=="character") { + if (verbose) catf("Coercing factor column %s to type character to match type of %s.\n", iname, xname) + set(i, j=ic, value=val<-as.character(i[[ic]])) + set(callersi, j=ic, value=val) # factor in i joining to character in x will return character and not keep x's factor; e.g. for antaresRead #3581 + next + } else if (i_merge_type=="character") { + if (verbose) catf("Matching character column %s to factor levels in %s.\n", iname, xname) + newvalue = chmatch(i[[ic]], levels(x[[xc]]), nomatch=0L) + if (anyNA(i[[ic]])) newvalue[is.na(i[[ic]])] = NA_integer_ # NA_character_ should match to NA in factor, #3809 + set(i, j=ic, value=newvalue) + next + } } + stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, x_merge_type, iname, i_merge_type) } - stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, x_merge_type, iname, i_merge_type) - } - if (x_merge_type == i_merge_type) { - if (length(icols)>1 && is(x[[xc]], "Date") && is(i[[ic]], "Date")) { - set(x, j=xc, value=as.double(x[[xc]])) - set(i, j=ic, value=as.double(i[[ic]])) - if (verbose) catf("%s and %s are both Dates. R does not guarentee a type for Date internally, hence, coercing to double.\n", iname, xname) - } else { + if (x_merge_type == i_merge_type) { if (verbose) catf("%s has same type (%s) as %s. No coercion needed.\n", iname, x_merge_type, xname) - } - next - } - if (x_merge_type=="character" || i_merge_type=="character" || - x_merge_type=="logical" || i_merge_type=="logical" || - x_merge_type=="factor" || i_merge_type=="factor") { - if (anyNA(i[[ic]]) && allNA(i[[ic]])) { - if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", iname, i_merge_type, x_merge_type, xname) - set(i, j=ic, value=match.fun(paste0("as.", x_merge_type))(i[[ic]])) next } - else if (anyNA(x[[xc]]) && allNA(x[[xc]])) { - if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", xname, x_merge_type, i_merge_type, iname) - set(x, j=xc, value=match.fun(paste0("as.", i_merge_type))(x[[xc]])) - next + if (x_merge_type=="character" || i_merge_type=="character" || + x_merge_type=="logical" || i_merge_type=="logical" || + x_merge_type=="factor" || i_merge_type=="factor") { + if (anyNA(i[[ic]]) && allNA(i[[ic]])) { + if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", iname, i_merge_type, x_merge_type, xname) + set(i, j=ic, value=match.fun(paste0("as.", x_merge_type))(i[[ic]])) + next + } + else if (anyNA(x[[xc]]) && allNA(x[[xc]])) { + if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", xname, x_merge_type, i_merge_type, iname) + set(x, j=xc, value=match.fun(paste0("as.", i_merge_type))(x[[xc]])) + next + } + stopf("Incompatible join types: %s (%s) and %s (%s)", xname, x_merge_type, iname, i_merge_type) } - stopf("Incompatible join types: %s (%s) and %s (%s)", xname, x_merge_type, iname, i_merge_type) - } - if (x_merge_type=="integer64" || i_merge_type=="integer64") { - nm = c(iname, xname) - if (x_merge_type=="integer64") { w=i; wc=ic; wclass=i_merge_type; } else { w=x; wc=xc; wclass=x_merge_type; nm=rev(nm) } # w is which to coerce - if (wclass=="integer" || (wclass=="double" && !isReallyReal(w[[wc]]))) { - if (verbose) catf("Coercing %s column %s%s to type integer64 to match type of %s.\n", wclass, nm[1L], if (wclass=="double") " (which contains no fractions)" else "", nm[2L]) - set(w, j=wc, value=bit64::as.integer64(w[[wc]])) - } else stopf("Incompatible join types: %s is type integer64 but %s is type double and contains fractions", nm[2L], nm[1L]) - } else { - # just integer and double left - if (i_merge_type=="double") { - if (!isReallyReal(i[[ic]])) { - # common case of ad hoc user-typed integers missing L postfix joining to correct integer keys - # we've always coerced to int and returned int, for convenience. - if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", iname, xname) - val = as.integer(i[[ic]]) - if (!is.null(attributes(i[[ic]]))) attributes(val) = attributes(i[[ic]]) # to retain Date for example; 3679 - set(i, j=ic, value=val) - set(callersi, j=ic, value=val) # change the shallow copy of i up in [.data.table to reflect in the result, too. + if (x_merge_type=="integer64" || i_merge_type=="integer64") { + nm = c(iname, xname) + if (x_merge_type=="integer64") { w=i; wc=ic; wclass=i_merge_type; } else { w=x; wc=xc; wclass=x_merge_type; nm=rev(nm) } # w is which to coerce + if (wclass=="integer" || (wclass=="double" && !isReallyReal(w[[wc]]))) { + if (verbose) catf("Coercing %s column %s%s to type integer64 to match type of %s.\n", wclass, nm[1L], if (wclass=="double") " (which contains no fractions)" else "", nm[2L]) + set(w, j=wc, value=bit64::as.integer64(w[[wc]])) + } else stopf("Incompatible join types: %s is type integer64 but %s is type double and contains fractions", nm[2L], nm[1L]) + } else { + # just integer and double left + if (i_merge_type=="double") { + if (!isReallyReal(i[[ic]])) { + # common case of ad hoc user-typed integers missing L postfix joining to correct integer keys + # we've always coerced to int and returned int, for convenience. + if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", iname, xname) + val = as.integer(i[[ic]]) + if (!is.null(attributes(i[[ic]]))) attributes(val) = attributes(i[[ic]]) # to retain Date for example; 3679 + set(i, j=ic, value=val) + set(callersi, j=ic, value=val) # change the shallow copy of i up in [.data.table to reflect in the result, too. + } else { + if (verbose) catf("Coercing integer column %s to type double to match type of %s which contains fractions.\n", xname, iname) + set(x, j=xc, value=as.double(x[[xc]])) + } } else { - if (verbose) catf("Coercing integer column %s to type double to match type of %s which contains fractions.\n", xname, iname) - set(x, j=xc, value=as.double(x[[xc]])) + if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", iname, xname) + set(i, j=ic, value=as.double(i[[ic]])) } - } else { - if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", iname, xname) - set(i, j=ic, value=as.double(i[[ic]])) } } } From 7ee12aa69a8757e40c981239be1cd181f293b272 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sun, 24 Nov 2024 17:25:39 +0100 Subject: [PATCH 09/43] add fix in one direction --- R/bmerge.R | 22 +++++++++++++++++----- inst/tests/tests.Rraw | 4 ++-- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/R/bmerge.R b/R/bmerge.R index 4460eb0339..f7eafd047c 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -34,7 +34,11 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos ans } - if (nrow(i)) { + if (nrow(i)) { + x_merge_types = vapply_1c(x[0L, ..xcols], getClass) + i_merge_types = vapply_1c(x[0L, ..icols], getClass) + xnames = paste0("x.", names(x)[xcols]) + inames = paste0("i.", names(i)[icols]) for (a in seq_along(icols)) { # - check that join columns have compatible types # - do type coercions if necessary on just the shallow local copies for the purpose of join @@ -42,10 +46,10 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos # Note that if i is keyed, if this coerces i's key gets dropped by set() ic = icols[a] xc = xcols[a] - x_merge_type = getClass(x[[xc]]) - i_merge_type = getClass(i[[ic]]) - xname = paste0("x.", names(x)[xc]) - iname = paste0("i.", names(i)[ic]) + x_merge_type = x_merge_types[a] + i_merge_type = i_merge_types[a] + xname = xnames[a] + iname = inames[a] if (!x_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", xname, x_merge_type) if (!i_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", iname, i_merge_type) if (x_merge_type=="factor" || i_merge_type=="factor") { @@ -115,6 +119,14 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos } else { if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", iname, xname) set(i, j=ic, value=as.double(i[[ic]])) + ic_idx = which(ic == icols) + if (length(ic_idx)>1) { + for (b in which(x_merge_types[ic_idx] != "double")) { + xb = xcols[b] + if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", xnames[b], xname) + set(x, j=xb, value=as.double(x[[xb]])) + } + } } } } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 98e65e3831..77a8cf9674 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -20601,6 +20601,6 @@ test(2296, d2[x %no such operator% 1], error = '%no such operator%') # coerce Dates to double if join on multiple columns, #6602 x = data.table(a=1L) y = data.table(c=1L, d=2) -y[x, on=.(c == a, d == a)] -test(2297.1, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=date_int, d=date_int, b=1L), output="coercing to double.") +test(2297.1, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=1L, d=1L), output="Coercing .*c to type double") +test(2297.2, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=1L, d=1L), output="Coercing .*c to type double") From 562a9fd7972cb87aa21555a8b6ee251970deafa6 Mon Sep 17 00:00:00 2001 From: ben-schwen Date: Tue, 26 Nov 2024 22:06:43 +0100 Subject: [PATCH 10/43] remove indent to cater for diff --- R/bmerge.R | 177 ++++++++++++++++++++++++++--------------------------- 1 file changed, 87 insertions(+), 90 deletions(-) diff --git a/R/bmerge.R b/R/bmerge.R index f7eafd047c..7505f1526e 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -35,102 +35,99 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos } if (nrow(i)) { - x_merge_types = vapply_1c(x[0L, ..xcols], getClass) - i_merge_types = vapply_1c(x[0L, ..icols], getClass) - xnames = paste0("x.", names(x)[xcols]) - inames = paste0("i.", names(i)[icols]) - for (a in seq_along(icols)) { - # - check that join columns have compatible types - # - do type coercions if necessary on just the shallow local copies for the purpose of join - # - handle factor columns appropriately - # Note that if i is keyed, if this coerces i's key gets dropped by set() - ic = icols[a] - xc = xcols[a] - x_merge_type = x_merge_types[a] - i_merge_type = i_merge_types[a] - xname = xnames[a] - iname = inames[a] - if (!x_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", xname, x_merge_type) - if (!i_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", iname, i_merge_type) - if (x_merge_type=="factor" || i_merge_type=="factor") { - if (roll!=0.0 && a==length(icols)) - stopf("Attempting roll join on factor column when joining %s to %s. Only integer, double or character columns may be roll joined.", xname, iname) - if (x_merge_type=="factor" && i_merge_type=="factor") { - if (verbose) catf("Matching %s factor levels to %s factor levels.\n", iname, xname) - set(i, j=ic, value=chmatch(levels(i[[ic]]), levels(x[[xc]]), nomatch=0L)[i[[ic]]]) # nomatch=0L otherwise a level that is missing would match to NA values - next - } else { - if (x_merge_type=="character") { - if (verbose) catf("Coercing factor column %s to type character to match type of %s.\n", iname, xname) - set(i, j=ic, value=val<-as.character(i[[ic]])) - set(callersi, j=ic, value=val) # factor in i joining to character in x will return character and not keep x's factor; e.g. for antaresRead #3581 - next - } else if (i_merge_type=="character") { - if (verbose) catf("Matching character column %s to factor levels in %s.\n", iname, xname) - newvalue = chmatch(i[[ic]], levels(x[[xc]]), nomatch=0L) - if (anyNA(i[[ic]])) newvalue[is.na(i[[ic]])] = NA_integer_ # NA_character_ should match to NA in factor, #3809 - set(i, j=ic, value=newvalue) - next - } - } - stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, x_merge_type, iname, i_merge_type) - } - if (x_merge_type == i_merge_type) { - if (verbose) catf("%s has same type (%s) as %s. No coercion needed.\n", iname, x_merge_type, xname) + xhead = x[0, ..xcols] + ihead = i[0, ..icols] + xtypes = vapply_1c(xhead, getClass) + itypes = vapply_1c(ihead, getClass) + for (a in seq_along(icols)) { + # - check that join columns have compatible types + # - do type coercions if necessary on just the shallow local copies for the purpose of join + # - handle factor columns appropriately + # Note that if i is keyed, if this coerces i's key gets dropped by set() + ic = icols[a] + xc = xcols[a] + xtype = xtypes[a] + itype = itypes[a] + xname = paste0("x.", names(xhead)[a]) + iname = paste0("i.", names(ihead)[a]) + if (!xtype %chin% supported) stopf("%s is type %s which is not supported by data.table join", xname, xtype) + if (!itype %chin% supported) stopf("%s is type %s which is not supported by data.table join", iname, itype) + if (xtype=="factor" || itype=="factor") { + if (roll!=0.0 && a==length(icols)) + stopf("Attempting roll join on factor column when joining %s to %s. Only integer, double or character columns may be roll joined.", xname, iname) + if (xtype=="factor" && itype=="factor") { + if (verbose) catf("Matching %s factor levels to %s factor levels.\n", iname, xname) + set(i, j=ic, value=chmatch(levels(i[[ic]]), levels(x[[xc]]), nomatch=0L)[i[[ic]]]) # nomatch=0L otherwise a level that is missing would match to NA values + next + } else { + if (xtype=="character") { + if (verbose) catf("Coercing factor column %s to type character to match type of %s.\n", iname, xname) + set(i, j=ic, value=val<-as.character(i[[ic]])) + set(callersi, j=ic, value=val) # factor in i joining to character in x will return character and not keep x's factor; e.g. for antaresRead #3581 + next + } else if (itype=="character") { + if (verbose) catf("Matching character column %s to factor levels in %s.\n", iname, xname) + newvalue = chmatch(i[[ic]], levels(x[[xc]]), nomatch=0L) + if (anyNA(i[[ic]])) newvalue[is.na(i[[ic]])] = NA_integer_ # NA_character_ should match to NA in factor, #3809 + set(i, j=ic, value=newvalue) next } - if (x_merge_type=="character" || i_merge_type=="character" || - x_merge_type=="logical" || i_merge_type=="logical" || - x_merge_type=="factor" || i_merge_type=="factor") { - if (anyNA(i[[ic]]) && allNA(i[[ic]])) { - if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", iname, i_merge_type, x_merge_type, xname) - set(i, j=ic, value=match.fun(paste0("as.", x_merge_type))(i[[ic]])) - next - } - else if (anyNA(x[[xc]]) && allNA(x[[xc]])) { - if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", xname, x_merge_type, i_merge_type, iname) - set(x, j=xc, value=match.fun(paste0("as.", i_merge_type))(x[[xc]])) - next - } - stopf("Incompatible join types: %s (%s) and %s (%s)", xname, x_merge_type, iname, i_merge_type) - } - if (x_merge_type=="integer64" || i_merge_type=="integer64") { - nm = c(iname, xname) - if (x_merge_type=="integer64") { w=i; wc=ic; wclass=i_merge_type; } else { w=x; wc=xc; wclass=x_merge_type; nm=rev(nm) } # w is which to coerce - if (wclass=="integer" || (wclass=="double" && !isReallyReal(w[[wc]]))) { - if (verbose) catf("Coercing %s column %s%s to type integer64 to match type of %s.\n", wclass, nm[1L], if (wclass=="double") " (which contains no fractions)" else "", nm[2L]) - set(w, j=wc, value=bit64::as.integer64(w[[wc]])) - } else stopf("Incompatible join types: %s is type integer64 but %s is type double and contains fractions", nm[2L], nm[1L]) + } + stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, xtype, iname, itype) + } + if (xtype == itype) { + if (anyDuplicated(icols) && !all() && duplicated(icols, fromLast=TRUE)[a]) { + set(x, j=xc, value=as.double(x[[xc]])) + set(i, j=ic, value=as.double(i[[ic]])) + if (verbose) catf("%s and %s are both Dates. R does not guarentee a type for Date internally, hence, coercing to double.\n", iname, xname) + } else { + if (verbose) catf("%s has same type (%s) as %s. No coercion needed.\n", iname, xtype, xname) + } + next + } + if (xtype=="character" || itype=="character" || + xtype=="logical" || itype=="logical" || + xtype=="factor" || itype=="factor") { + if (anyNA(i[[ic]]) && allNA(i[[ic]])) { + if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", iname, itype, xtype, xname) + set(i, j=ic, value=match.fun(paste0("as.", xtype))(i[[ic]])) + next + } + else if (anyNA(x[[xc]]) && allNA(x[[xc]])) { + if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", xname, xtype, itype, iname) + set(x, j=xc, value=match.fun(paste0("as.", itype))(x[[xc]])) + next + } + stopf("Incompatible join types: %s (%s) and %s (%s)", xname, xtype, iname, itype) + } + if (xtype=="integer64" || itype=="integer64") { + nm = c(iname, xname) + if (xtype=="integer64") { w=i; wc=ic; wclass=itype; } else { w=x; wc=xc; wclass=xtype; nm=rev(nm) } # w is which to coerce + if (wclass=="integer" || (wclass=="double" && !isReallyReal(w[[wc]]))) { + if (verbose) catf("Coercing %s column %s%s to type integer64 to match type of %s.\n", wclass, nm[1L], if (wclass=="double") " (which contains no fractions)" else "", nm[2L]) + set(w, j=wc, value=bit64::as.integer64(w[[wc]])) + } else stopf("Incompatible join types: %s is type integer64 but %s is type double and contains fractions", nm[2L], nm[1L]) + } else { + # just integer and double left + if (itype=="double") { + if (!isReallyReal(i[[ic]])) { + # common case of ad hoc user-typed integers missing L postfix joining to correct integer keys + # we've always coerced to int and returned int, for convenience. + if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", iname, xname) + val = as.integer(i[[ic]]) + if (!is.null(attributes(i[[ic]]))) attributes(val) = attributes(i[[ic]]) # to retain Date for example; 3679 + set(i, j=ic, value=val) + set(callersi, j=ic, value=val) # change the shallow copy of i up in [.data.table to reflect in the result, too. } else { - # just integer and double left - if (i_merge_type=="double") { - if (!isReallyReal(i[[ic]])) { - # common case of ad hoc user-typed integers missing L postfix joining to correct integer keys - # we've always coerced to int and returned int, for convenience. - if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", iname, xname) - val = as.integer(i[[ic]]) - if (!is.null(attributes(i[[ic]]))) attributes(val) = attributes(i[[ic]]) # to retain Date for example; 3679 - set(i, j=ic, value=val) - set(callersi, j=ic, value=val) # change the shallow copy of i up in [.data.table to reflect in the result, too. - } else { - if (verbose) catf("Coercing integer column %s to type double to match type of %s which contains fractions.\n", xname, iname) - set(x, j=xc, value=as.double(x[[xc]])) - } - } else { - if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", iname, xname) - set(i, j=ic, value=as.double(i[[ic]])) - ic_idx = which(ic == icols) - if (length(ic_idx)>1) { - for (b in which(x_merge_types[ic_idx] != "double")) { - xb = xcols[b] - if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", xnames[b], xname) - set(x, j=xb, value=as.double(x[[xb]])) - } - } - } + if (verbose) catf("Coercing integer column %s to type double to match type of %s which contains fractions.\n", xname, iname) + set(x, j=xc, value=as.double(x[[xc]])) } + } else { + if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", iname, xname) + set(i, j=ic, value=as.double(i[[ic]])) } } +}} ## after all modifications of x, check if x has a proper key on all xcols. ## If not, calculate the order. Also for non-equi joins, the order must be calculated. From 49f849befdb49c4ae302b49671fc175f7c0a499d Mon Sep 17 00:00:00 2001 From: ben-schwen Date: Tue, 26 Nov 2024 22:11:36 +0100 Subject: [PATCH 11/43] Revert "remove indent to cater for diff" This reverts commit 562a9fd7972cb87aa21555a8b6ee251970deafa6. --- R/bmerge.R | 177 +++++++++++++++++++++++++++-------------------------- 1 file changed, 90 insertions(+), 87 deletions(-) diff --git a/R/bmerge.R b/R/bmerge.R index 7505f1526e..f7eafd047c 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -35,99 +35,102 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos } if (nrow(i)) { - xhead = x[0, ..xcols] - ihead = i[0, ..icols] - xtypes = vapply_1c(xhead, getClass) - itypes = vapply_1c(ihead, getClass) - for (a in seq_along(icols)) { - # - check that join columns have compatible types - # - do type coercions if necessary on just the shallow local copies for the purpose of join - # - handle factor columns appropriately - # Note that if i is keyed, if this coerces i's key gets dropped by set() - ic = icols[a] - xc = xcols[a] - xtype = xtypes[a] - itype = itypes[a] - xname = paste0("x.", names(xhead)[a]) - iname = paste0("i.", names(ihead)[a]) - if (!xtype %chin% supported) stopf("%s is type %s which is not supported by data.table join", xname, xtype) - if (!itype %chin% supported) stopf("%s is type %s which is not supported by data.table join", iname, itype) - if (xtype=="factor" || itype=="factor") { - if (roll!=0.0 && a==length(icols)) - stopf("Attempting roll join on factor column when joining %s to %s. Only integer, double or character columns may be roll joined.", xname, iname) - if (xtype=="factor" && itype=="factor") { - if (verbose) catf("Matching %s factor levels to %s factor levels.\n", iname, xname) - set(i, j=ic, value=chmatch(levels(i[[ic]]), levels(x[[xc]]), nomatch=0L)[i[[ic]]]) # nomatch=0L otherwise a level that is missing would match to NA values - next - } else { - if (xtype=="character") { - if (verbose) catf("Coercing factor column %s to type character to match type of %s.\n", iname, xname) - set(i, j=ic, value=val<-as.character(i[[ic]])) - set(callersi, j=ic, value=val) # factor in i joining to character in x will return character and not keep x's factor; e.g. for antaresRead #3581 - next - } else if (itype=="character") { - if (verbose) catf("Matching character column %s to factor levels in %s.\n", iname, xname) - newvalue = chmatch(i[[ic]], levels(x[[xc]]), nomatch=0L) - if (anyNA(i[[ic]])) newvalue[is.na(i[[ic]])] = NA_integer_ # NA_character_ should match to NA in factor, #3809 - set(i, j=ic, value=newvalue) + x_merge_types = vapply_1c(x[0L, ..xcols], getClass) + i_merge_types = vapply_1c(x[0L, ..icols], getClass) + xnames = paste0("x.", names(x)[xcols]) + inames = paste0("i.", names(i)[icols]) + for (a in seq_along(icols)) { + # - check that join columns have compatible types + # - do type coercions if necessary on just the shallow local copies for the purpose of join + # - handle factor columns appropriately + # Note that if i is keyed, if this coerces i's key gets dropped by set() + ic = icols[a] + xc = xcols[a] + x_merge_type = x_merge_types[a] + i_merge_type = i_merge_types[a] + xname = xnames[a] + iname = inames[a] + if (!x_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", xname, x_merge_type) + if (!i_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", iname, i_merge_type) + if (x_merge_type=="factor" || i_merge_type=="factor") { + if (roll!=0.0 && a==length(icols)) + stopf("Attempting roll join on factor column when joining %s to %s. Only integer, double or character columns may be roll joined.", xname, iname) + if (x_merge_type=="factor" && i_merge_type=="factor") { + if (verbose) catf("Matching %s factor levels to %s factor levels.\n", iname, xname) + set(i, j=ic, value=chmatch(levels(i[[ic]]), levels(x[[xc]]), nomatch=0L)[i[[ic]]]) # nomatch=0L otherwise a level that is missing would match to NA values + next + } else { + if (x_merge_type=="character") { + if (verbose) catf("Coercing factor column %s to type character to match type of %s.\n", iname, xname) + set(i, j=ic, value=val<-as.character(i[[ic]])) + set(callersi, j=ic, value=val) # factor in i joining to character in x will return character and not keep x's factor; e.g. for antaresRead #3581 + next + } else if (i_merge_type=="character") { + if (verbose) catf("Matching character column %s to factor levels in %s.\n", iname, xname) + newvalue = chmatch(i[[ic]], levels(x[[xc]]), nomatch=0L) + if (anyNA(i[[ic]])) newvalue[is.na(i[[ic]])] = NA_integer_ # NA_character_ should match to NA in factor, #3809 + set(i, j=ic, value=newvalue) + next + } + } + stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, x_merge_type, iname, i_merge_type) + } + if (x_merge_type == i_merge_type) { + if (verbose) catf("%s has same type (%s) as %s. No coercion needed.\n", iname, x_merge_type, xname) next } - } - stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, xtype, iname, itype) - } - if (xtype == itype) { - if (anyDuplicated(icols) && !all() && duplicated(icols, fromLast=TRUE)[a]) { - set(x, j=xc, value=as.double(x[[xc]])) - set(i, j=ic, value=as.double(i[[ic]])) - if (verbose) catf("%s and %s are both Dates. R does not guarentee a type for Date internally, hence, coercing to double.\n", iname, xname) - } else { - if (verbose) catf("%s has same type (%s) as %s. No coercion needed.\n", iname, xtype, xname) - } - next - } - if (xtype=="character" || itype=="character" || - xtype=="logical" || itype=="logical" || - xtype=="factor" || itype=="factor") { - if (anyNA(i[[ic]]) && allNA(i[[ic]])) { - if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", iname, itype, xtype, xname) - set(i, j=ic, value=match.fun(paste0("as.", xtype))(i[[ic]])) - next - } - else if (anyNA(x[[xc]]) && allNA(x[[xc]])) { - if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", xname, xtype, itype, iname) - set(x, j=xc, value=match.fun(paste0("as.", itype))(x[[xc]])) - next - } - stopf("Incompatible join types: %s (%s) and %s (%s)", xname, xtype, iname, itype) - } - if (xtype=="integer64" || itype=="integer64") { - nm = c(iname, xname) - if (xtype=="integer64") { w=i; wc=ic; wclass=itype; } else { w=x; wc=xc; wclass=xtype; nm=rev(nm) } # w is which to coerce - if (wclass=="integer" || (wclass=="double" && !isReallyReal(w[[wc]]))) { - if (verbose) catf("Coercing %s column %s%s to type integer64 to match type of %s.\n", wclass, nm[1L], if (wclass=="double") " (which contains no fractions)" else "", nm[2L]) - set(w, j=wc, value=bit64::as.integer64(w[[wc]])) - } else stopf("Incompatible join types: %s is type integer64 but %s is type double and contains fractions", nm[2L], nm[1L]) - } else { - # just integer and double left - if (itype=="double") { - if (!isReallyReal(i[[ic]])) { - # common case of ad hoc user-typed integers missing L postfix joining to correct integer keys - # we've always coerced to int and returned int, for convenience. - if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", iname, xname) - val = as.integer(i[[ic]]) - if (!is.null(attributes(i[[ic]]))) attributes(val) = attributes(i[[ic]]) # to retain Date for example; 3679 - set(i, j=ic, value=val) - set(callersi, j=ic, value=val) # change the shallow copy of i up in [.data.table to reflect in the result, too. + if (x_merge_type=="character" || i_merge_type=="character" || + x_merge_type=="logical" || i_merge_type=="logical" || + x_merge_type=="factor" || i_merge_type=="factor") { + if (anyNA(i[[ic]]) && allNA(i[[ic]])) { + if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", iname, i_merge_type, x_merge_type, xname) + set(i, j=ic, value=match.fun(paste0("as.", x_merge_type))(i[[ic]])) + next + } + else if (anyNA(x[[xc]]) && allNA(x[[xc]])) { + if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", xname, x_merge_type, i_merge_type, iname) + set(x, j=xc, value=match.fun(paste0("as.", i_merge_type))(x[[xc]])) + next + } + stopf("Incompatible join types: %s (%s) and %s (%s)", xname, x_merge_type, iname, i_merge_type) + } + if (x_merge_type=="integer64" || i_merge_type=="integer64") { + nm = c(iname, xname) + if (x_merge_type=="integer64") { w=i; wc=ic; wclass=i_merge_type; } else { w=x; wc=xc; wclass=x_merge_type; nm=rev(nm) } # w is which to coerce + if (wclass=="integer" || (wclass=="double" && !isReallyReal(w[[wc]]))) { + if (verbose) catf("Coercing %s column %s%s to type integer64 to match type of %s.\n", wclass, nm[1L], if (wclass=="double") " (which contains no fractions)" else "", nm[2L]) + set(w, j=wc, value=bit64::as.integer64(w[[wc]])) + } else stopf("Incompatible join types: %s is type integer64 but %s is type double and contains fractions", nm[2L], nm[1L]) } else { - if (verbose) catf("Coercing integer column %s to type double to match type of %s which contains fractions.\n", xname, iname) - set(x, j=xc, value=as.double(x[[xc]])) + # just integer and double left + if (i_merge_type=="double") { + if (!isReallyReal(i[[ic]])) { + # common case of ad hoc user-typed integers missing L postfix joining to correct integer keys + # we've always coerced to int and returned int, for convenience. + if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", iname, xname) + val = as.integer(i[[ic]]) + if (!is.null(attributes(i[[ic]]))) attributes(val) = attributes(i[[ic]]) # to retain Date for example; 3679 + set(i, j=ic, value=val) + set(callersi, j=ic, value=val) # change the shallow copy of i up in [.data.table to reflect in the result, too. + } else { + if (verbose) catf("Coercing integer column %s to type double to match type of %s which contains fractions.\n", xname, iname) + set(x, j=xc, value=as.double(x[[xc]])) + } + } else { + if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", iname, xname) + set(i, j=ic, value=as.double(i[[ic]])) + ic_idx = which(ic == icols) + if (length(ic_idx)>1) { + for (b in which(x_merge_types[ic_idx] != "double")) { + xb = xcols[b] + if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", xnames[b], xname) + set(x, j=xb, value=as.double(x[[xb]])) + } + } + } } - } else { - if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", iname, xname) - set(i, j=ic, value=as.double(i[[ic]])) } } -}} ## after all modifications of x, check if x has a proper key on all xcols. ## If not, calculate the order. Also for non-equi joins, the order must be calculated. From cfa5d7134b467ca66679d3e566ed6ddff8e1e327 Mon Sep 17 00:00:00 2001 From: ben-schwen Date: Tue, 26 Nov 2024 22:13:02 +0100 Subject: [PATCH 12/43] remove indent --- R/bmerge.R | 160 ++++++++++++++++++++++++++--------------------------- 1 file changed, 80 insertions(+), 80 deletions(-) diff --git a/R/bmerge.R b/R/bmerge.R index f7eafd047c..65d532170a 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -40,96 +40,96 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos xnames = paste0("x.", names(x)[xcols]) inames = paste0("i.", names(i)[icols]) for (a in seq_along(icols)) { - # - check that join columns have compatible types - # - do type coercions if necessary on just the shallow local copies for the purpose of join - # - handle factor columns appropriately - # Note that if i is keyed, if this coerces i's key gets dropped by set() - ic = icols[a] - xc = xcols[a] - x_merge_type = x_merge_types[a] - i_merge_type = i_merge_types[a] - xname = xnames[a] - iname = inames[a] - if (!x_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", xname, x_merge_type) - if (!i_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", iname, i_merge_type) - if (x_merge_type=="factor" || i_merge_type=="factor") { - if (roll!=0.0 && a==length(icols)) - stopf("Attempting roll join on factor column when joining %s to %s. Only integer, double or character columns may be roll joined.", xname, iname) - if (x_merge_type=="factor" && i_merge_type=="factor") { - if (verbose) catf("Matching %s factor levels to %s factor levels.\n", iname, xname) - set(i, j=ic, value=chmatch(levels(i[[ic]]), levels(x[[xc]]), nomatch=0L)[i[[ic]]]) # nomatch=0L otherwise a level that is missing would match to NA values + # - check that join columns have compatible types + # - do type coercions if necessary on just the shallow local copies for the purpose of join + # - handle factor columns appropriately + # Note that if i is keyed, if this coerces i's key gets dropped by set() + ic = icols[a] + xc = xcols[a] + x_merge_type = x_merge_types[a] + i_merge_type = i_merge_types[a] + xname = xnames[a] + iname = inames[a] + if (!x_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", xname, x_merge_type) + if (!i_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", iname, i_merge_type) + if (x_merge_type=="factor" || i_merge_type=="factor") { + if (roll!=0.0 && a==length(icols)) + stopf("Attempting roll join on factor column when joining %s to %s. Only integer, double or character columns may be roll joined.", xname, iname) + if (x_merge_type=="factor" && i_merge_type=="factor") { + if (verbose) catf("Matching %s factor levels to %s factor levels.\n", iname, xname) + set(i, j=ic, value=chmatch(levels(i[[ic]]), levels(x[[xc]]), nomatch=0L)[i[[ic]]]) # nomatch=0L otherwise a level that is missing would match to NA values + next + } else { + if (x_merge_type=="character") { + if (verbose) catf("Coercing factor column %s to type character to match type of %s.\n", iname, xname) + set(i, j=ic, value=val<-as.character(i[[ic]])) + set(callersi, j=ic, value=val) # factor in i joining to character in x will return character and not keep x's factor; e.g. for antaresRead #3581 + next + } else if (i_merge_type=="character") { + if (verbose) catf("Matching character column %s to factor levels in %s.\n", iname, xname) + newvalue = chmatch(i[[ic]], levels(x[[xc]]), nomatch=0L) + if (anyNA(i[[ic]])) newvalue[is.na(i[[ic]])] = NA_integer_ # NA_character_ should match to NA in factor, #3809 + set(i, j=ic, value=newvalue) next - } else { - if (x_merge_type=="character") { - if (verbose) catf("Coercing factor column %s to type character to match type of %s.\n", iname, xname) - set(i, j=ic, value=val<-as.character(i[[ic]])) - set(callersi, j=ic, value=val) # factor in i joining to character in x will return character and not keep x's factor; e.g. for antaresRead #3581 - next - } else if (i_merge_type=="character") { - if (verbose) catf("Matching character column %s to factor levels in %s.\n", iname, xname) - newvalue = chmatch(i[[ic]], levels(x[[xc]]), nomatch=0L) - if (anyNA(i[[ic]])) newvalue[is.na(i[[ic]])] = NA_integer_ # NA_character_ should match to NA in factor, #3809 - set(i, j=ic, value=newvalue) - next - } } - stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, x_merge_type, iname, i_merge_type) } - if (x_merge_type == i_merge_type) { - if (verbose) catf("%s has same type (%s) as %s. No coercion needed.\n", iname, x_merge_type, xname) + stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, x_merge_type, iname, i_merge_type) + } + if (x_merge_type == i_merge_type) { + if (verbose) catf("%s has same type (%s) as %s. No coercion needed.\n", iname, x_merge_type, xname) + next + } + if (x_merge_type=="character" || i_merge_type=="character" || + x_merge_type=="logical" || i_merge_type=="logical" || + x_merge_type=="factor" || i_merge_type=="factor") { + if (anyNA(i[[ic]]) && allNA(i[[ic]])) { + if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", iname, i_merge_type, x_merge_type, xname) + set(i, j=ic, value=match.fun(paste0("as.", x_merge_type))(i[[ic]])) next } - if (x_merge_type=="character" || i_merge_type=="character" || - x_merge_type=="logical" || i_merge_type=="logical" || - x_merge_type=="factor" || i_merge_type=="factor") { - if (anyNA(i[[ic]]) && allNA(i[[ic]])) { - if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", iname, i_merge_type, x_merge_type, xname) - set(i, j=ic, value=match.fun(paste0("as.", x_merge_type))(i[[ic]])) - next - } - else if (anyNA(x[[xc]]) && allNA(x[[xc]])) { - if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", xname, x_merge_type, i_merge_type, iname) - set(x, j=xc, value=match.fun(paste0("as.", i_merge_type))(x[[xc]])) - next - } - stopf("Incompatible join types: %s (%s) and %s (%s)", xname, x_merge_type, iname, i_merge_type) + else if (anyNA(x[[xc]]) && allNA(x[[xc]])) { + if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", xname, x_merge_type, i_merge_type, iname) + set(x, j=xc, value=match.fun(paste0("as.", i_merge_type))(x[[xc]])) + next } - if (x_merge_type=="integer64" || i_merge_type=="integer64") { - nm = c(iname, xname) - if (x_merge_type=="integer64") { w=i; wc=ic; wclass=i_merge_type; } else { w=x; wc=xc; wclass=x_merge_type; nm=rev(nm) } # w is which to coerce - if (wclass=="integer" || (wclass=="double" && !isReallyReal(w[[wc]]))) { - if (verbose) catf("Coercing %s column %s%s to type integer64 to match type of %s.\n", wclass, nm[1L], if (wclass=="double") " (which contains no fractions)" else "", nm[2L]) - set(w, j=wc, value=bit64::as.integer64(w[[wc]])) - } else stopf("Incompatible join types: %s is type integer64 but %s is type double and contains fractions", nm[2L], nm[1L]) - } else { - # just integer and double left - if (i_merge_type=="double") { - if (!isReallyReal(i[[ic]])) { - # common case of ad hoc user-typed integers missing L postfix joining to correct integer keys - # we've always coerced to int and returned int, for convenience. - if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", iname, xname) - val = as.integer(i[[ic]]) - if (!is.null(attributes(i[[ic]]))) attributes(val) = attributes(i[[ic]]) # to retain Date for example; 3679 - set(i, j=ic, value=val) - set(callersi, j=ic, value=val) # change the shallow copy of i up in [.data.table to reflect in the result, too. - } else { - if (verbose) catf("Coercing integer column %s to type double to match type of %s which contains fractions.\n", xname, iname) - set(x, j=xc, value=as.double(x[[xc]])) - } + stopf("Incompatible join types: %s (%s) and %s (%s)", xname, x_merge_type, iname, i_merge_type) + } + if (x_merge_type=="integer64" || i_merge_type=="integer64") { + nm = c(iname, xname) + if (x_merge_type=="integer64") { w=i; wc=ic; wclass=i_merge_type; } else { w=x; wc=xc; wclass=x_merge_type; nm=rev(nm) } # w is which to coerce + if (wclass=="integer" || (wclass=="double" && !isReallyReal(w[[wc]]))) { + if (verbose) catf("Coercing %s column %s%s to type integer64 to match type of %s.\n", wclass, nm[1L], if (wclass=="double") " (which contains no fractions)" else "", nm[2L]) + set(w, j=wc, value=bit64::as.integer64(w[[wc]])) + } else stopf("Incompatible join types: %s is type integer64 but %s is type double and contains fractions", nm[2L], nm[1L]) + } else { + # just integer and double left + if (i_merge_type=="double") { + if (!isReallyReal(i[[ic]])) { + # common case of ad hoc user-typed integers missing L postfix joining to correct integer keys + # we've always coerced to int and returned int, for convenience. + if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", iname, xname) + val = as.integer(i[[ic]]) + if (!is.null(attributes(i[[ic]]))) attributes(val) = attributes(i[[ic]]) # to retain Date for example; 3679 + set(i, j=ic, value=val) + set(callersi, j=ic, value=val) # change the shallow copy of i up in [.data.table to reflect in the result, too. } else { - if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", iname, xname) - set(i, j=ic, value=as.double(i[[ic]])) - ic_idx = which(ic == icols) - if (length(ic_idx)>1) { - for (b in which(x_merge_types[ic_idx] != "double")) { - xb = xcols[b] - if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", xnames[b], xname) - set(x, j=xb, value=as.double(x[[xb]])) - } + if (verbose) catf("Coercing integer column %s to type double to match type of %s which contains fractions.\n", xname, iname) + set(x, j=xc, value=as.double(x[[xc]])) + } + } else { + if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", iname, xname) + set(i, j=ic, value=as.double(i[[ic]])) + ic_idx = which(ic == icols) + if (length(ic_idx)>1) { + for (b in which(x_merge_types[ic_idx] != "double")) { + xb = xcols[b] + if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", xnames[b], xname) + set(x, j=xb, value=as.double(x[[xb]])) } } } - } + } + } } ## after all modifications of x, check if x has a proper key on all xcols. From 6b8f11fed6a895862547a97dee8604c3de9fb8b3 Mon Sep 17 00:00:00 2001 From: ben-schwen Date: Tue, 26 Nov 2024 23:01:40 +0100 Subject: [PATCH 13/43] add 2nd case --- R/bmerge.R | 43 ++++++++++++++++++++++++++++++------------- inst/tests/tests.Rraw | 5 ++++- 2 files changed, 34 insertions(+), 14 deletions(-) diff --git a/R/bmerge.R b/R/bmerge.R index 65d532170a..1ced26ef38 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -103,29 +103,46 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos } else stopf("Incompatible join types: %s is type integer64 but %s is type double and contains fractions", nm[2L], nm[1L]) } else { # just integer and double left + ic_idx = which(ic == icols) if (i_merge_type=="double") { + coerce_x = FALSE if (!isReallyReal(i[[ic]])) { # common case of ad hoc user-typed integers missing L postfix joining to correct integer keys # we've always coerced to int and returned int, for convenience. - if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", iname, xname) - val = as.integer(i[[ic]]) - if (!is.null(attributes(i[[ic]]))) attributes(val) = attributes(i[[ic]]) # to retain Date for example; 3679 - set(i, j=ic, value=val) - set(callersi, j=ic, value=val) # change the shallow copy of i up in [.data.table to reflect in the result, too. - } else { + for (b in which(x_merge_types[ic_idx] == "double")) { + xb = xcols[b] + if (isReallyReal(x[[xb]])) { + coerce_x = TRUE + break + } + } + if (!coerce_x) { + if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", iname, xname) + val = as.integer(i[[ic]]) + if (!is.null(attributes(i[[ic]]))) attributes(val) = attributes(i[[ic]]) # to retain Date for example; 3679 + set(i, j=ic, value=val) + set(callersi, j=ic, value=val) # change the shallow copy of i up in [.data.table to reflect in the result, too. + for (b in which(x_merge_types[ic_idx] == "double")) { + xb = xcols[b] + val = as.integer(x[[xb]]) + if (!is.null(attributes(x[[xb]]))) attributes(val) = attributes(x[[xb]]) + if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", xnames[b], xname) + set(x, j=xb, value=val) + } + } + } + if (coerce_x) { + ic_idx = which() if (verbose) catf("Coercing integer column %s to type double to match type of %s which contains fractions.\n", xname, iname) set(x, j=xc, value=as.double(x[[xc]])) } } else { if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", iname, xname) set(i, j=ic, value=as.double(i[[ic]])) - ic_idx = which(ic == icols) - if (length(ic_idx)>1) { - for (b in which(x_merge_types[ic_idx] != "double")) { - xb = xcols[b] - if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", xnames[b], xname) - set(x, j=xb, value=as.double(x[[xb]])) - } + for (b in which(x_merge_types[ic_idx] == "integer")) { + xb = xcols[b] + if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", xnames[b], xname) + set(x, j=xb, value=as.double(x[[xb]])) } } } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 77a8cf9674..46c56e19a4 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -20601,6 +20601,9 @@ test(2296, d2[x %no such operator% 1], error = '%no such operator%') # coerce Dates to double if join on multiple columns, #6602 x = data.table(a=1L) y = data.table(c=1L, d=2) - test(2297.1, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=1L, d=1L), output="Coercing .*c to type double") test(2297.2, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=1L, d=1L), output="Coercing .*c to type double") +x = data.table(a=1) +y = data.table(c=1, d=2L) +test(2297.3, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=1L, d=1L), output="Coercing double column x.c (which contains no fractions) to type integer") +test(2297.4, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=1L, d=1L), output="Coercing double column x.c (which contains no fractions) to type integer") From a651b03bb606310dceb50c0c0b0f474a3825cb16 Mon Sep 17 00:00:00 2001 From: ben-schwen Date: Tue, 26 Nov 2024 23:05:18 +0100 Subject: [PATCH 14/43] remove trailing ws --- R/bmerge.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/bmerge.R b/R/bmerge.R index 1ced26ef38..7f543ade45 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -110,7 +110,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos # common case of ad hoc user-typed integers missing L postfix joining to correct integer keys # we've always coerced to int and returned int, for convenience. for (b in which(x_merge_types[ic_idx] == "double")) { - xb = xcols[b] + xb = xcols[b] if (isReallyReal(x[[xb]])) { coerce_x = TRUE break @@ -145,7 +145,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos set(x, j=xb, value=as.double(x[[xb]])) } } - } + } } } From 839463eebded28ab9943537041c7071432894cf1 Mon Sep 17 00:00:00 2001 From: ben-schwen Date: Tue, 26 Nov 2024 23:28:58 +0100 Subject: [PATCH 15/43] update all cases --- R/bmerge.R | 55 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/R/bmerge.R b/R/bmerge.R index 7f543ade45..652ddd236e 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -34,22 +34,17 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos ans } - if (nrow(i)) { - x_merge_types = vapply_1c(x[0L, ..xcols], getClass) - i_merge_types = vapply_1c(x[0L, ..icols], getClass) - xnames = paste0("x.", names(x)[xcols]) - inames = paste0("i.", names(i)[icols]) - for (a in seq_along(icols)) { + if (nrow(i)) for (a in seq_along(icols)) { # - check that join columns have compatible types # - do type coercions if necessary on just the shallow local copies for the purpose of join # - handle factor columns appropriately # Note that if i is keyed, if this coerces i's key gets dropped by set() ic = icols[a] xc = xcols[a] - x_merge_type = x_merge_types[a] - i_merge_type = i_merge_types[a] - xname = xnames[a] - iname = inames[a] + x_merge_type = getClass(x[[xc]]) + i_merge_type = getClass(i[[ic]]) + xname = paste0("x.", names(x)[xc]) + iname = paste0("i.", names(i)[ic]) if (!x_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", xname, x_merge_type) if (!i_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", iname, i_merge_type) if (x_merge_type=="factor" || i_merge_type=="factor") { @@ -109,11 +104,14 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos if (!isReallyReal(i[[ic]])) { # common case of ad hoc user-typed integers missing L postfix joining to correct integer keys # we've always coerced to int and returned int, for convenience. - for (b in which(x_merge_types[ic_idx] == "double")) { - xb = xcols[b] - if (isReallyReal(x[[xb]])) { - coerce_x = TRUE - break + if (length(ic_idx)>1L) { + xc_idx = xcols[ic_idx] + for (b in which(vapply_1c(x[0L, ..xc_idx], getClass) == "double")) { + xb = xcols[b] + if (isReallyReal(x[[xb]])) { + coerce_x = TRUE + break + } } } if (!coerce_x) { @@ -122,12 +120,15 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos if (!is.null(attributes(i[[ic]]))) attributes(val) = attributes(i[[ic]]) # to retain Date for example; 3679 set(i, j=ic, value=val) set(callersi, j=ic, value=val) # change the shallow copy of i up in [.data.table to reflect in the result, too. - for (b in which(x_merge_types[ic_idx] == "double")) { - xb = xcols[b] - val = as.integer(x[[xb]]) - if (!is.null(attributes(x[[xb]]))) attributes(val) = attributes(x[[xb]]) - if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", xnames[b], xname) - set(x, j=xb, value=val) + if (length(ic_idx)>1L) { + xc_idx = xcols[ic_idx] + for (b in which(vapply_1c(x[0L, ..xc_idx], getClass) == "double")) { + xb = xcols[b] + val = as.integer(x[[xb]]) + if (!is.null(attributes(x[[xb]]))) attributes(val) = attributes(x[[xb]]) + if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", paste0("x.", names(x)[xb]), xname) + set(x, j=xb, value=val) + } } } } @@ -139,15 +140,17 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos } else { if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", iname, xname) set(i, j=ic, value=as.double(i[[ic]])) - for (b in which(x_merge_types[ic_idx] == "integer")) { - xb = xcols[b] - if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", xnames[b], xname) - set(x, j=xb, value=as.double(x[[xb]])) + if (length(ic_idx)>1L) { + xc_idx = xcols[ic_idx] + for (b in which(vapply_1c(x[0L, ..xc_idx], getClass) == "integer")) { + xb = xcols[b] + if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", paste0("x.", names(x)[xb]), xname) + set(x, j=xb, value=as.double(x[[xb]])) + } } } } } - } ## after all modifications of x, check if x has a proper key on all xcols. ## If not, calculate the order. Also for non-equi joins, the order must be calculated. From 935ac607e028206a81bfb781008ffff225829174 Mon Sep 17 00:00:00 2001 From: ben-schwen Date: Tue, 26 Nov 2024 23:41:40 +0100 Subject: [PATCH 16/43] fix typo --- R/bmerge.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/bmerge.R b/R/bmerge.R index 652ddd236e..658ff53ec8 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -102,19 +102,20 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos if (i_merge_type=="double") { coerce_x = FALSE if (!isReallyReal(i[[ic]])) { + coerce_x = TRUE # common case of ad hoc user-typed integers missing L postfix joining to correct integer keys # we've always coerced to int and returned int, for convenience. if (length(ic_idx)>1L) { xc_idx = xcols[ic_idx] for (b in which(vapply_1c(x[0L, ..xc_idx], getClass) == "double")) { xb = xcols[b] - if (isReallyReal(x[[xb]])) { - coerce_x = TRUE + if (!isReallyReal(x[[xb]])) { + coerce_x = FALSE break } } } - if (!coerce_x) { + if (coerce_x) { if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", iname, xname) val = as.integer(i[[ic]]) if (!is.null(attributes(i[[ic]]))) attributes(val) = attributes(i[[ic]]) # to retain Date for example; 3679 @@ -132,8 +133,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos } } } - if (coerce_x) { - ic_idx = which() + if (!coerce_x) { if (verbose) catf("Coercing integer column %s to type double to match type of %s which contains fractions.\n", xname, iname) set(x, j=xc, value=as.double(x[[xc]])) } From f1997e0f531df5a368d2819fb36e2b8c97608852 Mon Sep 17 00:00:00 2001 From: ben-schwen Date: Tue, 26 Nov 2024 23:57:58 +0100 Subject: [PATCH 17/43] fix test cases --- inst/tests/tests.Rraw | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 46c56e19a4..8d5a98633f 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -20600,10 +20600,10 @@ test(2296, d2[x %no such operator% 1], error = '%no such operator%') # coerce Dates to double if join on multiple columns, #6602 x = data.table(a=1L) -y = data.table(c=1L, d=2) +y = data.table(c=1L, d=1) test(2297.1, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=1L, d=1L), output="Coercing .*c to type double") test(2297.2, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=1L, d=1L), output="Coercing .*c to type double") x = data.table(a=1) -y = data.table(c=1, d=2L) -test(2297.3, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=1L, d=1L), output="Coercing double column x.c (which contains no fractions) to type integer") -test(2297.4, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=1L, d=1L), output="Coercing double column x.c (which contains no fractions) to type integer") +y = data.table(c=1, d=1L) +test(2297.3, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=1, d=1), output="Coercing .*d to type double") +test(2297.4, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=1, d=1), output="Coercing .*d to type double") From 25eaa659689ec765e176bcc357235d98fab05ee2 Mon Sep 17 00:00:00 2001 From: ben-schwen Date: Wed, 27 Nov 2024 00:04:10 +0100 Subject: [PATCH 18/43] update testcases --- inst/tests/tests.Rraw | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 8d5a98633f..71c43aa675 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -20599,11 +20599,21 @@ test(2295.3, is.data.table(d2)) test(2296, d2[x %no such operator% 1], error = '%no such operator%') # coerce Dates to double if join on multiple columns, #6602 +d_int = .Date(1L) +d_dbl = .Date(1) x = data.table(a=1L) y = data.table(c=1L, d=1) -test(2297.1, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=1L, d=1L), output="Coercing .*c to type double") -test(2297.2, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=1L, d=1L), output="Coercing .*c to type double") +test(2297.01, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=1L, d=1L), output="Coercing .*c to type double") +test(2297.02, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=1L, d=1L), output="Coercing .*c to type double") +x = data.table(a=d_int) +y = data.table(c=d_int, d=d_dbl) +test(2297.03, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=d_int, d=d_int), output="Coercing .*c to type double") +test(2297.04, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=d_int, d=d_int), output="Coercing .*c to type double") x = data.table(a=1) y = data.table(c=1, d=1L) -test(2297.3, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=1, d=1), output="Coercing .*d to type double") -test(2297.4, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=1, d=1), output="Coercing .*d to type double") +test(2297.11, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=1, d=1), output="Coercing .*d to type double") +test(2297.12, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=1, d=1), output="Coercing .*d to type double") +x = data.table(a=d_dbl) +y = data.table(c=d_dbl, d=d_int) +test(2297.13, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=d_dbl, d=d_dbl), output="Coercing .*d to type double") +test(2297.14, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=d_dbl, d=d_dbl), output="Coercing .*d to type double") From f0219c4a77017c683426c45dfbe127c0c8ff328f Mon Sep 17 00:00:00 2001 From: ben-schwen Date: Wed, 27 Nov 2024 00:08:40 +0100 Subject: [PATCH 19/43] update copying attributes from int to dbl --- R/bmerge.R | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/R/bmerge.R b/R/bmerge.R index 658ff53ec8..fbe1408c53 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -139,13 +139,17 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos } } else { if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", iname, xname) - set(i, j=ic, value=as.double(i[[ic]])) + val = as.double(i[[ic]]) + if (!is.null(attributes(i[[ic]]))) attributes(val) = attributes(i[[ic]]) # to retain Date for example; 3679 + set(i, j=ic, value=val) if (length(ic_idx)>1L) { xc_idx = xcols[ic_idx] for (b in which(vapply_1c(x[0L, ..xc_idx], getClass) == "integer")) { xb = xcols[b] + val = as.double(x[[xb]]) + if (!is.null(attributes(x[[xb]]))) attributes(val) = attributes(x[[xb]]) if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", paste0("x.", names(x)[xb]), xname) - set(x, j=xb, value=as.double(x[[xb]])) + set(x, j=xb, value=val) } } } From ca6756f7ca3faffcf4064a7f4c3095e6980f0b85 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Wed, 27 Nov 2024 09:52:46 +0100 Subject: [PATCH 20/43] start modularize --- R/bmerge.R | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/R/bmerge.R b/R/bmerge.R index fbe1408c53..571a18ec75 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -34,6 +34,12 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos ans } + cast_with_atts = function(x, f) { + ans = f(x) + if (!is.null(attributes(x))) attributes(ans) = attributes(x) + ans + } + if (nrow(i)) for (a in seq_along(icols)) { # - check that join columns have compatible types # - do type coercions if necessary on just the shallow local copies for the purpose of join @@ -117,18 +123,15 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos } if (coerce_x) { if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", iname, xname) - val = as.integer(i[[ic]]) - if (!is.null(attributes(i[[ic]]))) attributes(val) = attributes(i[[ic]]) # to retain Date for example; 3679 + val = cast_with_atts(i[[ic]], as.integer) # to retain Date for example; 3679 set(i, j=ic, value=val) set(callersi, j=ic, value=val) # change the shallow copy of i up in [.data.table to reflect in the result, too. if (length(ic_idx)>1L) { xc_idx = xcols[ic_idx] for (b in which(vapply_1c(x[0L, ..xc_idx], getClass) == "double")) { xb = xcols[b] - val = as.integer(x[[xb]]) - if (!is.null(attributes(x[[xb]]))) attributes(val) = attributes(x[[xb]]) if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", paste0("x.", names(x)[xb]), xname) - set(x, j=xb, value=val) + set(x, j=xb, value=cast_with_atts(x[[xb]], as.integer)) } } } @@ -139,17 +142,14 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos } } else { if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", iname, xname) - val = as.double(i[[ic]]) - if (!is.null(attributes(i[[ic]]))) attributes(val) = attributes(i[[ic]]) # to retain Date for example; 3679 + val = cast_with_atts(i[[ic]], as.double) set(i, j=ic, value=val) if (length(ic_idx)>1L) { xc_idx = xcols[ic_idx] for (b in which(vapply_1c(x[0L, ..xc_idx], getClass) == "integer")) { xb = xcols[b] - val = as.double(x[[xb]]) - if (!is.null(attributes(x[[xb]]))) attributes(val) = attributes(x[[xb]]) if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", paste0("x.", names(x)[xb]), xname) - set(x, j=xb, value=val) + set(x, j=xb, value=cast_with_atts(x[[xb]], as.double)) } } } From d41bbf26f0fd4a7deebb1b83ee2bcac31b20bf8f Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Wed, 27 Nov 2024 10:08:12 +0100 Subject: [PATCH 21/43] fix cases --- R/bmerge.R | 4 ++-- inst/tests/tests.Rraw | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/bmerge.R b/R/bmerge.R index 571a18ec75..b6e0f4afb7 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -88,7 +88,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos set(i, j=ic, value=match.fun(paste0("as.", x_merge_type))(i[[ic]])) next } - else if (anyNA(x[[xc]]) && allNA(x[[xc]])) { + if (anyNA(x[[xc]]) && allNA(x[[xc]])) { if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", xname, x_merge_type, i_merge_type, iname) set(x, j=xc, value=match.fun(paste0("as.", i_merge_type))(x[[xc]])) next @@ -115,7 +115,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos xc_idx = xcols[ic_idx] for (b in which(vapply_1c(x[0L, ..xc_idx], getClass) == "double")) { xb = xcols[b] - if (!isReallyReal(x[[xb]])) { + if (isReallyReal(x[[xb]])) { coerce_x = FALSE break } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 71c43aa675..f77c3fbcbd 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -20611,9 +20611,9 @@ test(2297.03, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], dat test(2297.04, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=d_int, d=d_int), output="Coercing .*c to type double") x = data.table(a=1) y = data.table(c=1, d=1L) -test(2297.11, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=1, d=1), output="Coercing .*d to type double") -test(2297.12, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=1, d=1), output="Coercing .*d to type double") +test(2297.11, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=1L, d=1L), output="Coercing double column x.c (which contains no fractions) to type integer") +test(2297.12, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=1L, d=1L), output="Coercing double column x.c (which contains no fractions) to type integer") x = data.table(a=d_dbl) y = data.table(c=d_dbl, d=d_int) -test(2297.13, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=d_dbl, d=d_dbl), output="Coercing .*d to type double") -test(2297.14, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=d_dbl, d=d_dbl), output="Coercing .*d to type double") +test(2297.13, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=d_int, d=d_int), output="Coercing double column x.c (which contains no fractions) to type integer") +test(2297.14, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=d_int, d=d_int), output="Coercing double column x.c (which contains no fractions) to type integer") From 33d700916da2cfc4a4b4724e307c5070de37b18a Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Wed, 27 Nov 2024 11:01:49 +0100 Subject: [PATCH 22/43] ensure same types for test --- R/bmerge.R | 6 +++--- inst/tests/tests.Rraw | 5 +++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/R/bmerge.R b/R/bmerge.R index b6e0f4afb7..d24d8cb942 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -113,7 +113,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos # we've always coerced to int and returned int, for convenience. if (length(ic_idx)>1L) { xc_idx = xcols[ic_idx] - for (b in which(vapply_1c(x[0L, ..xc_idx], getClass) == "double")) { + for (b in which(vapply_1c(x[0L, xc_idx, with=FALSE], getClass) == "double")) { xb = xcols[b] if (isReallyReal(x[[xb]])) { coerce_x = FALSE @@ -128,7 +128,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos set(callersi, j=ic, value=val) # change the shallow copy of i up in [.data.table to reflect in the result, too. if (length(ic_idx)>1L) { xc_idx = xcols[ic_idx] - for (b in which(vapply_1c(x[0L, ..xc_idx], getClass) == "double")) { + for (b in which(vapply_1c(x[0L, xc_idx, with=FALSE], getClass) == "double")) { xb = xcols[b] if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", paste0("x.", names(x)[xb]), xname) set(x, j=xb, value=cast_with_atts(x[[xb]], as.integer)) @@ -146,7 +146,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos set(i, j=ic, value=val) if (length(ic_idx)>1L) { xc_idx = xcols[ic_idx] - for (b in which(vapply_1c(x[0L, ..xc_idx], getClass) == "integer")) { + for (b in which(vapply_1c(x[0L, xc_idx, with=FALSE], getClass) == "integer")) { xb = xcols[b] if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", paste0("x.", names(x)[xb]), xname) set(x, j=xb, value=cast_with_atts(x[[xb]], as.double)) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index f77c3fbcbd..2ff7d75040 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -12226,10 +12226,11 @@ DT1 = data.table(RANDOM_STRING = rand_strings(n), DATE = sample(seq(as.Date('2016-01-01'), as.Date('2016-12-31'), by="day"), n, replace=TRUE)) DT2 = data.table(RANDOM_STRING = rand_strings(n), START_DATE = sample(seq(as.Date('2015-01-01'), as.Date('2017-12-31'), by="day"), n, replace=TRUE)) +as.intDate = function(x) .Date(as.integer(as.Date(x))) DT2[, EXPIRY_DATE := START_DATE + floor(runif(1000, 200,300))] -DT1[, DT1_ID := .I][, DATE := as.Date(DATE)] +DT1[, DT1_ID := .I][, DATE := as.intDate(DATE)] cols = c("START_DATE", "EXPIRY_DATE") -DT2[, DT2_ID := .I][, (cols) := lapply(.SD, as.Date), .SDcols=cols] +DT2[, DT2_ID := .I][, (cols) := lapply(.SD, as.intDate), .SDcols=cols] ans1 = DT2[DT1, on=.(RANDOM_STRING, START_DATE <= DATE, EXPIRY_DATE >= DATE), .N, by=.EACHI ]$N > 0L tmp = DT1[DT2, on=.(RANDOM_STRING, DATE >= START_DATE, DATE <= EXPIRY_DATE), which=TRUE, nomatch=0L] ans2 = DT1[, DT1_ID %in% tmp] From 91c53c56100efddf758a89707bc42ce2e3bb1480 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Wed, 27 Nov 2024 11:13:50 +0100 Subject: [PATCH 23/43] add test for codecov --- inst/tests/tests.Rraw | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 2ff7d75040..6dbfa66a55 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -20600,21 +20600,27 @@ test(2295.3, is.data.table(d2)) test(2296, d2[x %no such operator% 1], error = '%no such operator%') # coerce Dates to double if join on multiple columns, #6602 -d_int = .Date(1L) -d_dbl = .Date(1) x = data.table(a=1L) y = data.table(c=1L, d=1) test(2297.01, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=1L, d=1L), output="Coercing .*c to type double") test(2297.02, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=1L, d=1L), output="Coercing .*c to type double") -x = data.table(a=d_int) -y = data.table(c=d_int, d=d_dbl) -test(2297.03, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=d_int, d=d_int), output="Coercing .*c to type double") -test(2297.04, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=d_int, d=d_int), output="Coercing .*c to type double") x = data.table(a=1) y = data.table(c=1, d=1L) -test(2297.11, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=1L, d=1L), output="Coercing double column x.c (which contains no fractions) to type integer") -test(2297.12, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=1L, d=1L), output="Coercing double column x.c (which contains no fractions) to type integer") +test(2297.03, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=1L, d=1L), output="Coercing double column x.c (which contains no fractions) to type integer") +test(2297.04, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=1L, d=1L), output="Coercing double column x.c (which contains no fractions) to type integer") +# dates +d_int = .Date(1L) +d_dbl = .Date(1) +x = data.table(a=d_int) +y = data.table(c=d_int, d=d_dbl) +test(2297.11, y[x, on=.(c == a, d == a)], data.table(c=d_int, d=d_int)) +test(2297.12, y[x, on=.(d == a, c == a)], data.table(c=d_int, d=d_int)) x = data.table(a=d_dbl) y = data.table(c=d_dbl, d=d_int) -test(2297.13, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=d_int, d=d_int), output="Coercing double column x.c (which contains no fractions) to type integer") -test(2297.14, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=d_int, d=d_int), output="Coercing double column x.c (which contains no fractions) to type integer") +test(2297.13, y[x, on=.(c == a, d == a)], data.table(c=d_int, d=d_int)) +test(2297.14, y[x, on=.(d == a, c == a)], data.table(c=d_int, d=d_int)) +# real double +x = data.table(a=1) +y = data.table(c=1.5, d=1L) +test(2297.21, y[x, on=.(c == a, d == a)], data.table(c=1, d=1)) +test(2297.22, y[x, on=.(d == a, c == a)], data.table(c=1, d=1)) From 6c68fb720e4963d7f27af233c5a45a76cf852402 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Wed, 27 Nov 2024 13:25:43 +0100 Subject: [PATCH 24/43] simplify --- R/bmerge.R | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/R/bmerge.R b/R/bmerge.R index d24d8cb942..7b8d840218 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -34,8 +34,8 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos ans } - cast_with_atts = function(x, f) { - ans = f(x) + cast_with_atts = function(x, as.f) { + ans = as.f(x) if (!is.null(attributes(x))) attributes(ans) = attributes(x) ans } @@ -113,8 +113,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos # we've always coerced to int and returned int, for convenience. if (length(ic_idx)>1L) { xc_idx = xcols[ic_idx] - for (b in which(vapply_1c(x[0L, xc_idx, with=FALSE], getClass) == "double")) { - xb = xcols[b] + for (xb in xcols[which(vapply_1c(x[0L, xc_idx, with=FALSE], getClass) == "double")]) { if (isReallyReal(x[[xb]])) { coerce_x = FALSE break @@ -128,8 +127,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos set(callersi, j=ic, value=val) # change the shallow copy of i up in [.data.table to reflect in the result, too. if (length(ic_idx)>1L) { xc_idx = xcols[ic_idx] - for (b in which(vapply_1c(x[0L, xc_idx, with=FALSE], getClass) == "double")) { - xb = xcols[b] + for (xb in xcols[which(vapply_1c(x[0L, xc_idx, with=FALSE], getClass) == "double")]) { if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", paste0("x.", names(x)[xb]), xname) set(x, j=xb, value=cast_with_atts(x[[xb]], as.integer)) } @@ -146,8 +144,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos set(i, j=ic, value=val) if (length(ic_idx)>1L) { xc_idx = xcols[ic_idx] - for (b in which(vapply_1c(x[0L, xc_idx, with=FALSE], getClass) == "integer")) { - xb = xcols[b] + for (xb in xcols[which(vapply_1c(x[0L, xc_idx, with=FALSE], getClass) == "integer")]) { if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", paste0("x.", names(x)[xb]), xname) set(x, j=xb, value=cast_with_atts(x[[xb]], as.double)) } From 5aaee922d0408c8dfa4c36ac08eae2ae322e6bf4 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Wed, 27 Nov 2024 15:40:59 +0100 Subject: [PATCH 25/43] fix test on windows --- R/bmerge.R | 8 ++++---- inst/tests/tests.Rraw | 5 ++--- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/R/bmerge.R b/R/bmerge.R index 7b8d840218..21c7345dac 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -104,7 +104,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos } else stopf("Incompatible join types: %s is type integer64 but %s is type double and contains fractions", nm[2L], nm[1L]) } else { # just integer and double left - ic_idx = which(ic == icols) + ic_idx = which(ic == icols) # check if on is joined on multiple conditions if (i_merge_type=="double") { coerce_x = FALSE if (!isReallyReal(i[[ic]])) { @@ -113,7 +113,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos # we've always coerced to int and returned int, for convenience. if (length(ic_idx)>1L) { xc_idx = xcols[ic_idx] - for (xb in xcols[which(vapply_1c(x[0L, xc_idx, with=FALSE], getClass) == "double")]) { + for (xb in xc_idx[which(vapply_1c(x[0L, xc_idx, with=FALSE], getClass) == "double")]) { if (isReallyReal(x[[xb]])) { coerce_x = FALSE break @@ -127,7 +127,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos set(callersi, j=ic, value=val) # change the shallow copy of i up in [.data.table to reflect in the result, too. if (length(ic_idx)>1L) { xc_idx = xcols[ic_idx] - for (xb in xcols[which(vapply_1c(x[0L, xc_idx, with=FALSE], getClass) == "double")]) { + for (xb in xc_idx[which(vapply_1c(x[0L, xc_idx, with=FALSE], getClass) == "double")]) { if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", paste0("x.", names(x)[xb]), xname) set(x, j=xb, value=cast_with_atts(x[[xb]], as.integer)) } @@ -144,7 +144,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos set(i, j=ic, value=val) if (length(ic_idx)>1L) { xc_idx = xcols[ic_idx] - for (xb in xcols[which(vapply_1c(x[0L, xc_idx, with=FALSE], getClass) == "integer")]) { + for (xb in xc_idx[which(vapply_1c(x[0L, xc_idx, with=FALSE], getClass) == "integer")]) { if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", paste0("x.", names(x)[xb]), xname) set(x, j=xb, value=cast_with_atts(x[[xb]], as.double)) } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 6dbfa66a55..9f9aa2aa07 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -12226,11 +12226,10 @@ DT1 = data.table(RANDOM_STRING = rand_strings(n), DATE = sample(seq(as.Date('2016-01-01'), as.Date('2016-12-31'), by="day"), n, replace=TRUE)) DT2 = data.table(RANDOM_STRING = rand_strings(n), START_DATE = sample(seq(as.Date('2015-01-01'), as.Date('2017-12-31'), by="day"), n, replace=TRUE)) -as.intDate = function(x) .Date(as.integer(as.Date(x))) DT2[, EXPIRY_DATE := START_DATE + floor(runif(1000, 200,300))] -DT1[, DT1_ID := .I][, DATE := as.intDate(DATE)] +DT1[, DT1_ID := .I][, DATE := as.Date(DATE)] cols = c("START_DATE", "EXPIRY_DATE") -DT2[, DT2_ID := .I][, (cols) := lapply(.SD, as.intDate), .SDcols=cols] +DT2[, DT2_ID := .I][, (cols) := lapply(.SD, as.Date), .SDcols=cols] ans1 = DT2[DT1, on=.(RANDOM_STRING, START_DATE <= DATE, EXPIRY_DATE >= DATE), .N, by=.EACHI ]$N > 0L tmp = DT1[DT2, on=.(RANDOM_STRING, DATE >= START_DATE, DATE <= EXPIRY_DATE), which=TRUE, nomatch=0L] ans2 = DT1[, DT1_ID %in% tmp] From 7a64f22084f99fb3a295966a7c8723cc171d8571 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Wed, 27 Nov 2024 16:41:43 +0100 Subject: [PATCH 26/43] simplify --- R/bmerge.R | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/R/bmerge.R b/R/bmerge.R index 21c7345dac..130a820654 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -80,9 +80,8 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos if (verbose) catf("%s has same type (%s) as %s. No coercion needed.\n", iname, x_merge_type, xname) next } - if (x_merge_type=="character" || i_merge_type=="character" || - x_merge_type=="logical" || i_merge_type=="logical" || - x_merge_type=="factor" || i_merge_type=="factor") { + cfl = c("character", "logical", "factor") + if (x_merge_type %chin% cfl || i_merge_type %chin% cfl) { if (anyNA(i[[ic]]) && allNA(i[[ic]])) { if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", iname, i_merge_type, x_merge_type, xname) set(i, j=ic, value=match.fun(paste0("as.", x_merge_type))(i[[ic]])) @@ -122,8 +121,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos } if (coerce_x) { if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", iname, xname) - val = cast_with_atts(i[[ic]], as.integer) # to retain Date for example; 3679 - set(i, j=ic, value=val) + set(i, j=ic, value=val<-cast_with_atts(i[[ic]], as.integer)) # to retain Date for example; 3679 set(callersi, j=ic, value=val) # change the shallow copy of i up in [.data.table to reflect in the result, too. if (length(ic_idx)>1L) { xc_idx = xcols[ic_idx] @@ -140,8 +138,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos } } else { if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", iname, xname) - val = cast_with_atts(i[[ic]], as.double) - set(i, j=ic, value=val) + set(i, j=ic, value=cast_with_atts(i[[ic]], as.double)) if (length(ic_idx)>1L) { xc_idx = xcols[ic_idx] for (xb in xc_idx[which(vapply_1c(x[0L, xc_idx, with=FALSE], getClass) == "integer")]) { From 69a7b20047c5d845b402d7f95e03136009a9e3c0 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Wed, 27 Nov 2024 17:43:49 +0100 Subject: [PATCH 27/43] add coerce function --- R/bmerge.R | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/R/bmerge.R b/R/bmerge.R index 130a820654..7d7d675909 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -40,6 +40,11 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos ans } + coerce_col = function(dt, col, from_type, to_type, from_name, to_name, verbose_msg) { + if (verbose) catf(verbose_msg, from_type, from_name, to_type, to_name) + set(dt, j=col, value=cast_with_atts(dt[[col]], match.fun(paste0("as.", to_type)))) + } + if (nrow(i)) for (a in seq_along(icols)) { # - check that join columns have compatible types # - do type coercions if necessary on just the shallow local copies for the purpose of join @@ -120,30 +125,28 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos } } if (coerce_x) { - if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", iname, xname) - set(i, j=ic, value=val<-cast_with_atts(i[[ic]], as.integer)) # to retain Date for example; 3679 - set(callersi, j=ic, value=val) # change the shallow copy of i up in [.data.table to reflect in the result, too. + msg = "Coercing %s column %s (which contains no fractions) to type %s to match type of %s.\n" + coerce_col(i, ic, "double", "integer", iname, xname, msg) + set(callersi, j=ic, value=i[[ic]]) # change the shallow copy of i up in [.data.table to reflect in the result, too. if (length(ic_idx)>1L) { xc_idx = xcols[ic_idx] for (xb in xc_idx[which(vapply_1c(x[0L, xc_idx, with=FALSE], getClass) == "double")]) { - if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", paste0("x.", names(x)[xb]), xname) - set(x, j=xb, value=cast_with_atts(x[[xb]], as.integer)) + coerce_col(x, xb, "double", "integer", paste0("x.", names(x)[xb]), xname, msg) } } } } if (!coerce_x) { - if (verbose) catf("Coercing integer column %s to type double to match type of %s which contains fractions.\n", xname, iname) - set(x, j=xc, value=as.double(x[[xc]])) + msg = "Coercing %s column %s to type %s to match type of %s which contains fractions.\n" + coerce_col(x, xc, "integer", "double", xname, iname, msg) } } else { - if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", iname, xname) - set(i, j=ic, value=cast_with_atts(i[[ic]], as.double)) + msg = "Coercing %s column %s to type %s for join to match type of %s.\n" + coerce_col(i, ic, "integer", "double", iname, xname, msg) if (length(ic_idx)>1L) { xc_idx = xcols[ic_idx] for (xb in xc_idx[which(vapply_1c(x[0L, xc_idx, with=FALSE], getClass) == "integer")]) { - if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", paste0("x.", names(x)[xb]), xname) - set(x, j=xb, value=cast_with_atts(x[[xb]], as.double)) + coerce_col(x, xb, "integer", "double", paste0("x.", names(x)[xb]), xname, msg) } } } From 3023b5cff164dff916fb872a6d7e02d8c9627264 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Wed, 27 Nov 2024 19:57:22 +0100 Subject: [PATCH 28/43] modularize more --- R/bmerge.R | 7 +++---- inst/tests/tests.Rraw | 6 +++--- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/R/bmerge.R b/R/bmerge.R index 7d7d675909..af70d422e7 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -87,14 +87,13 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos } cfl = c("character", "logical", "factor") if (x_merge_type %chin% cfl || i_merge_type %chin% cfl) { + msg = "Coercing all-NA %s column %s to type %s to match type of %s.\n" if (anyNA(i[[ic]]) && allNA(i[[ic]])) { - if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", iname, i_merge_type, x_merge_type, xname) - set(i, j=ic, value=match.fun(paste0("as.", x_merge_type))(i[[ic]])) + coerce_col(i, ic, i_merge_type, x_merge_type, iname, xname, msg) next } if (anyNA(x[[xc]]) && allNA(x[[xc]])) { - if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", xname, x_merge_type, i_merge_type, iname) - set(x, j=xc, value=match.fun(paste0("as.", i_merge_type))(x[[xc]])) + coerce_col(x, xc, x_merge_type, i_merge_type, xname, iname, msg) next } stopf("Incompatible join types: %s (%s) and %s (%s)", xname, x_merge_type, iname, i_merge_type) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 9f9aa2aa07..9ccec5eccc 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15156,15 +15156,15 @@ if (test_bit64) { dt1 = data.table(a=1, b=NA_character_) dt2 = data.table(a=2L, b=NA) test(2044.80, dt1[dt2, on="a==b", verbose=TRUE], data.table(a=NA, b=NA_character_, i.a=2L), - output=msg<-"Coercing all-NA i.b (logical) to type double to match type of x.a") + output=msg<-"Coercing all-NA logical column i.b to type double to match type of x.a") test(2044.81, dt1[dt2, on="a==b", nomatch=0L, verbose=TRUE], data.table(a=logical(), b=character(), i.a=integer()), output=msg) test(2044.82, dt1[dt2, on="b==b", verbose=TRUE], data.table(a=1, b=NA, i.a=2L), - output=msg<-"Coercing all-NA i.b (logical) to type character to match type of x.b") + output=msg<-"Coercing all-NA logical column i.b to type character to match type of x.b") test(2044.83, dt1[dt2, on="b==b", nomatch=0L, verbose=TRUE], data.table(a=1, b=NA, i.a=2L), output=msg) test(2044.84, dt1[dt2, on="b==a", verbose=TRUE], data.table(a=NA_real_, b=2L, i.b=NA), - output=msg<-"Coercing all-NA x.b (character) to type integer to match type of i.a") + output=msg<-"Coercing all-NA character column x.b to type integer to match type of i.a") test(2044.85, dt1[dt2, on="b==a", nomatch=0L, verbose=TRUE], data.table(a=double(), b=integer(), i.b=logical()), output=msg) From 4dfe8ab55aa6c954d99a308b065f97bbf18f1059 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 27 Nov 2024 14:35:03 -0800 Subject: [PATCH 29/43] Use gettext() on character strings directly --- R/bmerge.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/bmerge.R b/R/bmerge.R index af70d422e7..236e15cfe3 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -41,7 +41,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos } coerce_col = function(dt, col, from_type, to_type, from_name, to_name, verbose_msg) { - if (verbose) catf(verbose_msg, from_type, from_name, to_type, to_name) + if (verbose) catf(verbose_msg, from_type, from_name, to_type, to_name, domain=NULL) set(dt, j=col, value=cast_with_atts(dt[[col]], match.fun(paste0("as.", to_type)))) } @@ -87,7 +87,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos } cfl = c("character", "logical", "factor") if (x_merge_type %chin% cfl || i_merge_type %chin% cfl) { - msg = "Coercing all-NA %s column %s to type %s to match type of %s.\n" + msg = gettext("Coercing all-NA %s column %s to type %s to match type of %s.\n") if (anyNA(i[[ic]]) && allNA(i[[ic]])) { coerce_col(i, ic, i_merge_type, x_merge_type, iname, xname, msg) next @@ -124,7 +124,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos } } if (coerce_x) { - msg = "Coercing %s column %s (which contains no fractions) to type %s to match type of %s.\n" + msg = gettext("Coercing %s column %s (which contains no fractions) to type %s to match type of %s.\n") coerce_col(i, ic, "double", "integer", iname, xname, msg) set(callersi, j=ic, value=i[[ic]]) # change the shallow copy of i up in [.data.table to reflect in the result, too. if (length(ic_idx)>1L) { @@ -136,11 +136,11 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos } } if (!coerce_x) { - msg = "Coercing %s column %s to type %s to match type of %s which contains fractions.\n" + msg = gettext("Coercing %s column %s to type %s to match type of %s which contains fractions.\n") coerce_col(x, xc, "integer", "double", xname, iname, msg) } } else { - msg = "Coercing %s column %s to type %s for join to match type of %s.\n" + msg = gettext("Coercing %s column %s to type %s for join to match type of %s.\n") coerce_col(i, ic, "integer", "double", iname, xname, msg) if (length(ic_idx)>1L) { xc_idx = xcols[ic_idx] From faa54745bca3283b8c4e68e6b2e26dcb6047d098 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 27 Nov 2024 14:38:39 -0800 Subject: [PATCH 30/43] rename getClass helper: mergeType --- R/bmerge.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/R/bmerge.R b/R/bmerge.R index 236e15cfe3..69ce227521 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -25,7 +25,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos supported = c(ORDERING_TYPES, "factor", "integer64") - getClass = function(x) { + mergeType = function(x) { ans = typeof(x) if (ans=="integer") { if (is.factor(x)) ans = "factor" } else if (ans=="double") { if (inherits(x, "integer64")) ans = "integer64" } @@ -52,8 +52,8 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos # Note that if i is keyed, if this coerces i's key gets dropped by set() ic = icols[a] xc = xcols[a] - x_merge_type = getClass(x[[xc]]) - i_merge_type = getClass(i[[ic]]) + x_merge_type = mergeType(x[[xc]]) + i_merge_type = mergeType(i[[ic]]) xname = paste0("x.", names(x)[xc]) iname = paste0("i.", names(i)[ic]) if (!x_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", xname, x_merge_type) @@ -116,7 +116,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos # we've always coerced to int and returned int, for convenience. if (length(ic_idx)>1L) { xc_idx = xcols[ic_idx] - for (xb in xc_idx[which(vapply_1c(x[0L, xc_idx, with=FALSE], getClass) == "double")]) { + for (xb in xc_idx[which(vapply_1c(x[0L, xc_idx, with=FALSE], mergeType) == "double")]) { if (isReallyReal(x[[xb]])) { coerce_x = FALSE break @@ -129,7 +129,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos set(callersi, j=ic, value=i[[ic]]) # change the shallow copy of i up in [.data.table to reflect in the result, too. if (length(ic_idx)>1L) { xc_idx = xcols[ic_idx] - for (xb in xc_idx[which(vapply_1c(x[0L, xc_idx, with=FALSE], getClass) == "double")]) { + for (xb in xc_idx[which(vapply_1c(x[0L, xc_idx, with=FALSE], mergeType) == "double")]) { coerce_col(x, xb, "double", "integer", paste0("x.", names(x)[xb]), xname, msg) } } @@ -144,7 +144,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos coerce_col(i, ic, "integer", "double", iname, xname, msg) if (length(ic_idx)>1L) { xc_idx = xcols[ic_idx] - for (xb in xc_idx[which(vapply_1c(x[0L, xc_idx, with=FALSE], getClass) == "integer")]) { + for (xb in xc_idx[which(vapply_1c(x[0L, xc_idx, with=FALSE], mergeType) == "integer")]) { coerce_col(x, xb, "integer", "double", paste0("x.", names(x)[xb]), xname, msg) } } From 56e3169637fa819490a31c89179628c47ed71c26 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 27 Nov 2024 14:45:01 -0800 Subject: [PATCH 31/43] rename: {i,x}c --> {i,x}col I found myself wondering `ic`... "`i` character? `i` class?". Simpler to encode more info in the name --- R/bmerge.R | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/R/bmerge.R b/R/bmerge.R index 69ce227521..ab92a12066 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -50,12 +50,12 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos # - do type coercions if necessary on just the shallow local copies for the purpose of join # - handle factor columns appropriately # Note that if i is keyed, if this coerces i's key gets dropped by set() - ic = icols[a] - xc = xcols[a] - x_merge_type = mergeType(x[[xc]]) - i_merge_type = mergeType(i[[ic]]) - xname = paste0("x.", names(x)[xc]) - iname = paste0("i.", names(i)[ic]) + icol = icols[a] + xcol = xcols[a] + x_merge_type = mergeType(x[[xcol]]) + i_merge_type = mergeType(i[[icol]]) + xname = paste0("x.", names(x)[xcol]) + iname = paste0("i.", names(i)[icol]) if (!x_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", xname, x_merge_type) if (!i_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", iname, i_merge_type) if (x_merge_type=="factor" || i_merge_type=="factor") { @@ -63,19 +63,19 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos stopf("Attempting roll join on factor column when joining %s to %s. Only integer, double or character columns may be roll joined.", xname, iname) if (x_merge_type=="factor" && i_merge_type=="factor") { if (verbose) catf("Matching %s factor levels to %s factor levels.\n", iname, xname) - set(i, j=ic, value=chmatch(levels(i[[ic]]), levels(x[[xc]]), nomatch=0L)[i[[ic]]]) # nomatch=0L otherwise a level that is missing would match to NA values + set(i, j=icol, value=chmatch(levels(i[[icol]]), levels(x[[xcol]]), nomatch=0L)[i[[icol]]]) # nomatch=0L otherwise a level that is missing would match to NA values next } else { if (x_merge_type=="character") { if (verbose) catf("Coercing factor column %s to type character to match type of %s.\n", iname, xname) - set(i, j=ic, value=val<-as.character(i[[ic]])) - set(callersi, j=ic, value=val) # factor in i joining to character in x will return character and not keep x's factor; e.g. for antaresRead #3581 + set(i, j=icol, value=val<-as.character(i[[icol]])) + set(callersi, j=icol, value=val) # factor in i joining to character in x will return character and not keep x's factor; e.g. for antaresRead #3581 next } else if (i_merge_type=="character") { if (verbose) catf("Matching character column %s to factor levels in %s.\n", iname, xname) - newvalue = chmatch(i[[ic]], levels(x[[xc]]), nomatch=0L) - if (anyNA(i[[ic]])) newvalue[is.na(i[[ic]])] = NA_integer_ # NA_character_ should match to NA in factor, #3809 - set(i, j=ic, value=newvalue) + newvalue = chmatch(i[[icol]], levels(x[[xcol]]), nomatch=0L) + if (anyNA(i[[icol]])) newvalue[is.na(i[[icol]])] = NA_integer_ # NA_character_ should match to NA in factor, #3809 + set(i, j=icol, value=newvalue) next } } @@ -88,29 +88,29 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos cfl = c("character", "logical", "factor") if (x_merge_type %chin% cfl || i_merge_type %chin% cfl) { msg = gettext("Coercing all-NA %s column %s to type %s to match type of %s.\n") - if (anyNA(i[[ic]]) && allNA(i[[ic]])) { - coerce_col(i, ic, i_merge_type, x_merge_type, iname, xname, msg) + if (anyNA(i[[icol]]) && allNA(i[[icol]])) { + coerce_col(i, icol, i_merge_type, x_merge_type, iname, xname, msg) next } - if (anyNA(x[[xc]]) && allNA(x[[xc]])) { - coerce_col(x, xc, x_merge_type, i_merge_type, xname, iname, msg) + if (anyNA(x[[xcol]]) && allNA(x[[xcol]])) { + coerce_col(x, xcol, x_merge_type, i_merge_type, xname, iname, msg) next } stopf("Incompatible join types: %s (%s) and %s (%s)", xname, x_merge_type, iname, i_merge_type) } if (x_merge_type=="integer64" || i_merge_type=="integer64") { nm = c(iname, xname) - if (x_merge_type=="integer64") { w=i; wc=ic; wclass=i_merge_type; } else { w=x; wc=xc; wclass=x_merge_type; nm=rev(nm) } # w is which to coerce + if (x_merge_type=="integer64") { w=i; wc=icol; wclass=i_merge_type; } else { w=x; wc=xcol; wclass=x_merge_type; nm=rev(nm) } # w is which to coerce if (wclass=="integer" || (wclass=="double" && !isReallyReal(w[[wc]]))) { if (verbose) catf("Coercing %s column %s%s to type integer64 to match type of %s.\n", wclass, nm[1L], if (wclass=="double") " (which contains no fractions)" else "", nm[2L]) set(w, j=wc, value=bit64::as.integer64(w[[wc]])) } else stopf("Incompatible join types: %s is type integer64 but %s is type double and contains fractions", nm[2L], nm[1L]) } else { # just integer and double left - ic_idx = which(ic == icols) # check if on is joined on multiple conditions + ic_idx = which(icol == icols) # check if on is joined on multiple conditions if (i_merge_type=="double") { coerce_x = FALSE - if (!isReallyReal(i[[ic]])) { + if (!isReallyReal(i[[icol]])) { coerce_x = TRUE # common case of ad hoc user-typed integers missing L postfix joining to correct integer keys # we've always coerced to int and returned int, for convenience. @@ -125,8 +125,8 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos } if (coerce_x) { msg = gettext("Coercing %s column %s (which contains no fractions) to type %s to match type of %s.\n") - coerce_col(i, ic, "double", "integer", iname, xname, msg) - set(callersi, j=ic, value=i[[ic]]) # change the shallow copy of i up in [.data.table to reflect in the result, too. + coerce_col(i, icol, "double", "integer", iname, xname, msg) + set(callersi, j=icol, value=i[[icol]]) # change the shallow copy of i up in [.data.table to reflect in the result, too. if (length(ic_idx)>1L) { xc_idx = xcols[ic_idx] for (xb in xc_idx[which(vapply_1c(x[0L, xc_idx, with=FALSE], mergeType) == "double")]) { @@ -137,11 +137,11 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos } if (!coerce_x) { msg = gettext("Coercing %s column %s to type %s to match type of %s which contains fractions.\n") - coerce_col(x, xc, "integer", "double", xname, iname, msg) + coerce_col(x, xcol, "integer", "double", xname, iname, msg) } } else { msg = gettext("Coercing %s column %s to type %s for join to match type of %s.\n") - coerce_col(i, ic, "integer", "double", iname, xname, msg) + coerce_col(i, icol, "integer", "double", iname, xname, msg) if (length(ic_idx)>1L) { xc_idx = xcols[ic_idx] for (xb in xc_idx[which(vapply_1c(x[0L, xc_idx, with=FALSE], mergeType) == "integer")]) { From 8deaa6a6221922aa2fbc688a844bb6fab5e04e05 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 27 Nov 2024 14:46:04 -0800 Subject: [PATCH 32/43] comment ref. issue --- R/bmerge.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/bmerge.R b/R/bmerge.R index ab92a12066..c9e0a976f4 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -107,7 +107,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos } else stopf("Incompatible join types: %s is type integer64 but %s is type double and contains fractions", nm[2L], nm[1L]) } else { # just integer and double left - ic_idx = which(icol == icols) # check if on is joined on multiple conditions + ic_idx = which(icol == icols) # check if on is joined on multiple conditions, #6602 if (i_merge_type=="double") { coerce_x = FALSE if (!isReallyReal(i[[icol]])) { From 81b36571ec01d929cc6693ac8eb5cdabaa078672 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sat, 30 Nov 2024 15:34:17 +0100 Subject: [PATCH 33/43] exchange subset with .shallow --- R/bmerge.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/bmerge.R b/R/bmerge.R index c9e0a976f4..a970313a12 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -116,7 +116,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos # we've always coerced to int and returned int, for convenience. if (length(ic_idx)>1L) { xc_idx = xcols[ic_idx] - for (xb in xc_idx[which(vapply_1c(x[0L, xc_idx, with=FALSE], mergeType) == "double")]) { + for (xb in xc_idx[which(vapply_1c(.shallow(x, xc_idx), mergeType) == "double")]) { if (isReallyReal(x[[xb]])) { coerce_x = FALSE break @@ -129,7 +129,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos set(callersi, j=icol, value=i[[icol]]) # change the shallow copy of i up in [.data.table to reflect in the result, too. if (length(ic_idx)>1L) { xc_idx = xcols[ic_idx] - for (xb in xc_idx[which(vapply_1c(x[0L, xc_idx, with=FALSE], mergeType) == "double")]) { + for (xb in xc_idx[which(vapply_1c(.shallow(x, xc_idx), mergeType) == "double")]) { coerce_col(x, xb, "double", "integer", paste0("x.", names(x)[xb]), xname, msg) } } @@ -144,7 +144,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos coerce_col(i, icol, "integer", "double", iname, xname, msg) if (length(ic_idx)>1L) { xc_idx = xcols[ic_idx] - for (xb in xc_idx[which(vapply_1c(x[0L, xc_idx, with=FALSE], mergeType) == "integer")]) { + for (xb in xc_idx[which(vapply_1c(.shallow(x, xc_idx), mergeType) == "integer")]) { coerce_col(x, xb, "integer", "double", paste0("x.", names(x)[xb]), xname, msg) } } From c9d3d74a5452aee6901dc2f5bbcd2329e864f741 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sat, 30 Nov 2024 15:38:16 +0100 Subject: [PATCH 34/43] undo test --- inst/tests/tests.Rraw | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 9ccec5eccc..03dd3f3ce9 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15727,8 +15727,7 @@ DT = data.table(z = 1i) test(2069.33, DT[DT, on = 'z'], error = "Type 'complex' is not supported for joining/merging") # forder verbose message when !isReallyReal Date, #1738 -date_dbl = as.Date(as.double(seq(as.Date("2015-01-01"), as.Date("2015-01-05"), by="days")), origin="1970-01-01") -DT = data.table(d=sample(date_dbl, 20, replace=TRUE)) +DT = data.table(d=sample(seq(as.Date("2015-01-01"), as.Date("2015-01-05"), by="days"), 20, replace=TRUE)) test(2070.01, typeof(DT$d), "double") test(2070.02, DT[, .N, keyby=d, verbose=TRUE], output="Column 1.*date.*8 byte double.*no fractions are present.*4 byte integer.*to save space and time") From e1715003b112a68bd31d02ca40bcaa8a3f324e10 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sat, 30 Nov 2024 15:40:23 +0100 Subject: [PATCH 35/43] Revert "undo test" This reverts commit c9d3d74a5452aee6901dc2f5bbcd2329e864f741. --- inst/tests/tests.Rraw | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 03dd3f3ce9..9ccec5eccc 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15727,7 +15727,8 @@ DT = data.table(z = 1i) test(2069.33, DT[DT, on = 'z'], error = "Type 'complex' is not supported for joining/merging") # forder verbose message when !isReallyReal Date, #1738 -DT = data.table(d=sample(seq(as.Date("2015-01-01"), as.Date("2015-01-05"), by="days"), 20, replace=TRUE)) +date_dbl = as.Date(as.double(seq(as.Date("2015-01-01"), as.Date("2015-01-05"), by="days")), origin="1970-01-01") +DT = data.table(d=sample(date_dbl, 20, replace=TRUE)) test(2070.01, typeof(DT$d), "double") test(2070.02, DT[, .N, keyby=d, verbose=TRUE], output="Column 1.*date.*8 byte double.*no fractions are present.*4 byte integer.*to save space and time") From 719569780362af44def8f878b1ef07daf7fdf88f Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sat, 30 Nov 2024 15:51:33 +0100 Subject: [PATCH 36/43] update tests --- inst/tests/tests.Rraw | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 9ccec5eccc..ec99e5458f 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -20601,12 +20601,12 @@ test(2296, d2[x %no such operator% 1], error = '%no such operator%') # coerce Dates to double if join on multiple columns, #6602 x = data.table(a=1L) y = data.table(c=1L, d=1) -test(2297.01, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=1L, d=1L), output="Coercing .*c to type double") -test(2297.02, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=1L, d=1L), output="Coercing .*c to type double") +test(2297.01, y[x, on=.(c == a, d == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a to type double.*Coercing .*c to type double") +test(2297.02, y[x, on=.(d == a, c == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a to type double.*Coercing .*c to type double") x = data.table(a=1) y = data.table(c=1, d=1L) -test(2297.03, options=c(datatable.verbose=TRUE), y[x, on=.(c == a, d == a)], data.table(c=1L, d=1L), output="Coercing double column x.c (which contains no fractions) to type integer") -test(2297.04, options=c(datatable.verbose=TRUE), y[x, on=.(d == a, c == a)], data.table(c=1L, d=1L), output="Coercing double column x.c (which contains no fractions) to type integer") +test(2297.03, y[x, on=.(c == a, d == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a .*no fractions.* to type integer.*Coercing .*c .*no fractions.* to type integer") +test(2297.04, y[x, on=.(d == a, c == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a .*no fractions.* to type integer.*Coercing .*c .*no fractions.* to type integer") # dates d_int = .Date(1L) d_dbl = .Date(1) From 6143ef76d150712abd811babc611a39fbd6f7340 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sat, 30 Nov 2024 15:54:11 +0100 Subject: [PATCH 37/43] add comment --- R/bmerge.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/bmerge.R b/R/bmerge.R index a970313a12..7542429f4f 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -81,6 +81,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos } stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, x_merge_type, iname, i_merge_type) } + # we check factors first to cater for the case when trying to do rolling joins on factors if (x_merge_type == i_merge_type) { if (verbose) catf("%s has same type (%s) as %s. No coercion needed.\n", iname, x_merge_type, xname) next From 637ff588a261d491e80a05c549d7802f982cc051 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sat, 30 Nov 2024 15:58:52 +0100 Subject: [PATCH 38/43] add non right join testcase --- inst/tests/tests.Rraw | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index ec99e5458f..7fdaecd1b4 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -20623,3 +20623,7 @@ x = data.table(a=1) y = data.table(c=1.5, d=1L) test(2297.21, y[x, on=.(c == a, d == a)], data.table(c=1, d=1)) test(2297.22, y[x, on=.(d == a, c == a)], data.table(c=1, d=1)) +# non right join +x = data.table(a=1, b=2L) +y = data.table(c=1.5, d=1L) +test(2297.31, y[x, on=.(c == a, d == a), nomatch=NULL], output="Empty data.table (0 rows and 3 cols): c,d,b") From 03968e8714f36817c27d25634593492eacc293d0 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sat, 30 Nov 2024 17:04:16 +0100 Subject: [PATCH 39/43] move helper outside bmerge --- R/bmerge.R | 49 +++++++++++++++++++++++++------------------------ 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/R/bmerge.R b/R/bmerge.R index 7542429f4f..317fe2f645 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -1,4 +1,25 @@ + +mergeType = function(x) { + ans = typeof(x) + if (ans=="integer") { if (is.factor(x)) ans = "factor" } + else if (ans=="double") { if (inherits(x, "integer64")) ans = "integer64" } + # do not call isReallyReal(x) yet because i) if both types are double we don't need to coerce even if one or both sides + # are int-as-double, and ii) to save calling it until we really need it + ans +} + +cast_with_atts = function(x, as.f) { + ans = as.f(x) + if (!is.null(attributes(x))) attributes(ans) = attributes(x) + ans +} + +coerce_col = function(dt, col, from_type, to_type, from_name, to_name, verbose_msg=NULL) { + if (!is.null(verbose_msg)) catf(verbose_msg, from_type, from_name, to_type, to_name, domain=NULL) + set(dt, j=col, value=cast_with_atts(dt[[col]], match.fun(paste0("as.", to_type)))) +} + bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbose) { callersi = i @@ -25,26 +46,6 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos supported = c(ORDERING_TYPES, "factor", "integer64") - mergeType = function(x) { - ans = typeof(x) - if (ans=="integer") { if (is.factor(x)) ans = "factor" } - else if (ans=="double") { if (inherits(x, "integer64")) ans = "integer64" } - # do not call isReallyReal(x) yet because i) if both types are double we don't need to coerce even if one or both sides - # are int-as-double, and ii) to save calling it until we really need it - ans - } - - cast_with_atts = function(x, as.f) { - ans = as.f(x) - if (!is.null(attributes(x))) attributes(ans) = attributes(x) - ans - } - - coerce_col = function(dt, col, from_type, to_type, from_name, to_name, verbose_msg) { - if (verbose) catf(verbose_msg, from_type, from_name, to_type, to_name, domain=NULL) - set(dt, j=col, value=cast_with_atts(dt[[col]], match.fun(paste0("as.", to_type)))) - } - if (nrow(i)) for (a in seq_along(icols)) { # - check that join columns have compatible types # - do type coercions if necessary on just the shallow local copies for the purpose of join @@ -88,7 +89,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos } cfl = c("character", "logical", "factor") if (x_merge_type %chin% cfl || i_merge_type %chin% cfl) { - msg = gettext("Coercing all-NA %s column %s to type %s to match type of %s.\n") + msg = if(verbose) gettext("Coercing all-NA %s column %s to type %s to match type of %s.\n") else NULL if (anyNA(i[[icol]]) && allNA(i[[icol]])) { coerce_col(i, icol, i_merge_type, x_merge_type, iname, xname, msg) next @@ -125,7 +126,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos } } if (coerce_x) { - msg = gettext("Coercing %s column %s (which contains no fractions) to type %s to match type of %s.\n") + msg = if (verbose) gettext("Coercing %s column %s (which contains no fractions) to type %s to match type of %s.\n") else NULL coerce_col(i, icol, "double", "integer", iname, xname, msg) set(callersi, j=icol, value=i[[icol]]) # change the shallow copy of i up in [.data.table to reflect in the result, too. if (length(ic_idx)>1L) { @@ -137,11 +138,11 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos } } if (!coerce_x) { - msg = gettext("Coercing %s column %s to type %s to match type of %s which contains fractions.\n") + msg = if (verbose) gettext("Coercing %s column %s to type %s to match type of %s which contains fractions.\n") else NULL coerce_col(x, xcol, "integer", "double", xname, iname, msg) } } else { - msg = gettext("Coercing %s column %s to type %s for join to match type of %s.\n") + msg = if (verbose) gettext("Coercing %s column %s to type %s for join to match type of %s.\n") else NULL coerce_col(i, icol, "integer", "double", iname, xname, msg) if (length(ic_idx)>1L) { xc_idx = xcols[ic_idx] From 7e7c1c650f0c35947f90c8b9d61328490df27249 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sat, 30 Nov 2024 17:15:41 +0100 Subject: [PATCH 40/43] update comment --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 7fdaecd1b4..92db5262c4 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -20598,7 +20598,7 @@ test(2295.3, is.data.table(d2)) # #6588: .checkTypos used to give arbitrary strings to stopf as the first argument test(2296, d2[x %no such operator% 1], error = '%no such operator%') -# coerce Dates to double if join on multiple columns, #6602 +# fix coercing integer/double for joins on multiple columns, #6602 x = data.table(a=1L) y = data.table(c=1L, d=1) test(2297.01, y[x, on=.(c == a, d == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a to type double.*Coercing .*c to type double") From efc21c35a0a9d22aa5842953087c177468d2f2b6 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sat, 30 Nov 2024 17:28:06 +0100 Subject: [PATCH 41/43] add NEWS --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 4236e8b078..bd6da3d2b6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -115,6 +115,8 @@ rowwiseDT( 15. `DT[1, on=NULL]` now works for returning the first row, [#6579](https://github.com/Rdatatable/data.table/issues/6579). Thanks to @Kodiologist for the report and @tdhock for the PR. +16. Joins on multiple columns, such as `x[y, on=c("x1==y1", "x2==y1")]`, could fail during type coercions if `x1` and `x2` had different but still compatible types, [#6602](https://github.com/Rdatatable/data.table/issues/6602). This was particularly unexpected when all columns `x1, x2, and y1` were of the same class e.g. `Date` but differed in their underlying types. Thanks to Benjamin Schwendinger for the report and the fix. + ## NOTES 1. Tests run again when some Suggests packages are missing, [#6411](https://github.com/Rdatatable/data.table/issues/6411). Thanks @aadler for the note and @MichaelChirico for the fix. From ba03d5e27e2ac4e4fa9414fd191c3a1be8126944 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sat, 30 Nov 2024 17:37:36 +0100 Subject: [PATCH 42/43] update numbering --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index b6b51c301e..f938983996 100644 --- a/NEWS.md +++ b/NEWS.md @@ -107,7 +107,7 @@ rowwiseDT( 11. `tables()` now returns the correct size for data.tables over 2GiB, [#6607](https://github.com/Rdatatable/data.table/issues/6607). Thanks to @vlulla for the report and the PR. -16. Joins on multiple columns, such as `x[y, on=c("x1==y1", "x2==y1")]`, could fail during type coercions if `x1` and `x2` had different but still compatible types, [#6602](https://github.com/Rdatatable/data.table/issues/6602). This was particularly unexpected when all columns `x1, x2, and y1` were of the same class e.g. `Date` but differed in their underlying types. Thanks to Benjamin Schwendinger for the report and the fix. +12. Joins on multiple columns, such as `x[y, on=c("x1==y1", "x2==y1")]`, could fail during type coercions if `x1` and `x2` had different but still compatible types, [#6602](https://github.com/Rdatatable/data.table/issues/6602). This was particularly unexpected when all columns `x1, x2, and y1` were of the same class e.g. `Date` but differed in their underlying types. Thanks to Benjamin Schwendinger for the report and the fix. ## NOTES From f5fb8252a6c43c4dab7774b16d0dec2642884807 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sun, 1 Dec 2024 21:49:52 -0800 Subject: [PATCH 43/43] tweak NEWS --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index f938983996..92b4f9ee3b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -107,7 +107,7 @@ rowwiseDT( 11. `tables()` now returns the correct size for data.tables over 2GiB, [#6607](https://github.com/Rdatatable/data.table/issues/6607). Thanks to @vlulla for the report and the PR. -12. Joins on multiple columns, such as `x[y, on=c("x1==y1", "x2==y1")]`, could fail during type coercions if `x1` and `x2` had different but still compatible types, [#6602](https://github.com/Rdatatable/data.table/issues/6602). This was particularly unexpected when all columns `x1, x2, and y1` were of the same class e.g. `Date` but differed in their underlying types. Thanks to Benjamin Schwendinger for the report and the fix. +12. Joins on multiple columns, such as `x[y, on=c("x1==y1", "x2==y1")]`, could fail during implicit type coercions if `x1` and `x2` had different but still compatible types, [#6602](https://github.com/Rdatatable/data.table/issues/6602). This was particularly unexpected when columns `x1`, `x2`, and `y1` were all of the same class, e.g. `Date`, but differed in their underlying storage types. Thanks to Benjamin Schwendinger for the report and the fix. ## NOTES