diff --git a/DESCRIPTION b/DESCRIPTION index c33447a7..2924e337 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -37,7 +37,7 @@ Imports: data.table, cli, utils, - collapse (>= 2.0.9), + collapse (>= 2.0.13), lifecycle Depends: R (>= 2.10) diff --git a/NEWS.md b/NEWS.md index c52907a9..21533b70 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,9 +1,16 @@ # joyn (development version) + * Add `anti_join()` function. * Add `unmask_joyn()` function to unmask `joyn` functions that mask `dplyr` equivalents. +* Add information about duplicated obs in `by` variable when match type is `1` rathern than `m`. + +* improve ineffciencies in deep copies with `m:m` joins + +* Replace `m:m` joins from `data.table::merge.data.table` to `collapse::join`. Thanks to @SebKrantz for the suggestion (#58). + * Add information about duplicated obs in `by` variable when match type is `1` rather than `m`. ## breaking changes diff --git a/R/dplyr-joins.R b/R/dplyr-joins.R index 0273b01b..f0c5609d 100644 --- a/R/dplyr-joins.R +++ b/R/dplyr-joins.R @@ -59,8 +59,6 @@ left_join <- function( clear_joynenv() # Argument checks --------------------------------- - x <- copy(x) - y <- copy(y) na_matches <- match.arg(na_matches, choices = c("na","never")) multiple <- match.arg(multiple, @@ -91,6 +89,13 @@ left_join <- function( dropreport <- args_check$dropreport # Column names ----------------------------------- + correct_names <- correct_names(by = by, + x = x, + y = y) + byexp <- correct_names$byexp + xbynames <- correct_names$xbynames + ybynames <- correct_names$ybynames + if (keep == TRUE) { jn_type <- "left" modified_cols <- set_col_names(x = x, @@ -121,6 +126,20 @@ left_join <- function( ... ) + # Change names back------------------------------------ + if (any(grepl(pattern = "keyby", x = names(x)))) { + data.table::setnames(x, + old = names(x)[grepl(pattern = "keyby", + x = names(x))], + new = xbynames) + } + if (any(grepl(pattern = "keyby", x = names(y)))) { + data.table::setnames(y, + old = names(y)[grepl(pattern = "keyby", + x = names(y))], + new = ybynames) + } + # Unmatched Keys --------------------------------------- if (unmatched == "error") { check_unmatched_keys(x = x, @@ -134,6 +153,7 @@ left_join <- function( get_vars(lj, reportvar) <- NULL } + # return lj } @@ -198,8 +218,6 @@ right_join <- function( clear_joynenv() # Argument checks --------------------------------- - x <- copy(x) - y <- copy(y) na_matches <- match.arg(na_matches, choices = c("na","never")) multiple <- match.arg(multiple, @@ -211,6 +229,7 @@ right_join <- function( choices = c("drop", "error")) + args_check <- arguments_checks(x = x, y = y, by = by, @@ -230,6 +249,13 @@ right_join <- function( dropreport <- args_check$dropreport # Column names ----------------------------------- + correct_names <- correct_names(by = by, + x = x, + y = y) + byexp <- correct_names$byexp + xbynames <- correct_names$xbynames + ybynames <- correct_names$ybynames + if (keep == TRUE) { jn_type <- "right" modified_cols <- set_col_names(x = x, @@ -260,6 +286,20 @@ right_join <- function( ... ) + # Change names back------------------------------------ + if (any(grepl(pattern = "keyby", x = names(x)))) { + data.table::setnames(x, + old = names(x)[grepl(pattern = "keyby", + x = names(x))], + new = xbynames) + } + if (any(grepl(pattern = "keyby", x = names(y)))) { + data.table::setnames(y, + old = names(y)[grepl(pattern = "keyby", + x = names(y))], + new = ybynames) + } + # Unmatched Keys --------------------------------------- if (unmatched == "error") { check_unmatched_keys(x = x, @@ -340,8 +380,6 @@ full_join <- function( clear_joynenv() # Argument checks --------------------------------- - x <- copy(x) - y <- copy(y) na_matches <- match.arg(na_matches, choices = c("na","never")) multiple <- match.arg(multiple, @@ -372,6 +410,13 @@ full_join <- function( dropreport <- args_check$dropreport # Column names ----------------------------------- + correct_names <- correct_names(by = by, + x = x, + y = y) + byexp <- correct_names$byexp + xbynames <- correct_names$xbynames + ybynames <- correct_names$ybynames + if (keep == TRUE) { jn_type <- "full" modified_cols <- set_col_names(x = x, @@ -403,6 +448,20 @@ full_join <- function( ... ) + # Change names back------------------------------------ + if (any(grepl(pattern = "keyby", x = names(x)))) { + data.table::setnames(x, + old = names(x)[grepl(pattern = "keyby", + x = names(x))], + new = xbynames) + } + if (any(grepl(pattern = "keyby", x = names(y)))) { + data.table::setnames(y, + old = names(y)[grepl(pattern = "keyby", + x = names(y))], + new = ybynames) + } + # Unmatched Keys---------------------------------------- if (unmatched == "error") { @@ -488,8 +547,6 @@ inner_join <- function( clear_joynenv() # Argument checks --------------------------------- - x <- copy(x) - y <- copy(y) na_matches <- match.arg(na_matches, choices = c("na","never")) multiple <- match.arg(multiple, @@ -520,6 +577,13 @@ inner_join <- function( dropreport <- args_check$dropreport # Column names ----------------------------------- + correct_names <- correct_names(by = by, + x = x, + y = y) + byexp <- correct_names$byexp + xbynames <- correct_names$xbynames + ybynames <- correct_names$ybynames + if (keep == TRUE) { jn_type <- "inner" modified_cols <- set_col_names(x = x, @@ -550,6 +614,20 @@ inner_join <- function( ... ) + # Change names back------------------------------------ + if (any(grepl(pattern = "keyby", x = names(x)))) { + data.table::setnames(x, + old = names(x)[grepl(pattern = "keyby", + x = names(x))], + new = xbynames) + } + if (any(grepl(pattern = "keyby", x = names(y)))) { + data.table::setnames(y, + old = names(y)[grepl(pattern = "keyby", + x = names(y))], + new = ybynames) + } + # Unmatched Keys --------------------------------------- if (unmatched == "error") { check_unmatched_keys(x = x, @@ -629,8 +707,6 @@ anti_join <- function( clear_joynenv() # Argument checks --------------------------------- - x <- copy(x) - y <- copy(y) na_matches <- match.arg(na_matches, choices = c("na","never")) multiple <- match.arg(multiple, @@ -658,6 +734,13 @@ anti_join <- function( dropreport <- args_check$dropreport # Column names ----------------------------------- + correct_names <- correct_names(by = by, + x = x, + y = y) + byexp <- correct_names$byexp + xbynames <- correct_names$xbynames + ybynames <- correct_names$ybynames + if (keep == TRUE) { jn_type <- "anti" modified_cols <- set_col_names(x = x, @@ -688,6 +771,20 @@ anti_join <- function( ... ) + # Change names back------------------------------------ + if (any(grepl(pattern = "keyby", x = names(x)))) { + data.table::setnames(x, + old = names(x)[grepl(pattern = "keyby", + x = names(x))], + new = xbynames) + } + if (any(grepl(pattern = "keyby", x = names(y)))) { + data.table::setnames(y, + old = names(y)[grepl(pattern = "keyby", + x = names(y))], + new = ybynames) + } + # # Unmatched Keys --------------------------------------- if (dropreport == T) { get_vars(aj, reportvar) <- NULL @@ -838,25 +935,23 @@ arguments_checks <- function(x, y, by, copy, keep, suffix, na_matches, multiple, #' @keywords internal set_col_names <- function(x, y, by, suffix, jn_type) { - x_1 <- copy(x) - y_1 <- copy(y) - # If joining by different variables - if (length(grep(pattern = "==?", x = by, value = TRUE)) != 0) { + byexp <- grep(pattern = "==?", x = by, value = TRUE) + if (length(byexp) != 0) { if (jn_type == "right") { - by_x_names <- fix_by_vars(by = by, x_1, y_1)$xby + by_x_names <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)", "\\1", byexp)) } else if (jn_type == "left" | jn_type == "full" | jn_type == "inner") { - by_y_names <- fix_by_vars(by = by, x_1, y_1)$yby + by_y_names <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)", "\\3", byexp)) } } # If joining by common var else { - by_y_names <- by_x_names <- fix_by_vars(by = by, x_1, y_1)$by + by_y_names <- by_x_names <- by } # Add key vars with suffix to x and y @@ -903,24 +998,16 @@ check_unmatched_keys <- function(x, y, out, by, jn_type) { # Left table -------------------------------------------------------- if (jn_type %in% c("left", "inner", "anti")) { - use_y_input <- process_by_vector(by = by, input = "right") - use_y_out <- process_by_vector(by = by, input = "left") + use_y_input <- process_by_vector(by = by, input = "right") # id2 + use_y_out <- process_by_vector(by = by, input = "left") # id1 if (length(grep("==?", by, value = TRUE)) != 0) { if (any(use_y_out %in% colnames(y))) { - - store_msg( - type = "warn", - warn = paste(cli::symbol$warn, "\nWarning:"), - pale = "\nUnmatched = error not active for this joyn -unmatched keys are not detected" - ) + cli::cli_warn("`Unmatched = error` not active for this joyn -unmatched keys are not detected") } else { - data.table::setnames(y, - new = use_y_out, - old = use_y_input) if (unmatched_keys(x = y, by = use_y_out, @@ -1052,5 +1139,3 @@ process_by_vector <- function(by, input = c("left", "right")) { - - diff --git a/R/joyn-merge.R b/R/joyn-merge.R index 6b2f5a49..52f7a970 100644 --- a/R/joyn-merge.R +++ b/R/joyn-merge.R @@ -29,7 +29,7 @@ #' observations that matched in both tables and the ones that did not match in #' y. The ones in x will be discarded. If *"inner"*, it only keeps the #' observations that matched both tables. Note that if, for example, a `keep = -#' "left"`, the `joyn()` function still executes a full join under the hood +#' "left", the `joyn()` function still executes a full join under the hood #' and then filters so that only rows the output table is a left join. This #' behaviour, while inefficient, allows all the diagnostics and checks #' conducted by `joyn`. @@ -214,9 +214,6 @@ joyn <- function(x, # Initial parameters --------- #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ start_joyn <- Sys.time() - x <- copy(x) - y <- copy(y) - ## X and Y ----------- check_xy(x,y) @@ -231,16 +228,58 @@ joyn <- function(x, # the resulting table should have the same class as the x table. class_x <- class(x) - # If match type is m:m we need to convert to data.table - if (match_type == "m:m") { - x <- as.data.table(x) - y <- as.data.table(y) - } + # ensure input names can be restored + correct_names <- correct_names(by = by, + x = x, + y = y, + order = FALSE) + byexp <- correct_names$byexp + xbynames <- correct_names$xbynames + ybynames <- correct_names$ybynames + ynames <- copy(names(y)) + + # maintain name that is bound to original inputs + x_original <- x + y_original <- y ## Modify BY when is expression --------- fixby <- check_by_vars(by, x, y) by <- fixby$by + # Change names back on exit + # Change names back for inputs------------------------------ + on.exit( + expr = { + if (any(grepl(pattern = "keyby", x = names(x_original)))) { + + knames <- names(x_original)[grepl(pattern = "keyby", + x = names(x_original))] + knames <- knames[order(knames)] + + data.table::setnames(x_original, + old = knames, + new = xbynames) + } + + if (any(grepl(pattern = "keyby", x = names(y_original)))) { + + knames <- names(y_original)[grepl(pattern = "keyby", + x = names(y_original))] + knames <- knames[order(knames)] + + data.table::setnames(y_original, + old = knames, + new = ybynames) + + if (all(names(y_original) %in% ynames)) { + colorderv(y_original, + neworder = ynames) + } + } + }, + add = TRUE + ) + ## Check suffixes ------------- check_suffixes(suffixes) @@ -261,7 +300,6 @@ joyn <- function(x, tx <- mts[1] ty <- mts[2] - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Variables to keep in y --------- #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -284,8 +322,7 @@ joyn <- function(x, # include report variable --------- #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - yvars_w <- c(y_vars_to_keep, ".yreport") # working yvars ZP ------------------------------------- - #yvars_w <- c(newyvars, ".yreport") # working yvars + yvars_w <- c(y_vars_to_keep, ".yreport") # working yvars x <- x |> ftransform(.xreport = 1) y <- y |> @@ -420,7 +457,6 @@ joyn <- function(x, } - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Display results and cleaning --------- #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -430,16 +466,15 @@ joyn <- function(x, .xreport = NULL, .yreport = NULL) - ## Rename by variables ----- - - if (!is.null(fixby$xby)) { - data.table::setnames(x, fixby$tempkey, fixby$xby) - by <- fixby$xby - # not necessary - # setnames(y, fixby$tempkey, fixby$yby) + # Rename by variables ----- + ## in output + if (any(grepl(pattern = "keyby", x = names(x)))) { + data.table::setnames(x, + old = names(x)[grepl(pattern = "keyby", + x = names(x))], + new = xbynames) } - ## convert to characters if chosen ------- if (reporttype == "character") { @@ -501,6 +536,7 @@ joyn <- function(x, } setattr(x, "class", class_x) + x } diff --git a/R/joyn_workhorse.R b/R/joyn_workhorse.R index a570b8ad..433988b6 100644 --- a/R/joyn_workhorse.R +++ b/R/joyn_workhorse.R @@ -7,8 +7,7 @@ #' @param y data object, "right" or "using" #' @param by atomic character vector: key specifying join #' @param match_type atomic character vector of length 1: either "1:1" (default) -#' "1:m", "m:1", or "m:m". If "m:m" then executes `data.table::merge.data.table` -#' in the backend, otherwise uses `collapse::join()` +#' "1:m", "m:1", or "m:m". Relies on `collapse::join()` #' @param suffixes atomic character vector: give suffixes to columns common to both #' @param sort logical: sort the result by the columns in `by` #' `x` and `y` @@ -75,31 +74,19 @@ joyn_workhorse <- function( # not m:m => use collapse::join() dt_result <- tryCatch( expr = { - source_pkg <- if (match_type == "m:m") "data.table::merge" else "collapse::join" - if (match_type == "m:m") { - data.table::merge.data.table( - x = x, - y = y, - by = by, - all = TRUE, - sort = sort, - suffixes = suffixes, - allow.cartesian = TRUE - ) - - } else { - collapse::join( x = x, - y = y, - how = "full", - on = by, - multiple = TRUE, # matches row in x with m in y - validate = "m:m", # no checks performed - suffix = suffixes, # data.table suffixes - keep.col.order = TRUE, - sort = sort, - verbose = 0, - column = NULL) - } + source_pkg <- "collapse::join" + + collapse::join(x = x, + y = y, + how = "full", + on = by, + multiple = TRUE, # matches row in x with m in y + validate = "m:m", # no checks performed + suffix = suffixes, # data.table suffixes + keep.col.order = TRUE, + sort = sort, + verbose = 0, + column = NULL) }, # end of expr section error = function(e) { @@ -126,22 +113,6 @@ joyn_workhorse <- function( ) } - # This is inefficient but it is the only way to return the table when - # there is a warning - - if (match_type == "m:m") { - data.table::merge.data.table( - x = x, - y = y, - by = by, - all = TRUE, - sort = sort, - suffixes = suffixes, - allow.cartesian = TRUE - ) |> - suppressWarnings() - - } else { collapse::join( x = x, y = y, how = "full", @@ -154,7 +125,6 @@ joyn_workhorse <- function( verbose = 0, column = NULL) |> suppressWarnings() - } } diff --git a/R/update_na_vals.R b/R/update_na_vals.R index 580e44a6..108bca39 100644 --- a/R/update_na_vals.R +++ b/R/update_na_vals.R @@ -31,8 +31,8 @@ update_na_values <- function(dt, is_data_table <- inherits(dt, "data.table") # Add util vars #### - dt_1 <- copy(dt) - dt_1 <- dt_1 |> + + dt_1 <- dt |> ftransform(#use_util_reportvar = get(reportvar), # create variable for var.x and var.y is NA # TRUE if NOT NA @@ -57,23 +57,24 @@ update_na_values <- function(dt, } # Replace values #### - if (is_data_table) { - dt_1[get(reportvar) == 4, - (x.var) := mget(y.var)] + gv(dt_1[get(reportvar) == 4], + x.var) <- gv(dt_1[get(reportvar) == 4], + y.var) - dt_1[get(reportvar) == 5, - eval(x.var) := mget(y.var)] + gv(dt_1[get(reportvar) == 5], + x.var) <- gv(dt_1[get(reportvar) == 5], + y.var) } else { - - to_replace <- which(dt_1[[reportvar]] %in% c(4, 5)) - dt_1[to_replace, x.var] <- dt_1[to_replace, y.var] + to_replace <- which(dt_1[[reportvar]] %in% c(4, 5)) + dt_1[to_replace, x.var] <- dt_1[to_replace, y.var] } # Remove util vars #### - get_vars(dt_1, c("varx_na", "vary_na")) <- NULL + get_vars(dt_1, + c("varx_na", "vary_na")) <- NULL # Return dt_1 diff --git a/R/utils.R b/R/utils.R index a1080c15..6a72bee3 100644 --- a/R/utils.R +++ b/R/utils.R @@ -312,7 +312,7 @@ detach_package <- function(pkg_name) { search_item <- paste("package", pkg_name, sep = ":") - if(search_item %in% search()) { + if (search_item %in% search()) { detach(search_item, unload = TRUE, @@ -320,3 +320,37 @@ detach_package <- function(pkg_name) { } } + + + + +#' Function used to correct names in input data frames using `by` argument +#' +#' @param by `by` argument parsed from higher level function +#' @param x left data frame +#' @param y right data frame +#' +#' @return list +#' @keywords internal +correct_names <- function(by, x, y, order = TRUE) { + byexp <- grep(pattern = "==?", + x = by, + value = TRUE) + xbynames <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)", + "\\1", + byexp)) + ybynames <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)", + "\\3", + byexp)) + + if (order) { + xbynames <- xbynames[order(fmatch(xbynames, names(x)))] + ybynames <- ybynames[order(fmatch(ybynames, names(y)))] + } + + out <- list(byexp = byexp, + xbynames = xbynames, + ybynames = ybynames) + out +} + diff --git a/man/correct_names.Rd b/man/correct_names.Rd new file mode 100644 index 00000000..411d8710 --- /dev/null +++ b/man/correct_names.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{correct_names} +\alias{correct_names} +\title{Function used to correct names in input data frames using \code{by} argument} +\usage{ +correct_names(by, x, y, order = TRUE) +} +\arguments{ +\item{by}{\code{by} argument parsed from higher level function} + +\item{x}{left data frame} + +\item{y}{right data frame} +} +\value{ +list +} +\description{ +Function used to correct names in input data frames using \code{by} argument +} +\keyword{internal} diff --git a/man/joyn.Rd b/man/joyn.Rd index 241d5d8a..e370129e 100644 --- a/man/joyn.Rd +++ b/man/joyn.Rd @@ -57,10 +57,7 @@ that matched in both tables and the ones that did not match in x. The ones in y will be discarded. If \emph{"right"} or \emph{"using"}, it keeps the observations that matched in both tables and the ones that did not match in y. The ones in x will be discarded. If \emph{"inner"}, it only keeps the -observations that matched both tables. Note that if, for example, a \code{keep = "left"}, the \code{joyn()} function still executes a full join under the hood -and then filters so that only rows the output table is a left join. This -behaviour, while inefficient, allows all the diagnostics and checks -conducted by \code{joyn}.} +observations that matched both tables. Note that if, for example, a \verb{keep = "left", the }joyn()\verb{function still executes a full join under the hood and then filters so that only rows the output table is a left join. This behaviour, while inefficient, allows all the diagnostics and checks conducted by}joyn`.} \item{y_vars_to_keep}{character: Vector of variable names in \code{y} that will be kept after the merge. If TRUE (the default), it keeps all the brings all diff --git a/man/joyn_workhorse.Rd b/man/joyn_workhorse.Rd index 83f84fdf..6733e184 100644 --- a/man/joyn_workhorse.Rd +++ b/man/joyn_workhorse.Rd @@ -21,8 +21,7 @@ joyn_workhorse( \item{by}{atomic character vector: key specifying join} \item{match_type}{atomic character vector of length 1: either "1:1" (default) -"1:m", "m:1", or "m:m". If "m:m" then executes \code{data.table::merge.data.table} -in the backend, otherwise uses \code{collapse::join()}} +"1:m", "m:1", or "m:m". Relies on \code{collapse::join()}} \item{sort}{logical: sort the result by the columns in \code{by} \code{x} and \code{y}} diff --git a/tests/testthat/test-dplyr-joins.R b/tests/testthat/test-dplyr-joins.R index e3c99634..9d478d9b 100644 --- a/tests/testthat/test-dplyr-joins.R +++ b/tests/testthat/test-dplyr-joins.R @@ -153,7 +153,6 @@ test_that("LEFT JOIN - Conducts left join", { }) - test_that("LEFT JOIN - no id given", { jn1 <- left_join( x2, @@ -167,6 +166,7 @@ test_that("LEFT JOIN - no id given", { expect_equal(jn1, jn2) }) + test_that("LEFT JOIN - copy given", { jn1 <- joyn::left_join( x2, @@ -265,6 +265,7 @@ test_that("LEFT JOIN - incorrectly specified arguments give errors", { }) + test_that("LEFT JOIN - argument `keep` preserves keys in output", { jn <- left_join( x = x1, @@ -312,6 +313,7 @@ test_that("LEFT JOIN - argument `keep` preserves keys in output", { }) + test_that("LEFT JOIN - update values works", { x2a <- x2 x2a$x <- 1:5 @@ -340,6 +342,7 @@ test_that("LEFT JOIN - update values works", { }) + test_that("LEFT JOIN - reportvar works", { jn <- left_join( x1, @@ -375,15 +378,17 @@ test_that("LEFT JOIN - reportvar works", { + test_that("LEFT JOIN - unmatched throws error", { - left_join(x = x4, + left_join(x = x4, y = y4, relationship = "many-to-many", by = "id2", unmatched = "error") |> expect_no_error() + left_join(x = x4, y = y4, relationship = "many-to-many", @@ -391,6 +396,7 @@ test_that("LEFT JOIN - unmatched throws error", { unmatched = "error") |> expect_error() + left_join(x = x1, y = y1, relationship = "many-to-one", @@ -398,32 +404,34 @@ test_that("LEFT JOIN - unmatched throws error", { unmatched = "error") |> expect_error() + left_join(x = x4, y = y4, relationship = "many-to-many", by = c("id2", "x"), unmatched = "error") |> expect_error() - +################################################################################################################################################ left_join(x = x4, y = y4, relationship = "many-to-many", by = c("id2=id"), unmatched = "error") |> - expect_no_error() + expect_warning() left_join(x = x4, y = y4, relationship = "many-to-many", by = c("id1=id2", "id2=id"), unmatched = "error") |> - expect_no_error() + expect_warning() }) + test_that("LEFT JOIN - NA matches", { jn <- left_join( @@ -862,7 +870,8 @@ test_that("FULL JOIN - Conducts full join", { x4, y4, by = c("id1 = id2"), - relationship = "many-to-many" + relationship = "many-to-many", + sort = TRUE ) #dplyr::full_join(x4, y4, by = dplyr::join_by(id1 == id2), relationship = "many-to-many") jn_dplyr <- dplyr::full_join( @@ -871,9 +880,9 @@ test_that("FULL JOIN - Conducts full join", { by = dplyr::join_by(id1 == id2), relationship = "many-to-many" ) - attr(jn_dplyr, "sorted") <- "id1" - attr(jn_joyn, - "sorted") <- "id" + # attr(jn_dplyr, "sorted") <- "id1" + # attr(jn_joyn, + # "sorted") <- "id" expect_equal( jn |> fselect(-get(reportvar)), jn_dplyr, @@ -1671,7 +1680,8 @@ test_that("INNER JOIN - unmatched error", { relationship = "many-to-one", by = c("id1=id2", "id2=id"), unmatched = "error") |> - expect_error() + expect_error() |> + expect_warning() }) @@ -1694,13 +1704,14 @@ test_that("ANTI JOIN - Conducts ANTI join", { jn_joyn <- anti_join( x = x1, y = y1, - by = "id" + by = "id", + sort = TRUE ) jn_dplyr <- dplyr::anti_join( - x1, y1, by = "id" + x1, y1, by = "id", s ) - setorder(jn_dplyr, na.last = F) + attr( jn_dplyr, "sorted" diff --git a/tests/testthat/test-joyn.R b/tests/testthat/test-joyn.R index f58c8652..c28d8e8f 100644 --- a/tests/testthat/test-joyn.R +++ b/tests/testthat/test-joyn.R @@ -43,6 +43,9 @@ x5 = data.table(id = c(1, 2, 5, 6, 3), y = c(11L, 15L, 20L, 13L, 10L), x = c(16:18, NA, NA)) +#------------------------------------------------------------------------------- +# TESTS ------------------------------------------------------------------------ +#------------------------------------------------------------------------------- test_that( @@ -72,6 +75,8 @@ test_that("all types of by argument raise no error", { match_type = "m:m") |> expect_no_error() + + # THIS ONE joyn(x = x4, y = y4, by = c("id1 = id", "id2"), @@ -451,7 +456,7 @@ test_that("match types work", { }) - +########################################################################################### test_that("Update NAs", { # update NAs in x variable form x jn <- joyn(x2, @@ -791,7 +796,58 @@ test_that("anti join warning for update values", { }) +# Test all input data is unchanged + + +test_that("joyn() - input data unchanged", { + + expect_equal(x1, + data.table(id = c(1L, 1L, 2L, 3L, NA_integer_), + t = c(1L, 2L, 1L, 2L, NA_integer_), + x = 11:15)) + + expect_equal(y1, + data.table(id = c(1,2, 4), + y = c(11L, 15L, 16))) + + expect_equal(x2, + data.table(id = c(1, 4, 2, 3, NA), + t = c(1L, 2L, 1L, 2L, NA_integer_), + x = c(16, 12, NA, NA, 15))) + expect_equal(y2, + data.table(id = c(1, 2, 5, 6, 3), + yd = c(1, 2, 5, 6, 3), + y = c(11L, 15L, 20L, 13L, 10L), + x = c(16:20))) + expect_equal(x3, + data.table(id = c("c","b", "d"), + v = 8:10, + foo = c(4,2, 7))) + + expect_equal(y3, + data.table(id = c("c","b", "c", "a"), + y = c(11L, 15L, 18L, 20L))) + + expect_equal(x4, + data.table(id1 = c(1, 1, 2, 3, 3), + id2 = c(1, 1, 2, 3, 4), + t = c(1L, 2L, 1L, 2L, NA_integer_), + x = c(16, 12, NA, NA, 15))) + + expect_equal(y4, + data.table(id = c(1, 2, 5, 6, 3), + id2 = c(1, 1, 2, 3, 4), + y = c(11L, 15L, 20L, 13L, 10L), + x = c(16:20))) + + expect_equal(x5, + data.table(id = c(1, 2, 5, 6, 3), + yd = c(1, 2, 5, 6, 3), + y = c(11L, 15L, 20L, 13L, 10L), + x = c(16:18, NA, NA))) + +}) diff --git a/tests/testthat/test-merge-data.table.R b/tests/testthat/test-merge-data.table.R index c391cfa0..952f758c 100644 --- a/tests/testthat/test-merge-data.table.R +++ b/tests/testthat/test-merge-data.table.R @@ -380,7 +380,7 @@ test_that("FULL JOIN - Conducts full join", { by.y = "id2", all = TRUE ) - + attr(jn_dt, 'sorted') <- NULL expect_equal( jn |> fselect(-get(reportvar)), diff --git a/tests/testthat/test-update_na_vals.R b/tests/testthat/test-update_na_vals.R index ec84bce9..a1f821a2 100644 --- a/tests/testthat/test-update_na_vals.R +++ b/tests/testthat/test-update_na_vals.R @@ -109,7 +109,8 @@ test_that("update_na_vals -update values of one var", { ) # Check not updated values - dt[is.na(x.x) | is.na(x.y) | !.joyn == 3] |> fselect((id:x.y)) |> + dt |> + fsubset(is.na(x.x) | is.na(x.y) | !.joyn == 3) |> fselect((id:x.y)) |> expect_equal(res[!.joyn == 5,] |> fselect((id:x.y))) expect_true(!any(4 %in% res$.joyn))