From 6fc893bbb47126ac55556c289f4a9eebf4f8af7f Mon Sep 17 00:00:00 2001 From: RossanaTat Date: Thu, 7 Mar 2024 16:23:58 -0500 Subject: [PATCH] updates and bug fixes --- R/joyn-merge.R | 12 ++- R/update_na_vals.R | 2 +- man/full_join.Rd | 4 +- man/inner_join.Rd | 4 +- man/joyn.Rd | 4 +- man/left_join.Rd | 4 +- man/merge.Rd | 4 +- man/right_join.Rd | 4 +- tests/testthat/test-joyn.R | 172 ++++++++++++++++++++++++++++--------- 9 files changed, 158 insertions(+), 52 deletions(-) diff --git a/R/joyn-merge.R b/R/joyn-merge.R index 4e271fe5..864fc8e8 100644 --- a/R/joyn-merge.R +++ b/R/joyn-merge.R @@ -51,7 +51,9 @@ #' `TRUE` #' @param update_values logical: If TRUE, it will update all values of variables #' in x with the actual of variables in y with the same name as the ones in x. -#' **NAs from y won't be used to update actual values in x**. +#' **NAs from y won't be used to update actual values in x**. Yet, by default, +#' NAs in x will be updated with values in y. To avoid this, make sure to set +#' `update_NAs = FALSE` #' @param verbose logical: if FALSE, it won't display any message (programmer's #' option). Default is TRUE. #' @param keep_common_vars logical: If TRUE, it will keep the original variable @@ -157,7 +159,7 @@ joyn <- function(x, "right", "using", "inner"), y_vars_to_keep = TRUE, update_values = FALSE, - update_NAs = FALSE, + update_NAs = update_values, reportvar = getOption("joyn.reportvar"), reporttype = c("character", "numeric"), roll = NULL, @@ -254,6 +256,8 @@ joyn <- function(x, common_vars <- intersect(names(x), names(y)) if (!(is.null(fixby$yby))) { common_vars <- common_vars[!(common_vars %in% fixby$yby)] + common_vars <- common_vars[!(common_vars %in% fixby$tempkey)] + } else { common_vars <- common_vars[!(common_vars %in% fixby$by)] } @@ -386,12 +390,12 @@ joyn <- function(x, # Update x --------- #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ var_use <- NULL - if (isTRUE(update_values) || isTRUE(update_NAs)) { + if (isTRUE(update_NAs) || isTRUE(update_values)) { var_use <- common_vars } #return(list(reportvar)) - if (isTRUE(update_values | update_NAs) & length(var_use) > 0 ) { + if (isTRUE(update_NAs || update_values) & length(var_use) > 0 ) { x <- update_na_values(dt = x, var = var_use, diff --git a/R/update_na_vals.R b/R/update_na_vals.R index b37aabb7..5c971c84 100644 --- a/R/update_na_vals.R +++ b/R/update_na_vals.R @@ -52,7 +52,7 @@ update_na_values <- function(dt, if (rep_values) { dt_1[[reportvar]][ dt_1$varx_na & dt_1$vary_na] <- 5L - # reportvar = 5 >> y is NA => do not update + # reportvar = 6 >> y is NA => do not update dt_1[[reportvar]][ !dt_1$vary_na] <- 6L } diff --git a/man/full_join.Rd b/man/full_join.Rd index d35d123b..033ebf28 100644 --- a/man/full_join.Rd +++ b/man/full_join.Rd @@ -141,7 +141,9 @@ into x, but a report will be generated.} \item{update_values}{logical: If TRUE, it will update all values of variables in x with the actual of variables in y with the same name as the ones in x. -\strong{NAs from y won't be used to update actual values in x}.} +\strong{NAs from y won't be used to update actual values in x}. Yet, by default, +NAs in x will be updated with values in y. To avoid this, make sure to set +\code{update_NAs = FALSE}} \item{update_NAs}{logical: If TRUE, it will update NA values of all variables in x with actual values of variables in y that have the same name as the diff --git a/man/inner_join.Rd b/man/inner_join.Rd index 307af6a3..fe39af12 100644 --- a/man/inner_join.Rd +++ b/man/inner_join.Rd @@ -141,7 +141,9 @@ into x, but a report will be generated.} \item{update_values}{logical: If TRUE, it will update all values of variables in x with the actual of variables in y with the same name as the ones in x. -\strong{NAs from y won't be used to update actual values in x}.} +\strong{NAs from y won't be used to update actual values in x}. Yet, by default, +NAs in x will be updated with values in y. To avoid this, make sure to set +\code{update_NAs = FALSE}} \item{update_NAs}{logical: If TRUE, it will update NA values of all variables in x with actual values of variables in y that have the same name as the diff --git a/man/joyn.Rd b/man/joyn.Rd index 00b887b3..1ebb4956 100644 --- a/man/joyn.Rd +++ b/man/joyn.Rd @@ -68,7 +68,9 @@ into x, but a report will be generated.} \item{update_values}{logical: If TRUE, it will update all values of variables in x with the actual of variables in y with the same name as the ones in x. -\strong{NAs from y won't be used to update actual values in x}.} +\strong{NAs from y won't be used to update actual values in x}. Yet, by default, +NAs in x will be updated with values in y. To avoid this, make sure to set +\code{update_NAs = FALSE}} \item{update_NAs}{logical: If TRUE, it will update NA values of all variables in x with actual values of variables in y that have the same name as the diff --git a/man/left_join.Rd b/man/left_join.Rd index 813b696b..01bd3705 100644 --- a/man/left_join.Rd +++ b/man/left_join.Rd @@ -141,7 +141,9 @@ into x, but a report will be generated.} \item{update_values}{logical: If TRUE, it will update all values of variables in x with the actual of variables in y with the same name as the ones in x. -\strong{NAs from y won't be used to update actual values in x}.} +\strong{NAs from y won't be used to update actual values in x}. Yet, by default, +NAs in x will be updated with values in y. To avoid this, make sure to set +\code{update_NAs = FALSE}} \item{update_NAs}{logical: If TRUE, it will update NA values of all variables in x with actual values of variables in y that have the same name as the diff --git a/man/merge.Rd b/man/merge.Rd index c499d4d5..7f748784 100644 --- a/man/merge.Rd +++ b/man/merge.Rd @@ -86,7 +86,9 @@ ones in x. If FALSE, NA values won't be updated, even if \code{update_values} is \code{TRUE}} \item{\code{update_values}}{logical: If TRUE, it will update all values of variables in x with the actual of variables in y with the same name as the ones in x. -\strong{NAs from y won't be used to update actual values in x}.} +\strong{NAs from y won't be used to update actual values in x}. Yet, by default, +NAs in x will be updated with values in y. To avoid this, make sure to set +\code{update_NAs = FALSE}} \item{\code{verbose}}{logical: if FALSE, it won't display any message (programmer's option). Default is TRUE.} }} diff --git a/man/right_join.Rd b/man/right_join.Rd index 64ec2dca..1c445aeb 100644 --- a/man/right_join.Rd +++ b/man/right_join.Rd @@ -141,7 +141,9 @@ into x, but a report will be generated.} \item{update_values}{logical: If TRUE, it will update all values of variables in x with the actual of variables in y with the same name as the ones in x. -\strong{NAs from y won't be used to update actual values in x}.} +\strong{NAs from y won't be used to update actual values in x}. Yet, by default, +NAs in x will be updated with values in y. To avoid this, make sure to set +\code{update_NAs = FALSE}} \item{update_NAs}{logical: If TRUE, it will update NA values of all variables in x with actual values of variables in y that have the same name as the diff --git a/tests/testthat/test-joyn.R b/tests/testthat/test-joyn.R index d13aa53f..f0162c30 100644 --- a/tests/testthat/test-joyn.R +++ b/tests/testthat/test-joyn.R @@ -38,6 +38,12 @@ y4 = data.table(id = c(1, 2, 5, 6, 3), y = c(11L, 15L, 20L, 13L, 10L), x = c(16:20)) +x5 = data.table(id = c(1, 2, 5, 6, 3), + yd = c(1, 2, 5, 6, 3), + y = c(11L, 15L, 20L, 13L, 10L), + x = c(16:18, NA, NA)) + + test_that( "select `by` vars when non specified", { @@ -58,6 +64,34 @@ test_that( ) +test_that("all types of by argument raise no error", { + + joyn(x = x4, + y = y4, + by = "id1=id2", + match_type = "m:m") |> + expect_no_error() + + joyn(x = x4, + y = y4, + by = c("id1 = id", "id2"), + match_type = "m:1") |> + expect_no_error() + + joyn(x = x4, + y = y4, + by = c("id1 = id2", "id2 = id"), + match_type = "m:1") |> + expect_no_error() + + joyn(x = x4, + y = y4, + by = c("id2", "x"), + match_type = "1:1") |> + expect_no_error() + +}) + test_that("Errors if no common variables", { xf <- copy(x1) xf[, id := NULL] @@ -188,8 +222,8 @@ test_that("FULL- Compare with base::merge", { jn <- joyn( x1, y1, - by = "id", - reportvar = FALSE, + by = "id", + reportvar = FALSE, match_type = "m:1" ) @@ -202,10 +236,10 @@ test_that("FULL- Compare with base::merge", { expect_equal(jn, br, ignore_attr = 'row.names') jn <- joyn(x2, - y2, - by = "id", - reportvar = FALSE, - keep_common_vars = TRUE) + y2, + by= "id", + reportvar = FALSE, + keep_common_vars = TRUE) br <- base::merge(x2, y2, by = "id", all = TRUE) @@ -284,10 +318,10 @@ test_that("RIGHT - Compare with base::merge", { joyn( x2, y2, - by = "id", - reportvar = FALSE, - keep = "right", - match_type = "1:1", + by = "id", + reportvar = FALSE, + keep = "right", + match_type = "1:1", keep_common_vars = TRUE ) @@ -309,9 +343,9 @@ test_that("INNER - Compare with base::merge", { joyn( x1, y1, - by = "id", - reportvar = FALSE, - keep = "inner", + by = "id", + reportvar = FALSE, + keep = "inner", match_type = "m:1" ) br <- base::merge(x1, y1, by = "id") @@ -326,10 +360,10 @@ test_that("INNER - Compare with base::merge", { joyn( x2, y2, - by = "id", - reportvar = FALSE, - keep = "inner", - match_type = "1:1", + by = "id", + reportvar = FALSE, + keep = "inner", + match_type = "1:1", keep_common_vars = TRUE ) br <- base::merge(x2, y2, by = "id") @@ -409,28 +443,62 @@ test_that("match types work", { test_that("Update NAs", { # update NAs in x variable form x jn <- joyn(x2, - y2, - by = "id", - update_NAs = TRUE, + y2, + by = "id", + update_NAs = TRUE, keep_common_vars = TRUE) idx <- x2[is.na(x), "id"] expect_equal(jn[idx, on = "id"][, x.x], y2[idx, on = "id"][, x]) + jn_1 <- joyn(x2, + y2, + by = "id = yd", + update_NAs = TRUE, + keep_common_vars = TRUE) + + + expect_equal(jn_1[idx, on = "id"][, x.x], y2[idx, on = "id"][, x]) + + jn_2 <- joyn(x5, + y4, + by = c("id", "y"), + update_NAs = TRUE, + keep_common_vars = TRUE) + + to_replace <- x5[is.na(x), "id"] + + expect_equal(jn_2[to_replace, on = "id"][, x.x], y4[to_replace, on = "id"][, x]) + + out <- joyn(x5, + y4, + by = c("id = id2", "yd = id"), + update_NAs = FALSE, + keep_common_vars = TRUE) + + to_replace <- out[(is.na(x.x) & !is.na(x.y)) | (is.na(y.x) & !is.na(y.y) ), c("id", "yd")] + + jn_3 <- joyn(x5, + y4, + by = c("id = id2", "yd = id"), + update_NAs = TRUE, + keep_common_vars = TRUE) + + any(jn_3[to_replace, ".joyn"] != "NA updated") |> + expect_equal(FALSE) }) test_that("Update actual values", { - jn <- - joyn(x2, - y2, - by = "id", - update_values = TRUE, - update_NAs = TRUE, - keep_common_vars = TRUE) + jn <-joyn(x = x2, + y = y2, + by = "id", + update_values = TRUE, + update_NAs = TRUE, + keep_common_vars = TRUE) br <- base::merge(x2, y2, by = "id", all = TRUE) @@ -449,6 +517,28 @@ test_that("Update actual values", { fselect(x.x), ignore_attr = 'row.names') + joyn(x2, + y2, + by = "id = yd", + update_values = TRUE, + update_NAs = FALSE, + keep_common_vars = TRUE) |> + expect_no_error() + + joyn(x5, + y4, + by = c("id", "y"), + update_values = TRUE, + keep_common_vars = TRUE) |> + expect_no_error() + + + joyn(x5, + y4, + by = c("id = id2", "yd = id"), + update_values = TRUE, + keep_common_vars = TRUE) |> + expect_no_error() }) @@ -518,10 +608,10 @@ test_that("selection of reportvar", { expect_equal(unique(c(names(x2), names(y2))), names(jn)) - jn <- joyn(x2, - y2, - by = "id", - reportvar = "t") + jn <- joyn(x = x2, + y = y2, + by = "id", + reportvar = "t") allnames <- unique(c(names(x2), names(y2))) @@ -531,6 +621,16 @@ test_that("selection of reportvar", { }) +test_that("reporttype works", { + jn <- joyn(x = x2, + y = y2, + by = "id", + reporttype = "numeric") + + class(jn$.joyn) |> + expect_equal("numeric") + +}) test_that("Keep Y vars works", { @@ -593,16 +693,6 @@ test_that("convert to data.table when dataframe", { }) -# test_that("no matching obs", { -# -# xx2 <- x2[1, x := 23] -# -# dd <- joyn(x2, y2, verbose = F) -# dw <- dd[, unique(`.joyn`)] -# expect_equal(dw, c("y", "x")) -# -# }) - test_that("do not convert to data.table", { xx1 <- as.data.frame(x1) expect_equal(joyn(xx1, y1, match_type = "m:1") |> class(), xx1 |> class())