Skip to content

Commit

Permalink
Merge pull request #61 from randrescastaneda:collapse_mm
Browse files Browse the repository at this point in the history
Collapse mm
  • Loading branch information
randrescastaneda authored May 23, 2024
2 parents d401078 + fc7177f commit d6c6e64
Show file tree
Hide file tree
Showing 14 changed files with 349 additions and 130 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ Imports:
data.table,
cli,
utils,
collapse (>= 2.0.9),
collapse (>= 2.0.13),
lifecycle
Depends:
R (>= 2.10)
Expand Down
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
# joyn (development version)


* Add `anti_join()` function.

* Add `unmask_joyn()` function to unmask `joyn` functions that mask `dplyr` equivalents.

* Add information about duplicated obs in `by` variable when match type is `1` rathern than `m`.

* improve ineffciencies in deep copies with `m:m` joins

* Replace `m:m` joins from `data.table::merge.data.table` to `collapse::join`. Thanks to @SebKrantz for the suggestion (#58).

* Add information about duplicated obs in `by` variable when match type is `1` rather than `m`.

## breaking changes
Expand Down
145 changes: 115 additions & 30 deletions R/dplyr-joins.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,6 @@ left_join <- function(
clear_joynenv()

# Argument checks ---------------------------------
x <- copy(x)
y <- copy(y)
na_matches <- match.arg(na_matches,
choices = c("na","never"))
multiple <- match.arg(multiple,
Expand Down Expand Up @@ -91,6 +89,13 @@ left_join <- function(
dropreport <- args_check$dropreport

# Column names -----------------------------------
correct_names <- correct_names(by = by,
x = x,
y = y)
byexp <- correct_names$byexp
xbynames <- correct_names$xbynames
ybynames <- correct_names$ybynames

if (keep == TRUE) {
jn_type <- "left"
modified_cols <- set_col_names(x = x,
Expand Down Expand Up @@ -121,6 +126,20 @@ left_join <- function(
...
)

# Change names back------------------------------------
if (any(grepl(pattern = "keyby", x = names(x)))) {
data.table::setnames(x,
old = names(x)[grepl(pattern = "keyby",
x = names(x))],
new = xbynames)
}
if (any(grepl(pattern = "keyby", x = names(y)))) {
data.table::setnames(y,
old = names(y)[grepl(pattern = "keyby",
x = names(y))],
new = ybynames)
}

# Unmatched Keys ---------------------------------------
if (unmatched == "error") {
check_unmatched_keys(x = x,
Expand All @@ -134,6 +153,7 @@ left_join <- function(
get_vars(lj, reportvar) <- NULL
}


# return
lj
}
Expand Down Expand Up @@ -198,8 +218,6 @@ right_join <- function(
clear_joynenv()

# Argument checks ---------------------------------
x <- copy(x)
y <- copy(y)
na_matches <- match.arg(na_matches,
choices = c("na","never"))
multiple <- match.arg(multiple,
Expand All @@ -211,6 +229,7 @@ right_join <- function(
choices = c("drop",
"error"))


args_check <- arguments_checks(x = x,
y = y,
by = by,
Expand All @@ -230,6 +249,13 @@ right_join <- function(
dropreport <- args_check$dropreport

# Column names -----------------------------------
correct_names <- correct_names(by = by,
x = x,
y = y)
byexp <- correct_names$byexp
xbynames <- correct_names$xbynames
ybynames <- correct_names$ybynames

if (keep == TRUE) {
jn_type <- "right"
modified_cols <- set_col_names(x = x,
Expand Down Expand Up @@ -260,6 +286,20 @@ right_join <- function(
...
)

# Change names back------------------------------------
if (any(grepl(pattern = "keyby", x = names(x)))) {
data.table::setnames(x,
old = names(x)[grepl(pattern = "keyby",
x = names(x))],
new = xbynames)
}
if (any(grepl(pattern = "keyby", x = names(y)))) {
data.table::setnames(y,
old = names(y)[grepl(pattern = "keyby",
x = names(y))],
new = ybynames)
}

# Unmatched Keys ---------------------------------------
if (unmatched == "error") {
check_unmatched_keys(x = x,
Expand Down Expand Up @@ -340,8 +380,6 @@ full_join <- function(
clear_joynenv()

# Argument checks ---------------------------------
x <- copy(x)
y <- copy(y)
na_matches <- match.arg(na_matches,
choices = c("na","never"))
multiple <- match.arg(multiple,
Expand Down Expand Up @@ -372,6 +410,13 @@ full_join <- function(
dropreport <- args_check$dropreport

# Column names -----------------------------------
correct_names <- correct_names(by = by,
x = x,
y = y)
byexp <- correct_names$byexp
xbynames <- correct_names$xbynames
ybynames <- correct_names$ybynames

if (keep == TRUE) {
jn_type <- "full"
modified_cols <- set_col_names(x = x,
Expand Down Expand Up @@ -403,6 +448,20 @@ full_join <- function(
...
)

# Change names back------------------------------------
if (any(grepl(pattern = "keyby", x = names(x)))) {
data.table::setnames(x,
old = names(x)[grepl(pattern = "keyby",
x = names(x))],
new = xbynames)
}
if (any(grepl(pattern = "keyby", x = names(y)))) {
data.table::setnames(y,
old = names(y)[grepl(pattern = "keyby",
x = names(y))],
new = ybynames)
}

# Unmatched Keys----------------------------------------
if (unmatched == "error") {

Expand Down Expand Up @@ -488,8 +547,6 @@ inner_join <- function(
clear_joynenv()

# Argument checks ---------------------------------
x <- copy(x)
y <- copy(y)
na_matches <- match.arg(na_matches,
choices = c("na","never"))
multiple <- match.arg(multiple,
Expand Down Expand Up @@ -520,6 +577,13 @@ inner_join <- function(
dropreport <- args_check$dropreport

# Column names -----------------------------------
correct_names <- correct_names(by = by,
x = x,
y = y)
byexp <- correct_names$byexp
xbynames <- correct_names$xbynames
ybynames <- correct_names$ybynames

if (keep == TRUE) {
jn_type <- "inner"
modified_cols <- set_col_names(x = x,
Expand Down Expand Up @@ -550,6 +614,20 @@ inner_join <- function(
...
)

# Change names back------------------------------------
if (any(grepl(pattern = "keyby", x = names(x)))) {
data.table::setnames(x,
old = names(x)[grepl(pattern = "keyby",
x = names(x))],
new = xbynames)
}
if (any(grepl(pattern = "keyby", x = names(y)))) {
data.table::setnames(y,
old = names(y)[grepl(pattern = "keyby",
x = names(y))],
new = ybynames)
}

# Unmatched Keys ---------------------------------------
if (unmatched == "error") {
check_unmatched_keys(x = x,
Expand Down Expand Up @@ -629,8 +707,6 @@ anti_join <- function(
clear_joynenv()

# Argument checks ---------------------------------
x <- copy(x)
y <- copy(y)
na_matches <- match.arg(na_matches,
choices = c("na","never"))
multiple <- match.arg(multiple,
Expand Down Expand Up @@ -658,6 +734,13 @@ anti_join <- function(
dropreport <- args_check$dropreport

# Column names -----------------------------------
correct_names <- correct_names(by = by,
x = x,
y = y)
byexp <- correct_names$byexp
xbynames <- correct_names$xbynames
ybynames <- correct_names$ybynames

if (keep == TRUE) {
jn_type <- "anti"
modified_cols <- set_col_names(x = x,
Expand Down Expand Up @@ -688,6 +771,20 @@ anti_join <- function(
...
)

# Change names back------------------------------------
if (any(grepl(pattern = "keyby", x = names(x)))) {
data.table::setnames(x,
old = names(x)[grepl(pattern = "keyby",
x = names(x))],
new = xbynames)
}
if (any(grepl(pattern = "keyby", x = names(y)))) {
data.table::setnames(y,
old = names(y)[grepl(pattern = "keyby",
x = names(y))],
new = ybynames)
}

# # Unmatched Keys ---------------------------------------
if (dropreport == T) {
get_vars(aj, reportvar) <- NULL
Expand Down Expand Up @@ -838,25 +935,23 @@ arguments_checks <- function(x, y, by, copy, keep, suffix, na_matches, multiple,
#' @keywords internal
set_col_names <- function(x, y, by, suffix, jn_type) {

x_1 <- copy(x)
y_1 <- copy(y)

# If joining by different variables
if (length(grep(pattern = "==?", x = by, value = TRUE)) != 0) {
byexp <- grep(pattern = "==?", x = by, value = TRUE)
if (length(byexp) != 0) {

if (jn_type == "right") {
by_x_names <- fix_by_vars(by = by, x_1, y_1)$xby
by_x_names <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)", "\\1", byexp))
}

else if (jn_type == "left" | jn_type == "full" | jn_type == "inner") {
by_y_names <- fix_by_vars(by = by, x_1, y_1)$yby
by_y_names <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)", "\\3", byexp))
}

}

# If joining by common var
else {
by_y_names <- by_x_names <- fix_by_vars(by = by, x_1, y_1)$by
by_y_names <- by_x_names <- by
}

# Add key vars with suffix to x and y
Expand Down Expand Up @@ -903,24 +998,16 @@ check_unmatched_keys <- function(x, y, out, by, jn_type) {
# Left table --------------------------------------------------------
if (jn_type %in% c("left", "inner", "anti")) {

use_y_input <- process_by_vector(by = by, input = "right")
use_y_out <- process_by_vector(by = by, input = "left")
use_y_input <- process_by_vector(by = by, input = "right") # id2
use_y_out <- process_by_vector(by = by, input = "left") # id1

if (length(grep("==?", by, value = TRUE)) != 0) {

if (any(use_y_out %in% colnames(y))) {

store_msg(
type = "warn",
warn = paste(cli::symbol$warn, "\nWarning:"),
pale = "\nUnmatched = error not active for this joyn -unmatched keys are not detected"
)
cli::cli_warn("`Unmatched = error` not active for this joyn -unmatched keys are not detected")
}

else {
data.table::setnames(y,
new = use_y_out,
old = use_y_input)

if (unmatched_keys(x = y,
by = use_y_out,
Expand Down Expand Up @@ -1052,5 +1139,3 @@ process_by_vector <- function(by, input = c("left", "right")) {





Loading

0 comments on commit d6c6e64

Please sign in to comment.