From 7cbed4ff4aba1f68bcc1a79d93e80cf68f004fd0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Th=C3=A9riault?=
 <13123390+rempsyc@users.noreply.github.com>
Date: Sat, 11 May 2024 18:44:27 +0200
Subject: [PATCH] Adds data argument to effectsize.htest/cohens_d (#522)

Adds data argument to effectsize.htest/cohens_d, fixes #245
---
 R/cohens_d.R                     |  27 ++-
 R/effectsize.htest.R             | 215 ++++++++++++++-------
 tests/testthat/test-htest_data.R | 308 +++++++++++++++++++++++++++++++
 3 files changed, 466 insertions(+), 84 deletions(-)
 create mode 100644 tests/testthat/test-htest_data.R

diff --git a/R/cohens_d.R b/R/cohens_d.R
index ec98162b..f11c35bf 100644
--- a/R/cohens_d.R
+++ b/R/cohens_d.R
@@ -195,7 +195,7 @@ glass_delta <- function(x, y = NULL, data = NULL,
 
   if (type != "delta") {
     if (.is_htest_of_type(x, "t-test")) {
-      return(effectsize(x, type = type, verbose = verbose, ...))
+      return(effectsize(x, type = type, verbose = verbose, data = data, ...))
     } else if (.is_BF_of_type(x, c("BFoneSample", "BFindepSample"), "t-squared")) {
       return(effectsize(x, ci = ci, verbose = verbose, ...))
     }
@@ -234,7 +234,7 @@ glass_delta <- function(x, y = NULL, data = NULL,
 
     hn <- 1 / n
     se <- s / sqrt(n)
-    df <- n - 1
+    df1 <- n - 1
 
     pooled_sd <- NULL
   } else {
@@ -252,14 +252,14 @@ glass_delta <- function(x, y = NULL, data = NULL,
         s <- suppressWarnings(sd_pooled(x, y))
         hn <- (1 / n1 + 1 / n2)
         se <- s * sqrt(1 / n1 + 1 / n2)
-        df <- n - 2
+        df1 <- n - 2
       } else {
         s <- sqrt((s1^2 + s2^2) / 2)
         hn <- (2 * (n2 * s1^2 + n1 * s2^2)) / (n1 * n2 * (s1^2 + s2^2))
         se1 <- sqrt(s1^2 / n1)
         se2 <- sqrt(s2^2 / n2)
         se <- sqrt(se1^2 + se2^2)
-        df <- se^4 / (se1^4 / (n1 - 1) + se2^4 / (n2 - 1))
+        df1 <- se^4 / (se1^4 / (n1 - 1) + se2^4 / (n2 - 1))
       }
     } else if (type == "delta") {
       pooled_sd <- NULL
@@ -267,7 +267,7 @@ glass_delta <- function(x, y = NULL, data = NULL,
       s <- s2
       hn <- 1 / n2 + s1^2 / (n1 * s2^2)
       se <- (s2 * sqrt(1 / n2 + s1^2 / (n1 * s2^2)))
-      df <- n2 - 1
+      df1 <- n2 - 1
     }
   }
 
@@ -278,22 +278,21 @@ glass_delta <- function(x, y = NULL, data = NULL,
   if (.test_ci(ci)) {
     # Add cis
     out$CI <- ci
-    ci.level <- .adjust_ci(ci, alternative)
+    ci_level <- .adjust_ci(ci, alternative)
 
-    t <- (d - mu) / se
-    ts <- .get_ncp_t(t, df, ci.level)
+    t1 <- (d - mu) / se
+    ts1 <- .get_ncp_t(t1, df1, ci_level)
 
-    out$CI_low <- ts[1] * sqrt(hn)
-    out$CI_high <- ts[2] * sqrt(hn)
+    out$CI_low <- ts1[1] * sqrt(hn)
+    out$CI_high <- ts1[2] * sqrt(hn)
     ci_method <- list(method = "ncp", distribution = "t")
     out <- .limit_ci(out, alternative, -Inf, Inf)
   } else {
     ci_method <- alternative <- NULL
   }
 
-
   if (adjust) {
-    J <- .J(df)
+    J <- .J(df1)
     col_to_adjust <- intersect(colnames(out), c(types[type], "CI_low", "CI_high"))
     out[, col_to_adjust] <- out[, col_to_adjust] * J
 
@@ -311,6 +310,6 @@ glass_delta <- function(x, y = NULL, data = NULL,
 }
 
 #' @keywords internal
-.J <- function(df) {
-  exp(lgamma(df / 2) - log(sqrt(df / 2)) - lgamma((df - 1) / 2)) # exact method
+.J <- function(df1) {
+  exp(lgamma(df1 / 2) - log(sqrt(df1 / 2)) - lgamma((df1 - 1) / 2)) # exact method
 }
diff --git a/R/effectsize.htest.R b/R/effectsize.htest.R
index 86d7213a..a9b688c0 100644
--- a/R/effectsize.htest.R
+++ b/R/effectsize.htest.R
@@ -30,17 +30,78 @@ effectsize.htest <- function(model, type = NULL, verbose = TRUE, ...) {
   }
 }
 
+#' @keywords internal
+.data_from_formula <- function(model_data, model, verbose = TRUE, ...) {
+  if (is.null(model_data) && "data" %in% names(match.call())) {
+    vars <- insight::get_parameters(model)$Parameter
+    vars_split <- unlist(strsplit(vars, " by | and "))
+    data_ellipsis <- eval.parent(match.call()[["data"]])
+    if (!grepl("\\$|\\[", vars) && length(vars_split) > 1) {
+      if (grepl("by|and", vars)) {
+        vars <- sub("by|and", "~", vars, perl = TRUE)
+        vars <- sub("and", "|", vars, fixed = TRUE)
+        if (!grepl("|", vars, fixed = TRUE)) {
+          form <- stats::as.formula(vars)
+          data_out <- .resolve_formula(form, ...)
+          data_out[[2]] <- factor(data_out[[2]])
+        } else if (all(vars_split %in% names(data_ellipsis))) {
+          # We need a special case for the Friedman test
+          # because "In Ops.factor(w, t) : ‘|’ not meaningful for factors"
+          # When used with the | operator within the formula
+          data_out <- stats::model.frame(...)
+          if (all(vars_split %in% names(data_out))) {
+            data_out <- data_out[vars_split]
+          } else {
+            data_out <- NULL
+          }
+        }
+      }
+    } else if (grepl("$", vars, fixed = TRUE)) {
+      # Special case for square bracket subsetting
+      # E.g., x = dat$mpg[dat$am == 1], y = dat$mpg[dat$am == 0]
+      vars_cols <- gsub("(\\b\\w+\\$)", paste0(match.call()[["data"]], "$"), vars)
+      columns <- unlist(strsplit(vars_cols, " and ", fixed = TRUE))
+      x <- eval(parse(text = columns[1]))
+      y <- eval(parse(text = columns[2]))
+      data_out <- list(x, y)
+      # Not necessary to subset/na.omit here because not formula interface
+    } else if (grepl("\\$|\\[", vars)) {
+      # Special case for single-sample square bracket subsetting
+      # E.g., x = mtcars[[col_y]]
+      if (length(vars_split) == 1) {
+        data_out <- data_ellipsis
+        # Not necessary to subset/na.omit here because not formula interface
+      } else {
+        obj <- gsub(".*?\\[([^\\[\\]]+)\\].*", "\\1", vars, perl = TRUE)
+        message("Is object '", obj, "' still available in your workspace?")
+      }
+    } else if (length(vars_split) == 1) {
+      form <- stats::as.formula(paste0(vars, "~1"))
+      data_out <- .resolve_formula(form, ...)
+    } else if (verbose) {
+      message("To use the `data` argument, consider using modifiers outside the formula.")
+    }
+  } else {
+    data_out <- model_data
+  }
+  data_out
+}
+
 #' @keywords internal
 .effectsize_t.test <- function(model, type = NULL, verbose = TRUE, ...) {
   # Get data?
-  data <- insight::get_data(model)
-  approx <- is.null(data)
+  model_data <- insight::get_data(model)
+  data1 <- .data_from_formula(model_data, model, verbose, ...)
 
-  dots <- list(...)
+  approx1 <- is.null(data1)
 
   if (is.null(type) || tolower(type) == "cohens_d") type <- "d"
   if (tolower(type) == "hedges_g") type <- "g"
 
+  cl <- match.call()
+  cl <- cl[-which(names(cl) == "subset")]
+  dots <- list(eval(cl, parent.frame()))
+
   dots$alternative <- model$alternative
   dots$ci <- attr(model$conf.int, "conf.level")
   dots$mu <- model$null.value
@@ -48,10 +109,10 @@ effectsize.htest <- function(model, type = NULL, verbose = TRUE, ...) {
   dots$verbose <- verbose
 
   if (!type %in% c("d", "g")) {
-    .fail_if_approx(approx, if (startsWith(type, "rm")) "rm_d" else "cles")
+    .fail_if_approx(approx1, if (startsWith(type, "rm")) "rm_d" else "cles")
   }
 
-  if (approx) {
+  if (approx1) {
     if (verbose) {
       insight::format_warning(
         "Unable to retrieve data from htest object.",
@@ -60,21 +121,28 @@ effectsize.htest <- function(model, type = NULL, verbose = TRUE, ...) {
     }
 
     f <- t_to_d
-    args <- list(
+    args1 <- list(
       t = unname(model$statistic),
       df_error = unname(model$parameter)
     )
   } else {
-    if (ncol(data) == 2) {
-      data[[2]] <- factor(data[[2]])
+    if (inherits(data1, "data.frame") && ncol(data1) == 2) {
+      data1[[2]] <- factor(data1[[2]])
     }
-    data <- stats::na.omit(data)
+    data1 <- stats::na.omit(data1)
 
-    args <- list(
-      x = data[[1]],
-      y = if (ncol(data) == 2) data[[2]],
-      pooled_sd = !grepl("Welch", model$method, fixed = TRUE)
-    )
+    if (inherits(data1, "numeric")) {
+      args1 <- list(
+        x = data1,
+        pooled_sd = !grepl("Welch", model$method, fixed = TRUE)
+      )
+    } else {
+      args1 <- list(
+        x = data1[[1]],
+        y = if (length(data1) == 2) data1[[2]],
+        pooled_sd = !grepl("Welch", model$method, fixed = TRUE)
+      )
+    }
 
     if (type %in% c("d", "g")) {
       f <- switch(tolower(type),
@@ -82,12 +150,12 @@ effectsize.htest <- function(model, type = NULL, verbose = TRUE, ...) {
         g = hedges_g
       )
     } else if (dots$paired && startsWith(type, "rm")) {
-      args[c("x", "y")] <- split(args$x, args$y)
-      dots$paired <- args$pooled_sd <- NULL
-      args$method <- gsub("^rm\\_", "", type)
+      args1[c("x", "y")] <- split(args1$x, args1$y)
+      dots$paired <- args1$pooled_sd <- NULL
+      args1$method <- gsub("^rm\\_", "", type)
       f <- rm_d
     } else {
-      if (!dots$paired && !args$pooled_sd) {
+      if (!dots$paired && !args1$pooled_sd) {
         insight::format_error("Common language effect size only applicable to Cohen's d with pooled SD.")
       }
 
@@ -102,19 +170,20 @@ effectsize.htest <- function(model, type = NULL, verbose = TRUE, ...) {
     }
   }
 
-  out <- do.call(f, c(args, dots))
-  attr(out, "approximate") <- approx
+  out <- do.call(f, c(args1, dots))
+  attr(out, "approximate") <- approx1
   out
 }
 
 #' @keywords internal
 .effectsize_chisq.test_dep <- function(model, type = NULL, verbose = TRUE, ...) {
   # Get data?
-  data <- insight::get_data(model)
-  approx <- is.null(data)
-
+  model_data <- insight::get_data(model)
+  data1 <- .data_from_formula(model_data, model, verbose, ...)
   dots <- list(...)
 
+  approx1 <- is.null(data1)
+
   Obs <- model$observed
   Exp <- model$expected
 
@@ -197,8 +266,8 @@ effectsize.htest <- function(model, type = NULL, verbose = TRUE, ...) {
   if (!is.null(model[["conf.int"]])) dots$ci <- attr(model[["conf.int"]], "conf.level")
   if (!is.null(model[["alternative"]])) dots$alternative <- model[["alternative"]]
 
-  data <- insight::get_data(model)
-  .fail_if_approx(is.null(data), type)
+  data1 <- insight::get_data(model)
+  .fail_if_approx(is.null(data1), type)
 
   f <- switch(tolower(type),
     v = ,
@@ -220,23 +289,24 @@ effectsize.htest <- function(model, type = NULL, verbose = TRUE, ...) {
     nnt = nnt
   )
 
-  if (is.table(data)) {
-    args <- list(x = data)
+  if (is.table(data1)) {
+    args1 <- list(x = data1)
   } else {
-    args <- list(x = data[[1]], y = data[[2]])
+    args1 <- list(x = data1[[1]], y = data1[[2]])
   }
 
-  do.call(f, c(args, dots))
+  do.call(f, c(args1, dots))
 }
 
 #' @keywords internal
 .effectsize_chisq.test_gof <- function(model, type = NULL, verbose = TRUE, ...) {
   # Get data?
-  data <- insight::get_data(model)
-  approx <- is.null(data)
-
+  model_data <- insight::get_data(model)
+  data1 <- .data_from_formula(model_data, model, verbose, ...)
   dots <- list(...)
 
+  approx1 <- is.null(data1)
+
   Obs <- model$observed
   Exp <- model$expected
   nr <- length(Obs)
@@ -270,12 +340,13 @@ effectsize.htest <- function(model, type = NULL, verbose = TRUE, ...) {
 #' @keywords internal
 .effectsize_oneway.test <- function(model, type = NULL, verbose = TRUE, ...) {
   # Get data?
-  data <- insight::get_data(model)
-  approx <- is.null(data)
+  model_data <- insight::get_data(model)
+  data1 <- .data_from_formula(model_data, model, verbose, ...)
 
-  dots <- list(...)
+  approx1 <- is.null(data1)
 
-  if ((approx <- grepl("not assuming", model$method, fixed = TRUE)) && verbose) {
+  approx1 <- grepl("not assuming", model$method, fixed = TRUE)
+  if (approx1 && verbose) {
     insight::format_alert("`var.equal = FALSE` - effect size is an {.b approximation.}")
   }
 
@@ -306,24 +377,24 @@ effectsize.htest <- function(model, type = NULL, verbose = TRUE, ...) {
     ...
   )
   colnames(out)[1] <- sub("_partial", "", colnames(out)[1], fixed = TRUE)
-  attr(out, "approximate") <- approx
+  attr(out, "approximate") <- approx1
   out
 }
 
 #' @keywords internal
 .effectsize_mcnemar.test <- function(model, type = NULL, verbose = TRUE, ...) {
   # Get data?
-  data <- insight::get_data(model)
-  approx <- is.null(data)
+  model_data <- insight::get_data(model)
+  data1 <- .data_from_formula(model_data, model, verbose, ...)
 
-  dots <- list(...)
+  approx1 <- is.null(data1)
 
-  .fail_if_approx(approx, "cohens_g")
+  .fail_if_approx(approx1, "cohens_g")
 
-  if (inherits(data, "table")) {
-    out <- cohens_g(data, verbose = verbose, ...)
+  if (inherits(data1, "table")) {
+    out <- cohens_g(data1, verbose = verbose, ...)
   } else {
-    out <- cohens_g(data[[1]], data[[2]], verbose = verbose, ...)
+    out <- cohens_g(data1[[1]], data1[[2]], verbose = verbose, ...)
   }
   out
 }
@@ -331,24 +402,28 @@ effectsize.htest <- function(model, type = NULL, verbose = TRUE, ...) {
 #' @keywords internal
 .effectsize_wilcox.test <- function(model, type = NULL, verbose = TRUE, ...) {
   # Get data?
-  data <- insight::get_data(model)
-  approx <- is.null(data)
+  model_data <- insight::get_data(model)
+  data1 <- .data_from_formula(model_data, model, verbose, ...)
 
-  dots <- list(...)
+  approx1 <- is.null(data1)
 
   if (is.null(type) || tolower(type) == "rank_biserial") type <- "rb"
 
+  cl <- match.call()
+  cl <- cl[-which(names(cl) == "subset")]
+  dots <- list(eval(cl, parent.frame()))
+
   dots$alternative <- model$alternative
   dots$ci <- attr(model$conf.int, "conf.level")
   dots$mu <- model$null.value
   dots$paired <- grepl("signed rank", model$method, fixed = TRUE)
 
-  .fail_if_approx(approx, type)
+  .fail_if_approx(approx1, type)
 
-  if (ncol(data) == 2) {
-    data[[2]] <- factor(data[[2]])
+  if (ncol(data1) == 2) {
+    data1[[2]] <- factor(data1[[2]])
   }
-  data <- stats::na.omit(data)
+  data1 <- stats::na.omit(data1)
 
   f <- switch(tolower(type),
     rb = rank_biserial,
@@ -361,9 +436,9 @@ effectsize.htest <- function(model, type = NULL, verbose = TRUE, ...) {
     wmw_odds = wmw_odds
   )
 
-  args <- list(
-    x = data[[1]],
-    y = if (ncol(data) == 2) data[[2]],
+  args1 <- list(
+    x = data1[[1]],
+    y = if (ncol(data1) == 2) data1[[2]],
     verbose = verbose
   )
 
@@ -371,35 +446,35 @@ effectsize.htest <- function(model, type = NULL, verbose = TRUE, ...) {
     if (dots$paired) {
       insight::format_error("Common language effect size only applicable to 2-sample rank-biserial correlation.")
     }
-    args$parametric <- FALSE
+    args1$parametric <- FALSE
   }
 
-  out <- do.call(f, c(args, dots))
+  out <- do.call(f, c(args1, dots))
   out
 }
 
 #' @keywords internal
 .effectsize_kruskal.test <- function(model, type = NULL, verbose = TRUE, ...) {
   # Get data?
-  data <- insight::get_data(model)
-  approx <- is.null(data)
+  model_data <- insight::get_data(model)
+  data1 <- .data_from_formula(model_data, model, verbose, ...)
 
-  dots <- list(...)
+  approx1 <- is.null(data1)
 
   if (is.null(type)) type <- "epsilon"
 
-  .fail_if_approx(approx, "rank_epsilon_squared")
+  .fail_if_approx(approx1, "rank_epsilon_squared")
 
   f <- switch(type,
     epsilon = rank_epsilon_squared,
     eta = rank_eta_squared
   )
 
-  if (inherits(data, "data.frame")) {
-    out <- f(data[[1]], data[[2]], verbose = verbose, ...)
+  if (inherits(data1, "data.frame")) {
+    out <- f(data1[[1]], data1[[2]], verbose = verbose, ...)
   } else {
     # data frame
-    out <- f(data, verbose = verbose, ...)
+    out <- f(data1, verbose = verbose, ...)
   }
   out
 }
@@ -407,18 +482,18 @@ effectsize.htest <- function(model, type = NULL, verbose = TRUE, ...) {
 #' @keywords internal
 .effectsize_friedman.test <- function(model, type = NULL, verbose = TRUE, ...) {
   # Get data?
-  data <- insight::get_data(model)
-  approx <- is.null(data)
+  model_data <- insight::get_data(model)
+  data1 <- .data_from_formula(model_data, model, verbose, ...)
 
-  dots <- list(...)
+  approx1 <- is.null(data1)
 
-  .fail_if_approx(approx, "kendalls_w")
+  .fail_if_approx(approx1, "kendalls_w")
 
-  if (inherits(data, "table")) {
-    data <- as.data.frame(data)[c("Freq", "Var2", "Var1")]
+  if (inherits(data1, "table")) {
+    data1 <- as.data.frame(data1)[c("Freq", "Var2", "Var1")]
   }
 
-  out <- kendalls_w(data[[1]], data[[2]], data[[3]], verbose = verbose, ...)
+  out <- kendalls_w(data1[[1]], data1[[2]], data1[[3]], verbose = verbose, ...)
   out
 }
 
diff --git a/tests/testthat/test-htest_data.R b/tests/testthat/test-htest_data.R
new file mode 100644
index 00000000..b3996c2b
--- /dev/null
+++ b/tests/testthat/test-htest_data.R
@@ -0,0 +1,308 @@
+test_that("basic examples", {
+  if (getRversion() < "4.1.3") {
+    skip_on_os("linux")
+  }
+
+  # t.test
+  x <- t.test(mpg ~ vs, data = mtcars)
+  expect_warning(effectsize(x), "Unable to retrieve data")
+  expect_no_warning(effectsize(x, data = mtcars))
+
+  # cor.test
+  # no need to specify the data argument
+  x <- cor.test(~ qsec + drat, data = mtcars)
+  expect_warning(effectsize(x), "'htest' method is not")
+
+  # wilcox.test
+  x <- wilcox.test(mpg ~ vs, data = mtcars, exact = FALSE)
+  expect_error(effectsize(x), "Unable to retrieve data")
+  expect_no_warning(effectsize(x, data = mtcars))
+
+  # friedman.test
+  wb <- aggregate(warpbreaks$breaks, by = list(
+    w = warpbreaks$wool, t = warpbreaks$tension
+  ), FUN = mean)
+  x <- friedman.test(x ~ w | t, data = wb)
+  expect_error(effectsize(x), "Unable to retrieve data")
+  expect_no_warning(effectsize(x, data = wb))
+
+  # kruskal.test
+  airquality2 <- airquality
+  airquality2$Month <- as.factor(airquality2$Month)
+  airquality2$Ozone <- ifelse(is.na(airquality2$Ozone), 10, airquality2$Ozone)
+  x <- kruskal.test(Ozone ~ Month, data = airquality2)
+  expect_error(effectsize(x), "Unable to retrieve data")
+  expect_no_warning(effectsize(x, data = airquality2))
+})
+
+test_that("edge cases", {
+  # Example 1
+  tt1 <- t.test(mpg ~ I(am + cyl == 4), data = mtcars)
+  dd1 <- cohens_d(mpg ~ I(am + cyl == 4), data = mtcars, pooled_sd = FALSE)
+
+  expect_warning(effectsize(tt1), "Unable to retrieve data")
+  expect_no_warning(effectsize(tt1, data = mtcars))
+  expect_identical(effectsize(tt1, data = mtcars)[[1]], dd1[[1]])
+
+  # Example 2
+  dat <- mtcars
+  tt2 <- t.test(dat$mpg[dat$am == 1], dat$mpg[dat$am == 0])
+  dd2 <- cohens_d(dat$mpg[dat$am == 1], dat$mpg[dat$am == 0], pooled_sd = FALSE)
+
+  rm("dat")
+  expect_warning(effectsize(tt2), "Unable to retrieve data")
+  expect_no_warning(effectsize(tt2, data = mtcars))
+
+  expect_identical(effectsize(tt2, data = mtcars)[[1]], dd2[[1]])
+
+  # Example 3
+  col_y <- "mpg"
+  tt3 <- t.test(mtcars[[col_y]])
+  dd3 <- cohens_d(mtcars[[col_y]])
+
+  rm("col_y")
+  expect_warning(effectsize(tt3), "Unable to retrieve data")
+  expect_no_warning(effectsize(tt3, data = mtcars))
+  expect_identical(effectsize(tt3, data = mtcars)[[1]], dd3[[1]])
+
+  # Example 4
+  tt4 <- t.test(mpg ~ as.factor(am), data = mtcars)
+
+  expect_warning(effectsize(tt4), "Unable to retrieve data")
+  expect_no_warning(effectsize(tt4, data = mtcars))
+
+  # wilcox.test
+  x <- wilcox.test(mpg ~ as.factor(vs), data = mtcars, exact = FALSE)
+  expect_error(effectsize(x), "Unable to retrieve data")
+  expect_no_warning(effectsize(x, data = mtcars))
+
+  # friedman.test does not allow formula modifiers, skipping
+
+  # kruskal.test
+  airquality2 <- airquality
+  airquality2$Month <- as.factor(airquality2$Month)
+  airquality2$Ozone <- ifelse(is.na(airquality2$Ozone), 10, airquality2$Ozone)
+  x <- kruskal.test(Ozone ~ as.factor(Month), data = airquality2)
+
+  expect_error(effectsize(x), "Unable to retrieve data")
+  expect_no_warning(effectsize(x, data = airquality2))
+
+  # Paired t-test
+  x <- t.test(mpg ~ 1, data = mtcars)
+  expect_no_warning(effectsize(x, data = mtcars))
+
+  x <- t.test(Pair(mpg, hp) ~ 1, data = mtcars)
+  expect_no_warning(effectsize(x, data = mtcars))
+})
+
+test_that("subset and na.action", {
+  if (getRversion() < "4.1.3") {
+    skip_on_os("linux")
+  }
+
+  # t.test
+  some_data <- mtcars
+  some_data$mpg[1] <- NA
+
+  tt <- t.test(mpg ~ am,
+    data = some_data,
+    alternative = "less",
+    mu = 1,
+    var.equal = TRUE,
+    subset = cyl == 4,
+    na.action = na.omit
+  )
+
+  d1 <- effectsize(tt,
+    data = some_data,
+    alternative = "less",
+    mu = 1,
+    var.equal = TRUE,
+    subset = cyl == 4,
+    na.action = na.omit
+  )
+
+  d2 <- cohens_d(mpg ~ am,
+    data = some_data,
+    alternative = "less",
+    mu = 1,
+    pooled_sd = TRUE,
+    subset = cyl == 4,
+    na.action = na.omit
+  )
+
+  expect_equal(d1, d2, ignore_attr = TRUE)
+
+  # Paired t.test with formula
+  sleep2 <- reshape(sleep,
+    direction = "wide",
+    idvar = "ID", timevar = "group"
+  )
+  sleep2$ID <- as.numeric(sleep2$ID)
+  sleep2$extra.2[1] <- NA
+
+  tt_paired <- t.test(
+    Pair(extra.1, extra.2) ~ 1,
+    data = sleep2,
+    alternative = "less",
+    var.equal = TRUE,
+    subset = ID > 3,
+    na.action = na.omit
+  )
+
+  d1_paired <- effectsize(
+    tt_paired,
+    data = sleep2,
+    alternative = "less",
+    var.equal = TRUE,
+    subset = ID > 3,
+    na.action = na.omit
+  )
+
+  d2_paired <- cohens_d(
+    tt_paired,
+    data = sleep2,
+    alternative = "less",
+    paired = TRUE,
+    var.equal = TRUE,
+    subset = ID > 3,
+    na.action = na.omit
+  )
+
+  expect_identical(d1_paired, d2_paired)
+
+  # wilcox.test
+  x <- wilcox.test(
+    mpg ~ vs,
+    data = some_data,
+    alternative = "less",
+    mu = 1,
+    var.equal = TRUE,
+    subset = cyl == 4,
+    na.action = na.omit,
+    exact = FALSE
+  )
+
+  d1 <- effectsize(
+    x,
+    data = some_data,
+    alternative = "less",
+    mu = 1,
+    var.equal = TRUE,
+    subset = cyl == 4,
+    na.action = na.omit
+  )
+
+  d2 <- rank_biserial(
+    mpg ~ vs,
+    data = some_data,
+    alternative = "less",
+    mu = 1,
+    pooled_sd = TRUE,
+    subset = cyl == 4,
+    na.action = na.omit
+  )
+
+  expect_equal(d1, d2, ignore_attr = TRUE)
+
+  # friedman.test
+  wb <- aggregate(warpbreaks$breaks, by = list(
+    w = warpbreaks$wool, t = warpbreaks$tension
+  ), FUN = mean)
+  new_row <- data.frame(w = "B", t = "H", x = 99, stringsAsFactors = FALSE)
+  wb <- rbind(wb, wb[6, ], new_row)
+  wb$x[7] <- NA
+
+  x <- friedman.test(
+    x ~ w | t,
+    data = wb,
+    subset = x < 99,
+    na.action = na.omit
+  )
+
+  d1 <- effectsize(
+    x,
+    data = wb,
+    subset = x < 99,
+    na.action = na.omit
+  )
+
+  d2 <- kendalls_w(
+    x ~ w | t,
+    data = wb,
+    subset = x < 99,
+    na.action = na.omit
+  )
+
+  expect_equal(d1, d2, ignore_attr = FALSE)
+
+  # kruskal.test
+  airquality2 <- airquality
+  airquality2$Month <- as.factor(airquality2$Month)
+  airquality2$Ozone <- ifelse(is.na(airquality2$Ozone), 10, airquality2$Ozone)
+
+  x <- kruskal.test(
+    Ozone ~ Month,
+    data = airquality2,
+    subset = Month != 5,
+    na.action = na.omit
+  )
+
+  set.seed(42)
+  d1 <- effectsize(
+    x,
+    data = airquality2,
+    alternative = "less",
+    subset = Month != 5,
+    na.action = na.omit
+  )
+
+  set.seed(42)
+  d2 <- rank_epsilon_squared(
+    Ozone ~ Month,
+    data = airquality2,
+    alternative = "less",
+    subset = Month != 5,
+    na.action = na.omit
+  )
+
+  expect_equal(d1, d2, ignore_attr = TRUE)
+
+  # subset and na.omit arguments do not apply to square bracket subsetting
+  # using the S3 method instead of the formula interface because no other
+  # dataframe is provided on which to do the subsetting. So no test is
+  # necessary here.
+
+  # paired t-test with formula
+  # Removing this test because paired t-test with formula isn't supported anymore
+  #
+  # before <- c(200.1, 190.9, 192.7, 213, 241.4, 196.9, 172.2, 185.5, NA, 999)
+  # after <- c(392.9, 393.2, 345.1, 393, 434, 427.9, 422, 383.9, NA, 999)
+  # my_data <- data.frame(
+  #   group = rep(c("before", "after"), each = 10),
+  #   weight = c(before, after),
+  #   stringsAsFactors = FALSE
+  # )
+  #
+  # res <- t.test(weight ~ group,
+  #   data = my_data, paired = TRUE,
+  #   alternative = "less", na.omit = TRUE
+  # )
+  #
+  # d1 <- effectsize(
+  #   res,
+  #   data = my_data,
+  #   subset = weight < 999,
+  #   na.action = na.omit
+  # )
+  #
+  # d2 <- cohens_d(weight ~ group,
+  #   data = my_data,
+  #   paired = TRUE,
+  #   alternative = "less",
+  #   subset = weight < 999,
+  #   na.action = na.omit
+  # )
+  #
+  # expect_equal(d1, d2, ignore_attr = TRUE)
+})