insightsengineering · edelarua · Nov 23, 2024 · Nov 25, 2024 · Nov 25, 2024 · Nov 25, 2024
diff --git a/NAMESPACE b/NAMESPACE
@@ -26,6 +26,7 @@ export(ard_car_anova)
 export(ard_car_vif)
 export(ard_categorical)
 export(ard_categorical_ci)
+export(ard_categorical_max)
 export(ard_continuous)
 export(ard_continuous_ci)
 export(ard_dichotomous)

diff --git a/R/ard_categorical_max.R b/R/ard_categorical_max.R
@@ -0,0 +1,116 @@
+#' ARD to Calculate Categorical Occurrence Rates by Maximum Level Per Unique ID
+#'
+#' Function calculates categorical variable level occurrences rates by maximum level per unique ID.
+#' Each variable in `variables` is evaluated independently and then results for all variables are stacked.
+#' Only the highest-ordered level will be counted for each unique ID.
+#' Unordered, non-numeric variables will be converted to factor and the default level order used for ordering.
+#'
+#' @inheritParams cards::ard_categorical
+#' @inheritParams cards::ard_stack
+#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
+#'   The categorical variables for which occurrence rates per unique ID (by maximum level) will be calculated.
+#' @param id ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
+#'   Argument used to subset `data` to identify rows in `data` to calculate categorical variable level occurrence rates.
+#' @param denominator (`data.frame`, `integer`)\cr
+#'   Used to define the denominator and enhance the output.
+#'   The argument is optional. If not specified, `data` will be used as `denominator`.
+#'   - the univariate tabulations of the `by` variables are calculated with `denominator` when a data frame is passed,
+#'     e.g. tabulation of the treatment assignment counts that may appear in the header of a table.
+#' @param quiet (scalar `logical`)\cr
+#'   Logical indicating whether to suppress additional messaging. Default is `FALSE`.
+#'
+#' @return an ARD data frame of class 'card'
+#' @name ard_categorical_max
+#'
+#' @examples
+#' # Occurrence Rates by Max Level (Highest Severity) --------------------------
+#' ard_categorical_max(
+#'   cards::ADAE,
+#'   variables = c(AESER, AESEV),
+#'   id = USUBJID,
+#'   by = TRTA,
+#'   denominator = cards::ADSL |> dplyr::rename(TRTA = ARM),
+#'   quiet = FALSE
+#' )
+NULL
+
+#' @rdname ard_categorical_max
+#' @export
+ard_categorical_max <- function(data,
+                                variables,
+                                id,
+                                by = dplyr::group_vars(data),
+                                statistic = everything() ~ c("n", "p", "N"),
+                                denominator = NULL,
+                                fmt_fn = NULL,
+                                stat_label = everything() ~ cards::default_stat_labels(),
+                                quiet = TRUE,
+                                ...) {
+  set_cli_abort_call()
+
+  # check inputs ---------------------------------------------------------------
+  check_not_missing(data)
+  check_not_missing(variables)
+  check_not_missing(id)
+  cards::process_selectors(data, variables = {{ variables }}, id = {{ id }}, by = {{ by }})
+  data <- dplyr::ungroup(data)
+
+  # denominator must a data frame, or integer
+  if (!is_empty(denominator) && !is.data.frame(denominator) && !is_integerish(denominator)) {
+    cli::cli_abort(
+      "The {.arg denominator} argument must be a {.cls data.frame} or an {.cls integer}, not {.obj_type_friendly {denominator}}.",
+      call = get_cli_abort_call()
+    )
+  }
+  if (is_empty(denominator)) denominator <- data
+
+  # check the id argument is not empty
+  if (is_empty(id)) {
+    cli::cli_abort("Argument {.arg id} cannot be empty.", call = get_cli_abort_call())
+  }
+
+  # return empty ARD if no variables selected ----------------------------------
+  if (is_empty(variables)) {
+    return(dplyr::tibble() |> cards::as_card())
+  }
+
+  # print default order of character variable levels ---------------------------
+  for (v in variables) {
+    if (is.character(data[[v]])) {
+      lvls <- .unique_and_sorted(data[[v]])
+      vec <- cli::cli_vec(
+        lvls,
+        style = list("vec-sep" = " < ", "vec-sep2" = " < ", "vec-last" = " < ", "vec-trunc" = 3)
+      )
+      if (!quiet) cli::cli_inform("{.var {v}}: {.val {vec}}")
+    }
+  }
+
+  lst_results <- lapply(
+    variables,
+    function(x) {
+      ard_categorical(
+        data = data |>
+          cards:::arrange_using_order(c(id, by, x)) |>
+          dplyr::slice_tail(n = 1L, by = all_of(c(id, intersect(by, names(denominator))))),
+        variables = all_of(x),
+        by = all_of(by),
+        statistic = statistic,
+        denominator = denominator,
+        fmt_fn = fmt_fn,
+        stat_label = stat_label
+      ) |>
+        list()
+    }
+  )
+
+  # combine results ------------------------------------------------------------
+  result <- lst_results |>
+    dplyr::bind_rows() |>
+    dplyr::mutate(context = "categorical_max") |>
+    cards::tidy_ard_column_order() |>
+    cards::tidy_ard_row_order()
+
+  # return final result --------------------------------------------------------
+  result
+}
diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -88,6 +88,7 @@ reference:
       - ard_categorical_ci.data.frame
       - ard_regression
       - ard_regression_basic
+      - ard_categorical_max
 
   - title: "Helpers"
   - contents:

diff --git a/man/ard_categorical_max.Rd b/man/ard_categorical_max.Rd
diff --git a/tests/testthat/_snaps/ard_categorical_max.md b/tests/testthat/_snaps/ard_categorical_max.md
@@ -0,0 +1,168 @@
+# ard_categorical_max() works with default settings
+
+    Code
+      print(res, n = 20, columns = "all")
+    Message
+      {cards} data frame: 27 x 11
+    Output
+         group1 group1_level variable variable_level   context stat_name stat_label  stat fmt_fn warning error
+      1    TRTA      Placebo    AESEV           MILD categori…         n          n    36      0              
+      2    TRTA      Placebo    AESEV           MILD categori…         N          N   301      0              
+      3    TRTA      Placebo    AESEV           MILD categori…         p          %  0.12   <fn>              
+      4    TRTA      Placebo    AESEV       MODERATE categori…         n          n    26      0              
+      5    TRTA      Placebo    AESEV       MODERATE categori…         N          N   301      0              
+      6    TRTA      Placebo    AESEV       MODERATE categori…         p          % 0.086   <fn>              
+      7    TRTA      Placebo    AESEV         SEVERE categori…         n          n     7      0              
+      8    TRTA      Placebo    AESEV         SEVERE categori…         N          N   301      0              
+      9    TRTA      Placebo    AESEV         SEVERE categori…         p          % 0.023   <fn>              
+      10   TRTA    Xanomeli…    AESEV           MILD categori…         n          n    22      0              
+      11   TRTA    Xanomeli…    AESEV           MILD categori…         N          N   455      0              
+      12   TRTA    Xanomeli…    AESEV           MILD categori…         p          % 0.048   <fn>              
+      13   TRTA    Xanomeli…    AESEV       MODERATE categori…         n          n    49      0              
+      14   TRTA    Xanomeli…    AESEV       MODERATE categori…         N          N   455      0              
+      15   TRTA    Xanomeli…    AESEV       MODERATE categori…         p          % 0.108   <fn>              
+      16   TRTA    Xanomeli…    AESEV         SEVERE categori…         n          n     8      0              
+      17   TRTA    Xanomeli…    AESEV         SEVERE categori…         N          N   455      0              
+      18   TRTA    Xanomeli…    AESEV         SEVERE categori…         p          % 0.018   <fn>              
+      19   TRTA    Xanomeli…    AESEV           MILD categori…         n          n    19      0              
+      20   TRTA    Xanomeli…    AESEV           MILD categori…         N          N   435      0              
+    Message
+      i 7 more rows
+      i Use `print(n = ...)` to see more rows
+
+---
+
+    Code
+      print(ard_categorical_max(dplyr::group_by(cards::ADAE, TRTA), variables = AESEV, id = USUBJID, denominator = dplyr::rename(cards::ADSL, TRTA = ARM)), n = 20, columns = "all")
+    Message
+      {cards} data frame: 27 x 11
+    Output
+         group1 group1_level variable variable_level   context stat_name stat_label  stat fmt_fn warning error
+      1    TRTA      Placebo    AESEV           MILD categori…         n          n    36      0              
+      2    TRTA      Placebo    AESEV           MILD categori…         N          N    86      0              
+      3    TRTA      Placebo    AESEV           MILD categori…         p          % 0.419   <fn>              
+      4    TRTA      Placebo    AESEV       MODERATE categori…         n          n    26      0              
+      5    TRTA      Placebo    AESEV       MODERATE categori…         N          N    86      0              
+      6    TRTA      Placebo    AESEV       MODERATE categori…         p          % 0.302   <fn>              
+      7    TRTA      Placebo    AESEV         SEVERE categori…         n          n     7      0              
+      8    TRTA      Placebo    AESEV         SEVERE categori…         N          N    86      0              
+      9    TRTA      Placebo    AESEV         SEVERE categori…         p          % 0.081   <fn>              
+      10   TRTA    Xanomeli…    AESEV           MILD categori…         n          n    22      0              
+      11   TRTA    Xanomeli…    AESEV           MILD categori…         N          N    84      0              
+      12   TRTA    Xanomeli…    AESEV           MILD categori…         p          % 0.262   <fn>              
+      13   TRTA    Xanomeli…    AESEV       MODERATE categori…         n          n    49      0              
+      14   TRTA    Xanomeli…    AESEV       MODERATE categori…         N          N    84      0              
+      15   TRTA    Xanomeli…    AESEV       MODERATE categori…         p          % 0.583   <fn>              
+      16   TRTA    Xanomeli…    AESEV         SEVERE categori…         n          n     8      0              
+      17   TRTA    Xanomeli…    AESEV         SEVERE categori…         N          N    84      0              
+      18   TRTA    Xanomeli…    AESEV         SEVERE categori…         p          % 0.095   <fn>              
+      19   TRTA    Xanomeli…    AESEV           MILD categori…         n          n    19      0              
+      20   TRTA    Xanomeli…    AESEV           MILD categori…         N          N    84      0              
+    Message
+      i 7 more rows
+      i Use `print(n = ...)` to see more rows
+
+# ard_categorical_max(statistic) works
+
+    Code
+      ard_categorical_max(cards::ADAE, variables = AESEV, id = USUBJID, by = TRTA, denominator = dplyr::rename(cards::ADSL, TRTA = ARM), statistic = ~"n")
+    Message
+      {cards} data frame: 9 x 11
+    Output
+        group1 group1_level variable variable_level stat_name stat_label stat
+      1   TRTA      Placebo    AESEV           MILD         n          n   36
+      2   TRTA      Placebo    AESEV       MODERATE         n          n   26
+      3   TRTA      Placebo    AESEV         SEVERE         n          n    7
+      4   TRTA    Xanomeli…    AESEV           MILD         n          n   22
+      5   TRTA    Xanomeli…    AESEV       MODERATE         n          n   49
+      6   TRTA    Xanomeli…    AESEV         SEVERE         n          n    8
+      7   TRTA    Xanomeli…    AESEV           MILD         n          n   19
+      8   TRTA    Xanomeli…    AESEV       MODERATE         n          n   42
+      9   TRTA    Xanomeli…    AESEV         SEVERE         n          n   16
+    Message
+      i 4 more variables: context, fmt_fn, warning, error
+
+# ard_categorical_max(quiet) works
+
+    Code
+      ard_categorical_max(cards::ADAE, variables = AESEV, id = USUBJID, by = TRTA, denominator = dplyr::rename(cards::ADSL, TRTA = ARM), quiet = FALSE)
+    Message
+      `AESEV`: "MILD" < "MODERATE" < "SEVERE"
+      {cards} data frame: 27 x 11
+    Output
+         group1 group1_level variable variable_level stat_name stat_label  stat
+      1    TRTA      Placebo    AESEV           MILD         n          n    36
+      2    TRTA      Placebo    AESEV           MILD         N          N    86
+      3    TRTA      Placebo    AESEV           MILD         p          % 0.419
+      4    TRTA      Placebo    AESEV       MODERATE         n          n    26
+      5    TRTA      Placebo    AESEV       MODERATE         N          N    86
+      6    TRTA      Placebo    AESEV       MODERATE         p          % 0.302
+      7    TRTA      Placebo    AESEV         SEVERE         n          n     7
+      8    TRTA      Placebo    AESEV         SEVERE         N          N    86
+      9    TRTA      Placebo    AESEV         SEVERE         p          % 0.081
+      10   TRTA    Xanomeli…    AESEV           MILD         n          n    22
+    Message
+      i 17 more rows
+      i Use `print(n = ...)` to see more rows
+      i 4 more variables: context, fmt_fn, warning, error
+
+# ard_categorical_max() works with pre-ordered factor variables
+
+    Code
+      print(res, n = 20, columns = "all")
+    Message
+      {cards} data frame: 27 x 11
+    Output
+         group1 group1_level variable variable_level   context stat_name stat_label  stat fmt_fn warning error
+      1    TRTA      Placebo    AESEV           MILD categori…         n          n    36      0              
+      2    TRTA      Placebo    AESEV           MILD categori…         N          N    86      0              
+      3    TRTA      Placebo    AESEV           MILD categori…         p          % 0.419   <fn>              
+      4    TRTA      Placebo    AESEV       MODERATE categori…         n          n    26      0              
+      5    TRTA      Placebo    AESEV       MODERATE categori…         N          N    86      0              
+      6    TRTA      Placebo    AESEV       MODERATE categori…         p          % 0.302   <fn>              
+      7    TRTA      Placebo    AESEV         SEVERE categori…         n          n     7      0              
+      8    TRTA      Placebo    AESEV         SEVERE categori…         N          N    86      0              
+      9    TRTA      Placebo    AESEV         SEVERE categori…         p          % 0.081   <fn>              
+      10   TRTA    Xanomeli…    AESEV           MILD categori…         n          n    22      0              
+      11   TRTA    Xanomeli…    AESEV           MILD categori…         N          N    84      0              
+      12   TRTA    Xanomeli…    AESEV           MILD categori…         p          % 0.262   <fn>              
+      13   TRTA    Xanomeli…    AESEV       MODERATE categori…         n          n    49      0              
+      14   TRTA    Xanomeli…    AESEV       MODERATE categori…         N          N    84      0              
+      15   TRTA    Xanomeli…    AESEV       MODERATE categori…         p          % 0.583   <fn>              
+      16   TRTA    Xanomeli…    AESEV         SEVERE categori…         n          n     8      0              
+      17   TRTA    Xanomeli…    AESEV         SEVERE categori…         N          N    84      0              
+      18   TRTA    Xanomeli…    AESEV         SEVERE categori…         p          % 0.095   <fn>              
+      19   TRTA    Xanomeli…    AESEV           MILD categori…         n          n    19      0              
+      20   TRTA    Xanomeli…    AESEV           MILD categori…         N          N    84      0              
+    Message
+      i 7 more rows
+      i Use `print(n = ...)` to see more rows
+
+# ard_categorical_max() errors with incomplete factor columns
+
+    Code
+      ard_categorical_max(dplyr::mutate(cards::ADAE, AESOC = factor(AESOC, levels = character(
+        0))), variables = AESOC, id = USUBJID, by = TRTA)
+    Condition
+      Error in `ard_categorical_max()`:
+      ! Factors with empty "levels" attribute are not allowed, which was identified in column "AESOC".
+
+---
+
+    Code
+      ard_categorical_max(dplyr::mutate(cards::ADAE, SEX = factor(SEX, levels = c("F",
+        "M", NA), exclude = NULL)), variables = SEX, id = USUBJID, by = TRTA)
+    Condition
+      Error in `ard_categorical_max()`:
+      ! Factors with NA levels are not allowed, which are present in column "SEX".
+
+# ard_categorical_max() works without any variables
+
+    Code
+      ard_categorical_max(data = cards::ADAE, variables = starts_with("xxxx"), id = USUBJID,
+      by = c(TRTA, AESEV))
+    Message
+      {cards} data frame: 0 x 0
+    Output
+      data frame with 0 columns and 0 rows
+