Cope with 1/y (#897)

* Cope with `1/y` * msg * minor * fix * fix * fix * fix test * fix * docs
easystats · Jun 26, 2024 · a19bdc5 · a19bdc5
1 parent 8a12151
commit a19bdc5
Show file tree

Hide file tree

Showing 13 changed files with 191 additions and 46 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: insight
 Title: Easy Access to Model Information for Various Model Objects
-Version: 0.20.1.6
+Version: 0.20.1.7
 Authors@R: 
     c(person(given = "Daniel",
              family = "Lüdecke",

diff --git a/NAMESPACE b/NAMESPACE
@@ -332,6 +332,8 @@ S3method(find_terms,aovlist)
 S3method(find_terms,bfsl)
 S3method(find_terms,default)
 S3method(find_terms,mipo)
+S3method(find_transformation,character)
+S3method(find_transformation,default)
 S3method(find_weights,brmsfit)
 S3method(find_weights,default)
 S3method(find_weights,gls)

diff --git a/NEWS.md b/NEWS.md
@@ -29,6 +29,9 @@
 * `format_alert()` and `format_warning()` get an `immediate` argument, to
   output warnings immediately.
 
+* `find_terms()` and `find_transformation()` now better cope with inverse
+  transformations of the response value, such as `1/y`.
+
 ## Bug fixes
 
 * `null_model()` now correctly handles zero-inflated models from package

diff --git a/R/find_terms.R b/R/find_terms.R
@@ -52,7 +52,11 @@ find_terms <- function(x, ...) {
 
 #' @rdname find_terms
 #' @export
-find_terms.default <- function(x, flatten = FALSE, as_term_labels = FALSE, verbose = TRUE, ...) {
+find_terms.default <- function(x,
+                               flatten = FALSE,
+                               as_term_labels = FALSE,
+                               verbose = TRUE,
+                               ...) {
   f <- find_formula(x, verbose = verbose)
 
   if (is.null(f)) {
@@ -179,12 +183,15 @@ find_terms.mipo <- function(x, flatten = FALSE, ...) {
   f <- lapply(f, function(.x) {
     if (is.list(.x)) {
       .x <- vapply(.x, .formula_to_string, character(1))
-    } else {
-      if (!is.character(.x)) .x <- safe_deparse(.x)
+    } else if (!is.character(.x)) {
+      .x <- safe_deparse(.x)
     }
     .x
   })
 
+  # save original response
+  original_response <- f$response
+
   # protect "-1"
   f$conditional <- gsub("(-1|- 1)(?![^(]*\\))", "#1", f$conditional, perl = TRUE)
 
@@ -198,12 +205,17 @@ find_terms.mipo <- function(x, flatten = FALSE, ...) {
     )), fixed = TRUE)
     # if user has used namespace in formula-functions, these are returned
     # as empty elements. remove those here
-    if (any(nchar(f_parts) == 0)) {
-      f_parts <- f_parts[-which(nchar(f_parts) == 0)]
+    if (!all(nzchar(f_parts, keepNA = TRUE))) {
+      f_parts <- f_parts[-which(!nzchar(f_parts, keepNA = TRUE))]
     }
     text_remove_backticks(unique(f_parts))
   })
 
+  # exceptions where we want to preserve the response value come here
+  # - lm(1 / Sepal.Length ~ Species, data = iris)
+  if (!is.null(original_response) && !is_empty_object(original_response) && startsWith(original_response, "1/")) { # nolint
+    f$response <- original_response
+  }
 
   # remove "1" and "0" from variables in random effects
 

diff --git a/R/find_transformation.R b/R/find_transformation.R
@@ -5,9 +5,12 @@
 #'   or exp-transforming, was applied to the response variable (dependent
 #'   variable) in a regression formula. Currently, following patterns are
 #'   detected: `log`, `log1p`, `log2`, `log10`, `exp`, `expm1`, `sqrt`,
-#'   `log(x+<number>)`, `log-log` and `power` (to 2nd power, like `I(x^2)`).
+#'   `log(x+<number>)`, `log-log`, `power` (to 2nd power, like `I(x^2)`), and
+#'   `inverse` (like `1/y`).
+#'
+#' @param x A regression model or a character string of the response value.
+#' @param ... Currently not used.
 #'
-#' @param x A regression model.
 #' @return A string, with the name of the function of the applied transformation.
 #'   Returns `"identity"` for no transformation, and e.g. `"log(x+3)"` when
 #'   a specific values was added to the response variables before
@@ -25,32 +28,48 @@
 #' # log+2
 #' model <- lm(log(Sepal.Length + 2) ~ Species, data = iris)
 #' find_transformation(model)
+#'
+#' # inverse, response provided as character string
+#' find_transformation("1 / y")
+#' @export
+find_transformation <- function(x, ...) {
+  UseMethod("find_transformation")
+}
+
+
 #' @export
-find_transformation <- function(x) {
+find_transformation.default <- function(x, ...) {
   # validation check
   if (is.null(x) || is.data.frame(x) || !is_model(x)) {
     return(NULL)
   }
-
   rv <- find_terms(x)[["response"]]
+  find_transformation(rv)
+}
+
+
+#' @export
+find_transformation.character <- function(x, ...) {
   transform_fun <- "identity"
 
+  # remove whitespaces
+  x <- gsub(" ", "", x, fixed = TRUE)
 
   # log-transformation
 
-  if (any(grepl("log\\((.*)\\)", rv))) {
+  if (any(grepl("log\\((.*)\\)", x))) {
     # do we have log-log models?
-    if (grepl("log\\(log\\((.*)\\)\\)", rv)) {
+    if (grepl("log\\(log\\((.*)\\)\\)", x)) {
       transform_fun <- "log-log"
     } else {
       # 1. try: log(x + number)
       plus_minus <- .safe(
-        eval(parse(text = gsub("log\\(([^,\\+)]*)(.*)\\)", "\\2", rv)))
+        eval(parse(text = gsub("log\\(([^,\\+)]*)(.*)\\)", "\\2", x)))
       )
       # 2. try: log(number + x)
       if (is.null(plus_minus)) {
         plus_minus <- .safe(
-          eval(parse(text = gsub("log\\(([^,\\+)]*)(.*)\\)", "\\1", rv)))
+          eval(parse(text = gsub("log\\(([^,\\+)]*)(.*)\\)", "\\1", x)))
         )
       }
       if (is.null(plus_minus)) {
@@ -64,40 +83,40 @@ find_transformation <- function(x) {
 
   # log1p-transformation
 
-  if (any(grepl("log1p\\((.*)\\)", rv))) {
+  if (any(grepl("log1p\\((.*)\\)", x))) {
     transform_fun <- "log1p"
   }
 
 
   # expm1-transformation
 
-  if (any(grepl("expm1\\((.*)\\)", rv))) {
+  if (any(grepl("expm1\\((.*)\\)", x))) {
     transform_fun <- "expm1"
   }
 
 
   # log2/log10-transformation
 
-  if (any(grepl("log2\\((.*)\\)", rv))) {
+  if (any(grepl("log2\\((.*)\\)", x))) {
     transform_fun <- "log2"
   }
 
-  if (any(grepl("log10\\((.*)\\)", rv))) {
+  if (any(grepl("log10\\((.*)\\)", x))) {
     transform_fun <- "log10"
   }
 
 
   # exp-transformation
 
-  if (any(grepl("exp\\((.*)\\)", rv))) {
+  if (any(grepl("exp\\((.*)\\)", x))) {
     transform_fun <- "exp"
   }
 
 
   # sqrt-transformation
 
-  if (any(grepl("sqrt\\((.*)\\)", rv))) {
-    plus_minus <- eval(parse(text = gsub("sqrt\\(([^,\\+)]*)(.*)\\)", "\\2", rv)))
+  if (any(grepl("sqrt\\((.*)\\)", x))) {
+    plus_minus <- eval(parse(text = gsub("sqrt\\(([^,\\+)]*)(.*)\\)", "\\2", x)))
     if (is.null(plus_minus)) {
       transform_fun <- "sqrt"
     } else {
@@ -106,16 +125,23 @@ find_transformation <- function(x) {
   }
 
 
+  # inverse-transformation
+
+  if (any(startsWith(x, "1/"))) {
+    transform_fun <- "inverse"
+  }
+
+
   # (unknown) I-transformation
 
-  if (any(grepl("I\\((.*)\\)", rv))) {
+  if (any(grepl("I\\((.*)\\)", x))) {
     transform_fun <- NULL
   }
 
 
   # power-transformation
 
-  if (any(grepl("(.*)(\\^|\\*\\*)\\s?-?(\\d+|[()])", rv))) {
+  if (any(grepl("(.*)(\\^|\\*\\*)\\s?-?(\\d+|[()])", x))) {
     transform_fun <- "power"
   }
 

diff --git a/R/get_transformation.R b/R/get_transformation.R
@@ -9,6 +9,8 @@
 #' transformation.
 #'
 #' @param x A regression model.
+#' @param verbose Logical, if `TRUE`, prints a warning if the transformation
+#' could not be determined.
 #'
 #' @return
 #'
@@ -36,7 +38,7 @@
 #' get_transformation(model)$inverse(0.3)
 #' exp(0.3)
 #' @export
-get_transformation <- function(x) {
+get_transformation <- function(x, verbose = TRUE) {
   transform_fun <- find_transformation(x)
 
   # unknown
@@ -58,6 +60,8 @@ get_transformation <- function(x) {
     out <- list(transformation = exp, inverse = log)
   } else if (transform_fun == "sqrt") {
     out <- list(transformation = sqrt, inverse = function(x) x^2)
+  } else if (transform_fun == "inverse") {
+    out <- list(transformation = function(x) 1 / x, inverse = function(x) x^-1)
   } else if (transform_fun == "power") {
     ## TODO: detect power - can we turn this into a function?
     # power <- .safe(gsub("\\(|\\)", "", gsub("(.*)(\\^|\\*\\*)\\s*(\\d+|[()])", "\\3", find_terms(x)[["response"]])))
@@ -72,6 +76,13 @@ get_transformation <- function(x) {
       transformation = function(x) log(log(x)),
       inverse = function(x) exp(exp(x))
     )
+  } else {
+    if (verbose) {
+      insight::format_alert(
+        paste0("The transformation and inverse-transformation functions for `", transform_fun, "` could not be determined.") # nolint
+      )
+    }
+    out <- NULL
   }
 
   out

diff --git a/R/get_variances.R b/R/get_variances.R
@@ -107,19 +107,50 @@
 #' `VarCorr()`. This measure is only available for mixed models with random
 #' intercepts and slopes.
 #'
-#' @note This function supports models of class `merMod` (including models
-#' from **blme**), `clmm`, `cpglmm`, `glmmadmb`, `glmmTMB`, `MixMod`, `lme`,
-#' `mixed`, `rlmerMod`, `stanreg`, `brmsfit` or `wbm`. Support for objects of
-#' class `MixMod` (**GLMMadaptive**), `lme` (**nlme**) or `brmsfit` (**brms**) is
+#' @section Supported models and model families:
+#' This function supports models of class `merMod` (including models from
+#' **blme**), `clmm`, `cpglmm`, `glmmadmb`, `glmmTMB`, `MixMod`, `lme`, `mixed`,
+#' `rlmerMod`, `stanreg`, `brmsfit` or `wbm`. Support for objects of class
+#' `MixMod` (**GLMMadaptive**), `lme` (**nlme**) or `brmsfit` (**brms**) is
 #' not fully implemented or tested, and therefore may not work for all models
 #' of the aforementioned classes.
 #'
+#' The results are validated against the solutions provided by _Nakagawa et al. (2017)_,
+#' in particular examples shown in the Supplement 2 of the paper. Other model
+#' families are validated against results from the **MuMIn** package. This means
+#' that the returned variance components should be accurate and reliable for
+#' following mixed models or model families:
+#'
+#' - Bernoulli (logistic) regression
+#' - Binomial regression (with other than binary outcomes)
+#' - Poisson and Quasi-Poisson regression
+#' - Negative binomial regression (including nbinom1 and nbinom2 families)
+#' - Gaussian regression (linear models)
+#' - Gamma regression
+#' - Tweedie regression
+#' - Beta regression
+#' - Ordered beta regression
+#'
+#' Following model families are not yet validated, but should work:
+#'
+#' - Zero-inflated and hurdle models
+#' - Beta-binomial regression
+#' - Compound Poisson regression
+#' - Generalized Poisson regression
+#' - Log-normal regression
+#'
 #' Extracting variance components for models with zero-inflation part is not
 #' straightforward, because it is not definitely clear how the distribution-specific
 #' variance should be calculated. Therefore, it is recommended to carefully
 #' inspect the results, and probably validate against other models, e.g. Bayesian
 #' models (although results may be only roughly comparable).
 #'
+#' Log-normal regressions (e.g. `lognormal()` family in **glmmTMB** or `gaussian("log")`)
+#' often have a very low fixed effects variance (if they were calculated as
+#' suggested by _Nakagawa et al. 2017_). This results in very low ICC or
+#' r-squared values, which may not be meaningful (see [`performance::icc()`] or
+#' [`performance::r2_nakagawa()`]).
+#'
 #' @references
 #'  - Johnson, P. C. D. (2014). Extension of Nakagawa & Schielzeth’s R2 GLMM to
 #'  random slopes models. Methods in Ecology and Evolution, 5(9), 944–946.

diff --git a/man/find_transformation.Rd b/man/find_transformation.Rd
diff --git a/man/get_transformation.Rd b/man/get_transformation.Rd