Create workhorse for summary.mipo(), bind it to pool.table(), free wo…

…rkhorse from R-typed objects, document and test
amices · Aug 2, 2023 · 6ed5a14 · 6ed5a14
1 parent 9454afa
commit 6ed5a14
Show file tree

Hide file tree

Showing 5 changed files with 153 additions and 42 deletions.
diff --git a/R/mipo.R b/R/mipo.R
@@ -9,7 +9,7 @@
 #' @inheritParams broom::lm_tidiers
 #' @param z Data frame with a tidied version of a coefficient matrix
 #' @param conf.int Logical indicating whether to include
-#' a confidence interval. The default is \code{FALSE}.
+#' a confidence interval.
 #' @param conf.level Confidence level of the interval, used only if
 #' \code{conf.int = TRUE}. Number between 0 and 1.
 #' @param exponentiate Flag indicating whether to exponentiate the
@@ -64,22 +64,31 @@ summary.mipo <- function(object, type = c("tests", "all"),
                          conf.int = FALSE, conf.level = .95,
                          exponentiate = FALSE, ...) {
   type <- match.arg(type)
-  m <- object$m
   x <- object$pooled
+  z <- summary_mipo.workhorse(x = x, type = type,
+                              conf.int = conf.int, conf.level = conf.level,
+                              exponentiate = exponentiate, ...)
+  class(z) <- c("mipo.summary", "data.frame")
+  z
+}
+
+summary_mipo.workhorse <- function(x, type,
+                                   conf.int, conf.level,
+                                   exponentiate, ...) {
+  m <- x$m[1L]
   std.error <- sqrt(x$t)
   statistic <- x$estimate / std.error
   p.value <- 2 * (pt(abs(statistic), pmax(x$df, 0.001), lower.tail = FALSE))
 
   z <- data.frame(x,
-    std.error = std.error,
-    statistic = statistic,
-    p.value = p.value
-  )
-  z <- process_mipo(z, object,
-    conf.int = conf.int,
-    conf.level = conf.level,
-    exponentiate = exponentiate
+                  std.error = std.error,
+                  statistic = statistic,
+                  p.value = p.value
   )
+  z <- process_mipo(z, x,
+                    conf.int = conf.int,
+                    conf.level = conf.level,
+                    exponentiate = exponentiate)
 
   parnames <- names(z)[1L:(pmatch("m", names(z)) - 1L)]
   if (type == "tests") {
@@ -88,8 +97,7 @@ summary.mipo <- function(object, type = c("tests", "all"),
     z <- z[, keep]
   }
 
-  class(z) <- c("mipo.summary", "data.frame")
-  z
+  data.frame(z)
 }
 
 #' @rdname mipo
@@ -120,8 +128,7 @@ process_mipo <- function(z, x, conf.int = FALSE, conf.level = .95,
 
   CI <- NULL
   if (conf.int) {
-    # avoid "Waiting for profiling to be done..." message
-    CI <- suppressMessages(confint(x, level = conf.level))
+    CI <- confidence(x, level = conf.level)
   }
   z$estimate <- trans(z$estimate)
 
@@ -150,9 +157,8 @@ vcov.mipo <- function(object, ...) {
   so
 }
 
-confint.mipo <- function(object, parm, level = 0.95, ...) {
-  pooled <- object$pooled
-  cf <- getqbar(object)
+confidence <- function(pooled, parm, level = 0.95, ...) {
+  cf <- pooled$estimate
   df <- pooled$df
   se <- sqrt(pooled$t)
   pnames <- names(df) <- names(se) <- names(cf) <- row.names(pooled)
@@ -166,12 +172,14 @@ confint.mipo <- function(object, parm, level = 0.95, ...) {
   fac <- qt(a, df)
   pct <- fmt.perc(a, 3)
   ci <- array(NA,
-    dim = c(length(parm), 2L),
-    dimnames = list(parm, pct)
+              dim = c(length(parm), 2L),
+              dimnames = list(parm, pct)
   )
   ci[, 1] <- cf[parm] + qt(a[1], df[parm]) * se[parm]
   ci[, 2] <- cf[parm] + qt(a[2], df[parm]) * se[parm]
-  ci
+  ci <- data.frame(ci)
+  names(ci) <- c("conf.low", "conf.high")
+  return(ci)
 }
 
 unrowname <- function(x) {

diff --git a/R/pool.table.R b/R/pool.table.R
@@ -17,6 +17,17 @@
 #' rule for the total variance \code{t}. The custom rule can use the
 #' other calculated pooling statistics. The default \code{t} calculation
 #' has the form \code{".data$ubar + (1 + 1 / .data$m) * .data$b"}.
+#' @param type A string, either \code{"minimal"}, \code{"tests"} or \code{"all"}.
+#' Use minimal to mimick the output of \code{summary(pool(fit))}. The default
+#' is \code{"all"}.
+#' @param conf.int Logical indicating whether to include
+#' a confidence interval.
+#' @param conf.level Confidence level of the interval, used only if
+#' \code{conf.int = TRUE}. Number between 0 and 1.
+#' @param exponentiate Flag indicating whether to exponentiate the
+#' coefficient estimates and confidence intervals (typical for
+#' logistic regression).
+#' @param \dots Arguments passed down
 #' @details
 #' The input data \code{w} is a \code{data.frame} with columns named:
 #'
@@ -37,29 +48,73 @@
 #' The value \code{dfcom = Inf} is acceptable for large samples
 #' (n > 1000) and relatively concise parametric models.
 #'
-#' @return \code{pool.table()} return a \code{data.frame} with aggregated
+#' @return
+#'
+#' \code{pool.table()} returns a \code{data.frame} with aggregated
 #' estimates, standard errors, confidence intervals and statistical tests.
 #'
+#' The meaning of the columns is as follows:
+#'
+#' \tabular{ll}{
+#' \code{term}      \tab Parameter name\cr
+#' \code{m}         \tab Number of multiple imputations\cr
+#' \code{estimate}  \tab Pooled complete data estimate\cr
+#' \code{std.error} \tab Standard error of \code{estimate}\cr
+#' \code{statistic} \tab t-statistic = \code{estimate} / \code{std.error}\cr
+#' \code{df}        \tab Degrees of freedom for \code{statistic}\cr
+#' \code{p.value}   \tab One-sided P-value under null hypothesis\cr
+#' \code{conf.low}  \tab Lower bound of c.i. (default 95 pct)\cr
+#' \code{conf.high} \tab Upper bound of c.i. (default 95 pct)\cr
+#' \code{riv}       \tab Relative increase in variance\cr
+#' \code{fmi}       \tab Fraction of missing information\cr
+#' \code{ubar}      \tab Within-imputation variance of \code{estimate}\cr
+#' \code{b}         \tab Between-imputation variance of \code{estimate}\cr
+#' \code{t}         \tab Total variance, of \code{estimate}\cr
+#' \code{dfcom}     \tab Residual degrees of freedom in complete data\cr
+#' }
+#'
 #' @examples
 #' # conventional mice workflow
 #' imp <- mice(nhanes2, m = 2, maxit = 2, seed = 1, print = FALSE)
 #' fit <- with(imp, lm(chl ~ age + bmi + hyp))
-#' est <- pool(fit)
-#' est$pooled
+#' pld1 <- pool(fit)
+#' pld1$pooled
 #'
 #' # using pool.table() on tidy table
 #' tbl <- summary(fit)[, c("term", "estimate", "std.error", "df.residual")]
 #' tbl
-#' pooled <- pool.table(tbl)
-#' pooled
+#' pld2 <- pool.table(tbl, type = "minimal")
+#' pld2
+#'
+#' identical(pld1$pooled, pld2)
 #'
-#' identical(est$pooled, pooled)
+#' # conventional workflow: all numerical output
+#' all1 <- summary(pld1, type = "all", conf.int = TRUE)
+#' all1
 #'
+#' # pool.table workflow: all numerical output
+#' all2 <- pool.table(tbl)
+#' all2
 #'
+#' identical(data.frame(all1), all2)
 #' @export
-pool.table <- function(w, dfcom = Inf, custom.t = NULL,
-                       rule = c("rubin1987", "reiter2003")) {
+pool.table <- function(w,
+                       type = c("all", "minimal", "tests"),
+                       conf.int = TRUE,
+                       conf.level = 0.95,
+                       exponentiate = FALSE,
+                       dfcom = Inf,
+                       custom.t = NULL,
+                       rule = c("rubin1987", "reiter2003"),
+                       ...) {
+  type <- match.arg(type)
   pooled <- pool.vector(w, dfcom = dfcom, custom.t = custom.t, rule = rule)
-  pooled
+  if (type %in% c("all", "tests"))
+  pooled <- summary_mipo.workhorse(x = pooled,
+                                   type = type,
+                                   conf.int = conf.int,
+                                   conf.level = conf.level,
+                                   exponentiate = exponentiate,
+                                   ...)
+  return(pooled)
 }
-
diff --git a/R/summary.R b/R/summary.R
@@ -27,14 +27,13 @@ summary.mira <- function(object,
   }
   # nobs is needed for pool.r.squared
   # not supplied by broom <= 0.5.6
+  model <- getfit(object, 1L)
   if (!"nobs" %in% colnames(v)) {
-    v$nobs <- tryCatch(length(stats::residuals(getfit(object)[[1]])),
-      error = function(e) NULL
-    )
+    v$nobs <- tryCatch(length(stats::residuals(model)),
+      error = function(e) {NULL})
   }
   # get df.residuals
   if (!"df.residuals" %in% colnames(v)) {
-    model <- getfit(object, 1L)
     v$df.residual <- get.dfcom(model)
   }
 

diff --git a/man/mipo.Rd b/man/mipo.Rd
diff --git a/man/pool.table.Rd b/man/pool.table.Rd