Skip to content

Commit

Permalink
Create workhorse for summary.mipo(), bind it to pool.table(), free wo…
Browse files Browse the repository at this point in the history
…rkhorse from R-typed objects, document and test
  • Loading branch information
stefvanbuuren committed Aug 2, 2023
1 parent 9454afa commit 6ed5a14
Show file tree
Hide file tree
Showing 5 changed files with 153 additions and 42 deletions.
48 changes: 28 additions & 20 deletions R/mipo.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#' @inheritParams broom::lm_tidiers
#' @param z Data frame with a tidied version of a coefficient matrix
#' @param conf.int Logical indicating whether to include
#' a confidence interval. The default is \code{FALSE}.
#' a confidence interval.
#' @param conf.level Confidence level of the interval, used only if
#' \code{conf.int = TRUE}. Number between 0 and 1.
#' @param exponentiate Flag indicating whether to exponentiate the
Expand Down Expand Up @@ -64,22 +64,31 @@ summary.mipo <- function(object, type = c("tests", "all"),
conf.int = FALSE, conf.level = .95,
exponentiate = FALSE, ...) {
type <- match.arg(type)
m <- object$m
x <- object$pooled
z <- summary_mipo.workhorse(x = x, type = type,
conf.int = conf.int, conf.level = conf.level,
exponentiate = exponentiate, ...)
class(z) <- c("mipo.summary", "data.frame")
z
}

summary_mipo.workhorse <- function(x, type,
conf.int, conf.level,
exponentiate, ...) {
m <- x$m[1L]
std.error <- sqrt(x$t)
statistic <- x$estimate / std.error
p.value <- 2 * (pt(abs(statistic), pmax(x$df, 0.001), lower.tail = FALSE))

z <- data.frame(x,
std.error = std.error,
statistic = statistic,
p.value = p.value
)
z <- process_mipo(z, object,
conf.int = conf.int,
conf.level = conf.level,
exponentiate = exponentiate
std.error = std.error,
statistic = statistic,
p.value = p.value
)
z <- process_mipo(z, x,
conf.int = conf.int,
conf.level = conf.level,
exponentiate = exponentiate)

parnames <- names(z)[1L:(pmatch("m", names(z)) - 1L)]
if (type == "tests") {
Expand All @@ -88,8 +97,7 @@ summary.mipo <- function(object, type = c("tests", "all"),
z <- z[, keep]
}

class(z) <- c("mipo.summary", "data.frame")
z
data.frame(z)
}

#' @rdname mipo
Expand Down Expand Up @@ -120,8 +128,7 @@ process_mipo <- function(z, x, conf.int = FALSE, conf.level = .95,

CI <- NULL
if (conf.int) {
# avoid "Waiting for profiling to be done..." message
CI <- suppressMessages(confint(x, level = conf.level))
CI <- confidence(x, level = conf.level)
}
z$estimate <- trans(z$estimate)

Expand Down Expand Up @@ -150,9 +157,8 @@ vcov.mipo <- function(object, ...) {
so
}

confint.mipo <- function(object, parm, level = 0.95, ...) {
pooled <- object$pooled
cf <- getqbar(object)
confidence <- function(pooled, parm, level = 0.95, ...) {
cf <- pooled$estimate
df <- pooled$df
se <- sqrt(pooled$t)
pnames <- names(df) <- names(se) <- names(cf) <- row.names(pooled)
Expand All @@ -166,12 +172,14 @@ confint.mipo <- function(object, parm, level = 0.95, ...) {
fac <- qt(a, df)
pct <- fmt.perc(a, 3)
ci <- array(NA,
dim = c(length(parm), 2L),
dimnames = list(parm, pct)
dim = c(length(parm), 2L),
dimnames = list(parm, pct)
)
ci[, 1] <- cf[parm] + qt(a[1], df[parm]) * se[parm]
ci[, 2] <- cf[parm] + qt(a[2], df[parm]) * se[parm]
ci
ci <- data.frame(ci)
names(ci) <- c("conf.low", "conf.high")
return(ci)
}

unrowname <- function(x) {
Expand Down
75 changes: 65 additions & 10 deletions R/pool.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,17 @@
#' rule for the total variance \code{t}. The custom rule can use the
#' other calculated pooling statistics. The default \code{t} calculation
#' has the form \code{".data$ubar + (1 + 1 / .data$m) * .data$b"}.
#' @param type A string, either \code{"minimal"}, \code{"tests"} or \code{"all"}.
#' Use minimal to mimick the output of \code{summary(pool(fit))}. The default
#' is \code{"all"}.
#' @param conf.int Logical indicating whether to include
#' a confidence interval.
#' @param conf.level Confidence level of the interval, used only if
#' \code{conf.int = TRUE}. Number between 0 and 1.
#' @param exponentiate Flag indicating whether to exponentiate the
#' coefficient estimates and confidence intervals (typical for
#' logistic regression).
#' @param \dots Arguments passed down
#' @details
#' The input data \code{w} is a \code{data.frame} with columns named:
#'
Expand All @@ -37,29 +48,73 @@
#' The value \code{dfcom = Inf} is acceptable for large samples
#' (n > 1000) and relatively concise parametric models.
#'
#' @return \code{pool.table()} return a \code{data.frame} with aggregated
#' @return
#'
#' \code{pool.table()} returns a \code{data.frame} with aggregated
#' estimates, standard errors, confidence intervals and statistical tests.
#'
#' The meaning of the columns is as follows:
#'
#' \tabular{ll}{
#' \code{term} \tab Parameter name\cr
#' \code{m} \tab Number of multiple imputations\cr
#' \code{estimate} \tab Pooled complete data estimate\cr
#' \code{std.error} \tab Standard error of \code{estimate}\cr
#' \code{statistic} \tab t-statistic = \code{estimate} / \code{std.error}\cr
#' \code{df} \tab Degrees of freedom for \code{statistic}\cr
#' \code{p.value} \tab One-sided P-value under null hypothesis\cr
#' \code{conf.low} \tab Lower bound of c.i. (default 95 pct)\cr
#' \code{conf.high} \tab Upper bound of c.i. (default 95 pct)\cr
#' \code{riv} \tab Relative increase in variance\cr
#' \code{fmi} \tab Fraction of missing information\cr
#' \code{ubar} \tab Within-imputation variance of \code{estimate}\cr
#' \code{b} \tab Between-imputation variance of \code{estimate}\cr
#' \code{t} \tab Total variance, of \code{estimate}\cr
#' \code{dfcom} \tab Residual degrees of freedom in complete data\cr
#' }
#'
#' @examples
#' # conventional mice workflow
#' imp <- mice(nhanes2, m = 2, maxit = 2, seed = 1, print = FALSE)
#' fit <- with(imp, lm(chl ~ age + bmi + hyp))
#' est <- pool(fit)
#' est$pooled
#' pld1 <- pool(fit)
#' pld1$pooled
#'
#' # using pool.table() on tidy table
#' tbl <- summary(fit)[, c("term", "estimate", "std.error", "df.residual")]
#' tbl
#' pooled <- pool.table(tbl)
#' pooled
#' pld2 <- pool.table(tbl, type = "minimal")
#' pld2
#'
#' identical(pld1$pooled, pld2)
#'
#' identical(est$pooled, pooled)
#' # conventional workflow: all numerical output
#' all1 <- summary(pld1, type = "all", conf.int = TRUE)
#' all1
#'
#' # pool.table workflow: all numerical output
#' all2 <- pool.table(tbl)
#' all2
#'
#' identical(data.frame(all1), all2)
#' @export
pool.table <- function(w, dfcom = Inf, custom.t = NULL,
rule = c("rubin1987", "reiter2003")) {
pool.table <- function(w,
type = c("all", "minimal", "tests"),
conf.int = TRUE,
conf.level = 0.95,
exponentiate = FALSE,
dfcom = Inf,
custom.t = NULL,
rule = c("rubin1987", "reiter2003"),
...) {
type <- match.arg(type)
pooled <- pool.vector(w, dfcom = dfcom, custom.t = custom.t, rule = rule)
pooled
if (type %in% c("all", "tests"))
pooled <- summary_mipo.workhorse(x = pooled,
type = type,
conf.int = conf.int,
conf.level = conf.level,
exponentiate = exponentiate,
...)
return(pooled)
}

7 changes: 3 additions & 4 deletions R/summary.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,13 @@ summary.mira <- function(object,
}
# nobs is needed for pool.r.squared
# not supplied by broom <= 0.5.6
model <- getfit(object, 1L)
if (!"nobs" %in% colnames(v)) {
v$nobs <- tryCatch(length(stats::residuals(getfit(object)[[1]])),
error = function(e) NULL
)
v$nobs <- tryCatch(length(stats::residuals(model)),
error = function(e) {NULL})
}
# get df.residuals
if (!"df.residuals" %in% colnames(v)) {
model <- getfit(object, 1L)
v$df.residual <- get.dfcom(model)
}

Expand Down
2 changes: 1 addition & 1 deletion man/mipo.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

63 changes: 56 additions & 7 deletions man/pool.table.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 6ed5a14

Please sign in to comment.