From 0c1507c64ddaa7a7cdf442ae9d736d4825dedf0e Mon Sep 17 00:00:00 2001 From: Achim Zeileis Date: Tue, 10 Sep 2024 17:25:18 +0200 Subject: [PATCH] Enable distribution vectors in tibbles (#108) * Enabled the inclusion of vectors as columns in data objects, which needs vctrs and tibble as Suggests dependencies * add internal keyword to vec_proxy/vec_restore documentation --- DESCRIPTION | 6 ++-- NAMESPACE | 2 ++ NEWS.md | 2 ++ R/tibble-vctrs.R | 61 ++++++++++++++++++++++++++++++++++ man/vec_proxy.distribution.Rd | 62 +++++++++++++++++++++++++++++++++++ 5 files changed, 131 insertions(+), 2 deletions(-) create mode 100644 R/tibble-vctrs.R create mode 100644 man/vec_proxy.distribution.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 4abcd14..91ee73d 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -36,11 +36,13 @@ Suggests: PoissonBinomial, revdbayes (>= 1.3.5), rmarkdown, - testthat (>= 3.0.0) + testthat (>= 3.0.0), + tibble, + vctrs VignetteBuilder: knitr Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 Config/testthat/edition: 3 Config/testthat/parallel: true diff --git a/NAMESPACE b/NAMESPACE index 0c784d6..dfc3d8a 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -431,6 +431,8 @@ S3method(variance,ZINegativeBinomial) S3method(variance,ZIPoisson) S3method(variance,ZTNegativeBinomial) S3method(variance,ZTPoisson) +S3method(vctrs::vec_proxy,distribution) +S3method(vctrs::vec_restore,distribution) export(Bernoulli) export(Beta) export(Binomial) diff --git a/NEWS.md b/NEWS.md index f60871d..53c0cc7 100755 --- a/NEWS.md +++ b/NEWS.md @@ -25,6 +25,8 @@ using `match()` for distribution objects. - Added a `duplicated()` method which relies on the corresponding method for the `data.frame` of parameters in a distribution. +- Enabled the inclusion of `distribution` vectors as columns in `tibble` data objects, see + `?vec_proxy.distribution` for further details and a practical example. - Fixed errors in notation of cumulative distribution function in the documentation of `HurdlePoisson()` and `HurdleNegativeBinomial()` (by @dkwhu in #94 and #96). - The `prodist()` method for `glm` objects can now also handle `family` specifications from diff --git a/R/tibble-vctrs.R b/R/tibble-vctrs.R new file mode 100644 index 0000000..65efc48 --- /dev/null +++ b/R/tibble-vctrs.R @@ -0,0 +1,61 @@ +#' Methods for including distributions as vctrs in tibbles +#' +#' Methods for \code{\link[vctrs]{vec_proxy}} and \code{\link[vctrs]{vec_restore}} +#' from \pkg{vctrs} in order to include \code{distribution} objects in +#' \code{\link[tibble]{tibble}} objects. +#' +#' @details The methods for \code{\link[vctrs]{vec_proxy}} and +#' \code{\link[vctrs]{vec_restore}} from \pkg{vctrs} are needed so that +#' \code{distribution} objects can be included as a vector column in +#' (and extracted from) \code{\link[tibble]{tibble}} data frames. +#' \code{vec_proxy} simply adds the class \code{data.frame} which is the +#' actual underlying data structure used by \code{distribution} objects. +#' This way the number of rows etc. can be correctly determined. Conversely, +#' \code{vec_restore} strips off the additional \code{data.frame} class and +#' restores the original \code{distribution} classes. Users typically do not +#' need to call \code{vec_proxy} and \code{vec_restore} directly. +#' +#' @param x,to Objects inheriting from \code{distribution}. +#' @param ... Currently not used. +#' +#' @return The `vec_proxy` method returns a `distribution` object which +#' additionally inherits of `data.frame` while the `vec_restore` method +#' restores the original `distribution` classes. +#' +#' @examples +#' \dontshow{ if(!requireNamespace("tibble")) { +#' if(interactive() || is.na(Sys.getenv("_R_CHECK_PACKAGE_NAME_", NA))) { +#' stop("not all packages required for the example are installed") +#' } else q() } +#' } +#' ## Poisson GLM for FIFA 2018 goals data +#' data("FIFA2018", package = "distributions3") +#' m <- glm(goals ~ difference, data = FIFA2018, family = poisson) +#' +#' ## Predict fitted Poisson distributions for teams with ability differences +#' ## of -1, 0, 1 (out-of-sample) using the new data as a data.frame +#' nd <- data.frame(difference = -1:1) +#' nd$dist <- prodist(m, newdata = nd) +#' nd +#' +#' ## Do the same using the new data as a tibble +#' library("tibble") +#' nt <- tibble(difference = -1:1) +#' nt$dist <- prodist(m, newdata = nt) +#' nt +#' +#' @keywords internal + +#' @rdname vec_proxy.distribution +#' @exportS3Method vctrs::vec_proxy distribution +vec_proxy.distribution <- function(x, ...) { + class(x) <- c(class(x), "data.frame") + return(x) +} + +#' @rdname vec_proxy.distribution +#' @exportS3Method vctrs::vec_restore distribution +vec_restore.distribution <- function(x, to, ...) { + class(x) <- class(to) + return(x) +} diff --git a/man/vec_proxy.distribution.Rd b/man/vec_proxy.distribution.Rd new file mode 100644 index 0000000..cb3b71a --- /dev/null +++ b/man/vec_proxy.distribution.Rd @@ -0,0 +1,62 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tibble-vctrs.R +\name{vec_proxy.distribution} +\alias{vec_proxy.distribution} +\alias{vec_restore.distribution} +\title{Methods for including distributions as vctrs in tibbles} +\usage{ +\method{vec_proxy}{distribution}(x, ...) + +\method{vec_restore}{distribution}(x, to, ...) +} +\arguments{ +\item{x, to}{Objects inheriting from \code{distribution}.} + +\item{...}{Currently not used.} +} +\value{ +The \code{vec_proxy} method returns a \code{distribution} object which +additionally inherits of \code{data.frame} while the \code{vec_restore} method +restores the original \code{distribution} classes. +} +\description{ +Methods for \code{\link[vctrs]{vec_proxy}} and \code{\link[vctrs]{vec_restore}} +from \pkg{vctrs} in order to include \code{distribution} objects in +\code{\link[tibble]{tibble}} objects. +} +\details{ +The methods for \code{\link[vctrs]{vec_proxy}} and +\code{\link[vctrs]{vec_restore}} from \pkg{vctrs} are needed so that +\code{distribution} objects can be included as a vector column in +(and extracted from) \code{\link[tibble]{tibble}} data frames. +\code{vec_proxy} simply adds the class \code{data.frame} which is the +actual underlying data structure used by \code{distribution} objects. +This way the number of rows etc. can be correctly determined. Conversely, +\code{vec_restore} strips off the additional \code{data.frame} class and +restores the original \code{distribution} classes. Users typically do not +need to call \code{vec_proxy} and \code{vec_restore} directly. +} +\examples{ +\dontshow{ if(!requireNamespace("tibble")) { + if(interactive() || is.na(Sys.getenv("_R_CHECK_PACKAGE_NAME_", NA))) { + stop("not all packages required for the example are installed") + } else q() } +} +## Poisson GLM for FIFA 2018 goals data +data("FIFA2018", package = "distributions3") +m <- glm(goals ~ difference, data = FIFA2018, family = poisson) + +## Predict fitted Poisson distributions for teams with ability differences +## of -1, 0, 1 (out-of-sample) using the new data as a data.frame +nd <- data.frame(difference = -1:1) +nd$dist <- prodist(m, newdata = nd) +nd + +## Do the same using the new data as a tibble +library("tibble") +nt <- tibble(difference = -1:1) +nt$dist <- prodist(m, newdata = nt) +nt + +} +\keyword{internal}