Vectorize distributions (#71)

* Vectorize object construction of distributions: * Implement methods for distribution objects `{c,print,summary,as.data.frame,...}.distribution`. * Rewrite construction of families using `data.frame` instead of `list`. * Adapt all distributions including their methods except `Categorical` and `Multinomial`. * Adapt `base` and `ggplot2` plotting functions to handle vectorized distribution objects. * Rewritten `Erlang` distribution building now on `stats::{d,g,q,r}gamma()`. * Add tests for new `{methods}.distribution` and add tests for all vectorized distributions. * Add tests for `ggplot2` and (new) `Erlang`. * Pass addtional arguments "..." to `{d,p,q}` functions. * Adapt manuals (no proper desription of vectorized distiribution objects yet). * Modify tests (!) for checking print output for few distributions: * `print.distriubtion()` is currently printing the class name (could be modified). * This effects ChiSquare, FisherF, LogNormal, NegativeBinomial, StudentsT. * Also add missing ggplot2 dependency in `test-plot.R`. * Move ggplot2 plotting functions to `plot.R` and rename testfile to `test-plot.R` * Run formatting and do styling * Small fix in helper function to vectorize distributions: * Fix in `apply_dpqr()` due to error with `drop = FALSE`. * Modify handling of, e.g., two probabilites and two distributions. * Add more tests to check vectorization. * Adapt manuals to document vectorization in `random()`, `pdf()`, `log_pdf()`, `cdf()`, `quantile()`. * Update "apply_dpqr" (again) * Modify how to handle `drop` argument (consistent to `drop()`) * Increase performance. * Extend tests. * Solely for `Normal()` so far. * Update all other distriubtion to accomodate new changes: * New apply_dpqr with `drop` argument now matching `drop()`. * Modification of tests (!) to pass comparison due to new naming convention [test-{Erlang,Frechet,GeneralisedExtremeValue,GeneralisedPareto,ReversedWeibull,methods}.R
alexpghayes · Feb 10, 2022 · ca68f01 · ca68f01
1 parent 368a56b
commit ca68f01
Show file tree

Hide file tree

Showing 204 changed files with 6,130 additions and 2,005 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,5 +1,11 @@
 # Generated by roxygen2: do not edit by hand
 
+S3method("[",distribution)
+S3method("names<-",distribution)
+S3method(as.data.frame,distribution)
+S3method(as.list,distribution)
+S3method(as.matrix,distribution)
+S3method(c,distribution)
 S3method(cdf,Bernoulli)
 S3method(cdf,Beta)
 S3method(cdf,Binomial)
@@ -26,6 +32,8 @@ S3method(cdf,StudentsT)
 S3method(cdf,Tukey)
 S3method(cdf,Uniform)
 S3method(cdf,Weibull)
+S3method(dim,distribution)
+S3method(dimnames,distribution)
 S3method(fit_mle,Bernoulli)
 S3method(fit_mle,Binomial)
 S3method(fit_mle,Exponential)
@@ -34,6 +42,7 @@ S3method(fit_mle,Geometric)
 S3method(fit_mle,LogNormal)
 S3method(fit_mle,Normal)
 S3method(fit_mle,Poisson)
+S3method(format,distribution)
 S3method(kurtosis,Bernoulli)
 S3method(kurtosis,Beta)
 S3method(kurtosis,Binomial)
@@ -56,6 +65,7 @@ S3method(kurtosis,Poisson)
 S3method(kurtosis,StudentsT)
 S3method(kurtosis,Uniform)
 S3method(kurtosis,Weibull)
+S3method(length,distribution)
 S3method(log_pdf,Bernoulli)
 S3method(log_pdf,Beta)
 S3method(log_pdf,Binomial)
@@ -106,6 +116,7 @@ S3method(mean,RevWeibull)
 S3method(mean,StudentsT)
 S3method(mean,Uniform)
 S3method(mean,Weibull)
+S3method(names,distribution)
 S3method(pdf,Bernoulli)
 S3method(pdf,Beta)
 S3method(pdf,Binomial)
@@ -133,33 +144,9 @@ S3method(pdf,StudentsT)
 S3method(pdf,Uniform)
 S3method(pdf,Weibull)
 S3method(plot,distribution)
-S3method(print,Bernoulli)
-S3method(print,Beta)
-S3method(print,Binomial)
 S3method(print,Categorical)
-S3method(print,Cauchy)
-S3method(print,ChiSquare)
-S3method(print,Erlang)
-S3method(print,Exponential)
-S3method(print,FisherF)
-S3method(print,Frechet)
-S3method(print,GEV)
-S3method(print,GP)
-S3method(print,Gamma)
-S3method(print,Geometric)
-S3method(print,Gumbel)
-S3method(print,HyperGeometric)
-S3method(print,LogNormal)
-S3method(print,Logistic)
 S3method(print,Multinomial)
-S3method(print,NegativeBinomial)
-S3method(print,Normal)
-S3method(print,Poisson)
-S3method(print,RevWeibull)
-S3method(print,StudentsT)
-S3method(print,Tukey)
-S3method(print,Uniform)
-S3method(print,Weibull)
+S3method(print,distribution)
 S3method(quantile,Bernoulli)
 S3method(quantile,Beta)
 S3method(quantile,Binomial)
@@ -242,6 +229,7 @@ S3method(suff_stat,Geometric)
 S3method(suff_stat,LogNormal)
 S3method(suff_stat,Normal)
 S3method(suff_stat,Poisson)
+S3method(summary,distribution)
 S3method(support,Bernoulli)
 S3method(support,Beta)
 S3method(support,Binomial)

diff --git a/R/Bernoulli.R b/R/Bernoulli.R
@@ -86,39 +86,38 @@
 #'
 #' cdf(X, quantile(X, 0.7))
 #' quantile(X, cdf(X, 0.7))
-#'
 Bernoulli <- function(p = 0.5) {
-  d <- list(p = p)
+  d <- data.frame(p = p)
   class(d) <- c("Bernoulli", "distribution")
   d
 }
 
-#' @export
-print.Bernoulli <- function(x, ...) {
-  cat(glue("Bernoulli distribution (p = {x$p})"), "\n")
-}
-
 #' @export
 mean.Bernoulli <- function(x, ...) {
   ellipsis::check_dots_used()
-  x$p
+  setNames(x$p, names(x))
 }
 
 #' @export
-variance.Bernoulli <- function(x, ...) x$p * (1 - x$p)
+variance.Bernoulli <- function(x, ...) {
+  rval <- x$p * (1 - x$p)
+  setNames(rval, names(x))
+}
 
 #' @export
 skewness.Bernoulli <- function(x, ...) {
   p <- x$p
   q <- 1 - x$p
-  (1 - (2 * p)) / sqrt(p * q)
+  rval <- (1 - (2 * p)) / sqrt(p * q)
+  setNames(rval, names(x))
 }
 
 #' @export
 kurtosis.Bernoulli <- function(x, ...) {
   p <- x$p
   q <- 1 - x$p
-  (1 - (6 * p * q)) / (p * q)
+  rval <- (1 - (6 * p * q)) / (p * q)
+  setNames(rval, names(x))
 }
 
 #' Draw a random sample from a Bernoulli distribution
@@ -127,14 +126,22 @@ kurtosis.Bernoulli <- function(x, ...) {
 #'
 #' @param x A `Bernoulli` object created by a call to [Bernoulli()].
 #' @param n The number of samples to draw. Defaults to `1L`.
+#' @param drop logical. Should the result be simplified to a vector if possible?
 #' @param ... Unused. Unevaluated arguments will generate a warning to
 #'   catch mispellings or other possible errors.
 #'
-#' @return An integer vector of zeros and ones of length `n`.
+#' @return In case of a single distribution object or `n = 1`, either a numeric
+#'   vector of length `n` (if `drop = TRUE`, default) or a `matrix` with `n` columns
+#'   (if `drop = FALSE`).
 #' @export
 #'
-random.Bernoulli <- function(x, n = 1L, ...) {
-  rbinom(n = n, size = 1, prob = x$p)
+random.Bernoulli <- function(x, n = 1L, drop = TRUE, ...) {
+  n <- make_positive_integer(n)
+  if (n == 0L) {
+    return(numeric(0L))
+  }
+  FUN <- function(at, d) rbinom(n = length(d), size = 1, prob = x$p)
+  apply_dpqr(d = x, FUN = FUN, at = matrix(1, ncol = n), type = "random", drop = drop)
 }
 
 #' Evaluate the probability mass function of a Bernoulli distribution
@@ -144,21 +151,28 @@ random.Bernoulli <- function(x, n = 1L, ...) {
 #' @param d A `Bernoulli` object created by a call to [Bernoulli()].
 #' @param x A vector of elements whose probabilities you would like to
 #'   determine given the distribution `d`.
-#' @param ... Unused. Unevaluated arguments will generate a warning to
-#'   catch mispellings or other possible errors.
-#'
-#' @return A vector of probabilities, one for each element of `x`.
+#' @param drop logical. Should the result be simplified to a vector if possible?
+#' @param ... Arguments to be passed to \code{\link[stats]{dbinom}}.
+#'   Unevaluated arguments will generate a warning to catch mispellings or other
+#'   possible errors.
+#'
+#' @return In case of a single distribution object, either a numeric
+#'   vector of length `probs` (if `drop = TRUE`, default) or a `matrix` with
+#'   `length(x)` columns (if `drop = FALSE`). In case of a vectorized distribution
+#'   object, a matrix with `length(x)` columns containing all possible combinations.
 #' @export
 #'
-pdf.Bernoulli <- function(d, x, ...) {
-  dbinom(x = x, size = 1, prob = d$p)
+pdf.Bernoulli <- function(d, x, drop = TRUE, ...) {
+  FUN <- function(at, d) dbinom(x = at, size = 1, prob = d$p, ...)
+  apply_dpqr(d = d, FUN = FUN, at = x, type = "density", drop = drop)
 }
 
 #' @rdname pdf.Bernoulli
 #' @export
 #'
-log_pdf.Bernoulli <- function(d, x, ...) {
-  dbinom(x = x, size = 1, prob = d$p, log = TRUE)
+log_pdf.Bernoulli <- function(d, x, drop = TRUE, ...) {
+  FUN <- function(at, d) dbinom(x = at, size = 1, prob = d$p, log = TRUE)
+  apply_dpqr(d = d, FUN = FUN, at = x, type = "logLik", drop = drop)
 }
 
 #' Evaluate the cumulative distribution function of a Bernoulli distribution
@@ -168,14 +182,20 @@ log_pdf.Bernoulli <- function(d, x, ...) {
 #' @param d A `Bernoulli` object created by a call to [Bernoulli()].
 #' @param x A vector of elements whose cumulative probabilities you would
 #'   like to determine given the distribution `d`.
-#' @param ... Unused. Unevaluated arguments will generate a warning to
-#'   catch mispellings or other possible errors.
-#'
-#' @return A vector of probabilities, one for each element of `x`.
+#' @param drop logical. Should the result be simplified to a vector if possible?
+#' @param ... Arguments to be passed to \code{\link[stats]{pbinom}}.
+#'   Unevaluated arguments will generate a warning to catch mispellings or other
+#'   possible errors.
+#'
+#' @return In case of a single distribution object, either a numeric
+#'   vector of length `probs` (if `drop = TRUE`, default) or a `matrix` with
+#'   `length(x)` columns (if `drop = FALSE`). In case of a vectorized distribution
+#'   object, a matrix with `length(x)` columns containing all possible combinations.
 #' @export
 #'
-cdf.Bernoulli <- function(d, x, ...) {
-  pbinom(q = x, size = 1, prob = d$p)
+cdf.Bernoulli <- function(d, x, drop = TRUE, ...) {
+  FUN <- function(at, d) pbinom(q = at, size = 1, prob = d$p, ...)
+  apply_dpqr(d = d, FUN = FUN, at = x, type = "probability", drop = drop)
 }
 
 #' Determine quantiles of a Bernoulli distribution
@@ -186,15 +206,23 @@ cdf.Bernoulli <- function(d, x, ...) {
 #' @inheritParams random.Bernoulli
 #'
 #' @param probs A vector of probabilities.
-#' @param ... Unused. Unevaluated arguments will generate a warning to
-#'   catch mispellings or other possible errors.
-#'
-#' @return A vector of quantiles, one for each element of `probs`.
+#' @param drop logical. Should the result be simplified to a vector if possible?
+#' @param ... Arguments to be passed to \code{\link[stats]{qbinom}}.
+#'   Unevaluated arguments will generate a warning to catch mispellings or other
+#'   possible errors.
+#'
+#' @return In case of a single distribution object, either a numeric
+#'   vector of length `probs` (if `drop = TRUE`, default) or a `matrix` with
+#'   `length(probs)` columns (if `drop = FALSE`). In case of a vectorized
+#'   distribution object, a matrix with `length(probs)` columns containing all
+#'   possible combinations.
 #' @export
 #'
-quantile.Bernoulli <- function(x, probs, ...) {
+quantile.Bernoulli <- function(x, probs, drop = TRUE, ...) {
   ellipsis::check_dots_used()
-  qbinom(p = probs, size = 1, prob = x$p)
+
+  FUN <- function(at, d) qbinom(at, size = 1, prob = x$p, ...)
+  apply_dpqr(d = x, FUN = FUN, at = probs, type = "quantile", drop = drop)
 }
 
 #' Fit a Bernoulli distribution to data
@@ -230,12 +258,17 @@ suff_stat.Bernoulli <- function(d, x, ...) {
 #' Return the support of the Bernoulli distribution
 #'
 #' @param d An `Bernoulli` object created by a call to [Bernoulli()].
+#' @param drop logical. Should the result be simplified to a vector if possible?
 #'
 #' @return A vector of length 2 with the minimum and maximum value of the support.
 #'
 #' @export
-support.Bernoulli <- function(d){
-  return(c(0, 1))
-}
+support.Bernoulli <- function(d, drop = TRUE) {
+  stopifnot("d must be a supported distribution object" = is_distribution(d))
+  stopifnot(is.logical(drop))
 
+  min <- rep(0, length(d))
+  max <- rep(1, length(d))
 
+  make_support(min, max, d, drop = drop)
+}