Skip to content

Commit

Permalink
Vectorize distributions (#71)
Browse files Browse the repository at this point in the history
* Vectorize object construction of distributions:

* Implement methods for distribution objects `{c,print,summary,as.data.frame,...}.distribution`.
* Rewrite construction of families using `data.frame` instead of `list`.
* Adapt all distributions including their methods except `Categorical` and `Multinomial`.
* Adapt `base` and `ggplot2` plotting functions to handle vectorized distribution objects.
* Rewritten `Erlang` distribution building now on `stats::{d,g,q,r}gamma()`.
* Add tests for new `{methods}.distribution` and add tests for all vectorized distributions.
* Add tests for `ggplot2` and (new) `Erlang`.
* Pass addtional arguments "..." to `{d,p,q}` functions.
* Adapt manuals (no proper desription of vectorized distiribution objects yet).

* Modify tests (!) for checking print output for few distributions:

* `print.distriubtion()` is currently printing the class name (could be modified).
* This effects ChiSquare, FisherF, LogNormal, NegativeBinomial, StudentsT.
* Also add missing ggplot2 dependency in `test-plot.R`.

* Move ggplot2 plotting functions to `plot.R` and rename testfile to `test-plot.R`

* Run formatting and do styling

* Small fix in helper function to vectorize distributions:

* Fix in `apply_dpqr()` due to error with `drop = FALSE`.
* Modify handling of, e.g., two probabilites and two distributions.
* Add more tests to check vectorization.
* Adapt manuals to document vectorization in `random()`, `pdf()`, `log_pdf()`, `cdf()`, `quantile()`.

* Update "apply_dpqr" (again)

* Modify how to handle `drop` argument (consistent to `drop()`)
* Increase performance.
* Extend tests.
* Solely for `Normal()` so far.

* Update all other distriubtion to accomodate new changes:

* New apply_dpqr with `drop` argument now matching `drop()`.
* Modification of tests (!) to pass comparison due to new naming convention
  [test-{Erlang,Frechet,GeneralisedExtremeValue,GeneralisedPareto,ReversedWeibull,methods}.R
  • Loading branch information
mnlang authored Feb 10, 2022
1 parent 368a56b commit ca68f01
Show file tree
Hide file tree
Showing 204 changed files with 6,130 additions and 2,005 deletions.
38 changes: 13 additions & 25 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Generated by roxygen2: do not edit by hand

S3method("[",distribution)
S3method("names<-",distribution)
S3method(as.data.frame,distribution)
S3method(as.list,distribution)
S3method(as.matrix,distribution)
S3method(c,distribution)
S3method(cdf,Bernoulli)
S3method(cdf,Beta)
S3method(cdf,Binomial)
Expand All @@ -26,6 +32,8 @@ S3method(cdf,StudentsT)
S3method(cdf,Tukey)
S3method(cdf,Uniform)
S3method(cdf,Weibull)
S3method(dim,distribution)
S3method(dimnames,distribution)
S3method(fit_mle,Bernoulli)
S3method(fit_mle,Binomial)
S3method(fit_mle,Exponential)
Expand All @@ -34,6 +42,7 @@ S3method(fit_mle,Geometric)
S3method(fit_mle,LogNormal)
S3method(fit_mle,Normal)
S3method(fit_mle,Poisson)
S3method(format,distribution)
S3method(kurtosis,Bernoulli)
S3method(kurtosis,Beta)
S3method(kurtosis,Binomial)
Expand All @@ -56,6 +65,7 @@ S3method(kurtosis,Poisson)
S3method(kurtosis,StudentsT)
S3method(kurtosis,Uniform)
S3method(kurtosis,Weibull)
S3method(length,distribution)
S3method(log_pdf,Bernoulli)
S3method(log_pdf,Beta)
S3method(log_pdf,Binomial)
Expand Down Expand Up @@ -106,6 +116,7 @@ S3method(mean,RevWeibull)
S3method(mean,StudentsT)
S3method(mean,Uniform)
S3method(mean,Weibull)
S3method(names,distribution)
S3method(pdf,Bernoulli)
S3method(pdf,Beta)
S3method(pdf,Binomial)
Expand Down Expand Up @@ -133,33 +144,9 @@ S3method(pdf,StudentsT)
S3method(pdf,Uniform)
S3method(pdf,Weibull)
S3method(plot,distribution)
S3method(print,Bernoulli)
S3method(print,Beta)
S3method(print,Binomial)
S3method(print,Categorical)
S3method(print,Cauchy)
S3method(print,ChiSquare)
S3method(print,Erlang)
S3method(print,Exponential)
S3method(print,FisherF)
S3method(print,Frechet)
S3method(print,GEV)
S3method(print,GP)
S3method(print,Gamma)
S3method(print,Geometric)
S3method(print,Gumbel)
S3method(print,HyperGeometric)
S3method(print,LogNormal)
S3method(print,Logistic)
S3method(print,Multinomial)
S3method(print,NegativeBinomial)
S3method(print,Normal)
S3method(print,Poisson)
S3method(print,RevWeibull)
S3method(print,StudentsT)
S3method(print,Tukey)
S3method(print,Uniform)
S3method(print,Weibull)
S3method(print,distribution)
S3method(quantile,Bernoulli)
S3method(quantile,Beta)
S3method(quantile,Binomial)
Expand Down Expand Up @@ -242,6 +229,7 @@ S3method(suff_stat,Geometric)
S3method(suff_stat,LogNormal)
S3method(suff_stat,Normal)
S3method(suff_stat,Poisson)
S3method(summary,distribution)
S3method(support,Bernoulli)
S3method(support,Beta)
S3method(support,Binomial)
Expand Down
107 changes: 70 additions & 37 deletions R/Bernoulli.R
Original file line number Diff line number Diff line change
Expand Up @@ -86,39 +86,38 @@
#'
#' cdf(X, quantile(X, 0.7))
#' quantile(X, cdf(X, 0.7))
#'
Bernoulli <- function(p = 0.5) {
d <- list(p = p)
d <- data.frame(p = p)
class(d) <- c("Bernoulli", "distribution")
d
}

#' @export
print.Bernoulli <- function(x, ...) {
cat(glue("Bernoulli distribution (p = {x$p})"), "\n")
}

#' @export
mean.Bernoulli <- function(x, ...) {
ellipsis::check_dots_used()
x$p
setNames(x$p, names(x))
}

#' @export
variance.Bernoulli <- function(x, ...) x$p * (1 - x$p)
variance.Bernoulli <- function(x, ...) {
rval <- x$p * (1 - x$p)
setNames(rval, names(x))
}

#' @export
skewness.Bernoulli <- function(x, ...) {
p <- x$p
q <- 1 - x$p
(1 - (2 * p)) / sqrt(p * q)
rval <- (1 - (2 * p)) / sqrt(p * q)
setNames(rval, names(x))
}

#' @export
kurtosis.Bernoulli <- function(x, ...) {
p <- x$p
q <- 1 - x$p
(1 - (6 * p * q)) / (p * q)
rval <- (1 - (6 * p * q)) / (p * q)
setNames(rval, names(x))
}

#' Draw a random sample from a Bernoulli distribution
Expand All @@ -127,14 +126,22 @@ kurtosis.Bernoulli <- function(x, ...) {
#'
#' @param x A `Bernoulli` object created by a call to [Bernoulli()].
#' @param n The number of samples to draw. Defaults to `1L`.
#' @param drop logical. Should the result be simplified to a vector if possible?
#' @param ... Unused. Unevaluated arguments will generate a warning to
#' catch mispellings or other possible errors.
#'
#' @return An integer vector of zeros and ones of length `n`.
#' @return In case of a single distribution object or `n = 1`, either a numeric
#' vector of length `n` (if `drop = TRUE`, default) or a `matrix` with `n` columns
#' (if `drop = FALSE`).
#' @export
#'
random.Bernoulli <- function(x, n = 1L, ...) {
rbinom(n = n, size = 1, prob = x$p)
random.Bernoulli <- function(x, n = 1L, drop = TRUE, ...) {
n <- make_positive_integer(n)
if (n == 0L) {
return(numeric(0L))
}
FUN <- function(at, d) rbinom(n = length(d), size = 1, prob = x$p)
apply_dpqr(d = x, FUN = FUN, at = matrix(1, ncol = n), type = "random", drop = drop)
}

#' Evaluate the probability mass function of a Bernoulli distribution
Expand All @@ -144,21 +151,28 @@ random.Bernoulli <- function(x, n = 1L, ...) {
#' @param d A `Bernoulli` object created by a call to [Bernoulli()].
#' @param x A vector of elements whose probabilities you would like to
#' determine given the distribution `d`.
#' @param ... Unused. Unevaluated arguments will generate a warning to
#' catch mispellings or other possible errors.
#'
#' @return A vector of probabilities, one for each element of `x`.
#' @param drop logical. Should the result be simplified to a vector if possible?
#' @param ... Arguments to be passed to \code{\link[stats]{dbinom}}.
#' Unevaluated arguments will generate a warning to catch mispellings or other
#' possible errors.
#'
#' @return In case of a single distribution object, either a numeric
#' vector of length `probs` (if `drop = TRUE`, default) or a `matrix` with
#' `length(x)` columns (if `drop = FALSE`). In case of a vectorized distribution
#' object, a matrix with `length(x)` columns containing all possible combinations.
#' @export
#'
pdf.Bernoulli <- function(d, x, ...) {
dbinom(x = x, size = 1, prob = d$p)
pdf.Bernoulli <- function(d, x, drop = TRUE, ...) {
FUN <- function(at, d) dbinom(x = at, size = 1, prob = d$p, ...)
apply_dpqr(d = d, FUN = FUN, at = x, type = "density", drop = drop)
}

#' @rdname pdf.Bernoulli
#' @export
#'
log_pdf.Bernoulli <- function(d, x, ...) {
dbinom(x = x, size = 1, prob = d$p, log = TRUE)
log_pdf.Bernoulli <- function(d, x, drop = TRUE, ...) {
FUN <- function(at, d) dbinom(x = at, size = 1, prob = d$p, log = TRUE)
apply_dpqr(d = d, FUN = FUN, at = x, type = "logLik", drop = drop)
}

#' Evaluate the cumulative distribution function of a Bernoulli distribution
Expand All @@ -168,14 +182,20 @@ log_pdf.Bernoulli <- function(d, x, ...) {
#' @param d A `Bernoulli` object created by a call to [Bernoulli()].
#' @param x A vector of elements whose cumulative probabilities you would
#' like to determine given the distribution `d`.
#' @param ... Unused. Unevaluated arguments will generate a warning to
#' catch mispellings or other possible errors.
#'
#' @return A vector of probabilities, one for each element of `x`.
#' @param drop logical. Should the result be simplified to a vector if possible?
#' @param ... Arguments to be passed to \code{\link[stats]{pbinom}}.
#' Unevaluated arguments will generate a warning to catch mispellings or other
#' possible errors.
#'
#' @return In case of a single distribution object, either a numeric
#' vector of length `probs` (if `drop = TRUE`, default) or a `matrix` with
#' `length(x)` columns (if `drop = FALSE`). In case of a vectorized distribution
#' object, a matrix with `length(x)` columns containing all possible combinations.
#' @export
#'
cdf.Bernoulli <- function(d, x, ...) {
pbinom(q = x, size = 1, prob = d$p)
cdf.Bernoulli <- function(d, x, drop = TRUE, ...) {
FUN <- function(at, d) pbinom(q = at, size = 1, prob = d$p, ...)
apply_dpqr(d = d, FUN = FUN, at = x, type = "probability", drop = drop)
}

#' Determine quantiles of a Bernoulli distribution
Expand All @@ -186,15 +206,23 @@ cdf.Bernoulli <- function(d, x, ...) {
#' @inheritParams random.Bernoulli
#'
#' @param probs A vector of probabilities.
#' @param ... Unused. Unevaluated arguments will generate a warning to
#' catch mispellings or other possible errors.
#'
#' @return A vector of quantiles, one for each element of `probs`.
#' @param drop logical. Should the result be simplified to a vector if possible?
#' @param ... Arguments to be passed to \code{\link[stats]{qbinom}}.
#' Unevaluated arguments will generate a warning to catch mispellings or other
#' possible errors.
#'
#' @return In case of a single distribution object, either a numeric
#' vector of length `probs` (if `drop = TRUE`, default) or a `matrix` with
#' `length(probs)` columns (if `drop = FALSE`). In case of a vectorized
#' distribution object, a matrix with `length(probs)` columns containing all
#' possible combinations.
#' @export
#'
quantile.Bernoulli <- function(x, probs, ...) {
quantile.Bernoulli <- function(x, probs, drop = TRUE, ...) {
ellipsis::check_dots_used()
qbinom(p = probs, size = 1, prob = x$p)

FUN <- function(at, d) qbinom(at, size = 1, prob = x$p, ...)
apply_dpqr(d = x, FUN = FUN, at = probs, type = "quantile", drop = drop)
}

#' Fit a Bernoulli distribution to data
Expand Down Expand Up @@ -230,12 +258,17 @@ suff_stat.Bernoulli <- function(d, x, ...) {
#' Return the support of the Bernoulli distribution
#'
#' @param d An `Bernoulli` object created by a call to [Bernoulli()].
#' @param drop logical. Should the result be simplified to a vector if possible?
#'
#' @return A vector of length 2 with the minimum and maximum value of the support.
#'
#' @export
support.Bernoulli <- function(d){
return(c(0, 1))
}
support.Bernoulli <- function(d, drop = TRUE) {
stopifnot("d must be a supported distribution object" = is_distribution(d))
stopifnot(is.logical(drop))

min <- rep(0, length(d))
max <- rep(1, length(d))

make_support(min, max, d, drop = drop)
}
Loading

0 comments on commit ca68f01

Please sign in to comment.