Merge branch 'master' into compathelper/new_version/2023-09-16-00-05-…

…54-816-00041392826
JuliaStats · Oct 6, 2024 · cf45ea3 · cf45ea3
2 parents 424fd87 + a1010e4
commit cf45ea3
Show file tree

Hide file tree

Showing 39 changed files with 466 additions and 183 deletions.
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -2,12 +2,12 @@ name: CI
 
 on:
   pull_request:
-    branches:
-      - master
   push:
     branches:
       - master
     tags: '*'
+  workflow_dispatch:
+  merge_group:
 
 concurrency:
   # Skip intermediate builds: always.
@@ -17,38 +17,28 @@ concurrency:
 
 jobs:
   test:
-    name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
+    name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ github.event_name }}
     runs-on: ${{ matrix.os }}
-    continue-on-error: ${{ matrix.version == 'nightly' }}
     strategy:
       fail-fast: false
       matrix:
         version:
           - '1.3'
           - '1'
-          - 'nightly'
+          - pre
         os:
           - ubuntu-latest
           - macos-latest
           - windows-latest
-        arch:
-          - x64
     steps:
       - uses: actions/checkout@v4
       - uses: julia-actions/setup-julia@v2
         with:
           version: ${{ matrix.version }}
-          arch: ${{ matrix.arch }}
-      - uses: actions/cache@v4
-        env:
-          cache-name: cache-artifacts
-        with:
-          path: ~/.julia/artifacts
-          key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }}
-          restore-keys: |
-            ${{ runner.os }}-test-${{ env.cache-name }}-
-            ${{ runner.os }}-test-
-            ${{ runner.os }}-
+          # ARM64 on macos-latest is neither supported by older Julia versions nor setup-julia
+          arch: ${{ matrix.os == 'macos-latest' && matrix.version != '1.3' && 'aarch64' || 'x64' }}
+          show-versioninfo: true
+      - uses: julia-actions/cache@v2
       - uses: julia-actions/julia-buildpkg@v1
       - uses: julia-actions/julia-runtest@v1
       - run: |
@@ -70,6 +60,7 @@ jobs:
       - uses: julia-actions/setup-julia@v2
         with:
           version: '1'
+          show-versioninfo: true
       - run: |
           julia --project=docs -e '
             using Pkg

diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "Distributions"
 uuid = "31c24e10-a181-5473-b8eb-7969acd0382f"
 authors = ["JuliaStats"]
-version = "0.25.108"
+version = "0.25.112"
 
 [deps]
 AliasTables = "66dad0bd-aa9a-41b7-9441-69ab47430ed8"

diff --git a/docs/src/fit.md b/docs/src/fit.md
@@ -27,6 +27,8 @@ The function `fit_mle` is for maximum likelihood estimation.
 ### Synopsis
 
 ```@docs
+fit(D, x)
+fit(D, x, w)
 fit_mle(D, x)
 fit_mle(D, x, w)
 ```

diff --git a/src/Distributions.jl b/src/Distributions.jl
@@ -129,7 +129,6 @@ export
     MatrixBeta,
     MatrixFDist,
     MatrixNormal,
-    MatrixReshaped,
     MatrixTDist,
     MixtureModel,
     Multinomial,

diff --git a/src/convolution.jl b/src/convolution.jl
@@ -12,6 +12,7 @@ and one of
 * [`NegativeBinomial`](@ref)
 * [`Geometric`](@ref)
 * [`Poisson`](@ref)
+* [`DiscreteNonParametric`](@ref)
 * [`Normal`](@ref)
 * [`Cauchy`](@ref)
 * [`Chisq`](@ref)
@@ -47,6 +48,19 @@ end
 convolve(d1::Poisson, d2::Poisson) =  Poisson(d1.λ + d2.λ)
 
 
+function convolve(d1::DiscreteNonParametric, d2::DiscreteNonParametric)
+    support_conv = collect(Set(s1 + s2 for s1 in support(d1), s2 in support(d2)))
+    sort!(support_conv) #for fast index finding below
+    probs1 = probs(d1)
+    probs2 = probs(d2)
+    p_conv = zeros(Base.promote_eltype(probs1, probs2), length(support_conv)) 
+    for (s1, p1) in zip(support(d1), probs(d1)), (s2, p2) in zip(support(d2), probs(d2))
+            idx = searchsortedfirst(support_conv, s1+s2)
+            p_conv[idx] += p1*p2
+    end
+    DiscreteNonParametric(support_conv, p_conv,check_args=false) 
+end
+
 # continuous univariate
 convolve(d1::Normal, d2::Normal) = Normal(d1.μ + d2.μ, hypot(d1.σ, d2.σ))
 convolve(d1::Cauchy, d2::Cauchy) = Cauchy(d1.μ + d2.μ, d1.σ + d2.σ)

diff --git a/src/deprecates.jl b/src/deprecates.jl
@@ -53,10 +53,20 @@ end
 @deprecate expectation(distr::Union{UnivariateDistribution,MultivariateDistribution}, g::Function; kwargs...) expectation(g, distr; kwargs...) false
 
 # Deprecate `MatrixReshaped`
+# This is very similar to `Base.@deprecate_binding MatrixReshaped{...} ReshapedDistribution{...}`
+# However, `Base.@deprecate_binding` does not support type parameters
+export MatrixReshaped
 const MatrixReshaped{S<:ValueSupport,D<:MultivariateDistribution{S}} = ReshapedDistribution{2,S,D}
-@deprecate MatrixReshaped(
-    d::MultivariateDistribution, n::Integer, p::Integer=n
-) reshape(d, (n, p))
+Base.deprecate(@__MODULE__, :MatrixReshaped)
+# This is very similar to `Base.@deprecate MatrixReshaped(...) reshape(...)`
+# We use another (unexported!) alias here to not throw a deprecation warning/error
+# Unexported aliases do not affect the type printing
+# In Julia >= 1.6, instead of a new alias we could have defined a method for (ReshapedDistribution{2,S,D} where {S<:ValueSupport,D<:MultivariateDistribution{S}})
+const _MatrixReshaped{S<:ValueSupport,D<:MultivariateDistribution{S}} = ReshapedDistribution{2,S,D}
+function _MatrixReshaped(d::MultivariateDistribution, n::Integer, p::Integer=n)
+    Base.depwarn("`MatrixReshaped(d, n, p)` is deprecated, use `reshape(d, (n, p))` instead.", :MatrixReshaped)
+    return reshape(d, (n, p))
+end
 
 for D in (:InverseWishart, :LKJ, :MatrixBeta, :MatrixFDist, :Wishart)
     @eval @deprecate dim(d::$D) size(d, 1)

diff --git a/src/genericfit.jl b/src/genericfit.jl
@@ -30,5 +30,18 @@ fit_mle(dt::Type{D}, x::AbstractArray, w::AbstractArray) where {D<:UnivariateDis
 fit_mle(dt::Type{D}, x::AbstractMatrix) where {D<:MultivariateDistribution} = fit_mle(D, suffstats(D, x))
 fit_mle(dt::Type{D}, x::AbstractMatrix, w::AbstractArray) where {D<:MultivariateDistribution} = fit_mle(D, suffstats(D, x, w))
 
+"""
+    fit(D, args...)
+
+Fit a distribution of type `D` to `args`.
+
+The fit function will choose a reasonable way to fit the distribution, which,
+in most cases, is maximum likelihood estimation. Note that this algorithm may
+change; for a function that will behave consistently across versions, see 
+`fit_mle`.
+
+By default, the fallback is [`fit_mle(D, args...)`](@ref); developers can change this default
+for a specific distribution type `D <: Distribution` by defining a `fit(::Type{D}, args...)` method.
+"""
 fit(dt::Type{D}, x) where {D<:Distribution} = fit_mle(D, x)
 fit(dt::Type{D}, args...) where {D<:Distribution} = fit_mle(D, args...)
diff --git a/src/samplers/gamma.jl b/src/samplers/gamma.jl
@@ -225,6 +225,6 @@ end
 
 function rand(rng::AbstractRNG, s::GammaIPSampler)
     x = rand(rng, s.s)
-    e = randexp(rng)
+    e = randexp(rng, typeof(x))
     x*exp(s.nia*e)
 end
diff --git a/src/truncate.jl b/src/truncate.jl
@@ -166,7 +166,8 @@ function logpdf(d::Truncated, x::Real)
 end
 
 function cdf(d::Truncated, x::Real)
-    result = (cdf(d.untruncated, x) - d.lcdf) / d.tp
+    result = clamp((cdf(d.untruncated, x) - d.lcdf) / d.tp, 0, 1)
+    # Special cases for values outside of the support to avoid e.g. NaN issues with `Binomial`
     return if d.lower !== nothing && x < d.lower
         zero(result)
     elseif d.upper !== nothing && x >= d.upper
@@ -188,7 +189,8 @@ function logcdf(d::Truncated, x::Real)
 end
 
 function ccdf(d::Truncated, x::Real)
-    result = (d.ucdf - cdf(d.untruncated, x)) / d.tp
+    result = clamp((d.ucdf - cdf(d.untruncated, x)) / d.tp, 0, 1)
+    # Special cases for values outside of the support to avoid e.g. NaN issues with `Binomial`
     return if d.lower !== nothing && x <= d.lower
         one(result)
     elseif d.upper !== nothing && x > d.upper

diff --git a/src/univariate/continuous/beta.jl b/src/univariate/continuous/beta.jl
@@ -208,12 +208,8 @@ function rand(rng::AbstractRNG, d::Beta{T}) where T
     end
 end
 
-#### Fit model
-"""
-    fit_mle(::Type{<:Beta}, x::AbstractArray{T})
 
-Maximum Likelihood Estimate of `Beta` Distribution via Newton's Method
-"""
+
 function fit_mle(::Type{<:Beta}, x::AbstractArray{T};
     maxiter::Int=1000, tol::Float64=1e-14) where T<:Real
 
@@ -240,11 +236,8 @@ function fit_mle(::Type{<:Beta}, x::AbstractArray{T};
     return Beta(θ[1], θ[2])
 end
 
-"""
-    fit(::Type{<:Beta}, x::AbstractArray{T})
 
-fit a `Beta` distribution
-"""
+
 function fit(::Type{<:Beta}, x::AbstractArray{T}) where T<:Real
     x_bar = mean(x)
     v_bar = varm(x, x_bar)

diff --git a/src/univariate/continuous/exponential.jl b/src/univariate/continuous/exponential.jl
@@ -105,8 +105,13 @@ cf(d::Exponential, t::Real) = 1/(1 - t * im * scale(d))
 
 
 #### Sampling
-rand(rng::AbstractRNG, d::Exponential) = xval(d, randexp(rng))
+rand(rng::AbstractRNG, d::Exponential{T}) where {T} = xval(d, randexp(rng, float(T)))
 
+function rand!(rng::AbstractRNG, d::Exponential, A::AbstractArray{<:Real})
+    randexp!(rng, A)
+    map!(Base.Fix1(xval, d), A, A)
+    return A
+end
 
 #### Fit model
 

diff --git a/src/univariate/continuous/gumbel.jl b/src/univariate/continuous/gumbel.jl
@@ -85,7 +85,7 @@ entropy(d::Gumbel) = log(d.θ) + 1 + MathConstants.γ
 #### Evaluation
 
 zval(d::Gumbel, x::Real) = (x - d.μ) / d.θ
-xval(d::Gumbel, z::Real) = x * d.θ + d.μ
+xval(d::Gumbel, z::Real) = z * d.θ + d.μ
 
 function pdf(d::Gumbel, x::Real)
     z = zval(d, x)
@@ -98,8 +98,17 @@ function logpdf(d::Gumbel, x::Real)
 end
 
 cdf(d::Gumbel, x::Real) = exp(-exp(-zval(d, x)))
+ccdf(d::Gumbel, x::Real) = -expm1(-exp(-zval(d, x)))
 logcdf(d::Gumbel, x::Real) = -exp(-zval(d, x))
+logccdf(d::Gumbel, x::Real) = log1mexp(-exp(-zval(d, x)))
 
-quantile(d::Gumbel, p::Real) = d.μ - d.θ * log(-log(p))
+quantile(d::Gumbel, p::Real) = xval(d, -log(-log(p)))
+cquantile(d::Gumbel, p::Real) = xval(d, -log(-log1p(-p)))
+invlogcdf(d::Gumbel, lp::Real) = xval(d, -log(-lp))
+invlogccdf(d::Gumbel, lp::Real) = xval(d, -log(-log1mexp(lp)))
 
 gradlogpdf(d::Gumbel, x::Real) = expm1(-zval(d, x)) / d.θ
+
+mgf(d::Gumbel, t::Real) = gamma(1 - d.θ * t) * exp(d.μ * t)
+cgf(d::Gumbel, t::Real) = loggamma(1 - d.θ * t) + d.μ * t
+cf(d::Gumbel, t::Real) = gamma(1 - im * d.θ * t) * cis(d.μ * t)
diff --git a/src/univariate/continuous/inversegaussian.jl b/src/univariate/continuous/inversegaussian.jl
@@ -99,7 +99,10 @@ function cdf(d::InverseGaussian, x::Real)
     y = max(x, 0)
     u = sqrt(λ / y)
     v = y / μ
-    z = normcdf(u * (v - 1)) + exp(2λ / μ) * normcdf(-u * (v + 1))
+    # 2λ/μ and normlogcdf(-u*(v+1)) are similar magnitude, opp. sign
+    # truncating to [0, 1] as an additional precaution
+    # Ref https://github.com/JuliaStats/Distributions.jl/issues/1873
+    z = clamp(normcdf(u * (v - 1)) + exp(2λ / μ + normlogcdf(-u * (v + 1))), 0, 1)
 
     # otherwise `NaN` is returned for `+Inf`
     return isinf(x) && x > 0 ? one(z) : z
@@ -110,7 +113,10 @@ function ccdf(d::InverseGaussian, x::Real)
     y = max(x, 0)
     u = sqrt(λ / y)
     v = y / μ
-    z = normccdf(u * (v - 1)) - exp(2λ / μ) * normcdf(-u * (v + 1))
+    # 2λ/μ and normlogcdf(-u*(v+1)) are similar magnitude, opp. sign
+    # truncating to [0, 1] as an additional precaution
+    # Ref https://github.com/JuliaStats/Distributions.jl/issues/1873
+    z = clamp(normccdf(u * (v - 1)) - exp(2λ / μ + normlogcdf(-u * (v + 1))), 0, 1)
 
     # otherwise `NaN` is returned for `+Inf`
     return isinf(x) && x > 0 ? zero(z) : z

diff --git a/src/univariate/continuous/ksdist.jl b/src/univariate/continuous/ksdist.jl
@@ -28,7 +28,7 @@ function cdf(d::KSDist,x::Float64)
         return 0.0
     elseif b <= 1
         # accuracy could be improved
-        return exp(lfact(n)+n*(log(2*b-1)-log(n)))
+        return exp(logfactorial(n)+n*(log(2*b-1)-log(n)))
     elseif x >= 1
         return 1.0
     elseif b >= n-1
@@ -56,7 +56,7 @@ function ccdf(d::KSDist,x::Float64)
     if b <= 0.5
         return 1.0
     elseif b <= 1
-        return 1-exp(lfact(n)+n*(log(2*b-1)-log(n)))
+        return 1-exp(logfactorial(n)+n*(log(2*b-1)-log(n)))
     elseif x >= 1
         return 0.0
     elseif b >= n-1

diff --git a/src/univariate/continuous/logitnormal.jl b/src/univariate/continuous/logitnormal.jl
@@ -157,7 +157,14 @@ end
 
 #### Sampling
 
-rand(rng::AbstractRNG, d::LogitNormal) = logistic(randn(rng) * d.σ + d.μ)
+xval(d::LogitNormal, z::Real) = logistic(muladd(d.σ, z, d.μ))
+
+rand(rng::AbstractRNG, d::LogitNormal) = xval(d, randn(rng))
+function rand!(rng::AbstractRNG, d::LogitNormal, A::AbstractArray{<:Real})
+    randn!(rng, A)
+    map!(Base.Fix1(xval, d), A, A)
+    return A
+end
 
 ## Fitting
 

diff --git a/src/univariate/continuous/lognormal.jl b/src/univariate/continuous/lognormal.jl
@@ -156,7 +156,14 @@ end
 
 #### Sampling
 
-rand(rng::AbstractRNG, d::LogNormal) = exp(randn(rng) * d.σ + d.μ)
+xval(d::LogNormal, z::Real) = exp(muladd(d.σ, z, d.μ))
+
+rand(rng::AbstractRNG, d::LogNormal) = xval(d, randn(rng))
+function rand!(rng::AbstractRNG, d::LogNormal, A::AbstractArray{<:Real})
+    randn!(rng, A)
+    map!(Base.Fix1(xval, d), A, A)
+    return A
+end
 
 ## Fitting
 

diff --git a/src/univariate/continuous/normal.jl b/src/univariate/continuous/normal.jl
@@ -114,9 +114,14 @@ Base.:*(c::Real, d::Normal) = Normal(c * d.μ, abs(c) * d.σ)
 
 #### Sampling
 
-rand(rng::AbstractRNG, d::Normal{T}) where {T} = d.μ + d.σ * randn(rng, float(T))
+xval(d::Normal, z::Real) = muladd(d.σ, z, d.μ)
 
-rand!(rng::AbstractRNG, d::Normal, A::AbstractArray{<:Real}) = A .= muladd.(d.σ, randn!(rng, A), d.μ)
+rand(rng::AbstractRNG, d::Normal{T}) where {T} = xval(d, randn(rng, float(T)))
+function rand!(rng::AbstractRNG, d::Normal, A::AbstractArray{<:Real})
+    randn!(rng, A)
+    map!(Base.Fix1(xval, d), A, A)
+    return A
+end
 
 #### Fitting
 

diff --git a/src/univariate/continuous/normalcanon.jl b/src/univariate/continuous/normalcanon.jl
@@ -87,7 +87,13 @@ invlogccdf(d::NormalCanon, lp::Real) = xval(d, norminvlogccdf(lp))
 
 #### Sampling
 
-rand(rng::AbstractRNG, cf::NormalCanon) = cf.μ + randn(rng) / sqrt(cf.λ)
+rand(rng::AbstractRNG, cf::NormalCanon) = xval(cf, randn(rng))
+
+function rand!(rng::AbstractRNG, cf::NormalCanon, A::AbstractArray{<:Real})
+    randn!(rng, A)
+    map!(Base.Fix1(xval, cf), A, A)
+    return A
+end
 
 #### Affine transformations
 

diff --git a/src/univariate/continuous/pareto.jl b/src/univariate/continuous/pareto.jl
@@ -110,7 +110,14 @@ quantile(d::Pareto, p::Real) = cquantile(d, 1 - p)
 
 #### Sampling
 
-rand(rng::AbstractRNG, d::Pareto) = d.θ * exp(randexp(rng) / d.α)
+xval(d::Pareto, z::Real) = d.θ * exp(z / d.α)
+
+rand(rng::AbstractRNG, d::Pareto) = xval(d, randexp(rng))
+function rand!(rng::AbstractRNG, d::Pareto, A::AbstractArray{<:Real})
+    randexp!(rng, A)
+    map!(Base.Fix1(xval, d), A, A)
+    return A
+end
 
 ## Fitting
 

diff --git a/src/univariate/continuous/pgeneralizedgaussian.jl b/src/univariate/continuous/pgeneralizedgaussian.jl
@@ -141,7 +141,7 @@ function rand(rng::AbstractRNG, d::PGeneralizedGaussian)
     inv_p = inv(d.p)
     g = Gamma(inv_p, 1)
     z = d.α * rand(rng, g)^inv_p
-    if rand(rng) < 0.5
+    if rand(rng, Bool)
         return d.μ - z
     else
         return d.μ + z