From b9fcdf5e6f6a4bfcd8b1ca55d79166eb6aa54647 Mon Sep 17 00:00:00 2001 From: Andreas Noack Date: Fri, 1 Sep 2017 22:05:28 -0400 Subject: [PATCH] First take on cleaning up EmpiricalUnivariateDistribution --- src/Distributions.jl | 1 - src/empirical.jl | 76 ---------------------------- src/univariate/discrete/empirical.jl | 30 +++++++++++ src/univariates.jl | 1 + test/empirical.jl | 28 ++++++++++ test/runtests.jl | 3 +- 6 files changed, 61 insertions(+), 78 deletions(-) delete mode 100644 src/empirical.jl create mode 100644 src/univariate/discrete/empirical.jl create mode 100644 test/empirical.jl diff --git a/src/Distributions.jl b/src/Distributions.jl index fb014bd72..1d381adcc 100644 --- a/src/Distributions.jl +++ b/src/Distributions.jl @@ -268,7 +268,6 @@ include("genericfit.jl") # specific samplers and distributions include("univariates.jl") -include("empirical.jl") include("edgeworth.jl") include("multivariates.jl") include("matrixvariates.jl") diff --git a/src/empirical.jl b/src/empirical.jl deleted file mode 100644 index eeefd2179..000000000 --- a/src/empirical.jl +++ /dev/null @@ -1,76 +0,0 @@ -############################################################################## -# -# REFERENCES: "Statistical Distributions" -# -############################################################################## - -struct EmpiricalUnivariateDistribution <: ContinuousUnivariateDistribution - values::Vector{Float64} - support::Vector{Float64} - cdf::Function - entropy::Float64 - kurtosis::Float64 - mean::Float64 - median::Float64 - modes::Vector{Float64} - skewness::Float64 - var::Float64 -end - -@distr_support EmpiricalUnivariateDistribution d.values[1] d.values[end] - -function EmpiricalUnivariateDistribution(x::Vector) - sx = sort(x) - EmpiricalUnivariateDistribution(sx, - unique(sx), - ecdf(x), - NaN, - NaN, - mean(x), - median(x), - Float64[], - NaN, - var(x)) -end - -entropy(d::EmpiricalUnivariateDistribution) = d.entropy - -kurtosis(d::EmpiricalUnivariateDistribution) = d.kurtosis - -mean(d::EmpiricalUnivariateDistribution) = d.mean - -median(d::EmpiricalUnivariateDistribution) = d.median - -modes(d::EmpiricalUnivariateDistribution) = Float64[] - -skewness(d::EmpiricalUnivariateDistribution) = NaN - -var(d::EmpiricalUnivariateDistribution) = d.var - - -### Evaluation - -cdf(d::EmpiricalUnivariateDistribution, x::Float64) = d.cdf(x) - -function pdf(d::EmpiricalUnivariateDistribution, x::Float64) - ## TODO: Create lookup table for discrete case - 1.0 / length(d.values) -end - -function quantile(d::EmpiricalUnivariateDistribution, p::Float64) - n = length(d.values) - index = floor(Int,p * n) + 1 - index > n ? d.values[n] : d.values[index] -end - -function rand(d::EmpiricalUnivariateDistribution) - d.values[rand(1:length(d.values))] -end - - -### fit model - -function fit_mle(::Type{EmpiricalUnivariateDistribution}, - x::Vector{T}) where T <: Real - EmpiricalUnivariateDistribution(x) -end diff --git a/src/univariate/discrete/empirical.jl b/src/univariate/discrete/empirical.jl new file mode 100644 index 000000000..575fa7802 --- /dev/null +++ b/src/univariate/discrete/empirical.jl @@ -0,0 +1,30 @@ +struct EmpiricalUnivariateDistribution <: DiscreteUnivariateDistribution + values::Vector{Float64} + cdf::Function +end + +@distr_support EmpiricalUnivariateDistribution d.values[1] d.values[end] + +EmpiricalUnivariateDistribution(x::Vector) = EmpiricalUnivariateDistribution(sort(x), ecdf(x)) + +for f in (:entropy, :mean, :var, :skewness, :kurtosis) + @eval ($f)(d::EmpiricalUnivariateDistribution) = ($f)(d.values) +end + +function median(d::DiscreteUnivariateDistribution) + v = d.values + n = length(v) + return (v[(n + 1) >> 1] + v[(n + 2) >> 1]) / 2 +end + +### Evaluation + +cdf(d::EmpiricalUnivariateDistribution, x::Real) = d.cdf(x) + +pdf(d::EmpiricalUnivariateDistribution, x::Real) = mean(t -> t == x, d.values) + +quantile(d::EmpiricalUnivariateDistribution, p::Real) = quantile(d.values, p) + +function rand(d::EmpiricalUnivariateDistribution) + d.values[rand(1:length(d.values))] +end diff --git a/src/univariates.jl b/src/univariates.jl index 50788be78..65f69e3ca 100644 --- a/src/univariates.jl +++ b/src/univariates.jl @@ -610,6 +610,7 @@ const discrete_distributions = [ "binomial", "categorical", "discreteuniform", + "empirical", "geometric", "hypergeometric", "negativebinomial", diff --git a/test/empirical.jl b/test/empirical.jl new file mode 100644 index 000000000..7a3face06 --- /dev/null +++ b/test/empirical.jl @@ -0,0 +1,28 @@ +using Distributions, Base.Test + +@testset "EmpiricalUnivariateDistribution" begin + n = 100 + r = MersenneTwister(123) + @testset "$d data" for (d, x) in (("discrete" , rand( r, 1:10, n)), + ("continuous", randn(r, n))) + X = EmpiricalUnivariateDistribution(x) + + @testset "test function: $f" for f in (mean, var, std, skewness, kurtosis, median, entropy) + @test f(X) ≈ f(x) + end + + ecdfx = StatsBase.ecdf(x) + @testset "cdf" for t in linspace(-10, 10, 100) + @test cdf(X, t) == ecdfx(t) + @test cdf(X, t) == mean(x -> x <= t, x) + end + + @testset "quantile" for q in linspace(0, 1, 100) + @test quantile(X, q) == quantile(x, q) + end + + @testset "pdf" begin + @test sum(t -> pdf(X, t), unique(x)) ≈ 1 + end + end +end diff --git a/test/runtests.jl b/test/runtests.jl index ebf9e87a9..fa6295359 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -27,7 +27,8 @@ tests = [ "gradlogpdf", "truncate", "noncentralt", - "locationscale"] + "locationscale", + "empirical"] print_with_color(:blue, "Running tests:\n")