From e053fd926e9cd92a586f7da800e9be972e0e4002 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristian=20Agas=C3=B8ster=20Haaga?= Date: Mon, 26 Apr 2021 12:23:53 +0200 Subject: [PATCH 01/21] Use regular documentation, not mkdocs --- docs/make.jl | 17 ++++++++++++----- docs/{mkdocs.yml => mkdocs} | 0 2 files changed, 12 insertions(+), 5 deletions(-) rename docs/{mkdocs.yml => mkdocs} (100%) diff --git a/docs/make.jl b/docs/make.jl index 0a695fa9..a5cb549d 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -4,9 +4,16 @@ CI = get(ENV, "CI", nothing) == "true" || get(ENV, "GITHUB_TOKEN", nothing) !== CI && Pkg.activate(@__DIR__) CI && Pkg.instantiate() CI && (ENV["GKSwstype"] = "100") + +using Plots using Documenter -using DocumenterTools -using DocumenterMarkdown +using DocumenterTools: Themes + +# %% Theme stuff? + +# %% Build docs +cd(@__DIR__) +ENV["JULIA_DEBUG"] = "Documenter" using UncertainData using Distributions @@ -119,14 +126,14 @@ ENV["JULIA_DEBUG"] = "Documenter" makedocs( modules = [UncertainData], - sitename = "UncertainData.jl documentation", - format = format = Documenter.HTML( + format = Documenter.HTML( prettyurls = CI, ), + sitename = "UncertainData.jl", + authors = "Kristian Agasøster Haaga", pages = PAGES ) - if CI deploydocs( repo = "github.com/kahaaga/UncertainData.jl.git", diff --git a/docs/mkdocs.yml b/docs/mkdocs similarity index 100% rename from docs/mkdocs.yml rename to docs/mkdocs From b12fcb0807afd9750c08cf7e0a63e852dd0d8588 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristian=20Agas=C3=B8ster=20Haaga?= Date: Mon, 26 Apr 2021 16:34:56 +0200 Subject: [PATCH 02/21] Update documentation --- CHANGELOG.md | 12 ++- Project.toml | 7 +- docs/make.jl | 11 ++- .../sequential/strictly_decreasing.md | 9 +-- .../sequential/strictly_increasing.md | 8 +- docs/src/uncertain_datasets/datasets.md | 44 +++++++++++ .../{merging.md => combining_and_merging.md} | 31 ++++---- .../convenience_constructors.md | 9 +++ .../uncertainvalues_Measurements.md | 13 +--- .../uncertainvalues_certainvalue.md | 9 +-- .../uncertainvalues_examples.md | 13 ++-- .../uncertainvalues_fitted.md | 9 +-- .../uncertain_values/uncertainvalues_kde.md | 31 ++++---- .../uncertainvalues_overview.md | 16 ++-- .../uncertainvalues_populations.md | 10 +-- ...ncertainvalues_theoreticaldistributions.md | 76 ++++++------------- .../ordered_sequence_algorithms.jl | 11 ++- src/uncertain_values/UncertainValue.jl | 20 ++++- 18 files changed, 180 insertions(+), 159 deletions(-) create mode 100644 docs/src/uncertain_datasets/datasets.md rename docs/src/uncertain_values/{merging.md => combining_and_merging.md} (95%) create mode 100644 docs/src/uncertain_values/convenience_constructors.md diff --git a/CHANGELOG.md b/CHANGELOG.md index ce3e635c..98e9432f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,16 @@ # UncertainData changelog -## v.0.14 +## v.0.14.1 + +### Features + +- Implement sequential resampling with chunks. + +### Misc + +- Make some methods more generic (non-breaking). + +## v.0.14.0 ### Breaking changes diff --git a/Project.toml b/Project.toml index ac22a231..4b747777 100644 --- a/Project.toml +++ b/Project.toml @@ -2,19 +2,22 @@ name = "UncertainData" uuid = "dcd9ba68-c27b-5cea-ae21-829cd07325bf" authors = ["Kristian Agasøster Haaga "] repo = "https://github.com/kahaaga/UncertainData.jl.git" -version = "0.14.0" - +version = "0.14.1" [deps] Bootstrap = "e28b5b4c-05e8-5b66-bc03-6f0c0a0a06e0" Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" +Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" +DocumenterMarkdown = "997ab1e6-3595-5248-9280-8efb232c3433" +DocumenterTools = "35a29f4d-8980-5a13-9543-d66fff28ecb8" DynamicalSystemsBase = "6e36e845-645a-534a-86f2-f5d4aa5a06b4" HypothesisTests = "09f84164-cd44-5f33-b23f-e6b0d136a0d5" Interpolations = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59" IntervalArithmetic = "d1acc4aa-44c8-5952-acd4-ba5d80a2a253" KernelDensity = "5ab0869b-81aa-558d-bb23-cbf5423bbe9b" Measurements = "eff96d63-e80a-5855-80a2-b1b0885c5ab7" +Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" diff --git a/docs/make.jl b/docs/make.jl index a5cb549d..6f7e8c4c 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -25,14 +25,17 @@ using Interpolations PAGES = [ "index.md", "Uncertain values" => [ - "uncertain_values/uncertainvalues_overview.md", + "uncertain_values/convenience_constructors.md", + #"uncertain_values/uncertainvalues_overview.md", + "Types of uncertain values" => [ "uncertain_values/uncertainvalues_theoreticaldistributions.md", - "uncertain_values/uncertainvalues_kde.md", "uncertain_values/uncertainvalues_fitted.md", - "uncertain_values/uncertainvalues_certainvalue.md", + "uncertain_values/uncertainvalues_kde.md", "uncertain_values/uncertainvalues_populations.md", + "uncertain_values/uncertainvalues_certainvalue.md", "uncertain_values/uncertainvalues_Measurements.md", - "uncertain_values/merging.md", + ], + "uncertain_values/combining_and_merging.md", "uncertain_values/uncertainvalues_examples.md", ], "Uncertain datasets" => [ diff --git a/docs/src/resampling/sequential/strictly_decreasing.md b/docs/src/resampling/sequential/strictly_decreasing.md index beb97d12..6dfa2c38 100644 --- a/docs/src/resampling/sequential/strictly_decreasing.md +++ b/docs/src/resampling/sequential/strictly_decreasing.md @@ -6,15 +6,12 @@ The default constructor for a strictly decreasing sequential sampling constraint ## Documentation ```@docs -resample(udata::AbstractUncertainValueDataset, - constraint::Union{SamplingConstraint, Vector{SamplingConstraint}}, - sequential_constraint::StrictlyDecreasing{OrderedSamplingAlgorithm}; - quantiles = [0.0001, 0.9999]) +resample(udata::AbstractUncertainValueDataset, sequential_constraint::StrictlyDecreasing{OrderedSamplingAlgorithm}, + constraint::Union{SamplingConstraint, Vector{SamplingConstraint}}) ``` ```@docs -resample(udata::DT, sequential_constraint::StrictlyDecreasing{T}; - quantiles = [0.0001, 0.9999]) where {DT <: AbstractUncertainValueDataset, T <: StartToEnd} +resample(udata::DT, sequential_constraint::StrictlyDecreasing{T}) where {DT <: AbstractUncertainValueDataset, T <: StartToEnd} ``` ## Compatible ordering algorithms diff --git a/docs/src/resampling/sequential/strictly_increasing.md b/docs/src/resampling/sequential/strictly_increasing.md index dc98d20f..ffb3db52 100644 --- a/docs/src/resampling/sequential/strictly_increasing.md +++ b/docs/src/resampling/sequential/strictly_increasing.md @@ -13,15 +13,13 @@ The default constructor for a strictly increasing sequential sampling constraint ```@docs resample(udata::AbstractUncertainValueDataset, - constraint::Union{SamplingConstraint, Vector{SamplingConstraint}}, - sequential_constraint::StrictlyIncreasing{OrderedSamplingAlgorithm}; - quantiles = [0.0001, 0.9999]) + sequential_constraint::StrictlyIncreasing{OrderedSamplingAlgorithm}, + constraint::Union{SamplingConstraint, Vector{SamplingConstraint}}) ``` ```@docs -resample(udata::DT, sequential_constraint::StrictlyIncreasing{T}; - quantiles = [0.0001, 0.9999]) where {DT <: AbstractUncertainValueDataset, T <: StartToEnd} +resample(udata::DT, sequential_constraint::StrictlyIncreasing{T}) where {DT <: AbstractUncertainValueDataset, T <: StartToEnd} ``` ## Examples diff --git a/docs/src/uncertain_datasets/datasets.md b/docs/src/uncertain_datasets/datasets.md new file mode 100644 index 00000000..58ded4aa --- /dev/null +++ b/docs/src/uncertain_datasets/datasets.md @@ -0,0 +1,44 @@ +# Datasets of uncertain values + +## Uncertain value datasets + + +`UncertainValueDataset`s is an uncertain dataset type that has no explicit index +associated with its uncertain values. This type may come with some extra functionality +that the generic [UncertainDataset](uncertain_dataset.md) type does not support. + +Use this type when you want to be explicit about the values representing data values, +as opposed to [indices](uncertain_index_dataset.md). + +```@docs +UncertainValueDataset +``` + +### Example + +An `UncertainValueDataset` can be comprised of uncertain values of different types. + +```julia +o1 = UncertainValue(Normal, 0, 0.5) +o2 = UncertainValue(Normal, 2.0, 0.1) +o3 = UncertainValue(Uniform, 0, 4) +o4 = UncertainValue(Uniform, rand(100)) +o5 = UncertainValue(Beta, 4, 5) +o6 = UncertainValue(Gamma, 4, 5) +o7 = UncertainValue(Frechet, 1, 2) +o8 = UncertainValue(BetaPrime, 1, 2) +o9 = UncertainValue(BetaBinomial, 10, 3, 2) +o10 = UncertainValue(Binomial, 10, 0.3) + +uvals = [o1, o2, o3, o4, o5, o6, o7, o8, o9, o10] +d = UncertainValueDataset(uvals) +``` + +The built-in plot recipes makes it a breeze to plot the dataset. Here, we'll plot the +20th to 80th percentile range error bars. + +```julia +plot(d, [0.2, 0.8]) +``` + +![](uncertain_value_dataset_example.svg) diff --git a/docs/src/uncertain_values/merging.md b/docs/src/uncertain_values/combining_and_merging.md similarity index 95% rename from docs/src/uncertain_values/merging.md rename to docs/src/uncertain_values/combining_and_merging.md index aab721ed..c0785734 100644 --- a/docs/src/uncertain_values/merging.md +++ b/docs/src/uncertain_values/combining_and_merging.md @@ -1,8 +1,8 @@ +# Combining and merging + Because all uncertainties are handled using a resampling approach, it is trivial to [`combine`](@ref) or merge uncertain values of different types into a single uncertain value. -# Nomenclature - Depending on your data, you may want to choose of one the following ways of representing multiple uncertain values as one: @@ -16,7 +16,7 @@ representing multiple uncertain values as one: a kernel density estimate to the overall distribution. This approach introduces approximations *beyond* what is present in the uncertain values when you define them. -# [Combining uncertain values: the population approach](@id uncertainvalue_combine) +## [Combining: the population approach](@id uncertainvalue_combine) **Combining** uncertain values is done by representing them as a weighted population of uncertain values, which is illustrated in the following example: @@ -65,13 +65,13 @@ plot(d1, d2, layout = (2, 1), xlabel = "Value", ylabel = "Density", link = :x, x This makes it possible treat an ensemble of uncertain values as a single uncertain value. -With equal weights, this introduces no bias beyond what is present in the data, +With equal weights, combining uncertain values introduces no bias beyond what is present in the data, because resampling is done from the full supports of each of the furnishing values. Additional information on relative sampling probabilities, however, be it informed by expert opinion or quantative estimates, is easily incorporated by adjusting the sampling weights. -# [Merging uncertain values: the kernel density estimation (KDE) approach](@id uncertainvalue_merge) +## [Merging: KDE approach](@id uncertainvalue_merge) **Merging** multiple uncertain values could be done by fitting a model distribution to the values. Using any specific theoretical distribution as a model for the combined @@ -83,7 +83,7 @@ This has the advantage that you only have to deal with a single estimate to the distribution, but introduces bias because the distribution is *estimated* and the shape of the distribution depends on the parameters of the KDE procedure. -## Without weights +### Without weights When no weights are provided, the combined value is computed by resampling each of the `N` uncertain values `n/N` times, @@ -121,18 +121,17 @@ plot(p, pcombined, layout = (2, 1), link = :x, ylabel = "Density") ![](figs/combine_example_noweights.png) -## With weights +### With weights `Weights`, `ProbabilityWeights` and `AnalyticWeights` are functionally the same. Either may be used depending on whether the weights are assigned subjectively or quantitatively. With `FrequencyWeights`, it is possible to control the exact number of draws from each uncertain value that goes into the draw pool before performing KDE. -### ProbabilityWeights +#### `ProbabilityWeights` ```@docs -combine(uvals::Vector{AbstractUncertainValue}, weights::ProbabilityWeights; - n = 1000*length(uvals)) +combine(uvals::Vector{AbstractUncertainValue}, weights::ProbabilityWeights) ``` For example: @@ -166,11 +165,10 @@ plot(p, pcombined, layout = (2, 1), size = (800, 600), ![](figs/combine_example_pweights.png) -### AnalyticWeights +#### `AnalyticWeights` ```@docs -combine(uvals::Vector{AbstractUncertainValue}, weights::AnalyticWeights; - n = 1000*length(uvals)) +combine(uvals::Vector{AbstractUncertainValue}, weights::AnalyticWeights) ``` For example: @@ -200,7 +198,7 @@ plot(p, pcombined, layout = (2, 1), size = (800, 600), ![](figs/combine_example_aweights.png) -### Generic Weights +### Generic `Weights` ```@docs combine(uvals::Vector{AbstractUncertainValue}, weights::Weights; @@ -237,15 +235,14 @@ plot(p, pcombined, layout = (2, 1), size = (800, 600), ![](figs/combine_example_generic_weights.png) -### FrequencyWeights +### `FrequencyWeights` Using `FrequencyWeights`, one may specify the number of times each of the uncertain values should be sampled to form the pooled resampled draws on which the final kernel density estimate is performed. ```@docs -combine(uvals::Vector{AbstractUncertainValue}, weights::FrequencyWeights; - n = 1000*length(uvals)) +combine(uvals::Vector{AbstractUncertainValue}, weights::FrequencyWeights) ``` For example: diff --git a/docs/src/uncertain_values/convenience_constructors.md b/docs/src/uncertain_values/convenience_constructors.md new file mode 100644 index 00000000..ebf74f81 --- /dev/null +++ b/docs/src/uncertain_values/convenience_constructors.md @@ -0,0 +1,9 @@ +# Convenience constructors + +```@docs +UncertainValue(d::Distributions.Distribution) +UncertainValue(d::Type{D}, empiricaldata::Vector{T}) where {D<:Distribution, T} +UncertainValue(::AbstractVector{<:Real}) +UncertainValue(::Vector, ::Vector) +UncertainValue(::Real) +``` \ No newline at end of file diff --git a/docs/src/uncertain_values/uncertainvalues_Measurements.md b/docs/src/uncertain_values/uncertainvalues_Measurements.md index 72875fba..32c05e16 100644 --- a/docs/src/uncertain_values/uncertainvalues_Measurements.md +++ b/docs/src/uncertain_values/uncertainvalues_Measurements.md @@ -1,13 +1,8 @@ -`Measurement` instances from [Measurements.jl](https://github.com/JuliaPhysics/Measurements.jl)[^1] are -treated as normal distributions with known means. *Note: once you convert a measurement, you lose the -functionality provided by Measurements.jl, such as exact error propagation*. +# Compatibility with Measurements.jl -# Generic constructor - -If `x = measurement(2.2, 0.21)` is a measurement, then `UncertainValue(x)` will return an -`UncertainScalarNormallyDistributed` instance. - -# References +```@docs +UncertainValue(m::Measurement{T}) where T +``` [^1]: M. Giordano, 2016, "Uncertainty propagation with functionally correlated quantities", arXiv:1610.08716 (Bibcode: 2016arXiv161008716G). \ No newline at end of file diff --git a/docs/src/uncertain_values/uncertainvalues_certainvalue.md b/docs/src/uncertain_values/uncertainvalues_certainvalue.md index 40c28117..f5b3482a 100644 --- a/docs/src/uncertain_values/uncertainvalues_certainvalue.md +++ b/docs/src/uncertain_values/uncertainvalues_certainvalue.md @@ -1,15 +1,10 @@ +# Certain values + The `CertainValue` allows representation of values with no uncertainty. It behaves just as a scalar, but can be mixed with uncertain values when performing [mathematical operations](../mathematics/elementary_operations.md) and [resampling](../resampling/resampling_overview.md). -# Generic constructor - -```@docs -UncertainValue(::Real) -``` - -# Type documentation ```@docs CertainValue diff --git a/docs/src/uncertain_values/uncertainvalues_examples.md b/docs/src/uncertain_values/uncertainvalues_examples.md index cb15f3db..a4d78990 100644 --- a/docs/src/uncertain_values/uncertainvalues_examples.md +++ b/docs/src/uncertain_values/uncertainvalues_examples.md @@ -1,3 +1,4 @@ +# Examples First, load the necessary packages: @@ -5,9 +6,9 @@ First, load the necessary packages: using UncertainData, Distributions, KernelDensity, Plots ``` -# Example 1: Uncertain values defined by theoretical distributions +## Theoretical distributions -## A uniformly distributed uncertain value +### A uniformly distributed uncertain value Consider the following contrived example. We've measure a data value with a poor instrument that tells us that the value lies between `-2` and `3`. However, we but that we know nothing @@ -30,7 +31,7 @@ bar(u, label = "", xlabel = "value", ylabel = "probability density") ![](figs/uncertainvalue_theoretical_uniform.svg) -## A normally distributed uncertain value +### A normally distributed uncertain value A situation commonly encountered is to want to use someone else's data from a publication. Usually, these values are reported as the mean or median, with some associated uncertainty. @@ -49,12 +50,12 @@ bar(u, label = "", xlabel = "value", ylabel = "probability density") ![](figs/uncertainvalue_theoretical_normal.svg) -## Other distributions +### Other distributions You may define uncertain values following any of the [supported distributions](uncertainvalues_theoreticaldistributions.md). -# Example 2: Uncertain values defined by kernel density estimated distributions +## Kernel density estimated distributions One may also be given a a distribution of numbers that's not quite normally distributed. How to represent this uncertainty? Easy: we use a kernel density estimate to the distribution. @@ -88,7 +89,7 @@ plot(u, xlabel = "Value", ylabel = "Probability density") ![](figs/uncertainvalue_kde_bimodal.svg) -# Example 3: Uncertain values defined by theoretical distributions fitted to empirical data +## Theoretical distributions fitted to empirical data One may also be given a dataset whose histogram looks a lot like a theoretical distribution. We may then select a theoretical distribution and fit its diff --git a/docs/src/uncertain_values/uncertainvalues_fitted.md b/docs/src/uncertain_values/uncertainvalues_fitted.md index 70749cee..3357b2cf 100644 --- a/docs/src/uncertain_values/uncertainvalues_fitted.md +++ b/docs/src/uncertain_values/uncertainvalues_fitted.md @@ -5,14 +5,6 @@ may choose to represent the data by fitting a theoretical distribution to the values. This will only work well if the histogram closely resembles a theoretical distribution. -## Generic constructor - -```@docs -UncertainValue(d::Type{D}, empiricaldata::Vector{T}) where {D<:Distribution, T} -``` - -## Type documentation - ```@docs UncertainScalarTheoreticalFit ``` @@ -62,6 +54,7 @@ of your data! ### Beware: fitting distributions may lead to nonsensical results! + In a less contrived example, we may try to fit a beta distribution to a sample generated from a gamma distribution. diff --git a/docs/src/uncertain_values/uncertainvalues_kde.md b/docs/src/uncertain_values/uncertainvalues_kde.md index a6de554b..368fad5a 100644 --- a/docs/src/uncertain_values/uncertainvalues_kde.md +++ b/docs/src/uncertain_values/uncertainvalues_kde.md @@ -1,24 +1,18 @@ -# [Kernel density estimated distributions](@id uncertain_value_kde) +# [Kernel density estimates (KDE)](@id uncertain_value_kde) When your data have an empirical distribution that doesn't follow any obvious theoretical distribution, the data may be represented by a kernel density -estimate. - -# Generic constructor - -```@docs -UncertainValue(::AbstractVector{<:Real}) -``` - -# Type documentation +estimate to the underlying distribution. ```@docs UncertainScalarKDE ``` -# Examples +## Examples + +### Implicit KDE constructor: -``` julia tab="Implicit KDE constructor" +``` julia using Distributions, UncertainData # Create a normal distribution @@ -31,7 +25,10 @@ some_sample = rand(d, 1000) uv = UncertainValue(v::Vector) ``` -``` julia tab="Explicit KDE constructor" +### Explicit KDE constructor: + + +```julia using Distributions, UncertainData, KernelDensity # Create a normal distribution @@ -46,7 +43,9 @@ some_sample = rand(d, 1000) uv = UncertainValue(UnivariateKDE, v::Vector) ``` -``` julia tab="Changing the kernel" +### Changing the kernel + +```julia using Distributions, UncertainData, KernelDensity # Create a normal distribution @@ -62,7 +61,9 @@ some_sample = rand(d, 1000) uv = UncertainValue(UnivariateKDE, v::Vector; kernel = Normal) ``` -``` julia tab="Adjusting number of points" +### Adjusting number of points + +```julia using Distributions, UncertainData, KernelDensity # Create a normal distribution diff --git a/docs/src/uncertain_values/uncertainvalues_overview.md b/docs/src/uncertain_values/uncertainvalues_overview.md index 85b25936..2d90975b 100644 --- a/docs/src/uncertain_values/uncertainvalues_overview.md +++ b/docs/src/uncertain_values/uncertainvalues_overview.md @@ -1,6 +1,6 @@ -# [Uncertain value types](@id uncertain_value_types) +# [Types of uncertain values](@id uncertain_value_types) -The core concept of `UncertainData` is to replace an uncertain data value with a +The core concept of `UncertainData.jl` is to replace an uncertain data value with a probability distribution describing the point's uncertainty. The following types of uncertain values are currently implemented: @@ -66,7 +66,7 @@ p = UncertainValue(vals, weights) If your data has a histogram closely resembling some theoretical distribution, the uncertain value may be represented by fitting such a distribution to the data. -``` julia tab="Example 1: fitting a normal distribution" +```julia using Distributions, UncertainData # Generate some random data from a normal distribution, so that we get a @@ -78,7 +78,7 @@ some_sample = rand(Normal(), 1000) uv = UncertainValue(Normal, some_sample) ``` -``` julia tab="Example 2: fitting a gamma distribution" +```julia using Distributions, UncertainData # Generate some random data from a gamma distribution, so that we get a @@ -98,19 +98,19 @@ parameters. For example, a data value may be given as normal distribution with a given mean `μ = 2.2` and standard deviation `σ = 0.3`. -``` julia tab="Example 1: theoretical normal distribution" +```julia # Uncertain value represented by a theoretical normal distribution with # known parameters μ = 2.2 and σ = 0.3 uv = UncertainValue(Normal, 2.2, 0.3) ``` -``` julia tab="Example 2: theoretical gamma distribution" +```julia # Uncertain value represented by a theoretical gamma distribution with # known parameters α = 2.1 and θ = 3.1 uv = UncertainValue(Gamma, 2.1, 3.1) ``` -``` julia tab="Example 3: theoretical binomial distribution" +```julia # Uncertain value represented by a theoretical binomial distribution with # known parameters p = 32 and p = 0.13 uv = UncertainValue(Binomial, 32, 0.13) @@ -120,6 +120,6 @@ uv = UncertainValue(Binomial, 32, 0.13) Scalars with no uncertainty can also be represented. -```julia +```julia c1, c2 = UncertainValue(2), UncertainValue(2.2) ``` diff --git a/docs/src/uncertain_values/uncertainvalues_populations.md b/docs/src/uncertain_values/uncertainvalues_populations.md index 783491d4..95e86df9 100644 --- a/docs/src/uncertain_values/uncertainvalues_populations.md +++ b/docs/src/uncertain_values/uncertainvalues_populations.md @@ -1,18 +1,10 @@ -# [Weighted populations](@id uncertain_value_population) +# [Populations](@id uncertain_value_population) The `UncertainScalarPopulation` type allows representation of an uncertain scalar represented by a population of values who will be sampled according to a vector of explicitly provided probabilities. Think of it as an explicit kernel density estimate. -# Generic constructor - -```@docs -UncertainValue(::Vector, ::Vector) -``` - -# Type documentation - ```@docs UncertainScalarPopulation ``` diff --git a/docs/src/uncertain_values/uncertainvalues_theoreticaldistributions.md b/docs/src/uncertain_values/uncertainvalues_theoreticaldistributions.md index 4053a566..fc6cf625 100644 --- a/docs/src/uncertain_values/uncertainvalues_theoreticaldistributions.md +++ b/docs/src/uncertain_values/uncertainvalues_theoreticaldistributions.md @@ -5,107 +5,75 @@ which are reported as following a specific distribution. For example, an author report the mean and standard deviation of a value stated to follow a normal distribution. `UncertainData` makes it easy to represent such values! -# Generic constructors +Supported distributions are currently: -## From instances of distributions - -```@docs -UncertainValue(d::Distributions.Distribution) -``` +- `Uniform` +- `Normal` +- `Gamma` +- `Beta` +- `BetaPrime` +- `Frechet` +- `Binomial` +- `BetaBinomial` -## Defined from scratch +## Constructors -Uncertain values represented by theoretical distributions may be constructed +Uncertain values represented by theoretical distributions may also be constructed using the two-parameter or three-parameter constructors -`UncertainValue(d::Type{D}, a<:Number, b<:Number)` or -`UncertainValue(d::Type{D}, a<:Number, b<:Number, c<:Number)` (see below). +`UncertainValue(d::Type{D}, a<:Number, b<:Number) where D <: Distribution` or +`UncertainValue(d::Type{D}, a<:Number, b<:Number, c<:Number) where D <: Distribution` (see below). Parameters are provided to the constructor in the same order as for constructing the equivalent distributions in `Distributions.jl`. -### Two-parameter distributions - ```@docs UncertainValue(distribution::Type{D}, a::T1, b::T2; kwargs...) where {T1<:Number, T2 <: Number, D<:Distribution} -``` - -### Three-parameter distributions - -```@docs UncertainValue(distribution::Type{D}, a::T1, b::T2, c::T3; kwargs...) where {T1<:Number, T2<:Number, T3<:Number, D<:Distribution} ``` -# Type documentation - -```@docs -UncertainScalarBetaBinomialDistributed -UncertainScalarBetaDistributed -UncertainScalarBetaPrimeDistributed -UncertainScalarBinomialDistributed -UncertainScalarFrechetDistributed -UncertainScalarGammaDistributed -UncertainScalarNormallyDistributed -UncertainScalarUniformlyDistributed -``` - -# List of supported distributions - -Supported distributions are: - -- `Uniform` -- `Normal` -- `Gamma` -- `Beta` -- `BetaPrime` -- `Frechet` -- `Binomial` -- `BetaBinomial` - -More distributions will be added in the future!. - -# Examples +## Examples -``` julia tab="Uniform" +```julia # Uncertain value generated by a uniform distribution on [-5.0, 5.1]. uv = UncertainValue(Uniform, -5.0, 5.1) ``` -``` julia tab="Normal" +```julia # Uncertain value generated by a normal distribution with parameters μ = -2 and # σ = 0.5. uv = UncertainValue(Normal, -2, 0.5) ``` -``` julia tab="Gamma" +```julia # Uncertain value generated by a gamma distribution with parameters α = 2.2 # and θ = 3. uv = UncertainValue(Gamma, 2.2, 3) ``` -``` julia tab="Beta" +```julia # Uncertain value generated by a beta distribution with parameters α = 1.5 # and β = 3.5 uv = UncertainValue(Beta, 1.5, 3.5) ``` -``` julia tab="BetaPrime" +```julia # Uncertain value generated by a beta prime distribution with parameters α = 1.7 # and β = 3.2 uv = UncertainValue(Beta, 1.7, 3.2) ``` -``` julia tab="Fréchet" +```julia # Uncertain value generated by a Fréchet distribution with parameters α = 2.1 # and θ = 4 uv = UncertainValue(Beta, 2.1, 4) ``` -``` julia tab="Binomial" +```julia # Uncertain value generated by binomial distribution with n = 28 trials and # probability p = 0.2 of success in individual trials. uv = UncertainValue(Binomial, 28, 0.2) ``` -``` julia tab="BetaBinomial" +```julia # Creates an uncertain value generated by a beta-binomial distribution with # n = 28 trials, and parameters α = 1.5 and β = 3.5. uv = UncertainValue(BetaBinomial, 28, 3.3, 4.4) diff --git a/src/sampling_constraints/ordered_sequences/ordered_sequence_algorithms.jl b/src/sampling_constraints/ordered_sequences/ordered_sequence_algorithms.jl index 8fbdd5dd..fae0de9f 100644 --- a/src/sampling_constraints/ordered_sequences/ordered_sequence_algorithms.jl +++ b/src/sampling_constraints/ordered_sequences/ordered_sequence_algorithms.jl @@ -6,7 +6,7 @@ An abstract type for ordered sampling algorithms. abstract type OrderedSamplingAlgorithm end """ - StartToEnd + StartToEnd <: OrderedSamplingAlgorithm An ordered sampling algorithm indicating that values should be treated consecutively from start to finish of the dataset. @@ -14,7 +14,7 @@ treated consecutively from start to finish of the dataset. struct StartToEnd <: OrderedSamplingAlgorithm end """ - EndToStart + EndToStart <: OrderedSamplingAlgorithm An ordered sampling algorithm indicating that the values should be treated consecutively from the end to the start of the dataset. @@ -22,18 +22,17 @@ treated consecutively from the end to the start of the dataset. struct EndToStart <: OrderedSamplingAlgorithm end """ -RandPtOutwards + RandPtOutwards <: OrderedSamplingAlgorithm An ordered sampling algorithm indicating that the values should be divided into two groups, separating the values at some midpoint of the dataset. The two groups of values are then treated separately. """ struct RandPtOutwards <: OrderedSamplingAlgorithm - midpoint_idx::Int end """ - ChuncksForwards + ChuncksForwards <: OrderedSamplingAlgorithm An ordered sampling algorithm indicating that the values should be divided into multiple (`n_chunks`) groups. The groups of values @@ -45,7 +44,7 @@ struct ChunksForwards <: OrderedSamplingAlgorithm end """ - ChuncksBackwards + ChuncksBackwards <: OrderedSamplingAlgorithm An ordered sampling algorithm indicating that the values should be divided into multiple (`n_chunks`) groups. The groups of values diff --git a/src/uncertain_values/UncertainValue.jl b/src/uncertain_values/UncertainValue.jl index b19e8684..1413e464 100644 --- a/src/uncertain_values/UncertainValue.jl +++ b/src/uncertain_values/UncertainValue.jl @@ -14,6 +14,15 @@ UncertainValue(x::T) where T <: Real = CertainValue(x) UncertainValue(uval::AbstractUncertainValue) = uval # From Measurements.jl +""" + UncertainValue(m::Measurement) → UncertainScalarNormallyDistributed + +Convert a `Measurement` instance to an uncertain value compatible with UncertainData.jl. + +`Measurement` instances from [Measurements.jl](https://github.com/JuliaPhysics/Measurements.jl)[^1] are +treated as normal distributions with known means. Once the conversion is done, the +functionality provided by Measurements.jl, such as exact error propagation, is lost. +""" UncertainValue(m::Measurement{T}) where T = UncertainValue(Normal, m.val, m.err) """ @@ -21,6 +30,14 @@ UncertainValue(m::Measurement{T}) where T = UncertainValue(Normal, m.val, m.err) From a numeric vector, construct an `UncertainPopulation` whose members are scalar values. + +## Examples + +```julia +x = measurement(2.2, 0.21) +UncertainValue(x) +``` + """ function UncertainValue(values::Vector{<:Number}, probs::Vector{<:Number}) UncertainScalarPopulation(float.(values), probs) @@ -113,8 +130,7 @@ UncertainValue(x::Vector{Array{<:Real, 0}}) = UncertainValue([el[] for el in x]) """ - UncertainValue(empiricaldata::AbstractVector{T}, - d::Type{D}) where {D <: Distribution} + UncertainValue(d::Type{D}, empiricaldata::Vector{T}) where {D<:Distribution, T} # Constructor for empirical distributions. From ae6cb593ead8cdf6aa1851ff4650f34066c3f925 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristian=20Agas=C3=B8ster=20Haaga?= Date: Tue, 27 Apr 2021 10:39:42 +0200 Subject: [PATCH 03/21] Add Plots to documentation dependencies --- docs/Project.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/Project.toml b/docs/Project.toml index 243ed875..e1deb38a 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -7,3 +7,5 @@ Interpolations = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59" KernelDensity = "5ab0869b-81aa-558d-bb23-cbf5423bbe9b" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" Measurements = "eff96d63-e80a-5855-80a2-b1b0885c5ab7" +Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" + From 0c5a657635bc058ea42e020af3fa323cb706402e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristian=20Agas=C3=B8ster=20Haaga?= Date: Wed, 28 Apr 2021 14:38:57 +0200 Subject: [PATCH 04/21] See changelog for updates --- CHANGELOG.md | 20 +- Project.toml | 2 +- docs/Project.toml | 2 +- docs/make.jl | 10 +- docs/src/mathematics/elementary_operations.md | 4 +- ...orial_transforming_data_to_regular_grid.md | 2 +- .../convenience_constructors.md | 9 - .../defining_uncertain_values.md | 7 + docs/src/uncertain_values/types.md | 129 +++++ .../uncertainvalues_Measurements.md | 9 +- .../uncertainvalues_certainvalue.md | 4 +- .../uncertainvalues_fitted.md | 69 --- .../uncertain_values/uncertainvalues_kde.md | 129 +---- .../uncertainvalues_overview.md | 2 +- ...ncertainvalues_theoreticaldistributions.md | 81 +-- .../uncertainvalues/add_uncertainvalues.jl | 16 +- .../uncertainvalues/divide_uncertainvalues.jl | 16 +- ...perations_uncertainvalues_special_cases.jl | 16 +- .../exponentiation_uncertainvalues.jl | 16 +- .../multiply_uncertainvalues.jl | 16 +- .../subtract_uncertainvalues.jl | 16 +- .../trig_functions_uncertainvalues.jl | 16 +- src/plot_recipes/recipes_certainvalues.jl | 8 +- .../binning/bin_BinnedResampling.jl | 6 +- .../binning/bin_BinnedWeightedResampling.jl | 6 +- .../resampling_schemes_binned.jl | 4 +- .../resampling_schemes_constrained.jl | 2 +- .../resampling_schemes_sequential.jl | 4 +- .../resample_certainvalues.jl | 14 +- .../constrain_certain_value.jl | 14 +- .../constraint_definitions.jl | 2 +- .../ordered_sequences/ordered_sequences.jl | 4 +- .../truncation/truncate_CertainValue.jl | 18 +- .../UncertainIndexDataset.jl | 2 +- .../UncertainValueDataset.jl | 2 +- src/uncertain_values/CertainScalar.jl | 88 ++++ src/uncertain_values/CertainValue.jl | 88 ---- .../UncertainScalarPopulation.jl | 116 +++-- src/uncertain_values/UncertainScalarsKDE.jl | 59 ++- .../UncertainScalarsTheoretical.jl | 101 +++- .../UncertainScalarsTheoreticalFitted.jl | 88 +++- src/uncertain_values/UncertainValue.jl | 475 +++++++++--------- src/uncertain_values/UncertainValues.jl | 2 +- src/uncertain_values/convert.jl | 4 +- .../operations/comparisons.jl | 10 +- .../test_elementary_maths_uncertainvalues.jl | 4 +- .../test_resampling_sequential_decreasing.jl | 2 +- .../test_resampling_sequential_increasing.jl | 2 +- .../test_resampling_with_schemes.jl | 2 +- .../test_resampling_certain_value.jl | 2 +- test/runtests.jl | 2 +- .../test_constrain_certainvalue.jl | 4 +- .../test_constrain_with_schemes.jl | 2 +- .../test_uncertain_index_value_dataset.jl | 2 +- ...st_ConstrainedUncertainScalarPopulation.jl | 2 +- .../test_UncertainScalarPopulation.jl | 2 +- test/uncertain_values/test_certain_values.jl | 4 +- 57 files changed, 923 insertions(+), 815 deletions(-) delete mode 100644 docs/src/uncertain_values/convenience_constructors.md create mode 100644 docs/src/uncertain_values/defining_uncertain_values.md create mode 100644 docs/src/uncertain_values/types.md create mode 100644 src/uncertain_values/CertainScalar.jl delete mode 100644 src/uncertain_values/CertainValue.jl diff --git a/CHANGELOG.md b/CHANGELOG.md index 98e9432f..bddac130 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,22 @@ # UncertainData changelog -## v.0.14.1 +## v0.15.0 + +### Breaking changes + +- `CertainValue` is renamed to `CertainScalar`. +- Some abstract types are no longer exported. + +### Features + +- More flexible inputs to `UncertainValue` constructor. + +### Documentation + +- Shortened and improved documentation. +- Use regular Documenter.jl style, not mkdocs. + +## v0.14.1 ### Features @@ -10,7 +26,7 @@ - Make some methods more generic (non-breaking). -## v.0.14.0 +## v0.14.0 ### Breaking changes diff --git a/Project.toml b/Project.toml index 4b747777..a89283b5 100644 --- a/Project.toml +++ b/Project.toml @@ -9,7 +9,6 @@ Bootstrap = "e28b5b4c-05e8-5b66-bc03-6f0c0a0a06e0" Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" -DocumenterMarkdown = "997ab1e6-3595-5248-9280-8efb232c3433" DocumenterTools = "35a29f4d-8980-5a13-9543-d66fff28ecb8" DynamicalSystemsBase = "6e36e845-645a-534a-86f2-f5d4aa5a06b4" HypothesisTests = "09f84164-cd44-5f33-b23f-e6b0d136a0d5" @@ -24,6 +23,7 @@ Reexport = "189a3867-3050-52da-a836-e630ba90ab69" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" +StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd" Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d" [compat] diff --git a/docs/Project.toml b/docs/Project.toml index e1deb38a..719cc50e 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,11 +1,11 @@ [deps] Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" DocumenterTools = "35a29f4d-8980-5a13-9543-d66fff28ecb8" -DocumenterMarkdown = "997ab1e6-3595-5248-9280-8efb232c3433" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" Interpolations = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59" KernelDensity = "5ab0869b-81aa-558d-bb23-cbf5423bbe9b" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" +StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd" Measurements = "eff96d63-e80a-5855-80a2-b1b0885c5ab7" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" diff --git a/docs/make.jl b/docs/make.jl index 6f7e8c4c..5934b9c1 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -25,16 +25,8 @@ using Interpolations PAGES = [ "index.md", "Uncertain values" => [ - "uncertain_values/convenience_constructors.md", #"uncertain_values/uncertainvalues_overview.md", - "Types of uncertain values" => [ - "uncertain_values/uncertainvalues_theoreticaldistributions.md", - "uncertain_values/uncertainvalues_fitted.md", - "uncertain_values/uncertainvalues_kde.md", - "uncertain_values/uncertainvalues_populations.md", - "uncertain_values/uncertainvalues_certainvalue.md", - "uncertain_values/uncertainvalues_Measurements.md", - ], + "uncertain_values/types.md", "uncertain_values/combining_and_merging.md", "uncertain_values/uncertainvalues_examples.md", ], diff --git a/docs/src/mathematics/elementary_operations.md b/docs/src/mathematics/elementary_operations.md index 94de2a6d..641a55a6 100644 --- a/docs/src/mathematics/elementary_operations.md +++ b/docs/src/mathematics/elementary_operations.md @@ -92,6 +92,6 @@ Base.:/(a::AbstractUncertainValue, b::Real, n::Int) ## Special cases -### `CertainValue`s +### `CertainScalar`s -Performing elementary operations with `CertainValue`s behaves as for scalars. +Performing elementary operations with `CertainScalar`s behaves as for scalars. diff --git a/docs/src/tutorials/tutorial_transforming_data_to_regular_grid.md b/docs/src/tutorials/tutorial_transforming_data_to_regular_grid.md index 6a34acd5..5f0a4fa3 100644 --- a/docs/src/tutorials/tutorial_transforming_data_to_regular_grid.md +++ b/docs/src/tutorials/tutorial_transforming_data_to_regular_grid.md @@ -110,7 +110,7 @@ X_binned = resample(X, resampling) ``` `X_binned` is still a `UncertainIndexValueDataset`, but the indices have been reduced -to `CertainValue` instances placed at the bin midpoints. The values, however, are kept +to `CertainScalar` instances placed at the bin midpoints. The values, however, are kept as uncertain values. Plotting the result: diff --git a/docs/src/uncertain_values/convenience_constructors.md b/docs/src/uncertain_values/convenience_constructors.md deleted file mode 100644 index ebf74f81..00000000 --- a/docs/src/uncertain_values/convenience_constructors.md +++ /dev/null @@ -1,9 +0,0 @@ -# Convenience constructors - -```@docs -UncertainValue(d::Distributions.Distribution) -UncertainValue(d::Type{D}, empiricaldata::Vector{T}) where {D<:Distribution, T} -UncertainValue(::AbstractVector{<:Real}) -UncertainValue(::Vector, ::Vector) -UncertainValue(::Real) -``` \ No newline at end of file diff --git a/docs/src/uncertain_values/defining_uncertain_values.md b/docs/src/uncertain_values/defining_uncertain_values.md new file mode 100644 index 00000000..70b9cc58 --- /dev/null +++ b/docs/src/uncertain_values/defining_uncertain_values.md @@ -0,0 +1,7 @@ +# Convenience constructors + +The following convenience constructors are used to defined uncertain values. + +```@docs +UncertainValue +``` \ No newline at end of file diff --git a/docs/src/uncertain_values/types.md b/docs/src/uncertain_values/types.md new file mode 100644 index 00000000..e5eeb3b8 --- /dev/null +++ b/docs/src/uncertain_values/types.md @@ -0,0 +1,129 @@ +# Types of uncertain values + +## Convenience constructors + +The following convenience constructors are used to defined uncertain values. + +```@docs +UncertainValue +``` + +## [Theoretical distributions](@id uncertain_value_theoretical_distribution) + +It is common in the scientific literature to encounter uncertain data values +which are reported as following a specific distribution. For example, an author +report the mean and standard deviation of a value stated to follow a +normal distribution. `UncertainData.jl` makes it easy to represent such values! + +```@docs +UncertainScalarBetaDistributed +UncertainScalarBetaBinomialDistributed +UncertainScalarBetaPrimeDistributed +UncertainScalarBinomialDistributed +UncertainScalarFrechetDistributed +UncertainScalarGammaDistributed +UncertainScalarNormallyDistributed +UncertainScalarUniformlyDistributed +``` + +## [Fitted theoretical distributions](@id uncertain_value_fitted_theoretical_distribution) + +For data values with histograms close to some known distribution, the user +may choose to represent the data by fitting a theoretical distribution to the +values. This will only work well if the histogram closely resembles a +theoretical distribution. + +```@docs +UncertainScalarTheoreticalFit +``` + +## [Kernel density estimates (KDE)](@id uncertain_value_kde) + +When your data have an empirical distribution that doesn't follow any obvious +theoretical distribution, the data may be represented by a kernel density +estimate to the underlying distribution. + +```@docs +UncertainScalarKDE +``` + +### Extended example + +Let's create a bimodal distribution, then sample 10000 values from it. + +```@example kde1 +using UncertainData, Distributions, Plots, StatsPlots +# Draw 1000 points from a three-component mixture model to create a multimodal distribution. +n1 = Normal(-3.0, 1.2) +n2 = Normal(8.0, 1.2) +n3 = Normal(0.0, 2.5) +M = MixtureModel([n1, n2, n3]) +s = rand(M, 1000); +histogram(s, nbins = 80) +ylabel!("Frequency"); xlabel!("Value") +savefig("figs/bimodal_empirical.svg") #hide +``` + +![](figs/bimodal_empirical.svg) + +It is not obvious which distribution to fit to such data. +A kernel density estimate, however, will always be a decent representation +of the data, because it doesn't follow a specific distribution and adapts to +the data values. + +To create a kernel density estimate, simply call the +`UncertainValue` constructor with a vector containing the sample. This will trigger +kernel density estimation. + +```@example kde1 +x = UncertainValue(s) +``` + +The plot below compares the empirical histogram (here represented as a density +plot) with our kernel density estimate. + +```@example kde1 +x = UncertainValue(s) +density(s, label = "10000 mixture model (M) samples") +density!(rand(x, 50000), + label = "50000 samples from KDE estimate to M") +xlabel!("data value") +ylabel!("probability density") +savefig("figs/KDEUncertainValue.svg") #hide +``` + +![](figs/KDEUncertainValue.svg) + + +## [Populations](@id uncertain_value_population) + +The `UncertainScalarPopulation` type allows representation of an uncertain scalar +represented by a population of values who will be sampled according to a vector of +explicitly provided probabilities. Think of it as an explicit kernel density estimate. + +```@docs +UncertainScalarPopulation +``` + +## Certain values + +The `CertainScalar` allows representation of values with no uncertainty. It behaves +just as a scalar, but can be mixed with uncertain values when performing +[mathematical operations](../mathematics/elementary_operations.md) and +[resampling](../resampling/resampling_overview.md). + +```@docs +CertainScalar +``` + +## Compatibility with Measurements.jl + +`Measurement` instances from the Measurements.jl package[^1] are in UncertainData.jl represented as normal distributions. If exact error propagation is a requirement and your data is exclusively normally distributed, use Measurements.jl. If your data is not necessarily +normally distributed and contain errors of different types, and +a resampling approach to error propagation is desired, use UncertainData.jl. + +See the [`UncertainValue`](@ref) constructor for instructions on how to +convert `Measurement`s to uncertain values compatible with this package. + +[^1]: + M. Giordano, 2016, "Uncertainty propagation with functionally correlated quantities", arXiv:1610.08716 (Bibcode: 2016arXiv161008716G). diff --git a/docs/src/uncertain_values/uncertainvalues_Measurements.md b/docs/src/uncertain_values/uncertainvalues_Measurements.md index 32c05e16..ce994108 100644 --- a/docs/src/uncertain_values/uncertainvalues_Measurements.md +++ b/docs/src/uncertain_values/uncertainvalues_Measurements.md @@ -1,8 +1,11 @@ # Compatibility with Measurements.jl -```@docs -UncertainValue(m::Measurement{T}) where T -``` +`Measurement` instances from the Measurements.jl package[^1] are in UncertainData.jl represented as normal distributions. If exact error propagation is a requirement and your data is exclusively normally distributed, use Measurements.jl. If your data is not necessarily +normally distributed and contain errors of different types, and +a resampling approach to error propagation is desired, use UncertainData.jl. + +See the [`UncertainValue`](@ref) constructor for instructions on how to +convert `Measurement`s to uncertain values compatible with this package. [^1]: M. Giordano, 2016, "Uncertainty propagation with functionally correlated quantities", arXiv:1610.08716 (Bibcode: 2016arXiv161008716G). \ No newline at end of file diff --git a/docs/src/uncertain_values/uncertainvalues_certainvalue.md b/docs/src/uncertain_values/uncertainvalues_certainvalue.md index f5b3482a..91be29f2 100644 --- a/docs/src/uncertain_values/uncertainvalues_certainvalue.md +++ b/docs/src/uncertain_values/uncertainvalues_certainvalue.md @@ -1,11 +1,11 @@ # Certain values -The `CertainValue` allows representation of values with no uncertainty. It behaves +The `CertainScalar` allows representation of values with no uncertainty. It behaves just as a scalar, but can be mixed with uncertain values when performing [mathematical operations](../mathematics/elementary_operations.md) and [resampling](../resampling/resampling_overview.md). ```@docs -CertainValue +CertainScalar ``` diff --git a/docs/src/uncertain_values/uncertainvalues_fitted.md b/docs/src/uncertain_values/uncertainvalues_fitted.md index 3357b2cf..6c69eb94 100644 --- a/docs/src/uncertain_values/uncertainvalues_fitted.md +++ b/docs/src/uncertain_values/uncertainvalues_fitted.md @@ -8,72 +8,3 @@ theoretical distribution. ```@docs UncertainScalarTheoreticalFit ``` - -## Examples - -``` julia tab="Uniform" -using Distributions, UncertainData - -# Create a normal distribution -d = Uniform() - -# Draw a 1000-point sample from the distribution. -some_sample = rand(d, 1000) - -# Define an uncertain value by fitting a uniform distribution to the sample. -uv = UncertainValue(Uniform, some_sample) -``` - -``` julia tab="Normal" -using Distributions, UncertainData - -# Create a normal distribution -d = Normal() - -# Draw a 1000-point sample from the distribution. -some_sample = rand(d, 1000) - -# Represent the uncertain value by a fitted normal distribution. -uv = UncertainValue(Normal, some_sample) -``` - -``` julia tab="Gamma" -using Distributions, UncertainData - -# Generate 1000 values from a gamma distribution with parameters α = 2.1, -# θ = 5.2. -some_sample = rand(Gamma(2.1, 5.2), 1000) - -# Represent the uncertain value by a fitted gamma distribution. -uv = UncertainValue(Gamma, some_sample) -``` -In these examples we're trying to fit the same distribution to our sample -as the distribution from which we draw the sample. Thus, we will get good fits. -In real applications, make sure to always visually investigate the histogram -of your data! - - -### Beware: fitting distributions may lead to nonsensical results! - -In a less contrived example, we may try to fit a beta distribution to a sample -generated from a gamma distribution. - - -``` julia -using Distributions, UncertainData - -# Generate 1000 values from a gamma distribution with parameters α = 2.1, -# θ = 5.2. -some_sample = rand(Gamma(2.1, 5.2), 1000) - -# Represent the uncertain value by a fitted beta distribution. -uv = UncertainValue(Beta, some_sample) -``` - -This is obviously not a good idea. Always visualise your distribution before -deciding on which distribution to fit! You won't get any error messages if you -try to fit a distribution that does not match your data. - -If the data do not follow an obvious theoretical distribution, it is better to -use kernel density estimation to define the uncertain value. - diff --git a/docs/src/uncertain_values/uncertainvalues_kde.md b/docs/src/uncertain_values/uncertainvalues_kde.md index 368fad5a..9f545467 100644 --- a/docs/src/uncertain_values/uncertainvalues_kde.md +++ b/docs/src/uncertain_values/uncertainvalues_kde.md @@ -8,138 +8,49 @@ estimate to the underlying distribution. UncertainScalarKDE ``` -## Examples - -### Implicit KDE constructor: - -``` julia -using Distributions, UncertainData - -# Create a normal distribution -d = Normal() - -# Draw a 1000-point sample from the distribution. -some_sample = rand(d, 1000) - -# Use the implicit KDE constructor to create the uncertain value -uv = UncertainValue(v::Vector) -``` - -### Explicit KDE constructor: - - -```julia -using Distributions, UncertainData, KernelDensity - -# Create a normal distribution -d = Normal() - -# Draw a 1000-point sample from the distribution. -some_sample = rand(d, 1000) - -# Use the explicit KDE constructor to create the uncertain value. -# This constructor follows the same convention as when fitting distributions -# to empirical data, so this is the recommended way to construct KDE estimates. -uv = UncertainValue(UnivariateKDE, v::Vector) -``` - -### Changing the kernel - -```julia -using Distributions, UncertainData, KernelDensity - -# Create a normal distribution -d = Normal() - -# Draw a 1000-point sample from the distribution. -some_sample = rand(d, 1000) - -# Use the explicit KDE constructor to create the uncertain value, specifying -# that we want to use normal distributions as the kernel. The kernel can be -# any valid kernel from Distributions.jl, and the default is to use normal -# distributions. -uv = UncertainValue(UnivariateKDE, v::Vector; kernel = Normal) -``` - -### Adjusting number of points - -```julia -using Distributions, UncertainData, KernelDensity - -# Create a normal distribution -d = Normal() - -# Draw a 1000-point sample from the distribution. -some_sample = rand(d, 1000) - -# Use the explicit KDE constructor to create the uncertain value, specifying -# the number of points we want to use for the kernel density estimate. Fast -# Fourier transforms are used behind the scenes, so the number of points -# should be a power of 2 (the default is 2048 points). -uv = UncertainValue(UnivariateKDE, v::Vector; npoints = 1024) -``` - -# Extended example +## Extended example Let's create a bimodal distribution, then sample 10000 values from it. -```julia -using Distributions - +```@example kde1 +using UncertainData, Distributions, Plots, StatsPlots +# Draw 1000 points from a three-component mixture model to create a multimodal distribution. n1 = Normal(-3.0, 1.2) n2 = Normal(8.0, 1.2) n3 = Normal(0.0, 2.5) - -# Use a mixture model to create a bimodal distribution M = MixtureModel([n1, n2, n3]) - -# Sample the mixture model. -samples_empirical = rand(M, Int(1e4)); +s = rand(M, 1000); +histogram(s, nbins = 80) +ylabel!("Frequency"); xlabel!("Value") +savefig("figs/bimodal_empirical.svg") #hide ``` -![](imgs/bimodal_empirical.svg) +![](figs/bimodal_empirical.svg) It is not obvious which distribution to fit to such data. - A kernel density estimate, however, will always be a decent representation of the data, because it doesn't follow a specific distribution and adapts to the data values. To create a kernel density estimate, simply call the -`UncertainValue(v::Vector{Number})` constructor with a vector containing the -sample: +`UncertainValue` constructor with a vector containing the sample. This will trigger +kernel density estimation. -```julia -uv = UncertainValue(samples_empirical) +```@example kde1 +x = UncertainValue(s) ``` The plot below compares the empirical histogram (here represented as a density plot) with our kernel density estimate. -```julia -using Plots, StatPlots, UncertainData -uv = UncertainValue(samples_empirical) -density(mvals, label = "10000 mixture model (M) samples") -density!(rand(uv, Int(1e4)), - label = "10000 samples from KDE estimate to M") +```@example kde1 +x = UncertainValue(s) +density(s, label = "10000 mixture model (M) samples") +density!(rand(x, 50000), + label = "50000 samples from KDE estimate to M") xlabel!("data value") ylabel!("probability density") +savefig("figs/KDEUncertainValue.svg") #hide ``` -![](imgs/KDEUncertainValue.svg) - -## Constructor - -```@docs -UncertainValue(data::Vector{T}; - kernel::Type{D} = Normal, - npoints::Int = 2048) where {D <: Distributions.Distribution, T} -``` - -### Additional keyword arguments and examples - -If the only argument to the `UncertainValue` constructor is a vector of values, -the default behaviour is to represent the distribution by a kernel density -estimate (KDE), i.e. `UncertainValue(data)`. Gaussian kernels are used by -default. The syntax `UncertainValue(UnivariateKDE, data)` will also work if -`KernelDensity.jl` is loaded. +![](figs/KDEUncertainValue.svg) diff --git a/docs/src/uncertain_values/uncertainvalues_overview.md b/docs/src/uncertain_values/uncertainvalues_overview.md index 2d90975b..6d7129ca 100644 --- a/docs/src/uncertain_values/uncertainvalues_overview.md +++ b/docs/src/uncertain_values/uncertainvalues_overview.md @@ -12,7 +12,7 @@ The following types of uncertain values are currently implemented: drawing values are already known, so you can skip kernel density estimation. Populations can be nested, and may contain numerical values, uncertain values or both. - [Values without uncertainty](uncertainvalues_certainvalue.md) have their own dedicated - [`CertainValue`](@ref) type, so that you can uncertain values with certain values. + [`CertainScalar`](@ref) type, so that you can uncertain values with certain values. - [`Measurement` instances](uncertainvalues_Measurements.md) from [Measurements.jl](https://github.com/JuliaPhysics/Measurements.jl) are treated as normal distributions with known mean and standard devation. ## Some quick examples diff --git a/docs/src/uncertain_values/uncertainvalues_theoreticaldistributions.md b/docs/src/uncertain_values/uncertainvalues_theoreticaldistributions.md index fc6cf625..5fda9f4c 100644 --- a/docs/src/uncertain_values/uncertainvalues_theoreticaldistributions.md +++ b/docs/src/uncertain_values/uncertainvalues_theoreticaldistributions.md @@ -3,78 +3,15 @@ It is common in the scientific literature to encounter uncertain data values which are reported as following a specific distribution. For example, an author report the mean and standard deviation of a value stated to follow a -normal distribution. `UncertainData` makes it easy to represent such values! - -Supported distributions are currently: - -- `Uniform` -- `Normal` -- `Gamma` -- `Beta` -- `BetaPrime` -- `Frechet` -- `Binomial` -- `BetaBinomial` - -## Constructors - -Uncertain values represented by theoretical distributions may also be constructed -using the two-parameter or three-parameter constructors -`UncertainValue(d::Type{D}, a<:Number, b<:Number) where D <: Distribution` or -`UncertainValue(d::Type{D}, a<:Number, b<:Number, c<:Number) where D <: Distribution` (see below). -Parameters are provided to the constructor in the same order as for constructing -the equivalent distributions in `Distributions.jl`. +normal distribution. `UncertainData.jl` makes it easy to represent such values! ```@docs -UncertainValue(distribution::Type{D}, a::T1, b::T2; kwargs...) where {T1<:Number, T2 <: Number, D<:Distribution} -UncertainValue(distribution::Type{D}, a::T1, b::T2, c::T3; kwargs...) where {T1<:Number, T2<:Number, T3<:Number, D<:Distribution} -``` - -## Examples - -```julia -# Uncertain value generated by a uniform distribution on [-5.0, 5.1]. -uv = UncertainValue(Uniform, -5.0, 5.1) -``` - -```julia -# Uncertain value generated by a normal distribution with parameters μ = -2 and -# σ = 0.5. -uv = UncertainValue(Normal, -2, 0.5) -``` - -```julia -# Uncertain value generated by a gamma distribution with parameters α = 2.2 -# and θ = 3. -uv = UncertainValue(Gamma, 2.2, 3) -``` - -```julia -# Uncertain value generated by a beta distribution with parameters α = 1.5 -# and β = 3.5 -uv = UncertainValue(Beta, 1.5, 3.5) -``` - -```julia -# Uncertain value generated by a beta prime distribution with parameters α = 1.7 -# and β = 3.2 -uv = UncertainValue(Beta, 1.7, 3.2) -``` - -```julia -# Uncertain value generated by a Fréchet distribution with parameters α = 2.1 -# and θ = 4 -uv = UncertainValue(Beta, 2.1, 4) -``` - -```julia -# Uncertain value generated by binomial distribution with n = 28 trials and -# probability p = 0.2 of success in individual trials. -uv = UncertainValue(Binomial, 28, 0.2) -``` - -```julia -# Creates an uncertain value generated by a beta-binomial distribution with -# n = 28 trials, and parameters α = 1.5 and β = 3.5. -uv = UncertainValue(BetaBinomial, 28, 3.3, 4.4) +UncertainScalarBetaDistributed +UncertainScalarBetaBinomialDistributed +UncertainScalarBetaPrimeDistributed +UncertainScalarBinomialDistributed +UncertainScalarFrechetDistributed +UncertainScalarGammaDistributed +UncertainScalarNormallyDistributed +UncertainScalarUniformlyDistributed ``` diff --git a/src/mathematics/uncertainvalues/add_uncertainvalues.jl b/src/mathematics/uncertainvalues/add_uncertainvalues.jl index c588a4c9..3f2925f4 100644 --- a/src/mathematics/uncertainvalues/add_uncertainvalues.jl +++ b/src/mathematics/uncertainvalues/add_uncertainvalues.jl @@ -129,20 +129,20 @@ end # Special cases ##################################################################################### -import ..UncertainValues: CertainValue +import ..UncertainValues: CertainScalar ################## -# `CertainValue`s +# `CertainScalar`s ################# """ - Base.:+(a::Union{CertainValue, Real}, b::Union{CertainValue, Real}) + Base.:+(a::Union{CertainScalar, Real}, b::Union{CertainScalar, Real}) Addition of certain values with themselves or scalars acts as regular addition, but -returns the result wrapped in a `CertainValue` instance. +returns the result wrapped in a `CertainScalar` instance. """ -Base.:+(a::Union{CertainValue, Real}, b::Union{CertainValue, Real}) +Base.:+(a::Union{CertainScalar, Real}, b::Union{CertainScalar, Real}) -Base.:+(a::CertainValue, b::CertainValue) = CertainValue(a.value + b.value) -Base.:+(a::CertainValue, b::Real) = CertainValue(a.value + b) -Base.:+(a::Real, b::CertainValue) = CertainValue(a + b.value) +Base.:+(a::CertainScalar, b::CertainScalar) = CertainScalar(a.value + b.value) +Base.:+(a::CertainScalar, b::Real) = CertainScalar(a.value + b) +Base.:+(a::Real, b::CertainScalar) = CertainScalar(a + b.value) diff --git a/src/mathematics/uncertainvalues/divide_uncertainvalues.jl b/src/mathematics/uncertainvalues/divide_uncertainvalues.jl index 381ff370..f347c5e7 100644 --- a/src/mathematics/uncertainvalues/divide_uncertainvalues.jl +++ b/src/mathematics/uncertainvalues/divide_uncertainvalues.jl @@ -93,20 +93,20 @@ Base.:/(a::AbstractUncertainValue, b::Real, n::Int) = # Special cases ##################################################################################### -import ..UncertainValues: CertainValue +import ..UncertainValues: CertainScalar ################## -# `CertainValue`s +# `CertainScalar`s ################# """ - Base.:/(a::Union{CertainValue, Real}, b::Union{CertainValue, Real}) + Base.:/(a::Union{CertainScalar, Real}, b::Union{CertainScalar, Real}) Division of certain values with themselves or scalars acts as regular division, but -returns the result wrapped in a `CertainValue` instance. +returns the result wrapped in a `CertainScalar` instance. """ -Base.:/(a::Union{CertainValue, Real}, b::Union{CertainValue, Real}) +Base.:/(a::Union{CertainScalar, Real}, b::Union{CertainScalar, Real}) -Base.:/(a::CertainValue, b::CertainValue) = CertainValue(a.value / b.value) -Base.:/(a::CertainValue, b::Real) = CertainValue(a.value / b) -Base.:/(a::Real, b::CertainValue) = CertainValue(a / b.value) +Base.:/(a::CertainScalar, b::CertainScalar) = CertainScalar(a.value / b.value) +Base.:/(a::CertainScalar, b::Real) = CertainScalar(a.value / b) +Base.:/(a::Real, b::CertainScalar) = CertainScalar(a / b.value) diff --git a/src/mathematics/uncertainvalues/elementary_operations_uncertainvalues_special_cases.jl b/src/mathematics/uncertainvalues/elementary_operations_uncertainvalues_special_cases.jl index 6f29ef12..471ab7d2 100644 --- a/src/mathematics/uncertainvalues/elementary_operations_uncertainvalues_special_cases.jl +++ b/src/mathematics/uncertainvalues/elementary_operations_uncertainvalues_special_cases.jl @@ -6,50 +6,50 @@ for operator in operators funcs = quote """ - $($f)(a::CertainValue, b::AbstractUncertainValue; n::Int = 30000) + $($f)(a::CertainScalar, b::AbstractUncertainValue; n::Int = 30000) Compute `a $($operator) b`. Treats the certain value as a scalar and performs the operation element-wise on a default of `n = 30000` realizations of `b`. To tune the number of draws to `n`, use the `$($operator)(a, b, n::Int)` syntax. """ - function $(f)(a::CertainValue, b::AbstractUncertainValue; n::Int = 30000) + function $(f)(a::CertainScalar, b::AbstractUncertainValue; n::Int = 30000) $(elementwise_operator)(a.value, b, n) end """ - $($f)(a::AbstractUncertainValue, b::CertainValue; n::Int = 30000) + $($f)(a::AbstractUncertainValue, b::CertainScalar; n::Int = 30000) Compute `a $($operator) b`. Treats the certain value as a scalar and performs the operation element-wise on a default of `n = 30000` realizations of `a`. To tune the number of draws to `n`, use the `$($operator)(a, b, n::Int)` syntax. """ - function $(f)(a::AbstractUncertainValue, b::CertainValue; n::Int = 30000) + function $(f)(a::AbstractUncertainValue, b::CertainScalar; n::Int = 30000) $(elementwise_operator)(a, b.value, n) end """ - $($f)(a::AbstractUncertainValue, b::CertainValue; n::Int) + $($f)(a::AbstractUncertainValue, b::CertainScalar; n::Int) Compute `a $($operator) b`. Treats the certain value as a scalar and performs the operation element-wise on `n` realizations of `a`. This function is called with the `$($operator)(a, b, n::Int)` syntax. """ - function $(f)(a::AbstractUncertainValue, b::CertainValue, n::Int) + function $(f)(a::AbstractUncertainValue, b::CertainScalar, n::Int) $(elementwise_operator)(a, b.value, n) end """ - $($f)(a::CertainValue, b::AbstractUncertainValue, n::Int) + $($f)(a::CertainScalar, b::AbstractUncertainValue, n::Int) Compute `a $($operator) b`. Treats the certain value as a scalar and performs the operation element-wise on `n` realizations of `b`. This function is called with the `$($operator)(a, b, n::Int)` syntax. """ - function $(f)(a::CertainValue, b::AbstractUncertainValue, n::Int) + function $(f)(a::CertainScalar, b::AbstractUncertainValue, n::Int) $(elementwise_operator)(a.value, b, n) end end diff --git a/src/mathematics/uncertainvalues/exponentiation_uncertainvalues.jl b/src/mathematics/uncertainvalues/exponentiation_uncertainvalues.jl index 8270b703..b51c3aa1 100644 --- a/src/mathematics/uncertainvalues/exponentiation_uncertainvalues.jl +++ b/src/mathematics/uncertainvalues/exponentiation_uncertainvalues.jl @@ -105,20 +105,20 @@ Base.:^(a::AbstractUncertainValue, b::Real, n::Int) = # Special cases ##################################################################################### -import ..UncertainValues: CertainValue +import ..UncertainValues: CertainScalar ################## -# `CertainValue`s +# `CertainScalar`s ################# """ - Base.:^(a::Union{CertainValue, Real}, b::Union{CertainValue, Real}) + Base.:^(a::Union{CertainScalar, Real}, b::Union{CertainScalar, Real}) Exponentiation of certain values with themselves or scalars acts as regular exponentiation, -but returns the result wrapped in a `CertainValue` instance. +but returns the result wrapped in a `CertainScalar` instance. """ -Base.:^(a::Union{CertainValue, Real}, b::Union{CertainValue, Real}) +Base.:^(a::Union{CertainScalar, Real}, b::Union{CertainScalar, Real}) -Base.:^(a::CertainValue, b::CertainValue) = CertainValue(a.value ^ b.value) -Base.:^(a::CertainValue, b::Real) = CertainValue(a.value ^ b) -Base.:^(a::Real, b::CertainValue) = CertainValue(a ^ b.value) +Base.:^(a::CertainScalar, b::CertainScalar) = CertainScalar(a.value ^ b.value) +Base.:^(a::CertainScalar, b::Real) = CertainScalar(a.value ^ b) +Base.:^(a::Real, b::CertainScalar) = CertainScalar(a ^ b.value) diff --git a/src/mathematics/uncertainvalues/multiply_uncertainvalues.jl b/src/mathematics/uncertainvalues/multiply_uncertainvalues.jl index 06da479b..8e3a6ebc 100644 --- a/src/mathematics/uncertainvalues/multiply_uncertainvalues.jl +++ b/src/mathematics/uncertainvalues/multiply_uncertainvalues.jl @@ -94,20 +94,20 @@ Base.:*(a::AbstractUncertainValue, b::Real, n::Int) = # Special cases ##################################################################################### -import ..UncertainValues: CertainValue +import ..UncertainValues: CertainScalar ################## -# `CertainValue`s +# `CertainScalar`s ################# """ - Base.:*(a::Union{CertainValue, Real}, b::Union{CertainValue, Real}) + Base.:*(a::Union{CertainScalar, Real}, b::Union{CertainScalar, Real}) Multiplication of certain values with themselves or scalars acts as regular multiplication, -but returns the result wrapped in a `CertainValue` instance. +but returns the result wrapped in a `CertainScalar` instance. """ -Base.:*(a::Union{CertainValue, Real}, b::Union{CertainValue, Real}) +Base.:*(a::Union{CertainScalar, Real}, b::Union{CertainScalar, Real}) -Base.:*(a::CertainValue, b::CertainValue) = CertainValue(a.value * b.value) -Base.:*(a::CertainValue, b::Real) = CertainValue(a.value * b) -Base.:*(a::Real, b::CertainValue) = CertainValue(a * b.value) +Base.:*(a::CertainScalar, b::CertainScalar) = CertainScalar(a.value * b.value) +Base.:*(a::CertainScalar, b::Real) = CertainScalar(a.value * b) +Base.:*(a::Real, b::CertainScalar) = CertainScalar(a * b.value) diff --git a/src/mathematics/uncertainvalues/subtract_uncertainvalues.jl b/src/mathematics/uncertainvalues/subtract_uncertainvalues.jl index 9b04b284..7c66828e 100644 --- a/src/mathematics/uncertainvalues/subtract_uncertainvalues.jl +++ b/src/mathematics/uncertainvalues/subtract_uncertainvalues.jl @@ -98,20 +98,20 @@ Base.:-(a::AbstractUncertainValue, b::Real, n::Int) = # Special cases ##################################################################################### -import ..UncertainValues: CertainValue +import ..UncertainValues: CertainScalar ################## -# `CertainValue`s +# `CertainScalar`s ################# """ - Base.:-(a::Union{CertainValue, Real}, b::Union{CertainValue, Real}) + Base.:-(a::Union{CertainScalar, Real}, b::Union{CertainScalar, Real}) Subtraction of certain values with themselves or scalars acts as regular subtraction, -but returns the result wrapped in a `CertainValue` instance. +but returns the result wrapped in a `CertainScalar` instance. """ -Base.:-(a::Union{CertainValue, Real}, b::Union{CertainValue, Real}) +Base.:-(a::Union{CertainScalar, Real}, b::Union{CertainScalar, Real}) -Base.:-(a::CertainValue, b::CertainValue) = CertainValue(a.value - b.value) -Base.:-(a::CertainValue, b::Real) = CertainValue(a.value - b) -Base.:-(a::Real, b::CertainValue) = CertainValue(a - b.value) +Base.:-(a::CertainScalar, b::CertainScalar) = CertainScalar(a.value - b.value) +Base.:-(a::CertainScalar, b::Real) = CertainScalar(a.value - b) +Base.:-(a::Real, b::CertainScalar) = CertainScalar(a - b.value) diff --git a/src/mathematics/uncertainvalues/trig_functions_uncertainvalues.jl b/src/mathematics/uncertainvalues/trig_functions_uncertainvalues.jl index 71303c9b..bec81380 100644 --- a/src/mathematics/uncertainvalues/trig_functions_uncertainvalues.jl +++ b/src/mathematics/uncertainvalues/trig_functions_uncertainvalues.jl @@ -695,15 +695,15 @@ trigfuncs = [:(cos), :(cosd), :(cosh), :(sin), :(sind), :(sinh), :(tan), :(tand) :(secd), :(sech), :(cot), :(cotd), :(coth)] ################## -# `CertainValue`s +# `CertainScalar`s ################# -import ..UncertainValues: CertainValue +import ..UncertainValues: CertainScalar """ - Base.:-(a::Union{CertainValue, Real}, b::Union{CertainValue, Real}) + Base.:-(a::Union{CertainScalar, Real}, b::Union{CertainScalar, Real}) Subtraction of certain values with themselves or scalars acts as regular subtraction, -but returns the result wrapped in a `CertainValue` instance. +but returns the result wrapped in a `CertainScalar` instance. """ for trigfunc in trigfuncs @@ -711,21 +711,21 @@ for trigfunc in trigfuncs regular_func = quote """ - $($f)(x::CertainValue) + $($f)(x::CertainScalar) Compute `$($trigfunc)(x)`. """ - $(f)(x::CertainValue) = x.value + $(f)(x::CertainScalar) = x.value end n_reps_func = quote """ - $($f)(x::CertainValue, n::Int) + $($f)(x::CertainScalar, n::Int) Compute `$($trigfunc)(x)` `n` times and return the result(s) as a vector (just repeating the value `n` times). """ - $(f)(x::CertainValue, n::Int) = [x.value for i = 1:n] + $(f)(x::CertainScalar, n::Int) = [x.value for i = 1:n] end eval(regular_func) diff --git a/src/plot_recipes/recipes_certainvalues.jl b/src/plot_recipes/recipes_certainvalues.jl index 62f45057..273b4634 100644 --- a/src/plot_recipes/recipes_certainvalues.jl +++ b/src/plot_recipes/recipes_certainvalues.jl @@ -1,16 +1,16 @@ -import ..UncertainValues: CertainValue +import ..UncertainValues: CertainScalar using RecipesBase -@recipe f(::Type{CertainValue{T}}, x::CertainValue{T}) where {T} = [x.value] +@recipe f(::Type{CertainScalar{T}}, x::CertainScalar{T}) where {T} = [x.value] -@recipe function f(certainvals::Vector{CertainValue}) +@recipe function f(certainvals::Vector{CertainScalar}) @series begin [val.value for val in certainvals] end end -@recipe function f(certainvals::Vararg{CertainValue,N}) where {N} +@recipe function f(certainvals::Vararg{CertainScalar,N}) where {N} @series begin [val.value for val in certainvals] end diff --git a/src/resampling/binning/bin_BinnedResampling.jl b/src/resampling/binning/bin_BinnedResampling.jl index 8308bc78..a9ef8e24 100644 --- a/src/resampling/binning/bin_BinnedResampling.jl +++ b/src/resampling/binning/bin_BinnedResampling.jl @@ -11,7 +11,7 @@ distribute the values according to their indices, into the bins given by `binnin ## Returns Returns an `UncertainIndexValueDataset`. Indices are assumed to be uniformly distributed within each -bin, and are represented as `CertainValue`s at the bin centers. Values of the dataset have different +bin, and are represented as `CertainScalar`s at the bin centers. Values of the dataset have different representations depending on what `binning` is: - If `binning isa BinnedResampling{UncertainScalarKDE}`, then values in each bin are represented by a @@ -118,7 +118,7 @@ function bin(x::AbstractUncertainIndexValueDataset, binning::BinnedResampling{Un bin_centers, binvecs = bin(x, BinnedResampling(RawValues, left_bin_edges, n)) # Estimate distributions in each bin by kernel density estimation - estimated_value_dists = Vector{Union{CertainValue, UncertainScalarPopulation}}(undef, n_bins) + estimated_value_dists = Vector{Union{CertainScalar, UncertainScalarPopulation}}(undef, n_bins) binvec_lengths = length.(binvecs) for i in 1:n_bins @@ -153,7 +153,7 @@ function bin(x::AbstractUncertainIndexValueDataset, binning::BinnedResampling{Un bin_centers, binvecs = bin(x, BinnedResampling(RawValues, left_bin_edges, n)) # Estimate distributions in each bin by kernel density estimation - estimated_value_dists = Vector{Union{CertainValue, UncertainScalarKDE}}(undef, n_bins) + estimated_value_dists = Vector{Union{CertainScalar, UncertainScalarKDE}}(undef, n_bins) binvec_lengths = length.(binvecs) for i in 1:n_bins diff --git a/src/resampling/binning/bin_BinnedWeightedResampling.jl b/src/resampling/binning/bin_BinnedWeightedResampling.jl index ccd64e92..4324dd4f 100644 --- a/src/resampling/binning/bin_BinnedWeightedResampling.jl +++ b/src/resampling/binning/bin_BinnedWeightedResampling.jl @@ -14,7 +14,7 @@ given by `binning.weights[i]` (probability weights are always normalised to 1). ## Returns Returns an `UncertainIndexValueDataset`. Indices are assumed to be uniformly distributed within each -bin, and are represented as `CertainValue`s at the bin centers. Values of the dataset have different +bin, and are represented as `CertainScalar`s at the bin centers. Values of the dataset have different representations depending on what `binning` is: - If `binning isa BinnedWeightedResampling{UncertainScalarKDE}`, then values in each bin are @@ -149,7 +149,7 @@ function bin(x::AbstractUncertainIndexValueDataset, binning::BinnedWeightedResam # Estimate distributions in each bin by kernel density estimation n_bins = length(binning.left_bin_edges) - 1 - estimated_value_dists = Vector{Union{CertainValue, UncertainScalarKDE}}(undef, n_bins) + estimated_value_dists = Vector{Union{CertainScalar, UncertainScalarKDE}}(undef, n_bins) for i in 1:n_bins if length(binvecs[i]) > nan_threshold @@ -178,7 +178,7 @@ function bin(x::AbstractUncertainIndexValueDataset, binning::BinnedWeightedResam # Estimate distributions in each bin by kernel density estimation n_bins = length(binning.left_bin_edges) - 1 - estimated_value_dists = Vector{Union{CertainValue, UncertainScalarPopulation}}(undef, n_bins) + estimated_value_dists = Vector{Union{CertainScalar, UncertainScalarPopulation}}(undef, n_bins) for i in 1:n_bins if length(binvecs[i]) > nan_threshold diff --git a/src/resampling/resampling_with_schemes/resampling_schemes_binned.jl b/src/resampling/resampling_with_schemes/resampling_schemes_binned.jl index 1b5bdc6c..57333532 100644 --- a/src/resampling/resampling_with_schemes/resampling_schemes_binned.jl +++ b/src/resampling/resampling_with_schemes/resampling_schemes_binned.jl @@ -265,7 +265,7 @@ function resample(x::AbstractUncertainIndexValueDataset, end # Estimate distributions in each bin by kernel density estimation - estimated_value_dists = Vector{Union{CertainValue, UncertainScalarKDE}}(undef, n_bins) + estimated_value_dists = Vector{Union{CertainScalar, UncertainScalarKDE}}(undef, n_bins) for i in 1:n_bins if length(binvecs[i]) > nan_threshold @@ -327,7 +327,7 @@ function resample(x::AbstractUncertainIndexValueDataset, end # Estimate distributions in each bin by kernel density estimation - estimated_value_dists = Vector{Union{CertainValue, UncertainScalarPopulation}}(undef, n_bins) + estimated_value_dists = Vector{Union{CertainScalar, UncertainScalarPopulation}}(undef, n_bins) for i in 1:n_bins if length(binvecs[i]) > nan_threshold diff --git a/src/resampling/resampling_with_schemes/resampling_schemes_constrained.jl b/src/resampling/resampling_with_schemes/resampling_schemes_constrained.jl index 3359854e..6bde64cd 100644 --- a/src/resampling/resampling_with_schemes/resampling_schemes_constrained.jl +++ b/src/resampling/resampling_with_schemes/resampling_schemes_constrained.jl @@ -53,7 +53,7 @@ x = UncertainValueDataset(x_uncertain) y = UncertainValueDataset(y_uncertain) time_uncertain = [UncertainValue(Normal, i, 1) for i = 1:length(x)]; -time_certain = [CertainValue(i) for i = 1:length(x)]; +time_certain = [CertainScalar(i) for i = 1:length(x)]; timeinds_x = UncertainIndexDataset(time_uncertain) timeinds_y = UncertainIndexDataset(time_certain) diff --git a/src/resampling/resampling_with_schemes/resampling_schemes_sequential.jl b/src/resampling/resampling_with_schemes/resampling_schemes_sequential.jl index 6807c31c..b47bf7b7 100644 --- a/src/resampling/resampling_with_schemes/resampling_schemes_sequential.jl +++ b/src/resampling/resampling_with_schemes/resampling_schemes_sequential.jl @@ -20,7 +20,7 @@ x = UncertainValueDataset(x_uncertain) y = UncertainValueDataset(y_uncertain) time_uncertain = [UncertainValue(Normal, i, 1) for i = 1:length(x)]; -time_certain = [CertainValue(i) for i = 1:length(x)]; +time_certain = [CertainScalar(i) for i = 1:length(x)]; timeinds_x = UncertainIndexDataset(time_uncertain) timeinds_y = UncertainIndexDataset(time_certain) @@ -59,7 +59,7 @@ x = UncertainValueDataset(x_uncertain) y = UncertainValueDataset(y_uncertain) time_uncertain = [UncertainValue(Normal, i, 1) for i = 1:length(x)]; -time_certain = [CertainValue(i) for i = 1:length(x)]; +time_certain = [CertainScalar(i) for i = 1:length(x)]; timeinds_x = UncertainIndexDataset(time_uncertain) timeinds_y = UncertainIndexDataset(time_certain) diff --git a/src/resampling/uncertain_values/resample_certainvalues.jl b/src/resampling/uncertain_values/resample_certainvalues.jl index e2bb2cad..69579cce 100644 --- a/src/resampling/uncertain_values/resample_certainvalues.jl +++ b/src/resampling/uncertain_values/resample_certainvalues.jl @@ -1,13 +1,13 @@ import ..SamplingConstraints: SamplingConstraint -import ..UncertainValues: CertainValue +import ..UncertainValues: CertainScalar resample(x::Number) = x -resample(v::CertainValue) = v.value -resample(v::CertainValue, n::Int) = [v.value for i = 1:n] +resample(v::CertainScalar) = v.value +resample(v::CertainScalar, n::Int) = [v.value for i = 1:n] -resample(v::CertainValue, s::SamplingConstraint) = v.value -resample(v::CertainValue, s::SamplingConstraint, n::Int) = [v.value for i = 1:n] +resample(v::CertainScalar, s::SamplingConstraint) = v.value +resample(v::CertainScalar, s::SamplingConstraint, n::Int) = [v.value for i = 1:n] constraints = [ :(NoConstraint), @@ -23,8 +23,8 @@ constraints = [ for constraint in constraints funcs = quote - resample(x::CertainValue, constraint::$(constraint)) = x.value - resample(x::CertainValue, constraint::$(constraint), n::Int) = [x.value for i = 1:n] + resample(x::CertainScalar, constraint::$(constraint)) = x.value + resample(x::CertainScalar, constraint::$(constraint), n::Int) = [x.value for i = 1:n] end eval(funcs) end \ No newline at end of file diff --git a/src/sampling_constraints/constrain_certain_value.jl b/src/sampling_constraints/constrain_certain_value.jl index b73fd4f9..1892c16f 100644 --- a/src/sampling_constraints/constrain_certain_value.jl +++ b/src/sampling_constraints/constrain_certain_value.jl @@ -1,12 +1,12 @@ -import ..UncertainValues: CertainValue +import ..UncertainValues: CertainScalar -constrain(v::CertainValue) = v -constrain(v::CertainValue, s::SamplingConstraint) = v -constrain(v::CertainValue, s::TruncateLowerQuantile) = v -constrain(v::CertainValue, s::TruncateUpperQuantile) = v -constrain(v::CertainValue, s::TruncateQuantiles) = v -constrain(v::CertainValue, s::TruncateStd) = v +constrain(v::CertainScalar) = v +constrain(v::CertainScalar, s::SamplingConstraint) = v +constrain(v::CertainScalar, s::TruncateLowerQuantile) = v +constrain(v::CertainScalar, s::TruncateUpperQuantile) = v +constrain(v::CertainScalar, s::TruncateQuantiles) = v +constrain(v::CertainScalar, s::TruncateStd) = v export constrain \ No newline at end of file diff --git a/src/sampling_constraints/constraint_definitions.jl b/src/sampling_constraints/constraint_definitions.jl index b3220d48..1ea5f859 100644 --- a/src/sampling_constraints/constraint_definitions.jl +++ b/src/sampling_constraints/constraint_definitions.jl @@ -123,7 +123,7 @@ struct TruncateRange{T1, T2} <: ValueSamplingConstraint max::T2 function TruncateRange(min::T1, max::T2) where {T1, T2} - if min <= max # <= ties are allowed, because we may encounter CertainValue instances + if min <= max # <= ties are allowed, because we may encounter CertainScalar instances return new{T1, T2}(min, max) else err_msg = "Cannot create TruncateRange instance. Need min < max" diff --git a/src/sampling_constraints/ordered_sequences/ordered_sequences.jl b/src/sampling_constraints/ordered_sequences/ordered_sequences.jl index 267e7b39..37f20851 100644 --- a/src/sampling_constraints/ordered_sequences/ordered_sequences.jl +++ b/src/sampling_constraints/ordered_sequences/ordered_sequences.jl @@ -58,7 +58,7 @@ is necessary because some distributions may have infinite support). """ function sequence_exists(lqs, uqs, c::StrictlyIncreasing{StartToEnd}) L = length(lqs) - if any(lqs .> uqs) # ties are allowed, because we have `CertainValue`s + if any(lqs .> uqs) # ties are allowed, because we have `CertainScalar`s error("Not all `lqs[i]` are lower than uqs[i]. Quantile calculations are not meaningful.") return false end @@ -73,7 +73,7 @@ end function sequence_exists(lqs, uqs, c::StrictlyDecreasing{StartToEnd}) L = length(lqs) - if any(lqs .> uqs) # ties are allowed, because we have `CertainValue`s + if any(lqs .> uqs) # ties are allowed, because we have `CertainScalar`s error("Not all `lqs[i]` are lower than uqs[i]. Quantile calculations are not meaningful.") return false end diff --git a/src/sampling_constraints/truncation/truncate_CertainValue.jl b/src/sampling_constraints/truncation/truncate_CertainValue.jl index 28be0704..e0c8e82b 100644 --- a/src/sampling_constraints/truncation/truncate_CertainValue.jl +++ b/src/sampling_constraints/truncation/truncate_CertainValue.jl @@ -1,7 +1,7 @@ -import ..UncertainValues.CertainValue +import ..UncertainValues.CertainScalar -Base.truncate(v::CertainValue) = v -function Base.truncate(v::CertainValue, constraint::TruncateMaximum) +Base.truncate(v::CertainScalar) = v +function Base.truncate(v::CertainScalar, constraint::TruncateMaximum) if v.value > constraint.max msg = "Truncating $v with $constraint failed\n" msg2 = "Need value < constraint.max, got $v < $(constraint.max)" @@ -11,7 +11,7 @@ function Base.truncate(v::CertainValue, constraint::TruncateMaximum) end end -function Base.truncate(v::CertainValue, constraint::TruncateMinimum) +function Base.truncate(v::CertainScalar, constraint::TruncateMinimum) if v.value < constraint.min msg = "Truncating $v with $constraint failed\n" msg2 = "Need value > constraint.min, got $v > $(constraint.min)" @@ -21,7 +21,7 @@ function Base.truncate(v::CertainValue, constraint::TruncateMinimum) end end -function Base.truncate(v::CertainValue, constraint::TruncateRange) +function Base.truncate(v::CertainScalar, constraint::TruncateRange) if v.value < constraint.min msg = "Truncating $v with $constraint failed\n" msg2 = "Need value > constraint.min, got $v > $(constraint.min)" @@ -35,7 +35,7 @@ function Base.truncate(v::CertainValue, constraint::TruncateRange) end end -truncate(v::CertainValue, s::TruncateLowerQuantile) = v -truncate(v::CertainValue, s::TruncateUpperQuantile) = v -truncate(v::CertainValue, s::TruncateQuantiles) = v -truncate(v::CertainValue, s::TruncateStd) = v +truncate(v::CertainScalar, s::TruncateLowerQuantile) = v +truncate(v::CertainScalar, s::TruncateUpperQuantile) = v +truncate(v::CertainScalar, s::TruncateQuantiles) = v +truncate(v::CertainScalar, s::TruncateStd) = v diff --git a/src/uncertain_datasets/UncertainIndexDataset.jl b/src/uncertain_datasets/UncertainIndexDataset.jl index 0d95df41..80b717af 100644 --- a/src/uncertain_datasets/UncertainIndexDataset.jl +++ b/src/uncertain_datasets/UncertainIndexDataset.jl @@ -24,7 +24,7 @@ struct ConstrainedUncertainIndexDataset <: AbstractUncertainIndexDataset end function UncertainIndexDataset(x::AbstractArray{T, 1}) where T - UncertainIndexDataset(CertainValue.(x)) + UncertainIndexDataset(CertainScalar.(x)) end export diff --git a/src/uncertain_datasets/UncertainValueDataset.jl b/src/uncertain_datasets/UncertainValueDataset.jl index d9a6ab3c..a7e924d6 100644 --- a/src/uncertain_datasets/UncertainValueDataset.jl +++ b/src/uncertain_datasets/UncertainValueDataset.jl @@ -26,7 +26,7 @@ struct ConstrainedUncertainValueDataset <: AbstractUncertainValueDataset end function UncertainValueDataset(x::AbstractArray{T, 1}) where T - UncertainValueDataset(CertainValue.(x)) + UncertainValueDataset(CertainScalar.(x)) end export diff --git a/src/uncertain_values/CertainScalar.jl b/src/uncertain_values/CertainScalar.jl new file mode 100644 index 00000000..1984699f --- /dev/null +++ b/src/uncertain_values/CertainScalar.jl @@ -0,0 +1,88 @@ +""" + CertainScalar + +A simple wrapper type for values with no uncertainty (i.e. represented by a scalar). + +## Examples + +The two following ways of constructing values without uncertainty are equivalent. + +```julia +u1, u2 = CertainScalar(2.2), CertainScalar(6) +w1, w2 = UncertainValue(2.2), UncertainValue(6) +``` +""" +struct CertainScalar{T} <: AbstractUncertainValue + value::T +end + +Broadcast.broadcastable(x::CertainScalar) = Ref(x.value) + +function summarise(uval::CertainScalar) + _type = typeof(uval) + val = uval.value + "$_type($val)" +end +Base.show(io::IO, uval::CertainScalar) = print(io, summarise(uval)) + +eltype(v::CertainScalar{T}) where {T} = T + +Base.size(x::CertainScalar) = () +Base.size(x::CertainScalar,d) = convert(Int,d)<1 ? throw(BoundsError()) : 1 +Base.axes(x::CertainScalar) = () +Base.axes(x::CertainScalar,d) = convert(Int,d)<1 ? throw(BoundsError()) : Base.OneTo(1) +Base.ndims(x::CertainScalar) = 0 +Base.ndims(::Type{<:CertainScalar}) = 0 +Base.length(x::CertainScalar) = 1 +Base.firstindex(x::CertainScalar) = 1 +Base.lastindex(x::CertainScalar) = 1 +Base.IteratorSize(::Type{<:CertainScalar}) = Base.HasShape{0}() +Base.keys(::CertainScalar) = Base.OneTo(1) +Base.getindex(x::CertainScalar) = x + +function Base.getindex(x::CertainScalar, i::Integer) + Base.@_inline_meta + @boundscheck i == 1 || throw(BoundsError()) + x +end +function Base.getindex(x::CertainScalar, I::Integer...) + Base.@_inline_meta + @boundscheck all([i == 1 for i in I]) || throw(BoundsError()) + x +end + +Base.first(x::CertainScalar) = x +Base.last(x::CertainScalar) = x +Base.copy(x::CertainScalar) = x + +Base.minimum(v::CertainScalar) = v.value +Base.maximum(v::CertainScalar) = v.value +Base.isnan(x::CertainScalar) = Base.isnan(x.value) +Base.abs2(x::CertainScalar) = Base.abs2(x.value) + +StatsBase.mean(v::CertainScalar) = v.value +StatsBase.median(v::CertainScalar) = v.value +StatsBase.middle(v::CertainScalar) = v.value +StatsBase.quantile(v::CertainScalar, q) = v.value +StatsBase.quantile(v::CertainScalar, q, n::Int) = v.value +StatsBase.std(v::CertainScalar{T}) where {T} = zero(T) + +Base.rand(v::CertainScalar) = v.value +Base.rand(v::CertainScalar{T}, n::Int) where T = repeat([v.value], n) + +Base.float(v::CertainScalar) = float(v.value) + +function Base.:<(x::CertainScalar{T1}, y::CertainScalar{T2}) where { + T1 <: Real, T2 <: Real} + x.value < y.value +end + +function IntervalArithmetic.interval(x::CertainScalar{T1}, y::CertainScalar{T2}) where { + T1 <: Real, T2 <: Real} + interval(x.value, y.value) +end + + +export +CertainScalar, +UncertainValue \ No newline at end of file diff --git a/src/uncertain_values/CertainValue.jl b/src/uncertain_values/CertainValue.jl deleted file mode 100644 index 80e8c77a..00000000 --- a/src/uncertain_values/CertainValue.jl +++ /dev/null @@ -1,88 +0,0 @@ -""" - CertainValue - -A simple wrapper type for values with no uncertainty (i.e. represented by a scalar). - -## Examples - -The two following ways of constructing values without uncertainty are equivalent. - -```julia -u1, u2 = CertainValue(2.2), CertainValue(6) -w1, w2 = UncertainValue(2.2), UncertainValue(6) -``` -""" -struct CertainValue{T} <: AbstractUncertainValue - value::T -end - -Broadcast.broadcastable(x::CertainValue) = Ref(x.value) - -function summarise(uval::CertainValue) - _type = typeof(uval) - val = uval.value - "$_type($val)" -end -Base.show(io::IO, uval::CertainValue) = print(io, summarise(uval)) - -eltype(v::CertainValue{T}) where {T} = T - -Base.size(x::CertainValue) = () -Base.size(x::CertainValue,d) = convert(Int,d)<1 ? throw(BoundsError()) : 1 -Base.axes(x::CertainValue) = () -Base.axes(x::CertainValue,d) = convert(Int,d)<1 ? throw(BoundsError()) : Base.OneTo(1) -Base.ndims(x::CertainValue) = 0 -Base.ndims(::Type{<:CertainValue}) = 0 -Base.length(x::CertainValue) = 1 -Base.firstindex(x::CertainValue) = 1 -Base.lastindex(x::CertainValue) = 1 -Base.IteratorSize(::Type{<:CertainValue}) = Base.HasShape{0}() -Base.keys(::CertainValue) = Base.OneTo(1) -Base.getindex(x::CertainValue) = x - -function Base.getindex(x::CertainValue, i::Integer) - Base.@_inline_meta - @boundscheck i == 1 || throw(BoundsError()) - x -end -function Base.getindex(x::CertainValue, I::Integer...) - Base.@_inline_meta - @boundscheck all([i == 1 for i in I]) || throw(BoundsError()) - x -end - -Base.first(x::CertainValue) = x -Base.last(x::CertainValue) = x -Base.copy(x::CertainValue) = x - -Base.minimum(v::CertainValue) = v.value -Base.maximum(v::CertainValue) = v.value -Base.isnan(x::CertainValue) = Base.isnan(x.value) -Base.abs2(x::CertainValue) = Base.abs2(x.value) - -StatsBase.mean(v::CertainValue) = v.value -StatsBase.median(v::CertainValue) = v.value -StatsBase.middle(v::CertainValue) = v.value -StatsBase.quantile(v::CertainValue, q) = v.value -StatsBase.quantile(v::CertainValue, q, n::Int) = v.value -StatsBase.std(v::CertainValue{T}) where {T} = zero(T) - -Base.rand(v::CertainValue) = v.value -Base.rand(v::CertainValue{T}, n::Int) where T = repeat([v.value], n) - -Base.float(v::CertainValue) = float(v.value) - -function Base.:<(x::CertainValue{T1}, y::CertainValue{T2}) where { - T1 <: Real, T2 <: Real} - x.value < y.value -end - -function IntervalArithmetic.interval(x::CertainValue{T1}, y::CertainValue{T2}) where { - T1 <: Real, T2 <: Real} - interval(x.value, y.value) -end - - -export -CertainValue, -UncertainValue \ No newline at end of file diff --git a/src/uncertain_values/UncertainScalarPopulation.jl b/src/uncertain_values/UncertainScalarPopulation.jl index 2ebcecef..62c2aef9 100644 --- a/src/uncertain_values/UncertainScalarPopulation.jl +++ b/src/uncertain_values/UncertainScalarPopulation.jl @@ -3,7 +3,17 @@ import IntervalArithmetic: interval import Distributions import StatsBase -const POTENTIAL_UVAL_TYPES = Union{T1, T2} where {T1<:Number, T2} where T2 <: AbstractUncertainValue +const POTENTIAL_UVAL_TYPES = Union{T1, T2} where {T1 <: Number, T2 <: AbstractUncertainValue} + + +convert_elwise(f, x) = map(f, x); +nested_convert_elwise(f, x) = map(xᵢ -> convert_elwise(f, xᵢ), x) + +function verify_pop_and_weights(pop, wts) + if length(pop) != length(wts) + throw(ArgumentError("The number of population members and the number of weights do not match.")) + end +end """ UncertainScalarPopulation(values, probs) @@ -29,53 +39,95 @@ population members (for example during resampling). - If `values` contains one or more uncertain values, then the `values` field will be of type `Vector{AbstractUncertainValue}` -## Example +## Examples + +### Weighted scalar populations + +Weighted scalar populations are defined as follows. Note: Weights must always be provided, +and scalars must be converted to uncertain values before creating the population. ```julia +using UncertainData +pop = UncertainValue.([1.0, 2.0, 3.0]); wts = rand(3) + +# Treat elements of `pop` as equiprobable +p = UncertainScalarPopulation(pop, [1, 1, 1]) -# Uncertain population consisting of CertainValues (scalars get promoted to -# CertainValue), theoretical distributions and KDE distributions -pop1 = UncertainScalarPopulation( - [3.0, UncertainValue(Normal, 0, 1), UncertainValue(Gamma, 2, 3), - UncertainValue(Uniform, rand(1000))], [0.5, 0.5, 0.5, 0.5]) +# Treat elements of `pop` as inequiprobable +p = UncertainScalarPopulation(pop, [2, 3, 1]) +``` + +## Populations with mixed-type uncertain values + +Uncertain population can also consist of a mixture of different types of uncertain values. +Here, we use a population consisting of a scalar, two theoretical distributions +with known parameters, and a theoretical uniform distribution whose parameters +are estimated from a random sample `s`. We assign equal weights to the member +of the population. + +```julia +s = rand(1000) +pop = [ + 3.0, + UncertainValue(Normal, 0, 1), + UncertainValue(Gamma, 2, 3), + UncertainValue(Uniform, s) +] +wts = [0.5, 0.5, 0.5, 0.5] +p = UncertainScalarPopulation(pop, wts) +``` -# Uncertain population consisting of scalar values -pop2 = UncertainScalarPopulation([1, 2, 3], rand(3)) -pop3 = UncertainScalarPopulation([1, 2, 3], Weights(rand(3))) +## Nested populations -# Uncertain population consisting of uncertain populations -pop4 = UncertainScalarPopulation([pop1, pop2], [0.1, 0.5]) +Nested populations are also possible. -# Uncertain population consisting of uncertain populations, a scalar and -# a normal distribution. Assign random weights. -vals = [pop1, pop2, 2, UncertainValue(Normal, 0.3, 0.014)] -pop5 = UncertainScalarPopulation(vals, Weights(rand(4))) +``` +using UncertainData, Distributions +s = rand(Normal(0.1, 2.0), 8000) +p1 = [UncertainValue(Normal, 0.5, 0.33), UncertainValue(Gamma, 0.6, 0.9)] + +# If including scalars, these must be converted to `CertainScalar`s first, +# as follows. +p2 = [UncertainValue(2.2), UncertainValue(Normal, s), UncertainValue(s)] + +# Give p1 and p2 relative weights 0.1 and 0.5 (these are normalized, so +# do not need to sum to 1). +p = UncertainScalarPopulation([p1, p2], [0.1, 0.5]) ``` """ struct UncertainScalarPopulation{T, PW <: StatsBase.AbstractWeights} <: AbstractScalarPopulation{T, PW} - values::Vector{T} + values::AbstractVector{T} probs::PW -end - -""" - UncertainScalarPopulation(values::Vector, probabilities::Vector{Float64}) -Construct a population from a vector of values and a vector of probabilities associated -to those values.""" -function UncertainScalarPopulation(values::Vector{T1}, probabilities::Vector{T2}) where {T1 <: Number, T2 <: Number} - if length(values) != length(probabilities) - throw(ArgumentError("Lengths of values and probability vectors do not match.")) + function UncertainScalarPopulation(pop, probs::AbstractVector{T}) where {T <: Number} + members = nested_convert_elwise(UncertainValue, pop); TT = eltype(members) + verify_pop_and_weights(pop, probs) + wts = Weights(probs); PW = typeof(wts) + new{TT, PW}(members, wts) end - UncertainScalarPopulation(values, StatsBase.weights(probabilities)) -end -function UncertainScalarPopulation(values::VT, probabilities) where VT <: Vector{ELTYPE} where {ELTYPE<:POTENTIAL_UVAL_TYPES} - if length(values) != length(probabilities) - throw(ArgumentError("Lengths of values and probability vectors do not match.")) + + function UncertainScalarPopulation(pop, probs::PW) where {PW <: StatsBase.AbstractWeights} + members = nested_convert_elwise(UncertainValue, pop) + verify_pop_and_weights(pop, probs) + T = eltype(members) + new{T, PW}(members, probs) end - UncertainScalarPopulation(UncertainValue.(values), StatsBase.weights(probabilities)) end +# function UncertainScalarPopulation(values::Vector{T1}, probabilities::Vector{T2}) where {T1 <: Number, T2 <: Number} + +# UncertainScalarPopulation( +# nested_convert_elwise(UncertainValue, values), # in case scalars are provided +# StatsBase.weights(probabilities) +# ) +# # end +# function UncertainScalarPopulation(values::VT, probabilities) where VT <: Vector{ELTYPE} where {ELTYPE<:POTENTIAL_UVAL_TYPES} + +# UncertainScalarPopulation(UncertainValue.(values), StatsBase.weights(probabilities)) +# end + + """ ConstrainedUncertainScalarPopulation(values, probs) ConstrainedUncertainScalarPopulation(values, probs::Vector{Number}) diff --git a/src/uncertain_values/UncertainScalarsKDE.jl b/src/uncertain_values/UncertainScalarsKDE.jl index 5c47c5de..8e725710 100644 --- a/src/uncertain_values/UncertainScalarsKDE.jl +++ b/src/uncertain_values/UncertainScalarsKDE.jl @@ -3,27 +3,43 @@ import Base.rand import StatsBase.quantile import StatsBase.median import Distributions.support - import Distributions.ecdf import Base: minimum, maximum, max, min + """ - UncertainScalarKDE(d::KernelDensity.UnivariateKDE, values::AbstractVector{T}, range, pdf) + UncertainScalarKDE(d::KernelDensity.UnivariateKDE, x::AbstractVector, range, pdf) + +An uncertain value represented by a kernel density estimate `d`, to the +underlying distribution for the empirical sample `x`. + +`range` are the values for which the pdf is estimated, and `pdf` are the +corresponding values of the pdf. Gaussian kernels are used by default. + +## Examples -An empirical value represented by a distribution estimated from actual data. +```julia +using Distributions, UncertainData, KernelDensity -## Fields +# Draw a 1000-point sample from a normal distribution. +s = rand(Normal(), 1000) -- **`distribution`**: The `UnivariateKDE` estimate for the distribution of `values`. -- **`values`**: The values from which `distribution` is estimated. -- **`range`**: The values for which the pdf is estimated. -- **`pdf`**: The values of the pdf at each point in `range`. +# Estimate a distribution to the underlying distribution by using +# kernel density estimation on the sample `s` +x = UncertainValue(s) + +# The explicit constructor allows adjusting the kernel (must be a valid +# kernel from Distributions.jl; normal distributions are the default), +# and the number of points used for the estimation (must be a power of 2; +# default is 2048 points). +x = UncertainValue(UnivariateKDE, s; kernel = Normal, npoints = 1024) +``` """ -struct UncertainScalarKDE{T} <: AbstractUncertainScalarKDE{T} +struct UncertainScalarKDE{T, V <: AbstractVector{T}} <: AbstractUncertainScalarKDE{T} distribution::KernelDensity.UnivariateKDE - values::AbstractVector{T} + values::V range pdf::StatsBase.Weights end @@ -34,9 +50,9 @@ end A truncated [`UncertainScalarKDE`](@ref). """ -struct TruncatedUncertainScalarKDE{T} <: AbstractUncertainScalarKDE{T} +struct TruncatedUncertainScalarKDE{T, V <: AbstractVector{T}} <: AbstractUncertainScalarKDE{T} distribution::KernelDensity.UnivariateKDE - values::AbstractVector{T} + values::V range pdf::StatsBase.Weights end @@ -147,15 +163,12 @@ min(uv::AbstractUncertainScalarKDE) = minimum(uv.range) max(uv::AbstractUncertainScalarKDE) = maximum(uv.range) - - - export -AbstractUncertainScalarKDE, -UncertainScalarKDE -ecdf, -support, -getquantileindex, -UnivariateKDE, -minimum, -maximum \ No newline at end of file + AbstractUncertainScalarKDE, + UncertainScalarKDE + ecdf, + support, + getquantileindex, + UnivariateKDE, + minimum, + maximum \ No newline at end of file diff --git a/src/uncertain_values/UncertainScalarsTheoretical.jl b/src/uncertain_values/UncertainScalarsTheoretical.jl index a522ad69..28af705c 100644 --- a/src/uncertain_values/UncertainScalarsTheoretical.jl +++ b/src/uncertain_values/UncertainScalarsTheoretical.jl @@ -77,7 +77,9 @@ import Distributions.Gamma import Distributions.Frechet """ -Uncertain value represented by a generic three-parameter distribution. + UncertainScalarTheoreticalThreeParameter(d::Distribution, a, b, c) + +Uncertain value represented by a generic three-parameter distribution `d` with parameters `a`, `b` and `c`. """ struct UncertainScalarTheoreticalThreeParameter{S<:ValueSupport, T1<:Number, T2<:Number, T3<:Number} <: AbstractUncertainThreeParameterScalarValue{S, T1, T2, T3} distribution::Distribution{Univariate, S} @@ -87,7 +89,9 @@ struct UncertainScalarTheoreticalThreeParameter{S<:ValueSupport, T1<:Number, T2< end """ -Uncertain value represented by a generic two-parameter distribution. + UncertainScalarTheoreticalTwoParameter(d::Distribution, a, b) + +Uncertain value represented by a generic two-parameter distribution `d` with parameters `a` and `b`. """ struct UncertainScalarTheoreticalTwoParameter{S<:ValueSupport, T1<:Number, T2<:Number} <: AbstractUncertainTwoParameterScalarValue{S, T1, T2} distribution::Distribution{Univariate, S} @@ -96,17 +100,25 @@ struct UncertainScalarTheoreticalTwoParameter{S<:ValueSupport, T1<:Number, T2<:N end """ -Uncertain value represented by a generic one-parameter distribution. + UncertainScalarTheoreticalOneParameter(d::Distribution, a) + +Uncertain value represented by a generic one-parameter distribution `d` with parameter `a`. """ struct UncertainScalarGenericOneParameter{S<:ValueSupport, T1<:Number} <: AbstractUncertainOneParameterScalarValue{S, T1} distribution::Distribution{Univariate, S} a::T1 end +""" + UncertainScalarNormallyDistributed(d::Normal, μ, σ) + +Uncertain value represented by a normal distribution `d` with mean `μ` and standard deviation `σ`. +## Example -""" -Uncertain value represented by a normal distribution. +```julia +x = UncertainValue(Normal, 1.2, 0.3) +``` """ struct UncertainScalarNormallyDistributed{S<:ValueSupport, T1<:Number, T2<:Number} <: AbstractUncertainTwoParameterScalarValue{S, T1, T2} distribution::Distribution{Univariate, S} @@ -114,9 +126,16 @@ struct UncertainScalarNormallyDistributed{S<:ValueSupport, T1<:Number, T2<:Numbe σ::T2 end - """ -Uncertain value represented by a uniform distribution. + UncertainScalarUniformlyDistributed(d::Uniform, lower, upper) + +Uncertain value represented by a uniform distribution `d` with `lower` and `upper` bounds. + +## Example + +```julia +x = UncertainValue(Uniform, -2.5, 4.5) +``` """ struct UncertainScalarUniformlyDistributed{S<:ValueSupport, T1<:Number, T2<:Number} <: AbstractUncertainTwoParameterScalarValue{S, T1, T2} distribution::Distribution{Univariate, S} @@ -124,9 +143,16 @@ struct UncertainScalarUniformlyDistributed{S<:ValueSupport, T1<:Number, T2<:Numb upper::T2 end - """ -Uncertain value represented by a beta distribution. + UncertainScalarBetaDistributed(d::Beta, α, β) + +Uncertain value represented by a beta distribution `d` with parameters `α` and `β`. + +## Example + +```julia +x = UncertainValue(Beta, 0.5, 3.0) +``` """ struct UncertainScalarBetaDistributed{S<:ValueSupport, T1<:Number, T2<:Number} <: AbstractUncertainTwoParameterScalarValue{S, T1, T2} distribution::Distribution{Univariate, S} @@ -134,9 +160,16 @@ struct UncertainScalarBetaDistributed{S<:ValueSupport, T1<:Number, T2<:Number} < β::T2 end - """ -Uncertain value represented by a beta prime distribution. + UncertainScalarBetaPrimeDistributed(d::BetaPrime, α, β) + +Uncertain value represented by a beta prime distribution `d` with parameters `α` and `β`. + +## Example + +```julia +x = UncertainValue(BetaPrime, 2.1, 3.3) +``` """ struct UncertainScalarBetaPrimeDistributed{S<:ValueSupport, T1<:Number, T2<:Number} <: AbstractUncertainTwoParameterScalarValue{S, T1, T2} distribution::Distribution{Univariate, S} @@ -144,10 +177,16 @@ struct UncertainScalarBetaPrimeDistributed{S<:ValueSupport, T1<:Number, T2<:Numb β::T2 end +""" + UncertainScalarBetaBinomialDistributed(d::BetaBinomial, n, α, β) + +Uncertain value represented by a beta binomial distribution `d` with parameters `n`, `α` and `β`. +## Example -""" -Uncertain value represented by a beta binomial distribution. +```julia +x = UncertainValue(BetaBinomial, 10, 0.2, 0.7) +``` """ struct UncertainScalarBetaBinomialDistributed{S<:ValueSupport, T1<:Number, T2<:Number, T3<:Number} <: AbstractUncertainThreeParameterScalarValue{S, T1, T2, T3} distribution::Distribution{Univariate, S} @@ -156,11 +195,16 @@ struct UncertainScalarBetaBinomialDistributed{S<:ValueSupport, T1<:Number, T2<:N β::T3 end +""" + UncertainScalarGammaDistributed(d::Gamma, α, θ) +Uncertain value represented by a gamma distribution `d` with parameters `α` and `θ`. +## Example -""" -Uncertain value represented by a gamma distribution. +```julia +x = UncertainValue(Gamma, 0.2, 0.44) +``` """ struct UncertainScalarGammaDistributed{S<:ValueSupport, T1<:Number, T2<:Number} <: AbstractUncertainTwoParameterScalarValue{S, T1, T2} distribution::Distribution{Univariate, S} @@ -168,11 +212,16 @@ struct UncertainScalarGammaDistributed{S<:ValueSupport, T1<:Number, T2<:Number} θ::T2 end +""" + UncertainScalarFrechetDistributed(d::Frechet, α, θ) +Uncertain value represented by a Fréchet distribution `d` with parameters `α` and `θ`. +## Example -""" -Uncertain value represented by a Fréchet distribution. +```julia +x = UncertainValue(Frechet, 2.0, 2.1) +``` """ struct UncertainScalarFrechetDistributed{S<:ValueSupport, T1<:Number, T2<:Number} <: AbstractUncertainTwoParameterScalarValue{S, T1, T2} distribution::Distribution{Univariate, S} @@ -180,11 +229,16 @@ struct UncertainScalarFrechetDistributed{S<:ValueSupport, T1<:Number, T2<:Number θ::T2 end +""" + UncertainScalarBinomialDistributed(d::Binomial, n, θ) +Uncertain value represented by a binomial distribution `d` with parameters `n` and `θ`. +## Example -""" -Uncertain value represented by a binomial distribution. +```julia +x = UncertainValue(Binomial, 15, 0.5) +``` """ struct UncertainScalarBinomialDistributed{S<:ValueSupport, T1<:Number, T2<:Number} <: AbstractUncertainTwoParameterScalarValue{S, T1, T2} distribution::Distribution{Univariate, S} @@ -193,9 +247,6 @@ struct UncertainScalarBinomialDistributed{S<:ValueSupport, T1<:Number, T2<:Numbe end - - - ################### # Pretty printing ################### @@ -310,9 +361,9 @@ Base.show(io::IO, q::UncertainScalarBinomialDistributed) = print(io, summarise(q export TheoreticalDistributionScalarValue, -AbstractUncertainOneParameterScalarValue, -AbstractUncertainTwoParameterScalarValue, -AbstractUncertainThreeParameterScalarValue, +# AbstractUncertainOneParameterScalarValue, +# AbstractUncertainTwoParameterScalarValue, +# AbstractUncertainThreeParameterScalarValue, ConstrainedUncertainScalarValueOneParameter, ConstrainedUncertainScalarValueTwoParameter, diff --git a/src/uncertain_values/UncertainScalarsTheoreticalFitted.jl b/src/uncertain_values/UncertainScalarsTheoreticalFitted.jl index 29164863..0d6756c9 100644 --- a/src/uncertain_values/UncertainScalarsTheoreticalFitted.jl +++ b/src/uncertain_values/UncertainScalarsTheoreticalFitted.jl @@ -6,13 +6,79 @@ abstract type TheoreticalFittedUncertainScalar <: TheoreticalDistributionScalarV Broadcast.broadcastable(uv::TheoreticalFittedUncertainScalar) = Ref(uv.distribution) """ -UncertainScalarTheoreticalFit + UncertainScalarTheoreticalFit( + d::FittedDistribution{D}, + x::AbstractVector{T}) where {D <: Distribution, T} -An empirical value represented by a distribution estimated from actual data. +An uncertain value represented a distribution `d` whose parameters are +estimated from the empirical sample `x`. -## Fields -- **`distribution`** The distribution describing the value. -- **`values`**: The values from which `distribution` is estimated. +## Examples + +Here, we simulate an empirical sample. We then decide to represent the +sample by a distribution whose parameters are estimated from the sample. + +``` julia +using UncertainData, Distributions +# Simulate a 1000-point sample by drawing from a uniform distribution. +d = Uniform(); s = rand(d, 1000) + +# Represent `s` by a uniform distribution whose parameters are estimated from `s` +x = UncertainValue(Uniform, s) +``` + +``` julia +using UncertainData, Distributions +# Simulate a 1000-point sample by drawing from a normal distribution. +s = rand(Normal(), 1000) + +# Represent `s` by a normal distribution whose parameters are estimated from `s` +x = UncertainValue(Normal, s) +``` + +```julia +using UncertainData, Distributions + +# Simulate a 1000-point sample by drawing from a gamma distribution +# with parameters α = 2.1, θ = 5.2. +s = rand(Gamma(2.1, 5.2), 1000) + +# Represent `s` by a gamma distribution whose parameters are estimated from `s` +x = UncertainValue(Gamma, some_sample) +``` + + +*Note: these examples are contrived: of course, estimating the parameters +of a uniform distribution from a sample drawn from a uniform distribution +will yield a good fit. Real samples are usually less straight-forward to +model using theoretical distributions*. +In real applications, make sure to always visually investigate the histogram +of your data before picking which distribution to fit! Alternatively, +use kernel density estimation to fit a distribution (i.e. [`UncertainScalarKDE`](@ref)). + +### Beware: fitting distributions may lead to nonsensical results! + +In a less contrived example, we may try to fit a beta distribution to a sample +generated from a gamma distribution. + + +```julia +using Distributions, UncertainData + +# Generate 1000 values from a gamma distribution with parameters α = 2.1, +# θ = 5.2. +s = rand(Gamma(2.1, 5.2), 1000) + +# Represent `s` by a beta distribution whose parameters are estimated from `s` +x = UncertainValue(Beta, some_sample) +``` + +This is obviously not a good idea. Always visualise your distribution before +deciding on which distribution to fit! You won't get any error messages if you +try to fit a distribution that does not match your data. + +If the data do not follow an obvious theoretical distribution, it is better to +use kernel density estimation to define the uncertain value. """ struct UncertainScalarTheoreticalFit{D <: Distribution, T} <: TheoreticalFittedUncertainScalar distribution::FittedDistribution{D} # S may be Continuous or Discrete @@ -20,14 +86,12 @@ struct UncertainScalarTheoreticalFit{D <: Distribution, T} <: TheoreticalFittedU end """ - ConstrainedUncertainScalarTheoreticalFit - -An empirical value represented by a distribution estimated from actual data. + ConstrainedUncertainScalarTheoreticalFit( + d::FittedDistribution{D}, + x::AbstractVector{T}) where {D <: Distribution, T} -## Fields -- **`distribution`** The truncated version of the distribution describing the - value. -- **`values`**: The values from which the original distribution was estimated. +An uncertain value represented a distribution `d` whose parameters are estimated from the empirical sample `x`, +where the distribution `d` has been truncated after it has been estimated. """ struct ConstrainedUncertainScalarTheoreticalFit{D <: Distribution, T} <: TheoreticalFittedUncertainScalar distribution::FittedDistribution{D} # S may be Continuous or Discrete diff --git a/src/uncertain_values/UncertainValue.jl b/src/uncertain_values/UncertainValue.jl index 1413e464..570f2383 100644 --- a/src/uncertain_values/UncertainValue.jl +++ b/src/uncertain_values/UncertainValue.jl @@ -3,18 +3,52 @@ import Distributions.Distribution import StatsBase: AbstractWeights, Weights import Distributions -""" - UncertainValue(x::T) where T <: Real -Create a `CertainValue` instance from a scalar with no uncertainty. """ -UncertainValue(x::T) where T <: Real = CertainValue(x) + UncertainValue(d::Distribution) + UncertainValue(d::Type{Normal}, μ, σ) → UncertainScalarNormallyDistributed + UncertainValue(d::Type{Uniform}, lower, upper) → UncertainScalarUniformlyDistributed + UncertainValue(d::Type{Beta}, α, β) → UncertainScalarBetaDistributed + UncertainValue(d::Type{BetaPrime}, α, β) → UncertainScalarBetaPrimeDistributed + UncertainValue(d::Type{Gamma}, α, θ) → UncertainScalarGammaDistributed + UncertainValue(d::Type{Frechet}, α, θ) → UncertainScalarFrechetDistributed + UncertainValue(d::Type{Binomial, n, p) → UncertainScalarBinomialDistributed + UncertainValue(d::Type{BetaBinomial, n, α, β) → UncertainScalarBetaBinomialDistributed -# Identity constructor -UncertainValue(uval::AbstractUncertainValue) = uval +Construct an uncertain value represented by a (possibly truncated) +theoretical distribution `d`. + + UncertainValue(d::Type{<:Distribution}, x::AbstractVector) → UncertainScalarTheoreticalFit + +Construct an uncertain value by fitting a distribution of type `d` to an empirical sample +`x`, and use that fitted distribution as the representation of `x`. + +See also: [`UncertainScalarTheoreticalFit`](@ref) + + UncertainValue(x::AbstractVector; + kernel::Type{<:Distribution} = Normal, npoints::Int = 2048) → UncertainScalarKDE + +Construct an uncertain value by estimating the underlying distribution to +the empirical sample `x` using the kernel density estimation (KDE), then using the resulting +KDE-distribution as the representation of `x`. Fast Fourier transforms are used in the kernel density +estimation, so the number of points should be a power of 2 (default = 2048). + +See also: [`UncertainScalarKDE`](@ref) + + UncertainValue(pop::Vector, probs::Union{Vector, AbstractWeights}) → UncertainScalarPopulation + +Construct an uncertain value from a population `pop`, whose sampling +probabilities (prior beliefs) are `probs`. The population `pop` can contain any +type of uncertain value. Scalars in `pop` are converted to [`CertainScalar`](@ref)s. + +See also: [`UncertainScalarPopulation`](@ref) + + UncertainValue(x::T) where {T <: Real} → CertainScalar + +Create a `CertainScalar` instance from a scalar with no uncertainty. + +See also: [`CertainScalar`](@ref) -# From Measurements.jl -""" UncertainValue(m::Measurement) → UncertainScalarNormallyDistributed Convert a `Measurement` instance to an uncertain value compatible with UncertainData.jl. @@ -22,63 +56,148 @@ Convert a `Measurement` instance to an uncertain value compatible with Uncertain `Measurement` instances from [Measurements.jl](https://github.com/JuliaPhysics/Measurements.jl)[^1] are treated as normal distributions with known means. Once the conversion is done, the functionality provided by Measurements.jl, such as exact error propagation, is lost. -""" -UncertainValue(m::Measurement{T}) where T = UncertainValue(Normal, m.val, m.err) -""" - UncertainValue(values::Vector{<:Number}, probs::Vector{<:Number}) +# Examples -From a numeric vector, construct an `UncertainPopulation` whose -members are scalar values. +## Theoretical distributions with known parameters -## Examples +Measurements are often given as a mean and an associated standard deviation. +Such measurements can be directly represented by the parameters of the distribution. + +Assume a data point has a normally distributed uncertainty, with a mean value of 2.2 +and standard deviation of 4.0. We use the following notation to represent that value. ```julia -x = measurement(2.2, 0.21) -UncertainValue(x) +using UncertainData, Distributions +UncertainValue(Normal(2.2, 4.0)) +UncertainValue(Normal, 2.2, 4.0) # alternative constructor ``` -""" -function UncertainValue(values::Vector{<:Number}, probs::Vector{<:Number}) - UncertainScalarPopulation(float.(values), probs) -end +Other distributions, as well as truncated distributions, also work. -""" - UncertainValue(values::Vector{<:Number}, probs::Vector{<:Number}) +```julia +using UncertainData +UncertainValue(Uniform, -5.0, 5.0) +UncertainValue(Gamma, 3.0, 1.2) -From a numeric vector, construct an `UncertainPopulation` whose -members are scalar values. -""" -function UncertainValue(values::Vector{<:Number}, probs::W) where {W <: AbstractWeights} - UncertainScalarPopulation(float.(values), probs) -end +lo, hi = 0.5, 3.5 # truncation limits +UncertainValue(Truncated(Gamma(4, 5.1), lo, hi)) +``` -""" - UncertainValue(values::Vector, probs::Union{Vector, AbstractWeights}) +## Theoretical distributions with parameters estimated from empirical data -Construct a population whose members are given by `values` and whose sampling -probabilities are given by `probs`. The elements of `values` can be either -numeric or uncertain values of any type. -""" -function UncertainValue(values::VT, probs) where VT <: Vector{ELTYPE} where {ELTYPE<:POTENTIAL_UVAL_TYPES} - UncertainScalarPopulation(UncertainValue.(values), probs) -end +In some cases, it might be convenient to represent an empirical sample by a +porobability distribution whose parameters are estimated from the sample. +Here, we simulate a real dataset by generating a small sample from a +normal distribution, then fit a normal distribution to it. -function UncertainValue(values::VT, probs::Vector{Number}) where VT <: Vector{ELTYPE} where {ELTYPE<:POTENTIAL_UVAL_TYPES} - UncertainScalarPopulation(UncertainValue.(values), probs) -end +```julia +using UncertainData, Distributions +s = rand(Normal(0, 1), 100) -""" - UncertainValue(data::Vector{T}; - kernel::Type{D} = Normal, - npoints::Int=2048) where {D <: Distributions.Distribution, T} +# Represent the sample `s` by a normal distribution with estimated parameters +x = UncertainValue(Normal, s) +``` + +## Distributions estimated using the kernel density approach + +For empirical data with non-trivial underlying distributions, one may use +kernel density estimation to fit a distribution to the empirical sample. + +Below, we simulate a multimodal empirial sample, and represent that +sample by a kernel density estimated distribution. + +```julia +using UncertainData, Distributions +M = MixtureModel(Normal[ + Normal(-2.0, 1.2), + Normal(0.0, 1.0), + Normal(3.0, 2.5)], [0.2, 0.5, 0.3]) +# This is our sample +s = rand(M, 40000) + +# `x` is now a kernel density estimated distribution that represents the sample `s` +x = UncertainValue(s) # or UncertainValue(UnivariateKDE, s) to be explicit +``` + +## Populations (discrete sets of values with associated weights) + +Sometimes, numerous measurements of the same phenomenon might be available. In such cases, +a population may be used to simultaneously represent all data available. Weights +representing prior beliefs can be added (set weights equal if all points are +equiprobable). + +Below, we assume `x1` and `x2` were measured with sophisticated devices, giving +both a mean and standard deviation. `x3`, on the other hand, was measured with a +primitive device, giving only a mean value. Hence our trust in `x3` is lower than +for `x1` and `x2`. The following + +```julia +x1 = UncertainValue(Normal, 0.1, 0.5) +x2 = UncertainValue(Gamma, 1.2, 3.1) +x3 = UncertainValue(0.1) +pop = [x1, x2, x3] # the population +wts = [0.45, 0.45, 0.1] # weights; `x1` and `x2` are equiprobable, and more probable than `x3`. +UncertainValue(pop, wts) +``` + +## Values without uncertainties + +Numerical values without associated uncertainties must be converted before mixing with +uncertain values. -Construct an uncertain value by a kernel density estimate to `data`. +```julia +x = UncertainValue(2.0) +``` + +## Compatibility with Measurements.jl + +`Measurement`s from Measurements.jl are assumed to be normally distributed and errors +are propagated using linear error propagation theory. In this package, resampling +is used to propagate errors. Thus, `Measurement`s must be converted to normal distributions +to be used in conjuction with other uncertain values in this package. + +```julia +using UncertainData, Measurements +m = measurement(value, uncertainty) +x = UncertainValue(m) # now compatible with UncertainData.jl, but drops support for exact error propagation +``` -Fast Fourier transforms are used in the kernel density estimation, so the -number of points should be a power of 2 (default = 2048). """ -function UncertainValue(data::Vector{T}; +function UncertainValue end + +UncertainValue(x::T) where T <: Real = CertainScalar(x) + +# Identity constructor +UncertainValue(uval::AbstractUncertainValue) = uval + +# From Measurements.jl +UncertainValue(m::Measurement{T}) where T = UncertainValue(Normal, m.val, m.err) + +# Populations +UncertainValue( + pop::AbstractVector, + probs::Union{AbstractVector{<:Number}, <:StatsBase.AbstractWeights}) = + UncertainScalarPopulation(pop, probs) + +# function UncertainValue(values::Vector{<:Number}, probs::Vector{<:Number}) +# UncertainScalarPopulation(float.(values), probs) +# end + +# function UncertainValue(values::Vector{<:Number}, probs::W) where {W <: AbstractWeights} +# UncertainScalarPopulation(float.(values), probs) +# end + +# function UncertainValue(values::VT, probs) where VT <: Vector{ELTYPE} where {ELTYPE<:POTENTIAL_UVAL_TYPES} +# UncertainScalarPopulation(UncertainValue.(values), probs) +# end + +# function UncertainValue(values::VT, probs::Vector{Number}) where VT <: Vector{ELTYPE} where {ELTYPE<:POTENTIAL_UVAL_TYPES} +# UncertainScalarPopulation(UncertainValue.(values), probs) +# end + +#KDE +function UncertainValue(data::AbstractVector{T}; kernel::Type{D} = Normal, bandwidth = KernelDensity.default_bandwidth(data), npoints::Int = 2048) where {D <: Distributions.Distribution, T} @@ -96,17 +215,6 @@ function UncertainValue(data::Vector{T}; UncertainScalarKDE(KDE, data, xrange, Weights(density)) end - -""" - UncertainValue(kerneldensity::Type{K}, data::Vector{T}; - kernel::Type{D} = Normal, - npoints::Int=2048) where {K <: UnivariateKDE, D <: Distribution, T} - -Construct an uncertain value by a kernel density estimate to `data`. - -Fast Fourier transforms are used in the kernel density estimation, so the -number of points should be a power of 2 (default = 2048). -""" function UncertainValue(kerneldensity::Type{K}, data::Vector{T}; kernel::Type{D} = Normal, bandwidth = KernelDensity.default_bandwidth(data)/4, @@ -129,99 +237,86 @@ end UncertainValue(x::Vector{Array{<:Real, 0}}) = UncertainValue([el[] for el in x]) -""" - UncertainValue(d::Type{D}, empiricaldata::Vector{T}) where {D<:Distribution, T} - -# Constructor for empirical distributions. - -Fit a distribution of type `d` to the data and use that as the -representation of the empirical distribution. Calls `Distributions.fit` behind -the scenes. - -## Arguments -- **`empiricaldata`**: The data for which to fit the `distribution`. -- **`distribution`**: A valid univariate distribution from `Distributions.jl`. - -""" -function UncertainValue(d::Type{D}, - empiricaldata::Vector{T}) where {D<:Distribution, T} +# Fitted distributions +# TODO: make TheoreticalFittedUncertainScalar parametric on the input distribution +function UncertainValue(d::Type{<:Distribution}, data::AbstractVector) - distribution = FittedDistribution(Distributions.fit(d, empiricaldata)) - UncertainScalarTheoreticalFit(distribution, empiricaldata) + distribution = FittedDistribution(Distributions.fit(d, data)) + UncertainScalarTheoreticalFit(distribution, data) end -""" +# """ - UncertainValue(distribution::Type{D}, a::T1, b::T2; - kwargs...) where {T1<:Number, T2 <: Number, D<:Distribution} +# UncertainValue(distribution::Type{D}, a::T1, b::T2; +# kwargs...) where {T1 <: Number, T2 <: Number, D <: Distribution} → TheoreticalDistributionScalarValue -# Constructor for two-parameter distributions +# # Constructor for two-parameter distributions -`UncertainValue`s are currently implemented for the following two-parameter -distributions: `Uniform`, `Normal`, `Binomial`, `Beta`, `BetaPrime`, `Gamma`, -and `Frechet`. +# `UncertainValue`s are currently implemented for the following two-parameter +# distributions: `Uniform`, `Normal`, `Binomial`, `Beta`, `BetaPrime`, `Gamma`, +# and `Frechet`. -### Arguments +# ### Arguments -- **`a`, `b`**: Generic parameters whose meaning varies depending - on what `distribution` is provided. See the list below. -- **`distribution`**: A valid univariate distribution from `Distributions.jl`. +# - **`a`, `b`**: Generic parameters whose meaning varies depending +# on what `distribution` is provided. See the list below. +# - **`distribution`**: A valid univariate distribution from `Distributions.jl`. -Precisely what `a` and `b` are depends on which distribution is provided. +# Precisely what `a` and `b` are depends on which distribution is provided. -- `UncertainValue(Normal, μ, σ)` returns an `UncertainScalarNormallyDistributed` instance. -- `UncertainValue(Uniform, lower, upper)` returns an `UncertainScalarUniformlyDistributed` instance. -- `UncertainValue(Beta, α, β)` returns an `UncertainScalarBetaDistributed` instance. -- `UncertainValue(BetaPrime, α, β)` returns an `UncertainScalarBetaPrimeDistributed` instance. -- `UncertainValue(Gamma, α, θ)` returns an `UncertainScalarGammaDistributed` instance. -- `UncertainValue(Frechet, α, θ)` returns an `UncertainScalarFrechetDistributed` instance. -- `UncertainValue(Binomial, n, p)` returns an `UncertainScalarBinomialDistributed` instance. +# - `UncertainValue(Normal, μ, σ)` returns an `UncertainScalarNormallyDistributed` instance. +# - `UncertainValue(Uniform, lower, upper)` returns an `UncertainScalarUniformlyDistributed` instance. +# - `UncertainValue(Beta, α, β)` returns an `UncertainScalarBetaDistributed` instance. +# - `UncertainValue(BetaPrime, α, β)` returns an `UncertainScalarBetaPrimeDistributed` instance. +# - `UncertainValue(Gamma, α, θ)` returns an `UncertainScalarGammaDistributed` instance. +# - `UncertainValue(Frechet, α, θ)` returns an `UncertainScalarFrechetDistributed` instance. +# - `UncertainValue(Binomial, n, p)` returns an `UncertainScalarBinomialDistributed` instance. -### Keyword arguments +# ### Keyword arguments -- **`nσ`**: If `distribution <: Distributions.Normal`, then how many standard - deviations away from `μ` does `lower` and `upper` (i.e. both, because - they are the same distance away from `μ`) represent? -- **`tolerance`**: A threshold determining how symmetric the uncertainties - must be in order to allow the construction of Normal distribution - (`upper - lower > threshold` is required). -- **`trunc_lower`**: Lower truncation bound for distributions with infinite - support. Defaults to `-Inf`. -- **`trunc_upper`**: Upper truncation bound for distributions with infinite - support. Defaults to `Inf`. +# - **`nσ`**: If `distribution <: Distributions.Normal`, then how many standard +# deviations away from `μ` does `trunc_lower` and `trunc_upper` (i.e. both, because +# they are the same distance away from `μ`) represent? +# - **`tolerance`**: A threshold determining how symmetric the uncertainties +# must be in order to allow the construction of Normal distribution +# (`upper - lower > threshold` is required). +# - **`trunc_lower`**: Lower truncation bound for distributions with infinite +# support. Defaults to `-Inf`. +# - **`trunc_upper`**: Upper truncation bound for distributions with infinite +# support. Defaults to `Inf`. -## Examples +# ## Examples -### Normal distribution +# ### Normal distribution -Normal distributions are formed by using the constructor -`UncertainValue(μ, σ, Normal; kwargs...)`. This gives a normal distribution with -mean μ and standard deviation σ/nσ (nσ must be given as a keyword argument). +# Normal distributions are formed by using the constructor +# `UncertainValue(μ, σ, Normal; kwargs...)`. This gives a normal distribution with +# mean μ and standard deviation σ/nσ (nσ must be given as a keyword argument). -```julia -# A normal distribution with mean = 2.3 and standard deviation 0.3. -UncertainValue(2.3, 0.3, Normal) +# ```julia +# # A normal distribution with mean = 2.3 and standard deviation 0.3. +# UncertainValue(2.3, 0.3, Normal) -# A normal distribution with mean 2.3 and standard deviation 0.3/2. -UncertainValue(2.3, 0.3, Normal, nσ = 2) +# # A normal distribution with mean 2.3 and standard deviation 0.3/2. +# UncertainValue(2.3, 0.3, Normal, nσ = 2) -# A normal distribution with mean 2.3 and standard deviation = 0.3, -truncated to the interval `[1, 3]`. -UncertainValue(2.3, 0.3, Normal, trunc_lower = 1.0, trunc_upper = 3.0) -``` +# # A normal distribution with mean 2.3 and standard deviation = 0.3, +# truncated to the interval `[1, 3]`. +# UncertainValue(2.3, 0.3, Normal, trunc_lower = 1.0, trunc_upper = 3.0) +# ``` -### Uniform distribution +# ### Uniform distribution -Uniform distributions are formed using the -`UncertainValue(lower, upper, Uniform)` constructor. +# Uniform distributions are formed using the +# `UncertainValue(lower, upper, Uniform)` constructor. -```julia -# A uniform distribution on `[2, 3]` -UncertainValue(-2, 3, Uniform) -``` +# ```julia +# # A uniform distribution on `[2, 3]` +# UncertainValue(-2, 3, Uniform) +# ``` -""" +# """ function UncertainValue(distribution::Type{D}, a::T1, b::T2; kwargs...) where {T1<:Number, T2 <: Number, D<:Distribution} @@ -250,54 +345,11 @@ function UncertainValue(distribution::Type{D}, a::T1, b::T2; dist = assigndist_frechet(a, b; kwargs...) UncertainScalarFrechetDistributed(dist, a, b) else - throw(DomainError("Two-parameter $dist is not implemented.")) + throw(DomainError("Two-parameter $distribution distribution is not implemented")) end end - -""" - UncertainValue(distribution::Type{D}, a::T1, b::T2, c::T3; - kwargs...) where {T1<:Number, T2<:Number, T3<:Number, D<:Distribution} - -## Constructor for three-parameter distributions - -Currently implemented distributions are `BetaBinomial`. - -### Arguments -- **`a`, `b`, `c`**: Generic parameters whose meaning varies depending - on what `distribution` is provided. See the list below. -- **`distribution`**: A valid univariate distribution from `Distributions.jl`. - -Precisely what `a`, `b` and `c` are depends on which distribution is provided. - -- `UncertainValue(BetaBinomial, n, α, β)` returns an `UncertainScalarBetaBinomialDistributed` instance. - - -### Keyword arguments -- **`nσ`**: If `distribution <: Distributions.Normal`, then how many standard - deviations away from `μ` does `lower` and `upper` (i.e. both, because - they are the same distance away from `μ`) represent? -- **`tolerance`**: A threshold determining how symmetric the uncertainties - must be in order to allow the construction of Normal distribution - (`upper - lower > threshold` is required). -- **`trunc_lower`**: Lower truncation bound for distributions with infinite - support. Defaults to `-Inf`. -- **`trunc_upper`**: Upper truncation bound for distributions with infinite - support. Defaults to `Inf`. - -## Examples -### BetaBinomial distribution - -Normal distributions are formed by using the constructor -`UncertainValue(μ, σ, Normal; kwargs...)`. This gives a normal distribution with -mean μ and standard deviation σ/nσ (nσ must be given as a keyword argument). - -```julia -# A beta binomial distribution with n = 100 trials and parameters α = 2.3 and -# β = 5 -UncertainValue(100, 2.3, 5, BetaBinomial) -``` -""" +# TODO: make TheoreticalDistributionScalarValue type parametric on the input distribution function UncertainValue(distribution::Type{D}, a::T1, b::T2, c::T3; kwargs...) where {T1<:Number, T2<:Number, T3<:Number, D<:Distribution} @@ -342,34 +394,7 @@ function untruncated_disttype(t::Distributions.Truncated) return typeof(t_untrunc) end -""" - UncertainValue(t::Distributions.Truncated) - -Construct an uncertain value from an instance of a distribution. If a specific -uncertain value type has not been implemented, the number of parameters is -determined from the distribution and an instance of one of the following types -is returned: - -- `ConstrainedUncertainScalarValueOneParameter` -- `ConstrainedUncertainScalarValueTwoParameter` -- `ConstrainedUncertainScalarValueThreeParameter` - -## Examples - -```julia -# Normal distribution truncated to the interval [0.5, 0.7] -t = truncated(Normal(0, 1), 0.5, 0.7) -UncertainValue(t) - -# Gamma distribution truncated to the interval [0.5, 3.5] -t = Truncate(Gamma(4, 5.1), 0.5, 3.5) -UncertainValue(t) - -# Binomial distribution truncated to the interval [2, 7] -t = Truncate(Binomial(10, 0.4), 2, 7) -UncertainValue(t) -``` -""" +#TODO: this is not type-stable. function UncertainValue(t::Distributions.Truncated) dist_type = untruncated_disttype(t) original_dist = untruncated_dist(t) @@ -386,26 +411,8 @@ function UncertainValue(t::Distributions.Truncated) end end -""" - UncertainValue(d::Distributions.Distribution) - -Construct an uncertain value from an instance of a distribution. If a specific -uncertain value type has not been implemented, the number of parameters is -determined from the distribution and an instance of one of the following types -is returned: - -- `UncertainScalarTheoreticalOneParameter` -- `UncertainScalarTheoreticalTwoParameter` -- `UncertainScalarTheoreticalThreeParameter` +#TODO: this is not type-stable. -## Examples - -```julia -UncertainValue(Normal(0, 1)) -UncertainValue(Gamma(4, 5.1)) -UncertainValue(Binomial, 8, 0.2) -``` -""" function UncertainValue(d::Distributions.Distribution) params = fieldnames(typeof(d)) n_params = length(params) @@ -427,17 +434,21 @@ function UncertainValue(d::Distributions.Distribution) UncertainScalarFrechetDistributed(d, param_values...) # if no specific type is implemented for this distribution, just create # a generic one - else - if n_params == 1 - return UncertainScalarTheoreticalOneParameter(d, param_values...) - elseif n_params == 2 - return UncertainScalarTheoreticalTwoParameter(d, param_values...) - elseif n_params == 3 - return UncertainScalarTheoreticalThreeParameter(d, param_values...) - else - msg = "uncertain value type for $n_params-parameter $d not implemented." + else + # Todo: generic types are not implemented yet + msg = "uncertain value type for $n_params-parameter $d not implemented." throw(DomainError(msg)) - end + + # if n_params == 1 + # return UncertainScalarTheoreticalOneParameter(d, param_values...) + # elseif n_params == 2 + # return UncertainScalarTheoreticalTwoParameter(d, param_values...) + # elseif n_params == 3 + # return UncertainScalarTheoreticalThreeParameter(d, param_values...) + # else + # msg = "uncertain value type for $n_params-parameter $d not implemented." + # throw(DomainError(msg)) + # end end end diff --git a/src/uncertain_values/UncertainValues.jl b/src/uncertain_values/UncertainValues.jl index 5df52e43..0a7d63e4 100644 --- a/src/uncertain_values/UncertainValues.jl +++ b/src/uncertain_values/UncertainValues.jl @@ -57,7 +57,7 @@ using Reexport include("UncertainScalarPopulation.jl") # Certain values (i.e. values without uncertainty) - include("CertainValue.jl") + include("CertainScalar.jl") ########################################## # Composite uncertain vector types diff --git a/src/uncertain_values/convert.jl b/src/uncertain_values/convert.jl index 0684f53a..fcbc4b6e 100644 --- a/src/uncertain_values/convert.jl +++ b/src/uncertain_values/convert.jl @@ -1,2 +1,2 @@ -convert(::Type{CertainValue}, x::T) where {T <: Number} = CertainValue(x) -convert(::Type{T1}, x::T2) where {T1 <: AbstractUncertainValue, T2 <: Number} = CertainValue(x) +convert(::Type{CertainScalar}, x::T) where {T <: Number} = CertainScalar(x) +convert(::Type{T1}, x::T2) where {T1 <: AbstractUncertainValue, T2 <: Number} = CertainScalar(x) diff --git a/src/uncertain_values/operations/comparisons.jl b/src/uncertain_values/operations/comparisons.jl index 87a73297..63404d2b 100644 --- a/src/uncertain_values/operations/comparisons.jl +++ b/src/uncertain_values/operations/comparisons.jl @@ -1,8 +1,8 @@ import Base.< import Base.isapprox -Base.:<(x::T1, y::CertainValue{T2}) where {T1 <: Real, T2 <: Real} = x < y.value -Base.:<(x::CertainValue{T1}, y::T2) where {T1 <: Real, T2 <: Real} = x.value < y -Base.isless(x::CertainValue{T1}, y::CertainValue{T2}) where {T1 <: Real, T2 <: Real} = isless(x.value, y.value) -Base.isapprox(x::CertainValue{T1}, y::T2) where {T1 <: Real, T2 <: Real} = isapprox(x.value, y) -Base.isapprox(x::T1, y::CertainValue{T2}) where {T1 <: Real, T2 <: Real} = isapprox(x, y.value) +Base.:<(x::T1, y::CertainScalar{T2}) where {T1 <: Real, T2 <: Real} = x < y.value +Base.:<(x::CertainScalar{T1}, y::T2) where {T1 <: Real, T2 <: Real} = x.value < y +Base.isless(x::CertainScalar{T1}, y::CertainScalar{T2}) where {T1 <: Real, T2 <: Real} = isless(x.value, y.value) +Base.isapprox(x::CertainScalar{T1}, y::T2) where {T1 <: Real, T2 <: Real} = isapprox(x.value, y) +Base.isapprox(x::T1, y::CertainScalar{T2}) where {T1 <: Real, T2 <: Real} = isapprox(x, y.value) diff --git a/test/mathematics/uncertain_values/test_elementary_maths_uncertainvalues.jl b/test/mathematics/uncertain_values/test_elementary_maths_uncertainvalues.jl index 237882ed..cd9191a4 100644 --- a/test/mathematics/uncertain_values/test_elementary_maths_uncertainvalues.jl +++ b/test/mathematics/uncertain_values/test_elementary_maths_uncertainvalues.jl @@ -6,8 +6,8 @@ M = MixtureModel([Normal(3, 0.2), Normal(2, 1)]) r1 = UncertainValue(Normal, rand(), rand()) r2 = UncertainValue(rand(M, 10000)) r3 = UncertainValue(Normal, rand(Normal(4, 3.2), 10000)) -r4 = CertainValue(2.2) -r5 = CertainValue(2) +r4 = CertainScalar(2.2) +r5 = CertainScalar(2) uvals = [r1; r2; r3] diff --git a/test/resampling/uncertain_datasets/sequential/test_resampling_sequential_decreasing.jl b/test/resampling/uncertain_datasets/sequential/test_resampling_sequential_decreasing.jl index 5888a9ea..2062137e 100644 --- a/test/resampling/uncertain_datasets/sequential/test_resampling_sequential_decreasing.jl +++ b/test/resampling/uncertain_datasets/sequential/test_resampling_sequential_decreasing.jl @@ -5,7 +5,7 @@ using Test, UncertainData # Create some uncertain data with decreasing magnitude and zero overlap between values, # so we're guaranteed that a strictly decreasing sequence through the dataset exists. N = 10 - t = [i <= N/2 ? CertainValue(float(i)) : UncertainValue(Normal, i, 1) for i = N:-1:1] + t = [i <= N/2 ? CertainScalar(float(i)) : UncertainValue(Normal, i, 1) for i = N:-1:1] T = UncertainIndexDataset(t) iv = UncertainIndexValueDataset(t, t) diff --git a/test/resampling/uncertain_datasets/sequential/test_resampling_sequential_increasing.jl b/test/resampling/uncertain_datasets/sequential/test_resampling_sequential_increasing.jl index 826104c8..585fd857 100644 --- a/test/resampling/uncertain_datasets/sequential/test_resampling_sequential_increasing.jl +++ b/test/resampling/uncertain_datasets/sequential/test_resampling_sequential_increasing.jl @@ -6,7 +6,7 @@ using StatsBase # Create some uncertain data with decreasing magnitude and zero overlap between values, # so we're guaranteed that a strictly decreasing sequence through the dataset exists. N = 10 - t = [ i <= N/2 ? CertainValue(float(i)) : UncertainValue(Normal, i, 1) for i = 1:N] + t = [ i <= N/2 ? CertainScalar(float(i)) : UncertainValue(Normal, i, 1) for i = 1:N] T = UncertainIndexDataset(t) iv = UncertainIndexValueDataset(t, t) diff --git a/test/resampling/uncertain_datasets/test_resampling_with_schemes.jl b/test/resampling/uncertain_datasets/test_resampling_with_schemes.jl index 67fa4dbd..7149aece 100644 --- a/test/resampling/uncertain_datasets/test_resampling_with_schemes.jl +++ b/test/resampling/uncertain_datasets/test_resampling_with_schemes.jl @@ -8,7 +8,7 @@ x = UncertainValueDataset(x_uncertain) y = UncertainValueDataset(y_uncertain) time_uncertain = [UncertainValue(Normal, i, 1) for i = 1:length(x)]; -time_certain = [CertainValue(i) for i = 1:length(x)]; +time_certain = [CertainScalar(i) for i = 1:length(x)]; timeinds_x = UncertainIndexDataset(time_uncertain) timeinds_y = UncertainIndexDataset(time_certain) diff --git a/test/resampling/uncertain_values/test_resampling_certain_value.jl b/test/resampling/uncertain_values/test_resampling_certain_value.jl index de4e659e..dbcdd285 100644 --- a/test/resampling/uncertain_values/test_resampling_certain_value.jl +++ b/test/resampling/uncertain_values/test_resampling_certain_value.jl @@ -1,4 +1,4 @@ -x = CertainValue(2.0) +x = CertainScalar(2.0) test_constraints = [ NoConstraint(), diff --git a/test/runtests.jl b/test/runtests.jl index 33951c46..315b173b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -22,7 +22,7 @@ using KernelDensity include("uncertain_values/test_minmax.jl") end - @testset "CertainValue" begin + @testset "CertainScalar" begin include("uncertain_values/test_certain_values.jl") end diff --git a/test/sampling_constraints/test_constrain_certainvalue.jl b/test/sampling_constraints/test_constrain_certainvalue.jl index c1451eeb..92f49ec9 100644 --- a/test/sampling_constraints/test_constrain_certainvalue.jl +++ b/test/sampling_constraints/test_constrain_certainvalue.jl @@ -1,4 +1,4 @@ -x = CertainValue(2.0) +x = CertainScalar(2.0) test_constraints = [ NoConstraint(), @@ -12,5 +12,5 @@ test_constraints = [ ] for constraint in test_constraints - @test constrain(x, constraint) isa CertainValue + @test constrain(x, constraint) isa CertainScalar end \ No newline at end of file diff --git a/test/sampling_constraints/test_constrain_with_schemes.jl b/test/sampling_constraints/test_constrain_with_schemes.jl index 88a047a3..899531ac 100644 --- a/test/sampling_constraints/test_constrain_with_schemes.jl +++ b/test/sampling_constraints/test_constrain_with_schemes.jl @@ -8,7 +8,7 @@ x = UncertainValueDataset(x_uncertain) y = UncertainValueDataset(y_uncertain) time_uncertain = [UncertainValue(Normal, i, 1) for i = 1:length(x)]; -time_certain = [CertainValue(i) for i = 1:length(x)]; +time_certain = [CertainScalar(i) for i = 1:length(x)]; timeinds_x = UncertainIndexDataset(time_uncertain) timeinds_y = UncertainIndexDataset(time_certain) diff --git a/test/uncertain_datasets/test_uncertain_index_value_dataset.jl b/test/uncertain_datasets/test_uncertain_index_value_dataset.jl index 52263d45..f2180d4c 100644 --- a/test/uncertain_datasets/test_uncertain_index_value_dataset.jl +++ b/test/uncertain_datasets/test_uncertain_index_value_dataset.jl @@ -9,7 +9,7 @@ o2 = UncertainValue(Normal, 2, 0.3) o3 = UncertainValue(Uniform, 0, 4) o4 = UncertainValue(Uniform, rand(100)) o5 = UncertainValue(rand(400)) -o7 = CertainValue(2) +o7 = CertainScalar(2) o8 = UncertainValue([2, 3, 4], [4, 5, 2]) o9 = UncertainValue([2, 4, 5, 2], rand(4)) diff --git a/test/uncertain_values/populations/test_ConstrainedUncertainScalarPopulation.jl b/test/uncertain_values/populations/test_ConstrainedUncertainScalarPopulation.jl index 8db47d6f..6528cdc4 100644 --- a/test/uncertain_values/populations/test_ConstrainedUncertainScalarPopulation.jl +++ b/test/uncertain_values/populations/test_ConstrainedUncertainScalarPopulation.jl @@ -1,4 +1,4 @@ -# Uncertain population consisting of CertainValues (scalars get promoted to CertainValue)s +# Uncertain population consisting of CertainScalars (scalars get promoted to CertainScalar)s # theoretical distributions and KDE distributions p1 = ConstrainedUncertainScalarPopulation( [3.0, UncertainValue(Normal, 0, 1), diff --git a/test/uncertain_values/populations/test_UncertainScalarPopulation.jl b/test/uncertain_values/populations/test_UncertainScalarPopulation.jl index 59b69219..06825c45 100644 --- a/test/uncertain_values/populations/test_UncertainScalarPopulation.jl +++ b/test/uncertain_values/populations/test_UncertainScalarPopulation.jl @@ -1,6 +1,6 @@ import StatsBase: AbstractWeights -# Uncertain population consisting of CertainValues (scalars get promoted to CertainValue)s +# Uncertain population consisting of CertainScalars (scalars get promoted to CertainScalar)s # theoretical distributions and KDE distributions p1 = UncertainScalarPopulation( [3.0, UncertainValue(Normal, 0, 1), diff --git a/test/uncertain_values/test_certain_values.jl b/test/uncertain_values/test_certain_values.jl index 4873f135..a5b9e45e 100644 --- a/test/uncertain_values/test_certain_values.jl +++ b/test/uncertain_values/test_certain_values.jl @@ -2,5 +2,5 @@ x = 3 y = 3.3 -@test UncertainValue(x) isa CertainValue -@test UncertainValue(y) isa CertainValue \ No newline at end of file +@test UncertainValue(x) isa CertainScalar +@test UncertainValue(y) isa CertainScalar \ No newline at end of file From ddb42c75c29a7081c33bc3f087bb6be8bf291c6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristian=20Agas=C3=B8ster=20Haaga?= Date: Wed, 28 Apr 2021 14:51:15 +0200 Subject: [PATCH 05/21] Still need to figure out the constructor for populations. --- .../UncertainScalarPopulation.jl | 16 +++++++++------- src/uncertain_values/UncertainValue.jl | 16 ++++++---------- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/src/uncertain_values/UncertainScalarPopulation.jl b/src/uncertain_values/UncertainScalarPopulation.jl index 62c2aef9..e388b27e 100644 --- a/src/uncertain_values/UncertainScalarPopulation.jl +++ b/src/uncertain_values/UncertainScalarPopulation.jl @@ -6,8 +6,9 @@ import StatsBase const POTENTIAL_UVAL_TYPES = Union{T1, T2} where {T1 <: Number, T2 <: AbstractUncertainValue} -convert_elwise(f, x) = map(f, x); -nested_convert_elwise(f, x) = map(xᵢ -> convert_elwise(f, xᵢ), x) +convert_elwise(f::Function, x) = map(f, x); +convert_elwise(f::Function, x::AbstractUncertainValue) = x +nested_convert_elwise(f::Function, x) = map(xᵢ -> convert_elwise(f, xᵢ), x) function verify_pop_and_weights(pop, wts) if length(pop) != length(wts) @@ -20,7 +21,6 @@ end UncertainScalarPopulation(values, probs::Vector{Number}) UncertainScalarPopulation(values, probs::Statsbase.AbstractWeights) - An `UncertainScalarPopulation`, which consists of some population members (`values`) and some weights (`probs`) that indicate the relative importance of the population members (for example during resampling). @@ -100,17 +100,19 @@ struct UncertainScalarPopulation{T, PW <: StatsBase.AbstractWeights} <: Abstract probs::PW function UncertainScalarPopulation(pop, probs::AbstractVector{T}) where {T <: Number} - members = nested_convert_elwise(UncertainValue, pop); TT = eltype(members) verify_pop_and_weights(pop, probs) + @show "here" + @show pop + members = nested_convert_elwise(UncertainValue, pop); TT = eltype(members) wts = Weights(probs); PW = typeof(wts) new{TT, PW}(members, wts) end function UncertainScalarPopulation(pop, probs::PW) where {PW <: StatsBase.AbstractWeights} - members = nested_convert_elwise(UncertainValue, pop) verify_pop_and_weights(pop, probs) - T = eltype(members) - new{T, PW}(members, probs) + @show "here2" + members = nested_convert_elwise(UncertainValue, pop); TT = eltype(members) + new{TT, PW}(members, probs) end end diff --git a/src/uncertain_values/UncertainValue.jl b/src/uncertain_values/UncertainValue.jl index 570f2383..35d4a7f2 100644 --- a/src/uncertain_values/UncertainValue.jl +++ b/src/uncertain_values/UncertainValue.jl @@ -174,25 +174,21 @@ UncertainValue(uval::AbstractUncertainValue) = uval # From Measurements.jl UncertainValue(m::Measurement{T}) where T = UncertainValue(Normal, m.val, m.err) -# Populations -UncertainValue( - pop::AbstractVector, - probs::Union{AbstractVector{<:Number}, <:StatsBase.AbstractWeights}) = - UncertainScalarPopulation(pop, probs) +#Populations +# function UncertainValue( +# values::AbstractVector{<:Number}, +# probs::Union{Vector{<:Number}, W}) where {W <: AbstractWeights} -# function UncertainValue(values::Vector{<:Number}, probs::Vector{<:Number}) # UncertainScalarPopulation(float.(values), probs) # end -# function UncertainValue(values::Vector{<:Number}, probs::W) where {W <: AbstractWeights} -# UncertainScalarPopulation(float.(values), probs) -# end +UncertainValue(values, probs) = UncertainScalarPopulation(values, probs) # function UncertainValue(values::VT, probs) where VT <: Vector{ELTYPE} where {ELTYPE<:POTENTIAL_UVAL_TYPES} # UncertainScalarPopulation(UncertainValue.(values), probs) # end -# function UncertainValue(values::VT, probs::Vector{Number}) where VT <: Vector{ELTYPE} where {ELTYPE<:POTENTIAL_UVAL_TYPES} +# function UncertainValue(values::VT, probs::Vector{<:Number}) where VT <: Vector{ELTYPE} where {ELTYPE<:POTENTIAL_UVAL_TYPES} # UncertainScalarPopulation(UncertainValue.(values), probs) # end From 75310e9f0ee3a68a82a88b9ff22acc29bd9cd9b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristian=20Agas=C3=B8ster=20Haaga?= Date: Thu, 29 Apr 2021 11:57:56 +0200 Subject: [PATCH 06/21] Convert members to uncertain values --- .../UncertainScalarPopulation.jl | 141 ++++++------------ .../AbstractScalarPopulation.jl | 18 +-- 2 files changed, 55 insertions(+), 104 deletions(-) diff --git a/src/uncertain_values/UncertainScalarPopulation.jl b/src/uncertain_values/UncertainScalarPopulation.jl index e388b27e..b4750ba3 100644 --- a/src/uncertain_values/UncertainScalarPopulation.jl +++ b/src/uncertain_values/UncertainScalarPopulation.jl @@ -5,9 +5,8 @@ import StatsBase const POTENTIAL_UVAL_TYPES = Union{T1, T2} where {T1 <: Number, T2 <: AbstractUncertainValue} - convert_elwise(f::Function, x) = map(f, x); -convert_elwise(f::Function, x::AbstractUncertainValue) = x +convert_elwise(f::Function, x::T) where T <: AbstractUncertainValue = x nested_convert_elwise(f::Function, x) = map(xᵢ -> convert_elwise(f, xᵢ), x) function verify_pop_and_weights(pop, wts) @@ -17,49 +16,38 @@ function verify_pop_and_weights(pop, wts) end """ - UncertainScalarPopulation(values, probs) - UncertainScalarPopulation(values, probs::Vector{Number}) - UncertainScalarPopulation(values, probs::Statsbase.AbstractWeights) + UncertainScalarPopulation(members, probs) + UncertainScalarPopulation(members, probs::Vector{Number}) + UncertainScalarPopulation(members, probs::Statsbase.AbstractWeights) -An `UncertainScalarPopulation`, which consists of some population members (`values`) -and some weights (`probs`) that indicate the relative importance of the +An `UncertainScalarPopulation`, which consists of some population `members` +with associated weights (`probs`) that indicate the relative importance of the population members (for example during resampling). -## Fields - -- **`values`**: The members of the population. Can be either numerical values, any - type of uncertain value defined in this package (including populations), and - `Measurement` instances from Measurements.jl. -- **`probs`**: The probabilities of sampling each member of the population. - -## Constructors - -- If `values` contains only scalar numeric values, then the `values` field - will be of type `Vector{Number}`. -- If `values` contains one or more uncertain values, then the `values` field - will be of type `Vector{AbstractUncertainValue}` +Members can be either numerical values, any type of uncertain value defined +in this package (including populations, so nested populations are possible). ## Examples -### Weighted scalar populations +### Scalar populations -Weighted scalar populations are defined as follows. Note: Weights must always be provided, -and scalars must be converted to uncertain values before creating the population. +Weighted scalar populations are defined as follows.* Note: Weights must always be provided, +and scalars must be converted to uncertain values before creating the population.* ```julia using UncertainData -pop = UncertainValue.([1.0, 2.0, 3.0]); wts = rand(3) +members = UncertainValue.([1.0, 2.0, 3.0]); wts = rand(3) -# Treat elements of `pop` as equiprobable -p = UncertainScalarPopulation(pop, [1, 1, 1]) +# Treat elements of `members` as equiprobable +p = UncertainScalarPopulation(members, [1, 1, 1]) -# Treat elements of `pop` as inequiprobable -p = UncertainScalarPopulation(pop, [2, 3, 1]) +# Treat elements of `members` as inequiprobable +p = UncertainScalarPopulation(members, [2, 3, 1]) ``` ## Populations with mixed-type uncertain values -Uncertain population can also consist of a mixture of different types of uncertain values. +Uncertain populations can also consist of a mixture of different types of uncertain values. Here, we use a population consisting of a scalar, two theoretical distributions with known parameters, and a theoretical uniform distribution whose parameters are estimated from a random sample `s`. We assign equal weights to the member @@ -67,113 +55,76 @@ of the population. ```julia s = rand(1000) -pop = [ +members = [ 3.0, UncertainValue(Normal, 0, 1), UncertainValue(Gamma, 2, 3), UncertainValue(Uniform, s) ] wts = [0.5, 0.5, 0.5, 0.5] -p = UncertainScalarPopulation(pop, wts) +p = UncertainScalarPopulation(members, wts) ``` ## Nested populations Nested populations are also possible. -``` +```julia using UncertainData, Distributions s = rand(Normal(0.1, 2.0), 8000) -p1 = [UncertainValue(Normal, 0.5, 0.33), UncertainValue(Gamma, 0.6, 0.9)] +m1 = [UncertainValue(Normal, 0.5, 0.33), UncertainValue(Gamma, 0.6, 0.9)] # If including scalars, these must be converted to `CertainScalar`s first, # as follows. -p2 = [UncertainValue(2.2), UncertainValue(Normal, s), UncertainValue(s)] +m2 = [2.2, UncertainValue(Normal, s), UncertainValue(s)] -# Give p1 and p2 relative weights 0.1 and 0.5 (these are normalized, so +# Give m1 and m2 relative weights 0.1 and 0.5 (these are normalized, so # do not need to sum to 1). -p = UncertainScalarPopulation([p1, p2], [0.1, 0.5]) +p = UncertainScalarPopulation([m1, m2], [0.1, 0.5]) ``` """ struct UncertainScalarPopulation{T, PW <: StatsBase.AbstractWeights} <: AbstractScalarPopulation{T, PW} - values::AbstractVector{T} + members::T probs::PW - function UncertainScalarPopulation(pop, probs::AbstractVector{T}) where {T <: Number} - verify_pop_and_weights(pop, probs) - @show "here" - @show pop - members = nested_convert_elwise(UncertainValue, pop); TT = eltype(members) + function UncertainScalarPopulation(members, probs::AbstractVector{T}) where {T <: Number} + verify_pop_and_weights(members, probs) + m = nested_convert_elwise(UncertainValue, members); TT = typeof(m) wts = Weights(probs); PW = typeof(wts) - new{TT, PW}(members, wts) + new{TT, PW}(m, wts) end - function UncertainScalarPopulation(pop, probs::PW) where {PW <: StatsBase.AbstractWeights} - verify_pop_and_weights(pop, probs) - @show "here2" - members = nested_convert_elwise(UncertainValue, pop); TT = eltype(members) - new{TT, PW}(members, probs) + function UncertainScalarPopulation(members, probs::PW) where {PW <: StatsBase.AbstractWeights} + verify_pop_and_weights(members, probs) + m = nested_convert_elwise(UncertainValue, members); TT = typeof(m) + new{TT, PW}(m, probs) end end - -# function UncertainScalarPopulation(values::Vector{T1}, probabilities::Vector{T2}) where {T1 <: Number, T2 <: Number} - -# UncertainScalarPopulation( -# nested_convert_elwise(UncertainValue, values), # in case scalars are provided -# StatsBase.weights(probabilities) -# ) -# # end -# function UncertainScalarPopulation(values::VT, probabilities) where VT <: Vector{ELTYPE} where {ELTYPE<:POTENTIAL_UVAL_TYPES} - -# UncertainScalarPopulation(UncertainValue.(values), StatsBase.weights(probabilities)) -# end - - """ - ConstrainedUncertainScalarPopulation(values, probs) - ConstrainedUncertainScalarPopulation(values, probs::Vector{Number}) - ConstrainedUncertainScalarPopulation(values, probs::Statsbase.AbstractWeights) - -A `ConstrainedUncertainScalarPopulation`, which consists of some population -members (`values`)and some weights (`probs`) that indicate the relative importance of -the population members (for example during resampling). The uncertain values -for this type is meant to consist of constrained uncertain values -(generated by calling `constrain(uval, sampling_constraint`) on them. + ConstrainedUncertainScalarPopulation(members, probs) + ConstrainedUncertainScalarPopulation(members, probs::Vector{Number}) + ConstrainedUncertainScalarPopulation(members, probs::Statsbase.AbstractWeights) -This is just a convenience type to indicate that the population has been +A convenience type to indicate that the population has been constrained. It behaves identically to `UncertainScalarPopulation`. - -There are different constructors for different types of `values`: - -- If `values` contains only scalar numeric values, then the `values` field - will be of type `Vector{Number}`. -- If `values` contains one or more uncertain values, then the `values` field - will be of type `Vector{AbstractUncertainValue}` - """ struct ConstrainedUncertainScalarPopulation{T, PW <: StatsBase.AbstractWeights} <: AbstractScalarPopulation{T, PW} - values::Vector{T} + members::Vector{T} probs::PW end -""" - ConstrainedUncertainScalarPopulation(values::Vector, probabilities::Vector{Float64}) - -Construct a constrained population from a vector of values and a vector of -probabilities associated to those values. -""" -function ConstrainedUncertainScalarPopulation(values::Vector{T1}, probabilities::Vector{T2}) where {T1 <: Number, T2 <: Number} - if length(values) != length(probabilities) - throw(ArgumentError("Lengths of values and probability vectors do not match.")) +function ConstrainedUncertainScalarPopulation(members::Vector{T1}, probabilities::Vector{T2}) where {T1 <: Number, T2 <: Number} + if length(members) != length(probabilities) + throw(ArgumentError("Lengths of members and probability vectors do not match.")) end - ConstrainedUncertainScalarPopulation(float.(values), StatsBase.weights(probabilities)) + ConstrainedUncertainScalarPopulation(float.(members), StatsBase.weights(probabilities)) end -function ConstrainedUncertainScalarPopulation(values::VT, probabilities) where VT <: Vector{ELTYPE} where {ELTYPE<:POTENTIAL_UVAL_TYPES} - if length(values) != length(probabilities) - throw(ArgumentError("Lengths of values and probability vectors do not match.")) +function ConstrainedUncertainScalarPopulation(members::VT, probabilities) where VT <: Vector{ELTYPE} where {ELTYPE<:POTENTIAL_UVAL_TYPES} + if length(members) != length(probabilities) + throw(ArgumentError("Lengths of members and probability vectors do not match.")) end - ConstrainedUncertainScalarPopulation(UncertainValue.(values), StatsBase.weights(probabilities)) + ConstrainedUncertainScalarPopulation(UncertainValue.(members), StatsBase.weights(probabilities)) end export diff --git a/src/uncertain_values/abstract_types/AbstractScalarPopulation.jl b/src/uncertain_values/abstract_types/AbstractScalarPopulation.jl index b28765d8..1c3496b3 100644 --- a/src/uncertain_values/abstract_types/AbstractScalarPopulation.jl +++ b/src/uncertain_values/abstract_types/AbstractScalarPopulation.jl @@ -10,17 +10,17 @@ An abstract type for population-based uncertain scalar values. """ abstract type AbstractScalarPopulation{T, PW} <: AbstractPopulation end -Base.length(p::AbstractScalarPopulation) = length(p.values) -Base.getindex(p::AbstractScalarPopulation, i) = p.values[i] +Base.length(p::AbstractScalarPopulation) = length(p.members) +Base.getindex(p::AbstractScalarPopulation, i) = p.members[i] Base.firstindex(p::AbstractScalarPopulation) = 1 -Base.lastindex(p::AbstractScalarPopulation) = length(p.values) +Base.lastindex(p::AbstractScalarPopulation) = length(p.members) Base.eachindex(p::AbstractScalarPopulation) = Base.OneTo(lastindex(p)) -Base.iterate(p::AbstractScalarPopulation, state = 1) = iterate(p.values, state) +Base.iterate(p::AbstractScalarPopulation, state = 1) = iterate(p.members, state) function summarise(p::AbstractScalarPopulation) _type = typeof(p) - l = length(p.values) + l = length(p.members) summary = "$_type containing $l values" return summary end @@ -31,10 +31,10 @@ Base.minimum(p::AbstractScalarPopulation) = minimum(p) Base.maximum(p::AbstractScalarPopulation) = maximum(p) Base.minimum(pop::AbstractScalarPopulation{T, PW} where {T <: Number, PW}) = - minimum(pop.values) + minimum(pop.members) Base.maximum(pop::AbstractScalarPopulation{T, PW} where {T <: Number, PW}) = - maximum(pop.values) + maximum(pop.members) Base.minimum(pop::AbstractScalarPopulation{T, PW} where {T <: AbstractUncertainValue, PW}) = minimum([minimum(uv) for uv in pop]) @@ -45,11 +45,11 @@ Base.maximum(pop::AbstractScalarPopulation{T, PW} where {T <: AbstractUncertainV Distributions.support(p::AbstractScalarPopulation) = interval(minimum(p), maximum(p)) function Base.rand(pop::AbstractScalarPopulation{T, PW}) where {T <: Number, PW} - StatsBase.sample(pop.values, pop.probs) + StatsBase.sample(pop.members, pop.probs) end function Base.rand(pop::AbstractScalarPopulation{T, PW}, n::Int) where {T <: Number, PW} - StatsBase.sample(pop.values, pop.probs, n) + StatsBase.sample(pop.members, pop.probs, n) end function Base.rand(pop::AbstractScalarPopulation{T, PW}) where {T <: AbstractUncertainValue, PW} From 033e7d2d0588a1c6effd5c5de3dc5e9bfd5b33fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristian=20Agas=C3=B8ster=20Haaga?= Date: Thu, 29 Apr 2021 14:43:41 +0200 Subject: [PATCH 07/21] Remove old file --- docs/src/uncertain_values/defining_uncertain_values.md | 7 ------- 1 file changed, 7 deletions(-) delete mode 100644 docs/src/uncertain_values/defining_uncertain_values.md diff --git a/docs/src/uncertain_values/defining_uncertain_values.md b/docs/src/uncertain_values/defining_uncertain_values.md deleted file mode 100644 index 70b9cc58..00000000 --- a/docs/src/uncertain_values/defining_uncertain_values.md +++ /dev/null @@ -1,7 +0,0 @@ -# Convenience constructors - -The following convenience constructors are used to defined uncertain values. - -```@docs -UncertainValue -``` \ No newline at end of file From cdda8e78859645ab8d4a95d20caba83fe73e997c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristian=20Agas=C3=B8ster=20Haaga?= Date: Thu, 29 Apr 2021 15:13:30 +0200 Subject: [PATCH 08/21] Combine dataset docs --- docs/make.jl | 4 +- docs/src/uncertain_datasets/datasets.md | 36 ++++----------- .../uncertain_index_dataset.md | 37 ---------------- .../uncertainvalues_examples.md | 5 --- .../UncertainIndexDataset.jl | 44 +++++++++++++++++-- 5 files changed, 50 insertions(+), 76 deletions(-) diff --git a/docs/make.jl b/docs/make.jl index 5934b9c1..4b61e732 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -31,11 +31,9 @@ PAGES = [ "uncertain_values/uncertainvalues_examples.md", ], "Uncertain datasets" => [ + "uncertain_datasets/datasets.md", "uncertain_datasets/uncertain_datasets_overview.md", - "uncertain_datasets/uncertain_index_dataset.md", - "uncertain_datasets/uncertain_value_dataset.md", "uncertain_datasets/uncertain_indexvalue_dataset.md", - "uncertain_datasets/uncertain_dataset.md", ], "Uncertain statistics" => [ "Core statistics" => [ diff --git a/docs/src/uncertain_datasets/datasets.md b/docs/src/uncertain_datasets/datasets.md index 58ded4aa..d7815f2f 100644 --- a/docs/src/uncertain_datasets/datasets.md +++ b/docs/src/uncertain_datasets/datasets.md @@ -1,7 +1,6 @@ -# Datasets of uncertain values - -## Uncertain value datasets +# Types of datasets +## `UncertainValueDataset` `UncertainValueDataset`s is an uncertain dataset type that has no explicit index associated with its uncertain values. This type may come with some extra functionality @@ -14,31 +13,14 @@ as opposed to [indices](uncertain_index_dataset.md). UncertainValueDataset ``` -### Example - -An `UncertainValueDataset` can be comprised of uncertain values of different types. - -```julia -o1 = UncertainValue(Normal, 0, 0.5) -o2 = UncertainValue(Normal, 2.0, 0.1) -o3 = UncertainValue(Uniform, 0, 4) -o4 = UncertainValue(Uniform, rand(100)) -o5 = UncertainValue(Beta, 4, 5) -o6 = UncertainValue(Gamma, 4, 5) -o7 = UncertainValue(Frechet, 1, 2) -o8 = UncertainValue(BetaPrime, 1, 2) -o9 = UncertainValue(BetaBinomial, 10, 3, 2) -o10 = UncertainValue(Binomial, 10, 0.3) +## Uncertain index datasets -uvals = [o1, o2, o3, o4, o5, o6, o7, o8, o9, o10] -d = UncertainValueDataset(uvals) -``` +`UncertainIndexDataset`s is an uncertain dataset type that represents the indices +corresponding to an [UncertainValueDataset](uncertain_value_dataset.md). -The built-in plot recipes makes it a breeze to plot the dataset. Here, we'll plot the -20th to 80th percentile range error bars. +It is meant to be used for the `indices` field in +[UncertainIndexValueDataset](uncertain_indexvalue_dataset.md)s instances. -```julia -plot(d, [0.2, 0.8]) +```@docs +UncertainIndexDataset ``` - -![](uncertain_value_dataset_example.svg) diff --git a/docs/src/uncertain_datasets/uncertain_index_dataset.md b/docs/src/uncertain_datasets/uncertain_index_dataset.md index 71de557d..8b137891 100644 --- a/docs/src/uncertain_datasets/uncertain_index_dataset.md +++ b/docs/src/uncertain_datasets/uncertain_index_dataset.md @@ -1,38 +1 @@ -# Uncertain index datasets -## Documentation - -```@docs -UncertainIndexDataset -``` - -## Description - -`UncertainIndexDataset`s is an uncertain dataset type that represents the indices -corresponding to an [UncertainValueDataset](uncertain_value_dataset.md). - -It is meant to be used for the `indices` field in -[UncertainIndexValueDataset](uncertain_indexvalue_dataset.md)s instances. - -## Defining uncertain index datasets - -### Example 1: increasing index uncertainty through time - -#### Defining the indices - -Say we had a dataset of 20 values for which the uncertainties are normally distributed -with increasing standard deviation through time. - -```julia -time_inds = 1:13 -uvals = [UncertainValue(Normal, ind, rand(Uniform()) + (ind / 6)) for ind in time_inds] -inds = UncertainIndexDataset(uvals) -``` - -That's it. We can also plot the 33rd to 67th percentile range for the indices. - -```plot -plot(inds, [0.33, 0.67]) -``` - -![](uncertain_indexvalue_dataset_indices.svg) diff --git a/docs/src/uncertain_values/uncertainvalues_examples.md b/docs/src/uncertain_values/uncertainvalues_examples.md index a4d78990..f33c2a71 100644 --- a/docs/src/uncertain_values/uncertainvalues_examples.md +++ b/docs/src/uncertain_values/uncertainvalues_examples.md @@ -50,11 +50,6 @@ bar(u, label = "", xlabel = "value", ylabel = "probability density") ![](figs/uncertainvalue_theoretical_normal.svg) -### Other distributions - -You may define uncertain values following any of the -[supported distributions](uncertainvalues_theoreticaldistributions.md). - ## Kernel density estimated distributions One may also be given a a distribution of numbers that's not quite normally distributed. diff --git a/src/uncertain_datasets/UncertainIndexDataset.jl b/src/uncertain_datasets/UncertainIndexDataset.jl index 80b717af..dcaa1870 100644 --- a/src/uncertain_datasets/UncertainIndexDataset.jl +++ b/src/uncertain_datasets/UncertainIndexDataset.jl @@ -1,10 +1,46 @@ """ - UncertainIndexDataset + UncertainIndexDataset(indices) -Generic dataset containing uncertain indices. +A dataset containing `indices` that have uncertainties associated with them. + +## Examples + +Say we had a dataset of 20 values for which the uncertainties are normally distributed +with increasing standard deviation through time. + +```julia +using UncertainData, Plots +time_inds = 1:13 +uvals = [UncertainValue(Normal, ind, rand(Uniform()) + (ind / 6)) for ind in time_inds] +inds = UncertainIndexDataset(uvals) + +# With built-in plot recipes, we can plot the dataset, say, using the +33rd to 67th percentile range for the indices. +plot(inds, [0.33, 0.67]) +``` + +`UncertainValueDataset`s can also be comprised of uncertain values of different +types (see also [`UncertainValue`](@ref)). + +```julia +o1 = UncertainValue(Normal, 0, 0.5) +o2 = UncertainValue(Normal, 2.0, 0.1) +o3 = UncertainValue(Uniform, 0, 4) +o4 = UncertainValue(Uniform, rand(100)) +o5 = UncertainValue(Beta, 4, 5) +o6 = UncertainValue(Gamma, 4, 5) +o7 = UncertainValue(Frechet, 1, 2) +o8 = UncertainValue(BetaPrime, 1, 2) +o9 = UncertainValue(BetaBinomial, 10, 3, 2) +o10 = UncertainValue(Binomial, 10, 0.3) + +uvals = [o1, o2, o3, o4, o5, o6, o7, o8, o9, o10] +d = UncertainValueDataset(uvals) + +# Plot the 20th to 80th percentile range error bars. +plot(d, [0.2, 0.8]) +``` -## Fields -- **`indices::AbstractVector{AbstractUncertainValue}`**: The uncertain values. """ struct UncertainIndexDataset <: AbstractUncertainIndexDataset indices::AbstractVector{<:AbstractUncertainValue} From 8657e7f3b7452f4920a1d6b3a43c21f8c5035d99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristian=20Agas=C3=B8ster=20Haaga?= Date: Fri, 30 Apr 2021 13:39:57 +0200 Subject: [PATCH 09/21] Update keywords in plot recipes to ensure correct future behaviour --- src/plot_recipes/recipes_uncertainvalues_kde.jl | 16 ++++++++-------- .../recipes_uncertainvalues_multiple.jl | 12 ++++++------ 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/plot_recipes/recipes_uncertainvalues_kde.jl b/src/plot_recipes/recipes_uncertainvalues_kde.jl index 962fedb3..305f7855 100644 --- a/src/plot_recipes/recipes_uncertainvalues_kde.jl +++ b/src/plot_recipes/recipes_uncertainvalues_kde.jl @@ -6,10 +6,10 @@ import ..SamplingConstraints: @recipe function plot_uncertainvalueKDE(uv::AbstractUncertainScalarKDE) @series begin seriestype := :path - fα --> 0.5 - fc --> :green - xlabel --> "Value" - ylabel --> "Density" + fillalpha --> 0.5 + fillcolor --> :green + xguide --> "Value" + yguide --> "Density" label --> "" uv.distribution.x, uv.distribution.density ./ sum(uv.distribution.density) end @@ -22,10 +22,10 @@ end cuv = constrain(uv, constraint) @series begin seriestype := :path - fα --> 0.5 - fc --> :green - xlabel --> "Value" - ylabel --> "Density" + fillalpha --> 0.5 + fillcolor --> :green + xguide --> "Value" + yguide --> "Density" label --> "" cuv.distribution.x, cuv.distribution.density ./ sum(cuv.distribution.density) end diff --git a/src/plot_recipes/recipes_uncertainvalues_multiple.jl b/src/plot_recipes/recipes_uncertainvalues_multiple.jl index 0817b103..91bc04de 100644 --- a/src/plot_recipes/recipes_uncertainvalues_multiple.jl +++ b/src/plot_recipes/recipes_uncertainvalues_multiple.jl @@ -10,15 +10,15 @@ @series begin label --> "P1, $d1" seriestype := :bar - fα --> 0.4 - fc --> :green + fillalpha --> 0.4 + fillcolor --> :green fit(Histogram, resample(d1, n_samples), nbins = nbins) end @series begin label --> "P2, $d2" seriestype := :bar - fc --> :blue - fα --> 0.4 + fillcolor --> :blue + fillalpha --> 0.4 fit(Histogram, resample(d2, n_samples), nbins = nbins) end @@ -28,8 +28,8 @@ @series begin label --> "MixtureModel with uniform priors" seriestype := :bar - fα --> 0.6 - fc --> :black + fillalpha --> 0.6 + fillcolor --> :black fit(Histogram, rand(M, n_samples), nbins = nbins) end end From 1ed46717a475e36212fdf1826ef53967e77897d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristian=20Agas=C3=B8ster=20Haaga?= Date: Fri, 30 Apr 2021 15:58:02 +0200 Subject: [PATCH 10/21] Simplify docs --- docs/make.jl | 13 +- docs/src/uncertain_datasets/datasets.md | 98 ++++- .../uncertain_indexvalue_dataset.md | 128 ------ .../uncertain_values/combining_and_merging.md | 276 ------------ docs/src/uncertain_values/types.md | 129 ------ docs/src/uncertain_values/uncertain_values.md | 404 ++++++++++++++++++ .../uncertainvalues_Measurements.md | 11 - .../uncertainvalues_certainvalue.md | 11 - .../uncertainvalues_examples.md | 128 ------ .../uncertainvalues_fitted.md | 10 - .../uncertain_values/uncertainvalues_kde.md | 56 --- .../uncertainvalues_overview.md | 125 ------ .../uncertainvalues_populations.md | 10 - ...ncertainvalues_theoreticaldistributions.md | 17 - src/plot_recipes/recipes_certainvalues.jl | 5 + .../UncertainIndexDataset.jl | 6 +- .../UncertainIndexValueDataset.jl | 11 +- .../UncertainValueDataset.jl | 6 +- .../UncertainScalarPopulation.jl | 46 +- src/uncertain_values/operations/merging.jl | 34 +- 20 files changed, 545 insertions(+), 979 deletions(-) delete mode 100644 docs/src/uncertain_values/combining_and_merging.md delete mode 100644 docs/src/uncertain_values/types.md create mode 100644 docs/src/uncertain_values/uncertain_values.md delete mode 100644 docs/src/uncertain_values/uncertainvalues_Measurements.md delete mode 100644 docs/src/uncertain_values/uncertainvalues_certainvalue.md delete mode 100644 docs/src/uncertain_values/uncertainvalues_examples.md delete mode 100644 docs/src/uncertain_values/uncertainvalues_fitted.md delete mode 100644 docs/src/uncertain_values/uncertainvalues_kde.md delete mode 100644 docs/src/uncertain_values/uncertainvalues_overview.md delete mode 100644 docs/src/uncertain_values/uncertainvalues_populations.md delete mode 100644 docs/src/uncertain_values/uncertainvalues_theoreticaldistributions.md diff --git a/docs/make.jl b/docs/make.jl index 4b61e732..a3d8fae8 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -24,17 +24,8 @@ using Interpolations PAGES = [ "index.md", - "Uncertain values" => [ - #"uncertain_values/uncertainvalues_overview.md", - "uncertain_values/types.md", - "uncertain_values/combining_and_merging.md", - "uncertain_values/uncertainvalues_examples.md", - ], - "Uncertain datasets" => [ - "uncertain_datasets/datasets.md", - "uncertain_datasets/uncertain_datasets_overview.md", - "uncertain_datasets/uncertain_indexvalue_dataset.md", - ], + "uncertain_values/uncertain_values.md", + "uncertain_datasets/datasets.md", "Uncertain statistics" => [ "Core statistics" => [ "uncertain_statistics/core_stats/core_statistics.md", diff --git a/docs/src/uncertain_datasets/datasets.md b/docs/src/uncertain_datasets/datasets.md index d7815f2f..ed0210fe 100644 --- a/docs/src/uncertain_datasets/datasets.md +++ b/docs/src/uncertain_datasets/datasets.md @@ -1,26 +1,96 @@ -# Types of datasets +# Uncertain datasets ## `UncertainValueDataset` -`UncertainValueDataset`s is an uncertain dataset type that has no explicit index -associated with its uncertain values. This type may come with some extra functionality -that the generic [UncertainDataset](uncertain_dataset.md) type does not support. - -Use this type when you want to be explicit about the values representing data values, -as opposed to [indices](uncertain_index_dataset.md). - ```@docs UncertainValueDataset ``` -## Uncertain index datasets +## `UncertainIndexDataset` + +```@docs +UncertainIndexDataset +``` -`UncertainIndexDataset`s is an uncertain dataset type that represents the indices -corresponding to an [UncertainValueDataset](uncertain_value_dataset.md). -It is meant to be used for the `indices` field in -[UncertainIndexValueDataset](uncertain_indexvalue_dataset.md)s instances. +## `UncertainIndexValueDataset` ```@docs -UncertainIndexDataset +UncertainIndexValueDataset +``` + +## Examples + +### Example 1: `UncertainIndexValueDataset` + + +`UncertainIndexValueDataset`s have uncertainties associated with both the +indices (e.g. time, depth, etc) and the values of the data points. + + +Let's consider some measurements with associated uncertainties, which are of different types, +because they are taken from different sources and/or were measured used different devices. +The values were measures at some time indices by an inaccurate clock, so that the times +of measuring are normally distributed values with fluctuating standard deviations. We'll +represent all of these measurements in an [`UncertainIndexValueDataset`](@ref). + +Built-in plot recipes make it easy to visualize such datasets with error bars. +By default, plotting the dataset plots the median value of the index and the measurement +(only for scatter plots), along with the 33rd to 67th percentile range error bars in both +directions. You can also tune the error bars explicitly, by specifying +quantiles, like below: + +```@example uivd1 +using UncertainData, Plots + +# These are our measurements +r1 = [UncertainValue(Normal, rand(), rand()) for i = 1:10] +r2 = UncertainValue(rand(10000)) +r3 = UncertainValue(Uniform, rand(10000)) +r4 = UncertainValue(Normal, -0.1, 0.5) +r5 = UncertainValue(Gamma, 0.4, 0.8) +vals = [r1; r2; r3; r4; r5] + +# These are our time indices +inds = [UncertainValue(Normal, i, rand(Uniform(0, 1))) for i = 1:length(vals)] + +# Combine +x = UncertainIndexValueDataset(inds, vals) + +# Plot 90th percentile range both for indices and values. +plot(x, [0.05, 0.95], [0.05, 0.95], xlabel = "Time", ylabel = "Value") +savefig("uncertainindexvaluedataset_ex1.png") # hide ``` + +![](uncertainindexvaluedataset_ex1.png) + +### Example 2: `UncertainIndexValueDataset` + +Say we had a dataset of 20 values for which the uncertainties are normally distributed +with increasing standard deviation through time. We also have some uncertain values +that are associated with the indices. + +```@example uivd2 +using UncertainData, Plots + +# Time indices +time_inds = 1:13 +uvals = [UncertainValue(Normal, ind, rand(Uniform()) + (ind / 6)) for ind in time_inds] +inds = UncertainIndexDataset(uvals) + +# Measurements +u1 = UncertainValue(Gamma, rand(Gamma(), 500)) +u2 = UncertainValue(rand(MixtureModel([Normal(1, 0.3), Normal(0.1, 0.1)]), 500)) +uvals3 = [UncertainValue(Normal, rand(), rand()) for i = 1:11] +measurements = UncertainValueDataset([u1; u2; uvals3]) + +# Combine +x = UncertainIndexValueDataset(inds, measurements) + +# Plot the dataset with error bars in both directions, using the 20th to 80th percentile +# range for the indices and the 33rd to 67th percentile range for the data values. +plot(x, [0.2, 0.8], [0.33, 0.67], xlabel = "Time", ylabel = "Value") +savefig("uncertainindexvaluedataset_ex2.png") # hide +``` + +![](uncertainindexvaluedataset_ex2.png) diff --git a/docs/src/uncertain_datasets/uncertain_indexvalue_dataset.md b/docs/src/uncertain_datasets/uncertain_indexvalue_dataset.md index 59a92350..e69de29b 100644 --- a/docs/src/uncertain_datasets/uncertain_indexvalue_dataset.md +++ b/docs/src/uncertain_datasets/uncertain_indexvalue_dataset.md @@ -1,128 +0,0 @@ -# Uncertain index-value datasets - -## Documentation - -```@docs -UncertainIndexValueDataset -``` - -## Description - -`UncertainIndexValueDataset`s have uncertainties associated with both the -indices (e.g. time, depth, etc) and the values of the data points. - -## Defining an uncertain index-value dataset - -### Example 1 - -#### Defining the values - -Let's start by defining the uncertain data values and collecting them in -an `UncertainValueDataset`. - -```julia -using UncertainData, Plots -gr() -r1 = [UncertainValue(Normal, rand(), rand()) for i = 1:10] -r2 = UncertainValue(rand(10000)) -r3 = UncertainValue(Uniform, rand(10000)) -r4 = UncertainValue(Normal, -0.1, 0.5) -r5 = UncertainValue(Gamma, 0.4, 0.8) - -u_values = [r1; r2; r3; r4; r5] -udata = UncertainValueDataset(u_values); -``` - -#### Defining the indices - -The values were measures at some time indices by an inaccurate clock, so that the times -of measuring are normally distributed values with fluctuating standard deviations. - -```julia -u_timeindices = [UncertainValue(Normal, i, rand(Uniform(0, 1))) - for i = 1:length(udata)] -uindices = UncertainIndexDataset(u_timeindices); -``` - -#### Combinining the indices and values - -Now, combine the uncertain time indices and measurements into an -`UncertainIndexValueDataset`. - -```julia -x = UncertainIndexValueDataset(uindices, udata) -``` - -The built-in plot recipes make it easy to visualize the dataset. -By default, plotting the dataset plots the median value of the index and the measurement -(only for scatter plots), along with the 33rd to 67th percentile range error bars in both -directions. - -```julia -plot(x) -``` - -![](uncertain_indexvalue_dataset_plot_defaulterrorbars.svg) - -You can also tune the error bars by calling -`plot(udata::UncertainIndexValueDataset, idx_quantiles, val_quantiles)`, explicitly -specifying the quantiles in each direction, like so: - -```julia -plot(x, [0.05, 0.95], [0.05, 0.95]) -``` - -![](uncertain_indexvalue_dataset_plot_customerrorbars.svg) - -### Example 2 - -#### Defining the indices - -Say we had a dataset of 20 values for which the uncertainties are normally distributed -with increasing standard deviation through time. - -```julia -time_inds = 1:13 -uvals = [UncertainValue(Normal, ind, rand(Uniform()) + (ind / 6)) for ind in time_inds] -inds = UncertainIndexDataset(uvals) -``` - -That's it. We can also plot the 33rd to 67th percentile range for the indices. - -```plot -plot(inds, [0.33, 0.67]) -``` - -![](uncertain_indexvalue_dataset_indices.svg) - -#### Defining the values - -Let's define some uncertain values that are associated with the indices. - -```julia -u1 = UncertainValue(Gamma, rand(Gamma(), 500)) -u2 = UncertainValue(rand(MixtureModel([Normal(1, 0.3), Normal(0.1, 0.1)]), 500)) -uvals3 = [UncertainValue(Normal, rand(), rand()) for i = 1:11] - -measurements = [u1; u2; uvals3] -datavals = UncertainValueDataset(measurements) -``` - -![](uncertain_indexvalue_dataset_vals.svg) - -#### Combinining the indices and values - -Now, we combine the indices and the corresponding data. - -```julia -d = UncertainIndexValueDataset(inds, datavals) -``` - -Plot the dataset with error bars in both directions, using the 20th to 80th percentile -range for the indices and the 33rd to 67th percentile range for the data values. - -```julia -plot(d, [0.2, 0.8], [0.33, 0.67]) -``` - -![](uncertain_indexvalue_dataset_indices_and_vals.svg) diff --git a/docs/src/uncertain_values/combining_and_merging.md b/docs/src/uncertain_values/combining_and_merging.md deleted file mode 100644 index c0785734..00000000 --- a/docs/src/uncertain_values/combining_and_merging.md +++ /dev/null @@ -1,276 +0,0 @@ -# Combining and merging - -Because all uncertainties are handled using a resampling approach, it is trivial to -[`combine`](@ref) or merge uncertain values of different types into a single uncertain value. - -Depending on your data, you may want to choose of one the following ways of -representing multiple uncertain values as one: - -- [Combining](@ref uncertainvalue_combine). An ensemble of uncertain - values is represented as a weighted population. This approach is nice if you want - to impose expert-opinion on the relative sampling probabilities of uncertain - values in the ensemble, but still sample from the entire supports of each of the - furnishing values. This introduces no additional approximations besides what - is already present at the moment you define your uncertain values. -- [Merging](@ref uncertainvalue_merge). Multiple uncertain values are merged using - a kernel density estimate to the overall distribution. This approach introduces - approximations *beyond* what is present in the uncertain values when you define them. - -## [Combining: the population approach](@id uncertainvalue_combine) - -**Combining** uncertain values is done by representing them as a weighted population -of uncertain values, which is illustrated in the following example: - -```julia -# Assume we have done some analysis and have three points whose uncertainties -# significantly overlap. -v1 = UncertainValue(Normal(0.13, 0.52)) -v2 = UncertainValue(Normal(0.27, 0.42)) -v3 = UncertainValue(Normal(0.21, 0.61)) - -# Give each value equal sampling probabilities and represent as a population -pop = UncertainValue([v1, v2, v3], [1, 1, 1]) - -# Let the values v1, v2 and v3 be sampled with probability ratios 1-2-3 -pop = UncertainValue([v1, v2, v3], [1, 2, 3]) -``` - -![](figs/combining_uncertain_values.svg) - -This is not restricted to normal distributions! We can combine any type of -value in our population, even populations! - -```julia -# Consider a population of normal distributions, and a gamma distribution -v1 = UncertainValue(Normal(0.265, 0.52)) -v2 = UncertainValue(Normal(0.311, 0.15)) -v3 = UncertainValue([v1, v2], [2, 1]) -v4 = UncertainValue(Gamma(0.5, -1)) -pts = [v1, v4] -wts = [2, 1] - -# New population is a nested population with unequal weights -pop = UncertainValue(pts, wts) - -d1 = density(resample(pop, 20000), label = "population") - -d2 = plot() -density!(d2, resample(pop[1], 20000), label = "v1") -density!(d2, resample(pop[2], 20000), label = "v2") - -plot(d1, d2, layout = (2, 1), xlabel = "Value", ylabel = "Density", link = :x, xlims = (-2.5, 2.5)) -``` - -![](figs/combining_uncertain_values_ex2.svg) - -This makes it possible treat an ensemble of uncertain values as a single uncertain value. - -With equal weights, combining uncertain values introduces no bias beyond what is present in the data, -because resampling is done from the full supports of each of the furnishing values. -Additional information on relative sampling probabilities, however, be it informed by -expert opinion or quantative estimates, is easily incorporated by adjusting -the sampling weights. - -## [Merging: KDE approach](@id uncertainvalue_merge) - -**Merging** multiple uncertain values could be done by fitting a model distribution to -the values. Using any specific theoretical distribution as a model for the combined -uncertainty, however, is in general not possible, because the values may have -different types of uncertainties. - -Thus, in this package, kernel kernel density estimation is used to merge multiple uncertain values. -This has the advantage that you only have to deal with a single estimate to the combined -distribution, but introduces bias because the distribution is *estimated* and the -shape of the distribution depends on the parameters of the KDE procedure. - -### Without weights - -When no weights are provided, the combined value is computed -by resampling each of the `N` uncertain values `n/N` times, -then combining using kernel density estimation. - -```@docs -combine(uvals::Vector{AbstractUncertainValue}; n = 1000*length(uvals), - bw::Union{Nothing, Real} = nothing) -``` - -Weights dictating the relative contribution of each -uncertain value into the combined value can also be provided. `combine` works -with `ProbabilityWeights`, `AnalyticWeights`, -`FrequencyWeights` and the generic `Weights`. - -Below shows an example of combining - -```julia -v1 = UncertainValue(rand(1000)) -v2 = UncertainValue(Normal, 0.8, 0.4) -v3 = UncertainValue([rand() for i = 1:3], [0.3, 0.3, 0.4]) -v4 = UncertainValue(Normal, 3.7, 0.8) -uvals = [v1, v2, v3, v4] - -p = plot(title = L"distributions \,\, with \,\, overlapping \,\, supports") -plot!(v1, label = L"v_1", ls = :dash) -plot!(v2, label = L"v_2", ls = :dot) -vline!(v3.values, label = L"v_3") # plot each possible state as vline -plot!(v4, label = L"v_4") - -pcombined = plot(combine(uvals), title = L"merge(v_1, v_2, v_3, v_4)", lc = :black, lw = 2) - -plot(p, pcombined, layout = (2, 1), link = :x, ylabel = "Density") -``` - -![](figs/combine_example_noweights.png) - -### With weights - -`Weights`, `ProbabilityWeights` and `AnalyticWeights` are functionally the same. Either -may be used depending on whether the weights are assigned subjectively or quantitatively. -With `FrequencyWeights`, it is possible to control the exact number of draws from each -uncertain value that goes into the draw pool before performing KDE. - -#### `ProbabilityWeights` - -```@docs -combine(uvals::Vector{AbstractUncertainValue}, weights::ProbabilityWeights) -``` - -For example: - -```julia -v1 = UncertainValue(UnivariateKDE, rand(4:0.25:6, 1000), bandwidth = 0.02) -v2 = UncertainValue(Normal, 0.8, 0.4) -v3 = UncertainValue([rand() for i = 1:3], [0.3, 0.3, 0.4]) -v4 = UncertainValue(Gamma, 8, 0.4) -uvals = [v1, v2, v3, v4]; - -p = plot(title = L"distributions \,\, with \,\, overlapping \,\, supports") -plot!(v1, label = L"v_1: KDE \, over \, empirical \, distribution", ls = :dash) -plot!(v2, label = L"v_2: Normal(0.8, 0.4)", ls = :dot) -# plot each possible state as vline -vline!(v3.values, - label = L"v_3: \, Discrete \, population\, [1,2,3], w/ \, weights \, [0.3, 0.4, 0.4]") -plot!(v4, label = L"v_4: \, Gamma(8, 0.4)") - -pcombined = plot( - combine(uvals, ProbabilityWeights([0.1, 0.3, 0.02, 0.5]), n = 100000, bw = 0.05), - title = L"combine([v_1, v_2, v_3, v_4], ProbabilityWeights([0.1, 0.3, 0.02, 0.5])", - lc = :black, lw = 2) - -plot(p, pcombined, layout = (2, 1), size = (800, 600), - link = :x, - ylabel = "Density", - tickfont = font(12), - legendfont = font(8), fg_legend = :transparent, bg_legend = :transparent) -``` - -![](figs/combine_example_pweights.png) - -#### `AnalyticWeights` - -```@docs -combine(uvals::Vector{AbstractUncertainValue}, weights::AnalyticWeights) -``` - -For example: - -```julia -v1 = UncertainValue(UnivariateKDE, rand(4:0.25:6, 1000), bandwidth = 0.02) -v2 = UncertainValue(Normal, 0.8, 0.4) -v3 = UncertainValue([rand() for i = 1:3], [0.3, 0.3, 0.4]) -v4 = UncertainValue(Gamma, 8, 0.4) -uvals = [v1, v2, v3, v4]; - -p = plot(title = L"distributions \,\, with \,\, overlapping \,\, supports") -plot!(v1, label = L"v_1: KDE \, over \, empirical \, distribution", ls = :dash) -plot!(v2, label = L"v_2: Normal(0.8, 0.4)", ls = :dot) -vline!(v3.values, label = L"v_3: \, Discrete \, population\, [1,2,3], w/ \, weights \, [0.3, 0.4, 0.4]") # plot each possible state as vline -plot!(v4, label = L"v_4: \, Gamma(8, 0.4)") - -pcombined = plot(combine(uvals, AnalyticWeights([0.1, 0.3, 0.02, 0.5]), n = 100000, bw = 0.05), - title = L"combine([v_1, v_2, v_3, v_4], AnalyticWeights([0.1, 0.3, 0.02, 0.5])", lc = :black, lw = 2) - -plot(p, pcombined, layout = (2, 1), size = (800, 600), - link = :x, - ylabel = "Density", - tickfont = font(12), - legendfont = font(8), fg_legend = :transparent, bg_legend = :transparent) -``` - -![](figs/combine_example_aweights.png) - -### Generic `Weights` - -```@docs -combine(uvals::Vector{AbstractUncertainValue}, weights::Weights; - n = 1000*length(uvals)) -``` - -For example: - -```julia -v1 = UncertainValue(UnivariateKDE, rand(4:0.25:6, 1000), bandwidth = 0.01) -v2 = UncertainValue(Normal, 0.8, 0.4) -v3 = UncertainValue([rand() for i = 1:3], [0.3, 0.3, 0.4]) -v4 = UncertainValue(Gamma, 8, 0.4) -uvals = [v1, v2, v3, v4]; - -p = plot(title = L"distributions \,\, with \,\, overlapping \,\, supports") -plot!(v1, label = L"v_1: KDE \, over \, empirical \, distribution", ls = :dash) -plot!(v2, label = L"v_2: Normal(0.8, 0.4)", ls = :dot) -# plot each possible state as vline -vline!(v3.values, - label = L"v_3: \, Discrete \, population\, [1,2,3], w/ \, weights \, [0.3, 0.4, 0.4]") -plot!(v4, label = L"v_4: \, Gamma(8, 0.4)") - -pcombined = plot(combine(uvals, Weights([0.1, 0.15, 0.1, 0.1]), n = 100000, bw = 0.02), - title = L"combine([v_1, v_2, v_3, v_4], Weights([0.1, 0.15, 0.1, 0.1]))", - lc = :black, lw = 2) - -plot(p, pcombined, layout = (2, 1), size = (800, 600), - link = :x, - ylabel = "Density", - tickfont = font(12), - legendfont = font(8), fg_legend = :transparent, bg_legend = :transparent) -``` - -![](figs/combine_example_generic_weights.png) - -### `FrequencyWeights` - -Using `FrequencyWeights`, one may specify the number of times each of the uncertain values -should be sampled to form the pooled resampled draws on which the final kernel density -estimate is performed. - -```@docs -combine(uvals::Vector{AbstractUncertainValue}, weights::FrequencyWeights) -``` - -For example: - -```julia -v1 = UncertainValue(UnivariateKDE, rand(4:0.25:6, 1000), bandwidth = 0.01) -v2 = UncertainValue(Normal, 0.8, 0.4) -v3 = UncertainValue([rand() for i = 1:3], [0.3, 0.3, 0.4]) -v4 = UncertainValue(Gamma, 8, 0.4) -uvals = [v1, v2, v3, v4]; - -p = plot(title = L"distributions \,\, with \,\, overlapping \,\, supports") -plot!(v1, label = L"v_1: KDE \, over \, empirical \, distribution", ls = :dash) -plot!(v2, label = L"v_2: Normal(0.8, 0.4)", ls = :dot) -# plot each possible state as vline -vline!(v3.values, - label = L"v_3: \, Discrete \, population\, [1,2,3], w/ \, weights \, [0.3, 0.4, 0.4]") -plot!(v4, label = L"v_4: \, Gamma(8, 0.4)") - -pcombined = plot(combine(uvals, FrequencyWeights([10000, 20000, 3000, 5000]), bw = 0.05), - title = L"combine([v_1, v_2, v_3, v_4], FrequencyWeights([10000, 20000, 3000, 5000])", - lc = :black, lw = 2) - -plot(p, pcombined, layout = (2, 1), size = (800, 600), - link = :x, - ylabel = "Density", - tickfont = font(12), - legendfont = font(8), fg_legend = :transparent, bg_legend = :transparent) -``` - -![](figs/combine_example_fweights.png) diff --git a/docs/src/uncertain_values/types.md b/docs/src/uncertain_values/types.md deleted file mode 100644 index e5eeb3b8..00000000 --- a/docs/src/uncertain_values/types.md +++ /dev/null @@ -1,129 +0,0 @@ -# Types of uncertain values - -## Convenience constructors - -The following convenience constructors are used to defined uncertain values. - -```@docs -UncertainValue -``` - -## [Theoretical distributions](@id uncertain_value_theoretical_distribution) - -It is common in the scientific literature to encounter uncertain data values -which are reported as following a specific distribution. For example, an author -report the mean and standard deviation of a value stated to follow a -normal distribution. `UncertainData.jl` makes it easy to represent such values! - -```@docs -UncertainScalarBetaDistributed -UncertainScalarBetaBinomialDistributed -UncertainScalarBetaPrimeDistributed -UncertainScalarBinomialDistributed -UncertainScalarFrechetDistributed -UncertainScalarGammaDistributed -UncertainScalarNormallyDistributed -UncertainScalarUniformlyDistributed -``` - -## [Fitted theoretical distributions](@id uncertain_value_fitted_theoretical_distribution) - -For data values with histograms close to some known distribution, the user -may choose to represent the data by fitting a theoretical distribution to the -values. This will only work well if the histogram closely resembles a -theoretical distribution. - -```@docs -UncertainScalarTheoreticalFit -``` - -## [Kernel density estimates (KDE)](@id uncertain_value_kde) - -When your data have an empirical distribution that doesn't follow any obvious -theoretical distribution, the data may be represented by a kernel density -estimate to the underlying distribution. - -```@docs -UncertainScalarKDE -``` - -### Extended example - -Let's create a bimodal distribution, then sample 10000 values from it. - -```@example kde1 -using UncertainData, Distributions, Plots, StatsPlots -# Draw 1000 points from a three-component mixture model to create a multimodal distribution. -n1 = Normal(-3.0, 1.2) -n2 = Normal(8.0, 1.2) -n3 = Normal(0.0, 2.5) -M = MixtureModel([n1, n2, n3]) -s = rand(M, 1000); -histogram(s, nbins = 80) -ylabel!("Frequency"); xlabel!("Value") -savefig("figs/bimodal_empirical.svg") #hide -``` - -![](figs/bimodal_empirical.svg) - -It is not obvious which distribution to fit to such data. -A kernel density estimate, however, will always be a decent representation -of the data, because it doesn't follow a specific distribution and adapts to -the data values. - -To create a kernel density estimate, simply call the -`UncertainValue` constructor with a vector containing the sample. This will trigger -kernel density estimation. - -```@example kde1 -x = UncertainValue(s) -``` - -The plot below compares the empirical histogram (here represented as a density -plot) with our kernel density estimate. - -```@example kde1 -x = UncertainValue(s) -density(s, label = "10000 mixture model (M) samples") -density!(rand(x, 50000), - label = "50000 samples from KDE estimate to M") -xlabel!("data value") -ylabel!("probability density") -savefig("figs/KDEUncertainValue.svg") #hide -``` - -![](figs/KDEUncertainValue.svg) - - -## [Populations](@id uncertain_value_population) - -The `UncertainScalarPopulation` type allows representation of an uncertain scalar -represented by a population of values who will be sampled according to a vector of -explicitly provided probabilities. Think of it as an explicit kernel density estimate. - -```@docs -UncertainScalarPopulation -``` - -## Certain values - -The `CertainScalar` allows representation of values with no uncertainty. It behaves -just as a scalar, but can be mixed with uncertain values when performing -[mathematical operations](../mathematics/elementary_operations.md) and -[resampling](../resampling/resampling_overview.md). - -```@docs -CertainScalar -``` - -## Compatibility with Measurements.jl - -`Measurement` instances from the Measurements.jl package[^1] are in UncertainData.jl represented as normal distributions. If exact error propagation is a requirement and your data is exclusively normally distributed, use Measurements.jl. If your data is not necessarily -normally distributed and contain errors of different types, and -a resampling approach to error propagation is desired, use UncertainData.jl. - -See the [`UncertainValue`](@ref) constructor for instructions on how to -convert `Measurement`s to uncertain values compatible with this package. - -[^1]: - M. Giordano, 2016, "Uncertainty propagation with functionally correlated quantities", arXiv:1610.08716 (Bibcode: 2016arXiv161008716G). diff --git a/docs/src/uncertain_values/uncertain_values.md b/docs/src/uncertain_values/uncertain_values.md new file mode 100644 index 00000000..79a7a52d --- /dev/null +++ b/docs/src/uncertain_values/uncertain_values.md @@ -0,0 +1,404 @@ +# Uncertain values + +## `UncertainValue` constructors + +The following convenience constructors are used to defined uncertain values. + +```@docs +UncertainValue +``` + +## Uncertain data types + +### [Theoretical distributions](@id uncertain_value_theoretical_distribution) + +It is common in the scientific literature to encounter uncertain data values +which are reported as following a specific distribution. For example, an author +report the mean and standard deviation of a value stated to follow a +normal distribution. `UncertainData.jl` makes it easy to represent such values! + +```@docs +UncertainScalarBetaDistributed +UncertainScalarBetaBinomialDistributed +UncertainScalarBetaPrimeDistributed +UncertainScalarBinomialDistributed +UncertainScalarFrechetDistributed +UncertainScalarGammaDistributed +UncertainScalarNormallyDistributed +UncertainScalarUniformlyDistributed +``` + +### [Fitted theoretical distributions](@id uncertain_value_fitted_theoretical_distribution) + +For data values with histograms close to some known distribution, the user +may choose to represent the data by fitting a theoretical distribution to the +values. This will only work well if the histogram closely resembles a +theoretical distribution. + +```@docs +UncertainScalarTheoreticalFit +``` + +### [Kernel density estimates (KDE)](@id uncertain_value_kde) + +When your data have an empirical distribution that doesn't follow any obvious +theoretical distribution, the data may be represented by a kernel density +estimate to the underlying distribution. + +```@docs +UncertainScalarKDE +``` + +#### Extended example + +Let's create a bimodal distribution, then sample 10000 values from it. + +```@example kde1 +using UncertainData, Distributions, Plots, StatsPlots +# Draw 1000 points from a three-component mixture model to create a multimodal distribution. +n1 = Normal(-3.0, 1.2) +n2 = Normal(8.0, 1.2) +n3 = Normal(0.0, 2.5) +M = MixtureModel([n1, n2, n3]) +s = rand(M, 1000); +histogram(s, nbins = 80) +ylabel!("Frequency"); xlabel!("Value") +savefig("figs/bimodal_empirical.svg") #hide +``` + +![](figs/bimodal_empirical.svg) + +It is not obvious which distribution to fit to such data. +A kernel density estimate, however, will always be a decent representation +of the data, because it doesn't follow a specific distribution and adapts to +the data values. + +To create a kernel density estimate, simply call the +`UncertainValue` constructor with a vector containing the sample. This will trigger +kernel density estimation. + +```@example kde1 +x = UncertainValue(s) +``` + +The plot below compares the empirical histogram (here represented as a density +plot) with our kernel density estimate. + +```@example kde1 +x = UncertainValue(s) +density(s, label = "10000 mixture model (M) samples") +density!(rand(x, 50000), + label = "50000 samples from KDE estimate to M") +xlabel!("data value") +ylabel!("probability density") +savefig("figs/KDEUncertainValue.svg") #hide +``` + +![](figs/KDEUncertainValue.svg) + + +### [Populations](@id uncertain_value_population) + +The `UncertainScalarPopulation` type allows representation of an uncertain scalar +represented by a population of values who will be sampled according to a set of +explicitly provided probabilities. See [examples](@ref uncertainvalue_combine). + +```@docs +UncertainScalarPopulation +``` + +### Certain values + +The `CertainScalar` allows representation of values with no uncertainty. It behaves +just as a scalar, but can be mixed with uncertain values when performing +[mathematical operations](../mathematics/elementary_operations.md) and +[resampling](../resampling/resampling_overview.md). + +```@docs +CertainScalar +``` + +### Compatibility with Measurements.jl + +`Measurement` instances from the Measurements.jl package[^1] are in UncertainData.jl represented as normal distributions. If exact error propagation is a requirement and your data is exclusively normally distributed, use Measurements.jl. If your data is not necessarily +normally distributed and contain errors of different types, and +a resampling approach to error propagation is desired, use UncertainData.jl. + +See the [`UncertainValue`](@ref) constructor for instructions on how to +convert `Measurement`s to uncertain values compatible with this package. + +[^1]: + M. Giordano, 2016, "Uncertainty propagation with functionally correlated quantities", arXiv:1610.08716 (Bibcode: 2016arXiv161008716G). + + +## Examples + +First, load the necessary packages: + +```julia +using UncertainData, Distributions, KernelDensity, Plots +``` + +### Theoretical distributions + +#### A uniformly distributed uncertain value + +Consider the following contrived example. We've measure a data value with a poor instrument +that tells us that the value lies between `-2` and `3`. However, we but that we know nothing +more about how the value is distributed on that interval. Then it may be reasonable to +represent that value as a uniform distribution on `[-2, 3]`. + +To construct an uncertain value following a uniform distribution, we use the constructor +for theoretical distributions with known parameters +(`UncertainValue(distribution, params...)`). + +The uniform distribution is defined by its lower and upper bounds, so we'll provide +these bounds as the parameters. + +```julia +u = UncertainValue(Uniform, 1, 2) + +# Plot the estimated density +bar(u, label = "", xlabel = "value", ylabel = "probability density") +``` + +![](figs/uncertainvalue_theoretical_uniform.svg) + +#### A normally distributed uncertain value + +A situation commonly encountered is to want to use someone else's data from a publication. +Usually, these values are reported as the mean or median, with some associated uncertainty. +Say we want to use an uncertain value which is normally distributed with mean `2.1` and +standard deviation `0.3`. + +Normal distributions also have two parameters, so we'll use the two-parameter constructor +as we did above. + +```julia +u = UncertainValue(Normal, 2.1, 0.3) + +# Plot the estimated density +bar(u, label = "", xlabel = "value", ylabel = "probability density") +``` + +![](figs/uncertainvalue_theoretical_normal.svg) + +### Kernel density estimated distributions + +One may also be given a a distribution of numbers that's not quite normally distributed. +How to represent this uncertainty? Easy: we use a kernel density estimate to the distribution. + +Let's define a complicated distribution which is a mixture of two different normal +distributions, then draw a sample of numbers from it. + +```julia +M = MixtureModel([Normal(-5, 0.5), Normal(0.2)]) +some_sample = rand(M, 250) +``` + +Now, pretend that `some_sample` is a list of measurements we got from somewhere. +KDE estimates to the distribution can be defined implicitly or explicitly as follows: + +```julia +# If the only argument to `UncertainValue()` is a vector of number, KDE will be triggered. +u = UncertainValue(rand(M, 250)) + +# You may also tell the constructor explicitly that you want KDE. +u = UncertainValue(UnivariateKDE, rand(M, 250)) +``` + +Now, let's plot the resulting distribution. _Note: this is not the original mixture of +Gaussians we started out with, it's the kernel density estimate to that mixture!_ + +```julia +# Plot the estimated distribution. +plot(u, xlabel = "Value", ylabel = "Probability density") +``` + +![](figs/uncertainvalue_kde_bimodal.svg) + +### Theoretical distributions fitted to empirical data + +One may also be given a dataset whose histogram looks a lot like a theoretical +distribution. We may then select a theoretical distribution and fit its +parameters to the empirical data. + +Say our data was a sample that looks like it obeys Gamma distribution. + +```julia +# Draw a 2000-point sample from a Gamma distribution with parameters α = 1.7 and θ = 5.5 +some_sample = rand(Gamma(1.7, 5.5), 2000) +``` + +To perform a parameter estimation, simply provide the distribution as the first +argument and the sample as the second argument to the `UncertainValue` constructor. + +```julia +# Take a sample from a Gamma distribution with parameters α = 1.7 and θ = 5.5 and +# create a histogram of the sample. +some_sample = rand(Gamma(1.7, 5.5), 2000) + +p1 = histogram(some_sample, normalize = true, + fc = :black, lc = :black, + label = "", xlabel = "value", ylabel = "density") + +# For the uncertain value representation, fit a gamma distribution to the sample. +# Then, compare the histogram obtained from the original distribution to that obtained +# when resampling the fitted distribution +uv = UncertainValue(Gamma, some_sample) + +# Resample the fitted theoretical distribution +p2 = histogram(resample(uv, 10000), normalize = true, + fc = :blue, lc = :blue, + label = "", xlabel = "value", ylabel = "density") + +plot(p1, p2, layout = (2, 1), link = :x) +``` + +As expected, the histograms closely match (but are not exact because we estimated +the distribution using a limited sample). + +![](figs/uncertainvalue_theoretical_fitted_gamma.svg) + +### Populations + +See [examples for combining multiple values](@ref uncertainvalue_combine). + +## Combining/merging + +Because all uncertainties are handled using a resampling approach, it is trivial to +[`combine`](@ref) or merge uncertain values of different types into a single uncertain value. + +Depending on your data, you may want to choose of one the following ways of +representing multiple uncertain values as one. + +### [Exact approach: populations](@id uncertainvalue_combine) + +Combining uncertain values is done by representing them as a weighted population +of uncertain values. This approach exactly preserves the uncertainties of +the multiple uncertain values during resampling. Adding weights makes it possible to +impose expert-opinion on the relative sampling probabilities of uncertain +values but still sample from the entire supports of the furnishing distributions. + +With equal weights, combining uncertain values introduces no bias beyond what is present in the data, +because resampling is done from the full supports of each of the furnishing values. +Additional information on relative sampling probabilities, however, be it informed by +expert opinion or quantative estimates, is easily incorporated by adjusting +the sampling weights. + +```@example +using UncertainData, Plots, StatsPlots + +# Assume we have done some analysis and have three points whose uncertainties +# significantly overlap. We want to combine these into one uncertain value. +v1 = UncertainValue(Normal(-0.1, 0.52)) +v2 = UncertainValue(Normal(0.27, 0.42)) +v3 = UncertainValue(Normal(0.5, 0.61)) + +# Let the values v1, v2 and v3 be sampled with equal importance +pop = UncertainValue([v1, v2, v3], [1, 1, 1]) + +# Let the values v1, v2 and v3 be sampled with relative importance 1-2-3 +pop = UncertainValue([v1, v2, v3], [1, 2, 3]) + +d1 = plot() +density!(d1, resample(pop, 20000), label = "Overall population") +d2 = plot() +density!(d2, resample(v1, 20000), label = "v1") +density!(d2, resample(v2, 20000), label = "v2") +density!(d2, resample(v3, 20000), label = "v3") +plot(d1, d2, layout = (2, 1), xlabel = "Value", ylabel = "Density", + link = :x, xlims = (-2.5, 2.5), + legend = :topleft, fg_legend = :transparent, bg_legend = :transparent) +savefig("figs/population_ex1.png") #hide +``` + +![](figs/population_ex1.png) + +This is not restricted to normal distributions! We can combine any type of +value in our population, even populations! + +```@example +using UncertainData, Plots, StatsPlots +v1 = UncertainValue(Normal, 0.265, 0.52) +v2 = UncertainValue(Normal, 0.311, 0.15) +v3 = UncertainValue(Beta, 0.7, 0.8) +v4 = UncertainValue(Gamma, 0.5, 1.0) + +# Define two sub-populations. Members of each sub-population are sampled +# with equal importance. +p1, p2 = UncertainValue([v1, v4], [1, 1]), UncertainValue([v2, v3], [1, 1]) + +# In the overall population, sub-populations are sampled with relative importance 2 to 1. +pop = UncertainValue([p1, p2], [2, 1]) + +d1 = plot() +density!(d1, resample(pop, 20000), label = "Overall population") +d2 = plot() +density!(d2, resample(pop[1], 20000), label = "Subpopulation p1 (v1 and v4)") +density!(d2, resample(pop[2], 20000), label = "Subpopulation p2 (v2 and v3)") + +d3 = plot() +density!(d3, resample(v1, 20000), label = "v1") +density!(d3, resample(v2, 20000), label = "v2") +density!(d3, resample(v3, 20000), label = "v3") +density!(d3, resample(v4, 20000), label = "v4") + +plot(d1, d2, d3, layout = (3, 1), xlabel = "Value", ylabel = "Density", + link = :x, xlims = (-2.5, 2.5), + legend = :topleft, fg_legend = :transparent, bg_legend = :transparent) +savefig("figs/population_ex2.png") #hide +``` + +![](figs/population_ex2.png) + +### [Approximate approach: KDE](@id uncertainvalue_merge) + +Merging multiple uncertain values could be done by fitting a model distribution to +the values. Using any specific theoretical distribution as a model for the combined +uncertainty, however, is in general not possible, because the values may have +different types of uncertainties. + +The `combine` function instead uses kernel density estimation is used to merge +multiple uncertain values. This has the advantage that you only +have to deal with a single estimate to the combined distribution, but +introduces bias because the distribution is *estimated*. Additionally, the +shape of the distribution depends on the parameters of the KDE procedure. + +```@docs +combine(uvals::Vector{AbstractUncertainValue}) +``` + +#### Example + +```@example +using UncertainData, Plots, StatsPlots +v1 = UncertainValue(UnivariateKDE, rand(4:0.25:6, 1000), bandwidth = 0.02) +v2 = UncertainValue(Normal, 0.8, 0.4) +v3 = UncertainValue([rand() for i = 1:3], [0.3, 0.3, 0.4]) +v4 = UncertainValue(Gamma, 8, 0.4) +uvals = [v1, v2, v3, v4]; + +p = plot() +plot!(v1, label = "v1", ls = :dashdot) +plot!(v2, label = "v2", ls = :dot) +vline!(v3, label = "v3", ls = :dash) # plot each possible state as vline +plot!(v4, label = "v4") + +pcombined = plot(lc = :black, + combine(uvals, n = 100000), + title = "combine([v1, v2, v3, v4])") +pcombined_pw = plot(lc = :black, + combine(uvals, ProbabilityWeights([0.1, 0.3, 0.02, 0.5]), n = 100000, bw = 0.05), + title = "combine([v1, v2, v3, v4], ProbabilityWeights([0.1, 0.3, 0.02, 0.5])") +pcombined_fw = plot(lc = :black, + combine(uvals, FrequencyWeights([10000, 20000, 3000, 5000]), bw = 0.05), + title = "combine([v1, v2, v3, v4], FrequencyWeights([10000, 20000, 3000, 5000])") + +plot(p, pcombined, pcombined_pw, pcombined_fw, + layout = (4, 1), link = :x, ylabel = "Density", lw = 1, + titlefont = font(8), guidefont = font(9), size = (700, 600)) +savefig("figs/combine_ex.png") #hide +``` + +![](figs/combine_ex.png) diff --git a/docs/src/uncertain_values/uncertainvalues_Measurements.md b/docs/src/uncertain_values/uncertainvalues_Measurements.md deleted file mode 100644 index ce994108..00000000 --- a/docs/src/uncertain_values/uncertainvalues_Measurements.md +++ /dev/null @@ -1,11 +0,0 @@ -# Compatibility with Measurements.jl - -`Measurement` instances from the Measurements.jl package[^1] are in UncertainData.jl represented as normal distributions. If exact error propagation is a requirement and your data is exclusively normally distributed, use Measurements.jl. If your data is not necessarily -normally distributed and contain errors of different types, and -a resampling approach to error propagation is desired, use UncertainData.jl. - -See the [`UncertainValue`](@ref) constructor for instructions on how to -convert `Measurement`s to uncertain values compatible with this package. - -[^1]: - M. Giordano, 2016, "Uncertainty propagation with functionally correlated quantities", arXiv:1610.08716 (Bibcode: 2016arXiv161008716G). \ No newline at end of file diff --git a/docs/src/uncertain_values/uncertainvalues_certainvalue.md b/docs/src/uncertain_values/uncertainvalues_certainvalue.md deleted file mode 100644 index 91be29f2..00000000 --- a/docs/src/uncertain_values/uncertainvalues_certainvalue.md +++ /dev/null @@ -1,11 +0,0 @@ -# Certain values - -The `CertainScalar` allows representation of values with no uncertainty. It behaves -just as a scalar, but can be mixed with uncertain values when performing -[mathematical operations](../mathematics/elementary_operations.md) and -[resampling](../resampling/resampling_overview.md). - - -```@docs -CertainScalar -``` diff --git a/docs/src/uncertain_values/uncertainvalues_examples.md b/docs/src/uncertain_values/uncertainvalues_examples.md deleted file mode 100644 index f33c2a71..00000000 --- a/docs/src/uncertain_values/uncertainvalues_examples.md +++ /dev/null @@ -1,128 +0,0 @@ -# Examples - -First, load the necessary packages: - -```julia -using UncertainData, Distributions, KernelDensity, Plots -``` - -## Theoretical distributions - -### A uniformly distributed uncertain value - -Consider the following contrived example. We've measure a data value with a poor instrument -that tells us that the value lies between `-2` and `3`. However, we but that we know nothing -more about how the value is distributed on that interval. Then it may be reasonable to -represent that value as a uniform distribution on `[-2, 3]`. - -To construct an uncertain value following a uniform distribution, we use the constructor -for theoretical distributions with known parameters -(`UncertainValue(distribution, params...)`). - -The uniform distribution is defined by its lower and upper bounds, so we'll provide -these bounds as the parameters. - -```julia -u = UncertainValue(Uniform, 1, 2) - -# Plot the estimated density -bar(u, label = "", xlabel = "value", ylabel = "probability density") -``` - -![](figs/uncertainvalue_theoretical_uniform.svg) - -### A normally distributed uncertain value - -A situation commonly encountered is to want to use someone else's data from a publication. -Usually, these values are reported as the mean or median, with some associated uncertainty. -Say we want to use an uncertain value which is normally distributed with mean `2.1` and -standard deviation `0.3`. - -Normal distributions also have two parameters, so we'll use the two-parameter constructor -as we did above. - -```julia -u = UncertainValue(Normal, 2.1, 0.3) - -# Plot the estimated density -bar(u, label = "", xlabel = "value", ylabel = "probability density") -``` - -![](figs/uncertainvalue_theoretical_normal.svg) - -## Kernel density estimated distributions - -One may also be given a a distribution of numbers that's not quite normally distributed. -How to represent this uncertainty? Easy: we use a kernel density estimate to the distribution. - -Let's define a complicated distribution which is a mixture of two different normal -distributions, then draw a sample of numbers from it. - -```julia -M = MixtureModel([Normal(-5, 0.5), Normal(0.2)]) -some_sample = rand(M, 250) -``` - -Now, pretend that `some_sample` is a list of measurements we got from somewhere. -KDE estimates to the distribution can be defined implicitly or explicitly as follows: - -```julia -# If the only argument to `UncertainValue()` is a vector of number, KDE will be triggered. -u = UncertainValue(rand(M, 250)) - -# You may also tell the constructor explicitly that you want KDE. -u = UncertainValue(UnivariateKDE, rand(M, 250)) -``` - -Now, let's plot the resulting distribution. _Note: this is not the original mixture of -Gaussians we started out with, it's the kernel density estimate to that mixture!_ - -```julia -# Plot the estimated distribution. -plot(u, xlabel = "Value", ylabel = "Probability density") -``` - -![](figs/uncertainvalue_kde_bimodal.svg) - -## Theoretical distributions fitted to empirical data - -One may also be given a dataset whose histogram looks a lot like a theoretical -distribution. We may then select a theoretical distribution and fit its -parameters to the empirical data. - -Say our data was a sample that looks like it obeys Gamma distribution. - -```julia -# Draw a 2000-point sample from a Gamma distribution with parameters α = 1.7 and θ = 5.5 -some_sample = rand(Gamma(1.7, 5.5), 2000) -``` - -To perform a parameter estimation, simply provide the distribution as the first -argument and the sample as the second argument to the `UncertainValue` constructor. - -```julia -# Take a sample from a Gamma distribution with parameters α = 1.7 and θ = 5.5 and -# create a histogram of the sample. -some_sample = rand(Gamma(1.7, 5.5), 2000) - -p1 = histogram(some_sample, normalize = true, - fc = :black, lc = :black, - label = "", xlabel = "value", ylabel = "density") - -# For the uncertain value representation, fit a gamma distribution to the sample. -# Then, compare the histogram obtained from the original distribution to that obtained -# when resampling the fitted distribution -uv = UncertainValue(Gamma, some_sample) - -# Resample the fitted theoretical distribution -p2 = histogram(resample(uv, 10000), normalize = true, - fc = :blue, lc = :blue, - label = "", xlabel = "value", ylabel = "density") - -plot(p1, p2, layout = (2, 1), link = :x) -``` - -As expected, the histograms closely match (but are not exact because we estimated -the distribution using a limited sample). - -![](figs/uncertainvalue_theoretical_fitted_gamma.svg) diff --git a/docs/src/uncertain_values/uncertainvalues_fitted.md b/docs/src/uncertain_values/uncertainvalues_fitted.md deleted file mode 100644 index 6c69eb94..00000000 --- a/docs/src/uncertain_values/uncertainvalues_fitted.md +++ /dev/null @@ -1,10 +0,0 @@ -# [Fitted theoretical distributions](@id uncertain_value_fitted_theoretical_distribution) - -For data values with histograms close to some known distribution, the user -may choose to represent the data by fitting a theoretical distribution to the -values. This will only work well if the histogram closely resembles a -theoretical distribution. - -```@docs -UncertainScalarTheoreticalFit -``` diff --git a/docs/src/uncertain_values/uncertainvalues_kde.md b/docs/src/uncertain_values/uncertainvalues_kde.md deleted file mode 100644 index 9f545467..00000000 --- a/docs/src/uncertain_values/uncertainvalues_kde.md +++ /dev/null @@ -1,56 +0,0 @@ -# [Kernel density estimates (KDE)](@id uncertain_value_kde) - -When your data have an empirical distribution that doesn't follow any obvious -theoretical distribution, the data may be represented by a kernel density -estimate to the underlying distribution. - -```@docs -UncertainScalarKDE -``` - -## Extended example - -Let's create a bimodal distribution, then sample 10000 values from it. - -```@example kde1 -using UncertainData, Distributions, Plots, StatsPlots -# Draw 1000 points from a three-component mixture model to create a multimodal distribution. -n1 = Normal(-3.0, 1.2) -n2 = Normal(8.0, 1.2) -n3 = Normal(0.0, 2.5) -M = MixtureModel([n1, n2, n3]) -s = rand(M, 1000); -histogram(s, nbins = 80) -ylabel!("Frequency"); xlabel!("Value") -savefig("figs/bimodal_empirical.svg") #hide -``` - -![](figs/bimodal_empirical.svg) - -It is not obvious which distribution to fit to such data. -A kernel density estimate, however, will always be a decent representation -of the data, because it doesn't follow a specific distribution and adapts to -the data values. - -To create a kernel density estimate, simply call the -`UncertainValue` constructor with a vector containing the sample. This will trigger -kernel density estimation. - -```@example kde1 -x = UncertainValue(s) -``` - -The plot below compares the empirical histogram (here represented as a density -plot) with our kernel density estimate. - -```@example kde1 -x = UncertainValue(s) -density(s, label = "10000 mixture model (M) samples") -density!(rand(x, 50000), - label = "50000 samples from KDE estimate to M") -xlabel!("data value") -ylabel!("probability density") -savefig("figs/KDEUncertainValue.svg") #hide -``` - -![](figs/KDEUncertainValue.svg) diff --git a/docs/src/uncertain_values/uncertainvalues_overview.md b/docs/src/uncertain_values/uncertainvalues_overview.md deleted file mode 100644 index 6d7129ca..00000000 --- a/docs/src/uncertain_values/uncertainvalues_overview.md +++ /dev/null @@ -1,125 +0,0 @@ -# [Types of uncertain values](@id uncertain_value_types) - -The core concept of `UncertainData.jl` is to replace an uncertain data value with a -probability distribution describing the point's uncertainty. - -The following types of uncertain values are currently implemented: - -- [Theoretical distributions with known parameters](uncertainvalues_theoreticaldistributions.md). -- [Theoretical distributions with parameters fitted to empirical data](uncertainvalues_fitted.md). -- [Kernel density estimated distributions estimated from empirical data](uncertainvalues_kde.md). -- [Weighted (nested) populations](uncertainvalues_populations.md) where the probability of - drawing values are already known, so you can skip kernel density estimation. Populations can be - nested, and may contain numerical values, uncertain values or both. -- [Values without uncertainty](uncertainvalues_certainvalue.md) have their own dedicated - [`CertainScalar`](@ref) type, so that you can uncertain values with certain values. -- [`Measurement` instances](uncertainvalues_Measurements.md) from [Measurements.jl](https://github.com/JuliaPhysics/Measurements.jl) are treated as normal distributions with known mean and standard devation. - -## Some quick examples - -See also the [extended examples](uncertainvalues_examples.md)! - -### Kernel density estimation (KDE) - -If the data doesn't follow an obvious theoretical distribution, the recommended -course of action is to represent the uncertain value with a kernel density -estimate of the distribution. - -``` julia tab="Implicit KDE estimate" -using Distributions, UncertainData, KernelDensity - -# Generate some random data from a normal distribution, so that we get a -# histogram resembling a normal distribution. -some_sample = rand(Normal(), 1000) - -# Uncertain value represented by a kernel density estimate (it is inferred -# that KDE is wanted when no distribution is provided to the constructor). -uv = UncertainValue(some_sample) -``` - -``` julia tab="Explicit KDE estimate" -using Distributions, UncertainData - -# Generate some random data from a normal distribution, so that we get a -# histogram resembling a normal distribution. -some_sample = rand(Normal(), 1000) - - -# Specify that we want a kernel density estimate representation -uv = UncertainValue(UnivariateKDE, some_sample) -``` - -### Populations - -If you have a population of values where each value has a probability assigned to it, -you can construct an uncertain value by providing the values and uncertainties as -two equal-length vectors to the constructor. Weights are normalized by default. - -```julia -vals = rand(100) -weights = rand(100) -p = UncertainValue(vals, weights) -``` - -### Fitting a theoretical distribution - -If your data has a histogram closely resembling some theoretical distribution, -the uncertain value may be represented by fitting such a distribution to the data. - -```julia -using Distributions, UncertainData - -# Generate some random data from a normal distribution, so that we get a -# histogram resembling a normal distribution. -some_sample = rand(Normal(), 1000) - -# Uncertain value represented by a theoretical normal distribution with -# parameters fitted to the data. -uv = UncertainValue(Normal, some_sample) -``` - -```julia -using Distributions, UncertainData - -# Generate some random data from a gamma distribution, so that we get a -# histogram resembling a gamma distribution. -some_sample = rand(Gamma(), 1000) - -# Uncertain value represented by a theoretical gamma distribution with -# parameters fitted to the data. -uv = UncertainValue(Gamma, some_sample) -``` - -### Theoretical distribution with known parameters - -It is common when working with uncertain data found in the scientific -literature that data value are stated to follow a distribution with given -parameters. For example, a data value may be given as normal distribution with -a given mean `μ = 2.2` and standard deviation `σ = 0.3`. - - -```julia -# Uncertain value represented by a theoretical normal distribution with -# known parameters μ = 2.2 and σ = 0.3 -uv = UncertainValue(Normal, 2.2, 0.3) -``` - -```julia -# Uncertain value represented by a theoretical gamma distribution with -# known parameters α = 2.1 and θ = 3.1 -uv = UncertainValue(Gamma, 2.1, 3.1) -``` - -```julia -# Uncertain value represented by a theoretical binomial distribution with -# known parameters p = 32 and p = 0.13 -uv = UncertainValue(Binomial, 32, 0.13) -``` - -### Values with no uncertainty - -Scalars with no uncertainty can also be represented. - -```julia -c1, c2 = UncertainValue(2), UncertainValue(2.2) -``` diff --git a/docs/src/uncertain_values/uncertainvalues_populations.md b/docs/src/uncertain_values/uncertainvalues_populations.md deleted file mode 100644 index 95e86df9..00000000 --- a/docs/src/uncertain_values/uncertainvalues_populations.md +++ /dev/null @@ -1,10 +0,0 @@ - -# [Populations](@id uncertain_value_population) - -The `UncertainScalarPopulation` type allows representation of an uncertain scalar -represented by a population of values who will be sampled according to a vector of -explicitly provided probabilities. Think of it as an explicit kernel density estimate. - -```@docs -UncertainScalarPopulation -``` diff --git a/docs/src/uncertain_values/uncertainvalues_theoreticaldistributions.md b/docs/src/uncertain_values/uncertainvalues_theoreticaldistributions.md deleted file mode 100644 index 5fda9f4c..00000000 --- a/docs/src/uncertain_values/uncertainvalues_theoreticaldistributions.md +++ /dev/null @@ -1,17 +0,0 @@ -# [Theoretical distributions](@id uncertain_value_theoretical_distribution) - -It is common in the scientific literature to encounter uncertain data values -which are reported as following a specific distribution. For example, an author -report the mean and standard deviation of a value stated to follow a -normal distribution. `UncertainData.jl` makes it easy to represent such values! - -```@docs -UncertainScalarBetaDistributed -UncertainScalarBetaBinomialDistributed -UncertainScalarBetaPrimeDistributed -UncertainScalarBinomialDistributed -UncertainScalarFrechetDistributed -UncertainScalarGammaDistributed -UncertainScalarNormallyDistributed -UncertainScalarUniformlyDistributed -``` diff --git a/src/plot_recipes/recipes_certainvalues.jl b/src/plot_recipes/recipes_certainvalues.jl index 273b4634..b1cf78bf 100644 --- a/src/plot_recipes/recipes_certainvalues.jl +++ b/src/plot_recipes/recipes_certainvalues.jl @@ -9,6 +9,11 @@ using RecipesBase end end +@recipe function f(x::CertainScalar) + @series begin + x + end +end @recipe function f(certainvals::Vararg{CertainScalar,N}) where {N} @series begin diff --git a/src/uncertain_datasets/UncertainIndexDataset.jl b/src/uncertain_datasets/UncertainIndexDataset.jl index dcaa1870..483537bb 100644 --- a/src/uncertain_datasets/UncertainIndexDataset.jl +++ b/src/uncertain_datasets/UncertainIndexDataset.jl @@ -3,7 +3,11 @@ A dataset containing `indices` that have uncertainties associated with them. -## Examples +`UncertainIndexDataset`s are meant to contain the indices corresponding to +an [`UncertainValueDataset`](@ref), and are used for the `indices` field +in [`UncertainIndexValueDataset`](@ref)s. + +## Example Say we had a dataset of 20 values for which the uncertainties are normally distributed with increasing standard deviation through time. diff --git a/src/uncertain_datasets/UncertainIndexValueDataset.jl b/src/uncertain_datasets/UncertainIndexValueDataset.jl index 8fea49af..c55904bb 100644 --- a/src/uncertain_datasets/UncertainIndexValueDataset.jl +++ b/src/uncertain_datasets/UncertainIndexValueDataset.jl @@ -1,7 +1,5 @@ """ - UncertainIndexValueDataset{ - IDXTYP<:AbstractUncertainIndexDataset, - VALSTYP<:AbstractUncertainValueDataset} + UncertainIndexValueDataset(indices, values) A generic dataset type consisting of a set of uncertain `indices` (e.g. time, depth, order, etc...) and a set of uncertain `values`. @@ -13,13 +11,6 @@ The i-th index is assumed to correspond to the i-th value. For example, if - `data.values[7]` is the value for the index `data.indices[7]`. - `data[3]` is an index-value tuple `(data.indices[3], data.values[3])`. -## Fields - -- **`indices::T where {T <: AbstractUncertainIndexDataset}`**: The uncertain indices, - represented by some type of uncertain index dataset. -- **`values::T where {T <: AbstractUncertainValueDataset}`**: The uncertain values, - represented by some type of uncertain index dataset. - ## Example ```julia diff --git a/src/uncertain_datasets/UncertainValueDataset.jl b/src/uncertain_datasets/UncertainValueDataset.jl index a7e924d6..2ea03e3f 100644 --- a/src/uncertain_datasets/UncertainValueDataset.jl +++ b/src/uncertain_datasets/UncertainValueDataset.jl @@ -1,8 +1,10 @@ """ - UncertainValueDataset + UncertainValueDataset(values) + +A dataset of uncertain values which have no explicit index associated with its uncertain values. Use this type when you want to be explicit +about the values representing data values, as opposed to [`UncertainIndexDataset`](@ref)s. -A dataset of uncertain values. ## Fields diff --git a/src/uncertain_values/UncertainScalarPopulation.jl b/src/uncertain_values/UncertainScalarPopulation.jl index b4750ba3..a4bc5f53 100644 --- a/src/uncertain_values/UncertainScalarPopulation.jl +++ b/src/uncertain_values/UncertainScalarPopulation.jl @@ -22,17 +22,14 @@ end An `UncertainScalarPopulation`, which consists of some population `members` with associated weights (`probs`) that indicate the relative importance of the -population members (for example during resampling). - -Members can be either numerical values, any type of uncertain value defined -in this package (including populations, so nested populations are possible). +population members (for example during resampling). The `members` can be either +numerical values, any type of uncertain value defined in this package +(including populations, so nested populations are possible). ## Examples -### Scalar populations - -Weighted scalar populations are defined as follows.* Note: Weights must always be provided, -and scalars must be converted to uncertain values before creating the population.* +Weighted scalar populations are defined as follows. Weights must always be provided, +and scalars must be converted to uncertain values before creating the population. ```julia using UncertainData @@ -45,8 +42,6 @@ p = UncertainScalarPopulation(members, [1, 1, 1]) p = UncertainScalarPopulation(members, [2, 3, 1]) ``` -## Populations with mixed-type uncertain values - Uncertain populations can also consist of a mixture of different types of uncertain values. Here, we use a population consisting of a scalar, two theoretical distributions with known parameters, and a theoretical uniform distribution whose parameters @@ -55,32 +50,29 @@ of the population. ```julia s = rand(1000) -members = [ - 3.0, - UncertainValue(Normal, 0, 1), - UncertainValue(Gamma, 2, 3), - UncertainValue(Uniform, s) -] +members = [3.0, UncertainValue(Normal, 0, 1), UncertainValue(Gamma, 2, 3), + UncertainValue(Uniform, s)] wts = [0.5, 0.5, 0.5, 0.5] -p = UncertainScalarPopulation(members, wts) +p = UncertainValue(members, wts) ``` -## Nested populations - -Nested populations are also possible. +Nested populations are also possible, and sub-populations can be given +unequal sampling priority. ```julia using UncertainData, Distributions s = rand(Normal(0.1, 2.0), 8000) -m1 = [UncertainValue(Normal, 0.5, 0.33), UncertainValue(Gamma, 0.6, 0.9)] +v1, v2 = UncertainValue(Normal, 0.5, 0.33), UncertainValue(Gamma, 0.6, 0.9) +v3, v4 = 2.2, UncertainValue(Normal, s), UncertainValue(s) -# If including scalars, these must be converted to `CertainScalar`s first, -# as follows. -m2 = [2.2, UncertainValue(Normal, s), UncertainValue(s)] +# When sampling sub-population m1, members v1 and v2 are given relative importance 1 to 3 +# When sampling sub-population m2, members v3 and v4 are given relative importance 2 to 1 +m1 = UncertainValue([v1, v2], [1, 3]) +m2 = UncertainValue([v3, v4], [2, 1]) -# Give m1 and m2 relative weights 0.1 and 0.5 (these are normalized, so -# do not need to sum to 1). -p = UncertainScalarPopulation([m1, m2], [0.1, 0.5]) +# When sampling the overall population, the sub-populations m1 and m2 +# are sampled with equal importance. +p = UncertainValue([m1, m2], [1, 1]) ``` """ struct UncertainScalarPopulation{T, PW <: StatsBase.AbstractWeights} <: AbstractScalarPopulation{T, PW} diff --git a/src/uncertain_values/operations/merging.jl b/src/uncertain_values/operations/merging.jl index 6a882871..56a2f54e 100644 --- a/src/uncertain_values/operations/merging.jl +++ b/src/uncertain_values/operations/merging.jl @@ -1,22 +1,31 @@ """ - combine(uvals::Vector{AbstractUncertainValue}; n = 10000*length(uvals), - bw::Union{Nothing, Real} = nothing) + combine(x::Vector{AbstractUncertainValue}; + n = 10000*length(uvals), bw::Union{Nothing, Real} = nothing) → UncertainScalarKDE + combine(x::Vector{AbstractUncertainValue}, weights::ProbabilityWeights; kwargs...) → UncertainScalarKDE + combine(x::Vector{AbstractUncertainValue}, weights::AnalyticWeights; kwargs...) → UncertainScalarKDE + combine(x::Vector{AbstractUncertainValue}, weights::FrequencyWeights; kwargs...) → UncertainScalarKDE -Combine multiple uncertain values into a single uncertain value. This is -done by resampling each uncertain value in `uvals`, `n` times each, -then pooling these draws together. Finally, a kernel density estimate to the final -distribution is computed over those draws. +Combine multiple uncertain values `x` into a single uncertain value using kernel +density estimation (KDE). This is done by resampling each uncertain value in `x`, +`n` times each, then pooling these draws together. Finally, an approximation +to the final distribution is computed over those draws using KDE. The KDE bandwidth is controlled by `bw`. By default, `bw = nothing`; in this case, the bandwidth is determined using the `KernelDensity.default_bandwidth` function. +Tip: For very wide, close-to-normal distributions, the default bandwidth usually +works well.For very peaked distributions or discrete populations, however, +a lowering the bandwidth significantly may be a better choice. -!!! tip +If no weights are provided, the sample pool on which KDE is performed is computed +by resampling each of the `N` uncertain values `n/N` times and pooling these values +together. If `weights` are provided, then the `weights` control the relative sampling +importance of the elements of `x`. `Weights`, `ProbabilityWeights` and `AnalyticWeights` are +functionally the same, and represent relative sampling probabilities. Either +may be used depending on whether the weights are assigned subjectively or quantitatively. +With `FrequencyWeights`, it is possible to control the exact number of draws from each +uncertain value that goes into the draw pool before performing kernel density estimation. - For very wide, close-to-normal distributions, the default bandwidth may work well. - If you're combining very peaked distributions or discrete populations, however, - you may want to lower the bandwidth significantly. - -# Example +## Example ```julia v1 = UncertainValue(Normal, 1, 0.3) @@ -80,7 +89,6 @@ uvals = [v1, v2, v3, v4]; combine(uvals, ProbabilityWeights([0.2, 0.1, 0.3, 0.2])) combine(uvals, pweights([0.2, 0.1, 0.3, 0.2]), n = 20000) # adjust number of total draws ``` - """ function combine(uvals::Vector{AbstractUncertainValue}, weights::ProbabilityWeights; n = 10000*length(uvals), From c8996a0be64f3dc2b3e9434124446715c71c76e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristian=20Agas=C3=B8ster=20Haaga?= Date: Fri, 30 Apr 2021 16:31:53 +0200 Subject: [PATCH 11/21] Combine constraints docs --- docs/make.jl | 6 +- .../available_constraints.md | 46 ---- .../constrain_uncertain_values.md | 260 ------------------ docs/src/sampling_constraints/old.txt | 235 ++++++++++++++++ .../sampling_constraints.md | 65 +++++ 5 files changed, 301 insertions(+), 311 deletions(-) delete mode 100644 docs/src/sampling_constraints/available_constraints.md create mode 100644 docs/src/sampling_constraints/old.txt create mode 100644 docs/src/sampling_constraints/sampling_constraints.md diff --git a/docs/make.jl b/docs/make.jl index a3d8fae8..fd57edf0 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -26,6 +26,7 @@ PAGES = [ "index.md", "uncertain_values/uncertain_values.md", "uncertain_datasets/datasets.md", + "sampling_constraints/sampling_constraints.md", "Uncertain statistics" => [ "Core statistics" => [ "uncertain_statistics/core_stats/core_statistics.md", @@ -48,11 +49,6 @@ PAGES = [ "uncertain_statistics/hypothesistests/anderson_darling_test.md" ], ], - "Sampling constraints" => [ - "sampling_constraints/available_constraints.md", - "sampling_constraints/constrain_uncertain_values.md", - "sampling_constraints/sequential_constraints.md" - ], "Binning" => [ "binning/bin.md" diff --git a/docs/src/sampling_constraints/available_constraints.md b/docs/src/sampling_constraints/available_constraints.md deleted file mode 100644 index 773d9f00..00000000 --- a/docs/src/sampling_constraints/available_constraints.md +++ /dev/null @@ -1,46 +0,0 @@ - -# Available sampling constraints - -The following sampling constraints are available. These constraints may be used in any resampling setting. - -## Standard deviation - -```@docs -TruncateStd -``` - -## Minimum value - -```@docs -TruncateMinimum -``` - -## Maximum value - -```@docs -TruncateMaximum -``` - -## Value range - -```@docs -TruncateRange -``` - -## Lower quantile - -```@docs -TruncateLowerQuantile -``` - -## Upper quantile - -```@docs -TruncateUpperQuantile -``` - -## Quantile range - -```@docs -TruncateQuantiles -``` diff --git a/docs/src/sampling_constraints/constrain_uncertain_values.md b/docs/src/sampling_constraints/constrain_uncertain_values.md index 4f254d4a..8b137891 100644 --- a/docs/src/sampling_constraints/constrain_uncertain_values.md +++ b/docs/src/sampling_constraints/constrain_uncertain_values.md @@ -1,261 +1 @@ -# Documentation - -```@docs -constrain(uv::AbstractUncertainValue, constraint::SamplingConstraint) -``` - -# Examples: constraining uncertain values - -## Theoretical distributions - -``` julia tab="Theoretical distribution" -using UncertainData, Distributions - -# Define an uncertain value furnished by a theoretical distribution -uv = UncertainValue(Normal, 1, 0.5) - -# Constrain the support of the furnishing distribution using various -# constraints -uvc_lq = constrain(uv, TruncateLowerQuantile(0.2)) -uvc_uq = constrain(uv, TruncateUpperQuantile(0.8)) -uvc_q = constrain(uv, TruncateQuantiles(0.2, 0.8)) -uvc_min = constrain(uv, TruncateMinimum(0.5)) -uvc_max = constrain(uv, TruncateMaximum(1.5)) -uvc_range = constrain(uv, TruncateRange(0.5, 1.5)) -``` - -## Theoretical distributions with fitted parameters - -``` julia tab="Theoretical distribution with fitted parameters" -using UncertainData, Distributions - -# Define an uncertain value furnished by a theoretical distribution with -# parameters fitted to empirical data -uv = UncertainValue(Normal, rand(Normal(-1, 0.2), 1000)) - -# Constrain the support of the furnishing distribution using various -# constraints -uvc_lq = constrain(uv, TruncateLowerQuantile(0.2)) -uvc_uq = constrain(uv, TruncateUpperQuantile(0.8)) -uvc_q = constrain(uv, TruncateQuantiles(0.2, 0.8)) -uvc_min = constrain(uv, TruncateMinimum(0.5)) -uvc_max = constrain(uv, TruncateMaximum(1.5)) -uvc_range = constrain(uv, TruncateRange(0.5, 1.5)) -``` - -## Kernel density estimated distributions - -``` julia tab="Kernel density estimated distribution" -# Define an uncertain value furnished by a kernel density estimate to the -# distribution of the empirical data -uv = UncertainValue(UnivariateKDE, rand(Uniform(10, 15), 1000)) - -# Constrain the support of the furnishing distribution using various -# constraints -uvc_lq = constrain(uv, TruncateLowerQuantile(0.2)) -uvc_uq = constrain(uv, TruncateUpperQuantile(0.8)) -uvc_q = constrain(uv, TruncateQuantiles(0.2, 0.8)) -uvc_min = constrain(uv, TruncateMinimum(13)) -uvc_max = constrain(uv, TruncateMaximum(13)) -uvc_range = constrain(uv, TruncateRange(11, 12)) -``` - -## (nested) weighted populations of uncertain values - -Let's define a complicated uncertain value that is defined by a nested weighted population. - -```julia -# Some subpopulations consisting of both scalar values and distributions -subpop1_members = [UncertainValue(Normal, 0, 1), UncertainValue(Uniform, -2, 2), -5] -subpop2_members = [UncertainValue(Normal, -2, 1), UncertainValue(Uniform, -6, -1), - -3, UncertainValue(Gamma, 1, 0.4)] - -# Define the probabilities of sampling the different population members within the -# subpopulations. Weights are normalised, so we can input any numbers here indicating -# relative importance -subpop1_probs = [1, 2, 1] -subpop2_probs = [0.1, 0.2, 0.3, 0.1] - -pop1 = UncertainValue(subpop1_members, subpop1_probs) -pop2 = UncertainValue(subpop2_members, subpop2_probs) - -# Define the probabilities of sampling the two subpopulations in the overall population. -pop_probs = [0.3, 0.7] - -# Construct overall population -pop_mixed = UncertainValue([pop1, pop2], pop_probs) -``` - -Now we can draw samples from this nested population. Sampling directly from the -entire distribution is done by calling `resample(pop_mixed, n_draws)`. However, -in some cases we might want to constrain the sampling to some minimum, maximum -or range of values. You can do that by using sampling constraints. - -### TruncateMinimum - -To truncate the overall population below at some absolute value, use a -[`TruncateMinimum`](@ref) sampling constraint. - -```julia -constraint = TruncateMinimum(-1.1) -pop_mixed_constrained = constrain(pop_mixed, constraint); - -n_draws = 500 -x = resample(pop_mixed, n_draws) -xc = resample(pop_mixed_constrained, n_draws) - -p1 = scatter(x, label = "", title = "resampling before constraint") -p2 = scatter(xc, label = "", title = "resampling after constraint") -hline!([constraint.min], label = "TruncateMinimum(-1.1)") -plot(p1, p2, layout = (2, 1), link = :both, ylims = (-3, 3), ms = 1) -xlabel!("Sampling #"); ylabel!("Value") -``` - -![](figs/constraining_complex_population_truncateminimum.svg) - -### TruncateMaximum - -To truncate the overall population above at some absolute value, use a -[`TruncateMaximum`](@ref) sampling constraint. - -```julia -constraint = TruncateMaximum(1.5) -pop_mixed_constrained = constrain(pop_mixed, constraint); - -n_draws = 500 -x = resample(pop_mixed, n_draws) -xc = resample(pop_mixed_constrained, n_draws) - -p1 = scatter(x, label = "", title = "resampling before constraint") -p2 = scatter(xc, label = "", title = "resampling after constraint") -hline!([constraint.max], label = "TruncateMaximum(1.5)") -plot(p1, p2, layout = (2, 1), link = :both, ylims = (-3, 3), ms = 1) -xlabel!("Sampling #"); ylabel!("Value") -``` - -![](figs/constraining_complex_population_truncatemaximum.svg) - -### TruncateRange - -To truncate the overall population above at some range of values, use a -[`TruncateRange`](@ref) sampling constraint. - -```julia -constraint = TruncateRange(-1.5, 1.7) -pop_mixed_constrained = constrain(pop_mixed, constraint); - -n_draws = 500 -x = resample(pop_mixed, n_draws) -xc = resample(pop_mixed_constrained, n_draws) - -p1 = scatter(x, label = "", title = "resampling before constraint") -p2 = scatter(xc, label = "", title = "resampling after constraint") -hline!([constraint.min, constraint.max], label = "TruncateRange(-1.5, 1.7)") - -plot(p1, p2, layout = (2, 1), link = :both, ylims = (-3, 3), ms = 1) -xlabel!("Sampling #"); ylabel!("Value") -``` - -![](figs/constraining_complex_population_truncaterange.svg) - -### TruncateLowerQuantile - -To truncate the overall population below at some quantile of -the overall population, use a -[`TruncateLowerQuantile`](@ref) sampling constraint. - -```julia -constraint = TruncateLowerQuantile(0.2) - -# Constrain the population below at the lower 20th percentile -# Resample the entire population (and its subpopulations) according to -# their probabilities 30000 times to determine the percentile bound. -n_draws = 30000 -pop_mixed_constrained = constrain(pop_mixed, constraint, n_draws); - -# Calculate quantile using the same number of samples for plotting. -# Will not be exactly the same as the quantile actually used for -# truncating, except in the limit n -> ∞ -q = quantile(resample(pop_mixed, n_draws), constraint.lower_quantile) - -n_draws_plot = 3000 -x = resample(pop_mixed, n_draws_plot) -xc = resample(pop_mixed_constrained, n_draws_plot) - -p1 = scatter(x, label = "", title = "resampling before constraint") -p2 = scatter(xc, label = "", title = "resampling after constraint") -hline!([lq], label = "TruncateLowerQuantile(0.2)") -plot(p1, p2, layout = (2, 1), link = :both, ms = 1, ylims = (-6, 4)) -xlabel!("Sampling #"); ylabel!("Value") -``` - -![](figs/constraining_complex_population_truncatelowerquantile.svg) - -### TruncateUpperQuantile - -To truncate the overall population below at some quantile of -the overall population, use a -[`TruncateUpperQuantile`](@ref) sampling constraint. - -```julia -constraint = TruncateUpperQuantile(0.8) - -# Constrain the population below at the lower 20th percentile -# Resample the entire population (and its subpopulations) according to -# their probabilities 30000 times to determine the percentile bound. -n_resample_draws = 30000 -pop_mixed_constrained = constrain(pop_mixed, constraint, n_resample_draws); - -# Calculate quantile using the same number of samples for plotting. -# Will not be exactly the same as the quantile actually used for -# truncating, except in the limit n_resample_draws -> ∞ -q = quantile(resample(pop_mixed, n_resample_draws), constraint.upper_quantile) - -n_plot_draws = 3000 -x = resample(pop_mixed, n_plot_draws) -xc = resample(pop_mixed_constrained, n_plot_draws) - -p1 = scatter(x, label = "", title = "resampling before constraint") -p2 = scatter(xc, label = "", title = "resampling after constraint") -hline!([q], label = "TruncateUpperQuantile(0.8)") -plot(p1, p2, layout = (2, 1), link = :both, ms = 1, ylims = (-6, 4)) -xlabel!("Sampling #"); ylabel!("Value") -``` - -![](figs/constraining_complex_population_truncateupperquantile.svg) - -### TruncateQuantiles - -To truncate the overall population below at some quantile of -the overall population, use a -[`TruncateQuantiles`](@ref) sampling constraint. - -```julia -constraint = TruncateQuantiles(0.2, 0.8) - -# Constrain the population below at the lower 20th percentile -# Resample the entire population (and its subpopulations) according to -# their probabilities 30000 times to determine the percentile bound. -n_resample_draws = 30000 -pop_mixed_constrained = constrain(pop_mixed, constraint, n_resample_draws); - -# Calculate quantile using the same number of samples for plotting. -# Will not be exactly the same as the quantile actually used for -# truncating, except in the limit n_resample_draws -> ∞ -s = resample(pop_mixed, n_resample_draws) -qs = quantile(s, [constraint.lower_quantile, constraint.upper_quantile]) - -n_plot_draws = 3000 -x = resample(pop_mixed, n_plot_draws) -xc = resample(pop_mixed_constrained, n_plot_draws) - -p1 = scatter(x, label = "", title = "resampling before constraint") -p2 = scatter(xc, label = "", title = "resampling after constraint") -hline!([qs], label = "TruncateQuantiles(0.2, 0.8)") - -plot(p1, p2, layout = (2, 1), link = :both, ms = 1, ylims = (-6, 4)) -xlabel!("Sampling #"); ylabel!("Value") -``` - -![](figs/constraining_complex_population_truncatequantiles.svg) diff --git a/docs/src/sampling_constraints/old.txt b/docs/src/sampling_constraints/old.txt new file mode 100644 index 00000000..f3aed51a --- /dev/null +++ b/docs/src/sampling_constraints/old.txt @@ -0,0 +1,235 @@ + +## Theoretical distributions with fitted parameters + +``` julia tab="Theoretical distribution with fitted parameters" +using UncertainData, Distributions + +# Define an uncertain value furnished by a theoretical distribution with +# parameters fitted to empirical data +uv = UncertainValue(Normal, rand(Normal(-1, 0.2), 1000)) + +# Constrain the support of the furnishing distribution using various +# constraints +uvc_lq = constrain(uv, TruncateLowerQuantile(0.2)) +uvc_uq = constrain(uv, TruncateUpperQuantile(0.8)) +uvc_q = constrain(uv, TruncateQuantiles(0.2, 0.8)) +uvc_min = constrain(uv, TruncateMinimum(0.5)) +uvc_max = constrain(uv, TruncateMaximum(1.5)) +uvc_range = constrain(uv, TruncateRange(0.5, 1.5)) +``` + +## Kernel density estimated distributions + +``` julia tab="Kernel density estimated distribution" +# Define an uncertain value furnished by a kernel density estimate to the +# distribution of the empirical data +uv = UncertainValue(UnivariateKDE, rand(Uniform(10, 15), 1000)) + +# Constrain the support of the furnishing distribution using various +# constraints +uvc_lq = constrain(uv, TruncateLowerQuantile(0.2)) +uvc_uq = constrain(uv, TruncateUpperQuantile(0.8)) +uvc_q = constrain(uv, TruncateQuantiles(0.2, 0.8)) +uvc_min = constrain(uv, TruncateMinimum(13)) +uvc_max = constrain(uv, TruncateMaximum(13)) +uvc_range = constrain(uv, TruncateRange(11, 12)) +``` + +## (nested) weighted populations of uncertain values + +Let's define a complicated uncertain value that is defined by a nested weighted population. + +```julia +# Some subpopulations consisting of both scalar values and distributions +subpop1_members = [UncertainValue(Normal, 0, 1), UncertainValue(Uniform, -2, 2), -5] +subpop2_members = [UncertainValue(Normal, -2, 1), UncertainValue(Uniform, -6, -1), + -3, UncertainValue(Gamma, 1, 0.4)] + +# Define the probabilities of sampling the different population members within the +# subpopulations. Weights are normalised, so we can input any numbers here indicating +# relative importance +subpop1_probs = [1, 2, 1] +subpop2_probs = [0.1, 0.2, 0.3, 0.1] + +pop1 = UncertainValue(subpop1_members, subpop1_probs) +pop2 = UncertainValue(subpop2_members, subpop2_probs) + +# Define the probabilities of sampling the two subpopulations in the overall population. +pop_probs = [0.3, 0.7] + +# Construct overall population +pop_mixed = UncertainValue([pop1, pop2], pop_probs) +``` + +Now we can draw samples from this nested population. Sampling directly from the +entire distribution is done by calling `resample(pop_mixed, n_draws)`. However, +in some cases we might want to constrain the sampling to some minimum, maximum +or range of values. You can do that by using sampling constraints. + +### TruncateMinimum + +To truncate the overall population below at some absolute value, use a +[`TruncateMinimum`](@ref) sampling constraint. + +```julia +constraint = TruncateMinimum(-1.1) +pop_mixed_constrained = constrain(pop_mixed, constraint); + +n_draws = 500 +x = resample(pop_mixed, n_draws) +xc = resample(pop_mixed_constrained, n_draws) + +p1 = scatter(x, label = "", title = "resampling before constraint") +p2 = scatter(xc, label = "", title = "resampling after constraint") +hline!([constraint.min], label = "TruncateMinimum(-1.1)") +plot(p1, p2, layout = (2, 1), link = :both, ylims = (-3, 3), ms = 1) +xlabel!("Sampling #"); ylabel!("Value") +``` + +![](figs/constraining_complex_population_truncateminimum.svg) + +### TruncateMaximum + +To truncate the overall population above at some absolute value, use a +[`TruncateMaximum`](@ref) sampling constraint. + +```julia +constraint = TruncateMaximum(1.5) +pop_mixed_constrained = constrain(pop_mixed, constraint); + +n_draws = 500 +x = resample(pop_mixed, n_draws) +xc = resample(pop_mixed_constrained, n_draws) + +p1 = scatter(x, label = "", title = "resampling before constraint") +p2 = scatter(xc, label = "", title = "resampling after constraint") +hline!([constraint.max], label = "TruncateMaximum(1.5)") +plot(p1, p2, layout = (2, 1), link = :both, ylims = (-3, 3), ms = 1) +xlabel!("Sampling #"); ylabel!("Value") +``` + +![](figs/constraining_complex_population_truncatemaximum.svg) + +### TruncateRange + +To truncate the overall population above at some range of values, use a +[`TruncateRange`](@ref) sampling constraint. + +```julia +constraint = TruncateRange(-1.5, 1.7) +pop_mixed_constrained = constrain(pop_mixed, constraint); + +n_draws = 500 +x = resample(pop_mixed, n_draws) +xc = resample(pop_mixed_constrained, n_draws) + +p1 = scatter(x, label = "", title = "resampling before constraint") +p2 = scatter(xc, label = "", title = "resampling after constraint") +hline!([constraint.min, constraint.max], label = "TruncateRange(-1.5, 1.7)") + +plot(p1, p2, layout = (2, 1), link = :both, ylims = (-3, 3), ms = 1) +xlabel!("Sampling #"); ylabel!("Value") +``` + +![](figs/constraining_complex_population_truncaterange.svg) + +### TruncateLowerQuantile + +To truncate the overall population below at some quantile of +the overall population, use a +[`TruncateLowerQuantile`](@ref) sampling constraint. + +```julia +constraint = TruncateLowerQuantile(0.2) + +# Constrain the population below at the lower 20th percentile +# Resample the entire population (and its subpopulations) according to +# their probabilities 30000 times to determine the percentile bound. +n_draws = 30000 +pop_mixed_constrained = constrain(pop_mixed, constraint, n_draws); + +# Calculate quantile using the same number of samples for plotting. +# Will not be exactly the same as the quantile actually used for +# truncating, except in the limit n -> ∞ +q = quantile(resample(pop_mixed, n_draws), constraint.lower_quantile) + +n_draws_plot = 3000 +x = resample(pop_mixed, n_draws_plot) +xc = resample(pop_mixed_constrained, n_draws_plot) + +p1 = scatter(x, label = "", title = "resampling before constraint") +p2 = scatter(xc, label = "", title = "resampling after constraint") +hline!([lq], label = "TruncateLowerQuantile(0.2)") +plot(p1, p2, layout = (2, 1), link = :both, ms = 1, ylims = (-6, 4)) +xlabel!("Sampling #"); ylabel!("Value") +``` + +![](figs/constraining_complex_population_truncatelowerquantile.svg) + +### TruncateUpperQuantile + +To truncate the overall population below at some quantile of +the overall population, use a +[`TruncateUpperQuantile`](@ref) sampling constraint. + +```julia +constraint = TruncateUpperQuantile(0.8) + +# Constrain the population below at the lower 20th percentile +# Resample the entire population (and its subpopulations) according to +# their probabilities 30000 times to determine the percentile bound. +n_resample_draws = 30000 +pop_mixed_constrained = constrain(pop_mixed, constraint, n_resample_draws); + +# Calculate quantile using the same number of samples for plotting. +# Will not be exactly the same as the quantile actually used for +# truncating, except in the limit n_resample_draws -> ∞ +q = quantile(resample(pop_mixed, n_resample_draws), constraint.upper_quantile) + +n_plot_draws = 3000 +x = resample(pop_mixed, n_plot_draws) +xc = resample(pop_mixed_constrained, n_plot_draws) + +p1 = scatter(x, label = "", title = "resampling before constraint") +p2 = scatter(xc, label = "", title = "resampling after constraint") +hline!([q], label = "TruncateUpperQuantile(0.8)") +plot(p1, p2, layout = (2, 1), link = :both, ms = 1, ylims = (-6, 4)) +xlabel!("Sampling #"); ylabel!("Value") +``` + +![](figs/constraining_complex_population_truncateupperquantile.svg) + +### TruncateQuantiles + +To truncate the overall population below at some quantile of +the overall population, use a +[`TruncateQuantiles`](@ref) sampling constraint. + +```julia +constraint = TruncateQuantiles(0.2, 0.8) + +# Constrain the population below at the lower 20th percentile +# Resample the entire population (and its subpopulations) according to +# their probabilities 30000 times to determine the percentile bound. +n_resample_draws = 30000 +pop_mixed_constrained = constrain(pop_mixed, constraint, n_resample_draws); + +# Calculate quantile using the same number of samples for plotting. +# Will not be exactly the same as the quantile actually used for +# truncating, except in the limit n_resample_draws -> ∞ +s = resample(pop_mixed, n_resample_draws) +qs = quantile(s, [constraint.lower_quantile, constraint.upper_quantile]) + +n_plot_draws = 3000 +x = resample(pop_mixed, n_plot_draws) +xc = resample(pop_mixed_constrained, n_plot_draws) + +p1 = scatter(x, label = "", title = "resampling before constraint") +p2 = scatter(xc, label = "", title = "resampling after constraint") +hline!([qs], label = "TruncateQuantiles(0.2, 0.8)") + +plot(p1, p2, layout = (2, 1), link = :both, ms = 1, ylims = (-6, 4)) +xlabel!("Sampling #"); ylabel!("Value") +``` + +![](figs/constraining_complex_population_truncatequantiles.svg) diff --git a/docs/src/sampling_constraints/sampling_constraints.md b/docs/src/sampling_constraints/sampling_constraints.md new file mode 100644 index 00000000..705d20be --- /dev/null +++ b/docs/src/sampling_constraints/sampling_constraints.md @@ -0,0 +1,65 @@ + +# Sampling constraints + +```@docs +constrain(uv::AbstractUncertainValue, constraint::SamplingConstraint) +``` + +## Element-wise constraints + + +The following sampling constraintsa are aimed to be used element-wise on uncertain values. + +```@docs +TruncateStd +TruncateMinimum +TruncateMaximum +TruncateRange +TruncateLowerQuantile +TruncateUpperQuantile +TruncateQuantiles +``` + + +### Examples + +```@example constraint_theoretical +using UncertainData, Distributions, Plots + +# Define an uncertain value furnished by a theoretical distribution +x = UncertainValue(Normal, 1, 0.5) + +# Constrain the support of the furnishing distribution using various +# constraints +xc_lq = constrain(x, TruncateLowerQuantile(0.2)) +xc_uq = constrain(x, TruncateUpperQuantile(0.8)) +xc_q = constrain(x, TruncateQuantiles(0.2, 0.8)) +xc_min = constrain(x, TruncateMinimum(0.5)) +xc_max = constrain(x, TruncateMaximum(1.5)) +xc_range = constrain(x, TruncateRange(0.5, 1.5)) + +p_lq = plot(x, label = ""); plot!(xc_lq, label = "TruncateLowerQuantile(0.2)") +p_uq = plot(x, label = ""); plot!(xc_uq, label = "TruncateLowerQuantile(0.8)") +p_q = plot(x, label = ""); plot!(xc_q, label = "TruncateQuantiles(0.2, 0.8)") +p_min = plot(x, label = ""); plot!(xc_min, label = "TruncateMinimum(0.5)") +p_max = plot(x, label = ""); plot!(xc_max, label = "TruncateMaximum(1.5)") +p_range = plot(x, label = ""); plot!(xc_range, label = "TruncateRange(0.5, 1.5)") +plot(p_min, p_max, p_range, + p_q, p_lq, p_uq, + size = (750, 500), legendfont = font(7), xlabel = "Value", ylabel = "Density", + legend = :topright, fg_legend = :transparent, bg_legend = :transparent +) +``` + + +## Sequential constraints + +Sequential constraints are used when sampling [`UncertainIndexDataset`](@ref)s or +[`UncertainIndexValueDataset`](@ref)s. + +```@docs +StrictlyIncreasing +StrictlyDecreasing +StartToEnd +sequence_exists +``` From 8dabdb9e5a0d20a14d7028ecadb9b37c13886708 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristian=20Agas=C3=B8ster=20Haaga?= Date: Wed, 5 May 2021 17:20:26 +0200 Subject: [PATCH 12/21] Start to move resampling docs to one function, and collect docs --- docs/make.jl | 44 +- docs/src/binning/bin.md | 45 -- .../resampling/resampling_uncertain_values.md | 13 - docs/src/resampling_and_binning.md | 30 ++ src/interpolation_and_binning/binning.jl | 16 +- src/resampling/Resampling.jl | 12 + .../resample_certainvalues.jl | 1 - .../uncertain_values/resample_measurements.jl | 12 - .../resample_uncertainvalues.jl | 100 ++++ .../resample_uncertainvalues_distributions.jl | 52 -- .../resample_uncertainvalues_kde.jl | 20 +- ...ple_uncertainvalues_kde_withconstraints.jl | 454 +++++++++--------- .../UncertainIndexDataset.jl | 23 - .../UncertainValueDataset.jl | 36 +- 14 files changed, 440 insertions(+), 418 deletions(-) create mode 100644 docs/src/resampling_and_binning.md create mode 100644 src/resampling/uncertain_values/resample_uncertainvalues.jl diff --git a/docs/make.jl b/docs/make.jl index fd57edf0..19f51af3 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -50,29 +50,27 @@ PAGES = [ ], ], - "Binning" => [ - "binning/bin.md" - ], - "Resampling" => [ - "resampling/resampling_overview.md", - "resampling/resampling_uncertain_values.md", - "resampling/resampling_uncertain_datasets.md", - "resampling/resampling_uncertain_indexvalue_datasets.md", - - "resampling/sequential/resampling_uncertaindatasets_sequential.md", - "resampling/sequential/resampling_indexvalue_sequential.md", - "resampling/sequential/strictly_increasing.md", - "resampling/sequential/strictly_decreasing.md", - - "resampling/interpolation/interpolation.md", - "resampling/interpolation/gridded.md", - "resampling/resampling_schemes/resampling_schemes_uncertain_value_collections.md", - "resampling/resampling_schemes/resampling_schemes_uncertain_indexvalue_collections.md", - "resampling/resampling_schemes/resampling_with_schemes_uncertain_value_collections.md", - "resampling/resampling_schemes/resampling_with_schemes_uncertain_indexvalue_collections.md", - "resampling/resampling_inplace.md" - #"resampling/models/resampling_with_models.md" - ], + "resampling_and_binning.md", + # "Resampling" => [ + # "resampling/resampling_overview.md", + # "resampling/resampling_uncertain_values.md", + # "resampling/resampling_uncertain_datasets.md", + # "resampling/resampling_uncertain_indexvalue_datasets.md", + + # "resampling/sequential/resampling_uncertaindatasets_sequential.md", + # "resampling/sequential/resampling_indexvalue_sequential.md", + # "resampling/sequential/strictly_increasing.md", + # "resampling/sequential/strictly_decreasing.md", + + # "resampling/interpolation/interpolation.md", + # "resampling/interpolation/gridded.md", + # "resampling/resampling_schemes/resampling_schemes_uncertain_value_collections.md", + # "resampling/resampling_schemes/resampling_schemes_uncertain_indexvalue_collections.md", + # "resampling/resampling_schemes/resampling_with_schemes_uncertain_value_collections.md", + # "resampling/resampling_schemes/resampling_with_schemes_uncertain_indexvalue_collections.md", + # "resampling/resampling_inplace.md" + # #"resampling/models/resampling_with_models.md" + # ], "Propagation of errors" => [ "propagation_of_errors/propagation_of_errors.md" diff --git a/docs/src/binning/bin.md b/docs/src/binning/bin.md index e071dc6c..e69de29b 100644 --- a/docs/src/binning/bin.md +++ b/docs/src/binning/bin.md @@ -1,45 +0,0 @@ -# Binning scalar values - -## Bin values - -```@docs -bin(left_bin_edges::AbstractRange, xs, ys) -``` - -```@docs -bin!(bins::Vector{AbstractVector{T}}, ::AbstractRange{T}, xs, ys) where T -``` - -## Bin summaries - -```@docs -bin(f::Function, left_bin_edges::AbstractRange, xs, ys) -``` - -## Fast bin summaries - -```@docs -bin_mean -``` - -# Binning uncertain data - -## Bin values - -```@docs -bin(x::AbstractUncertainIndexValueDataset, binning::BinnedResampling{RawValues}) -``` - -```@docs -bin(x::AbstractUncertainIndexValueDataset, binning::BinnedWeightedResampling{RawValues}) -``` - -## Bin summaries - -```@docs -bin(x::AbstractUncertainIndexValueDataset, binning::BinnedResampling) -``` - -```@docs -bin(x::AbstractUncertainIndexValueDataset, binning::BinnedWeightedResampling) -``` diff --git a/docs/src/resampling/resampling_uncertain_values.md b/docs/src/resampling/resampling_uncertain_values.md index 5d1a4e72..30ec1003 100644 --- a/docs/src/resampling/resampling_uncertain_values.md +++ b/docs/src/resampling/resampling_uncertain_values.md @@ -1,18 +1,5 @@ # Resampling uncertain values -Uncertain values may be resampled by drawing random number from the distributions -furnishing them. - -## Documentation - -```@docs -resample(uv::AbstractUncertainValue) -``` - -```@docs -resample(uv::AbstractUncertainValue, n::Int) -``` - ## Examples ``` julia tab="Resample once" diff --git a/docs/src/resampling_and_binning.md b/docs/src/resampling_and_binning.md new file mode 100644 index 00000000..90c7480c --- /dev/null +++ b/docs/src/resampling_and_binning.md @@ -0,0 +1,30 @@ +# Resampling and binning + +## Uncertain values + +Uncertain values may be resampled by drawing random number from the distributions +furnishing them. Optionally, sampling constraints can be applied. + + +```@docs +resample(uv::AbstractUncertainValue) +resample(uv::AbstractUncertainValue, n::Int) +``` + +## Uncertain datasets + +### Binning + +```@docs +bin(x::AbstractUncertainIndexValueDataset, binning::BinnedResampling{RawValues}) +bin(x::AbstractUncertainIndexValueDataset, binning::BinnedWeightedResampling{RawValues}) +bin(x::AbstractUncertainIndexValueDataset, binning::BinnedResampling) +bin(x::AbstractUncertainIndexValueDataset, binning::BinnedWeightedResampling) +``` + +```@docs +bin(left_bin_edges::AbstractRange, xs, ys) +bin!(bins::Vector{AbstractVector{T}}, ::AbstractRange{T}, xs, ys) where T +bin(f::Function, left_bin_edges::AbstractRange, xs, ys) +bin_mean +``` diff --git a/src/interpolation_and_binning/binning.jl b/src/interpolation_and_binning/binning.jl index b71fd628..981016d9 100644 --- a/src/interpolation_and_binning/binning.jl +++ b/src/interpolation_and_binning/binning.jl @@ -14,17 +14,25 @@ Returns `N - 1` bin vectors. ## Examples -### Getting the values in each bin: - -```julia +```jldoctest xs = [1.2, 1.7, 2.2, 3.3, 4.5, 4.6, 7.1] ys = [4.2, 5.1, 6.5, 4.2, 3.2, 3.1, 2.5] left_bin_edges = 0.0:1.0:6.0 bin(left_bin_edges, xs, ys) + +# output +6-element Array{Array{Float64,1},1}: + [] + [4.2, 5.1] + [6.5] + [4.2] + [3.2, 3.1] + [] ``` +Some example data with unevenly spaced time indices: + ```julia -# Some example data with unevenly spaced time indices npts = 300 time, vals = sort(rand(1:1000, npts)), rand(npts) diff --git a/src/resampling/Resampling.jl b/src/resampling/Resampling.jl index 2e82de72..cce811ef 100644 --- a/src/resampling/Resampling.jl +++ b/src/resampling/Resampling.jl @@ -14,6 +14,15 @@ using Reexport UncertainValue, AbstractUncertainValue + """ + resample(x::AbstractUncertainValue) + resample(x::AbstractUncertainValue, n::Int) + + Draw a single sample, or `n` samples, from the uncertain value `x`. + + + Draw `n` samples from the uncertain value `x`. + """ function resample end ################################### @@ -36,6 +45,9 @@ using Reexport ################################### # Resampling uncertain values ################################### + + include("uncertain_values/resample_uncertainvalues.jl") + # Uncertain values based on distributions include("uncertain_values/resample_uncertainvalues_distributions.jl") diff --git a/src/resampling/uncertain_values/resample_certainvalues.jl b/src/resampling/uncertain_values/resample_certainvalues.jl index 69579cce..0f2754bc 100644 --- a/src/resampling/uncertain_values/resample_certainvalues.jl +++ b/src/resampling/uncertain_values/resample_certainvalues.jl @@ -20,7 +20,6 @@ constraints = [ :(TruncateStd) ] - for constraint in constraints funcs = quote resample(x::CertainScalar, constraint::$(constraint)) = x.value diff --git a/src/resampling/uncertain_values/resample_measurements.jl b/src/resampling/uncertain_values/resample_measurements.jl index 5c58eae0..e69de29b 100644 --- a/src/resampling/uncertain_values/resample_measurements.jl +++ b/src/resampling/uncertain_values/resample_measurements.jl @@ -1,12 +0,0 @@ - -import Measurements: Measurement -import ..UncertainValues: UncertainValue -import Distributions: Normal - -resample(m::Measurement{T}) where T = resample(UncertainValue(Normal, m.val, m.err)) - -function resample(m::Measurement{T}, n::Int) where T - uval = UncertainValue(Normal, m.val, m.err) - - [resample(uval) for i = 1:n] -end \ No newline at end of file diff --git a/src/resampling/uncertain_values/resample_uncertainvalues.jl b/src/resampling/uncertain_values/resample_uncertainvalues.jl new file mode 100644 index 00000000..66e44ae8 --- /dev/null +++ b/src/resampling/uncertain_values/resample_uncertainvalues.jl @@ -0,0 +1,100 @@ +import ..SamplingConstraints: SamplingConstraint +import ..UncertainValues: CertainScalar +import Measurements: Measurement +import ..UncertainValues: UncertainValue +import Distributions: Normal + + +################################# +# Values without uncertainties +################################# + +resample(x::Number) = x +resample(v::CertainScalar) = v.value +resample(v::CertainScalar, n::Int) = [v.value for i = 1:n] +resample(v::CertainScalar, s::SamplingConstraint) = v.value +resample(v::CertainScalar, s::SamplingConstraint, n::Int) = [v.value for i = 1:n] + +# constraints = [ +# :(NoConstraint), +# :(TruncateLowerQuantile), +# :(TruncateUpperQuantile), +# :(TruncateQuantiles), +# :(TruncateMaximum), +# :(TruncateMinimum), +# :(TruncateRange), +# :(TruncateStd) +# ] + +# for constraint in constraints +# funcs = quote +# resample(x::CertainScalar, constraint::$(constraint)) = x.value +# resample(x::CertainScalar, constraint::$(constraint), n::Int) = [x.value for i = 1:n] +# end +# eval(funcs) +# end + +################################# +# Measurements +################################# + +resample(m::Measurement{T}) where T = resample(UncertainValue(Normal, m.val, m.err)) +function resample(m::Measurement{T}, n::Int) where T + uval = UncertainValue(Normal, m.val, m.err) + + [resample(uval) for i = 1:n] +end + +################################# +# Theoretical distributions +################################# +import ..UncertainValues: + TheoreticalDistributionScalarValue, + AbstractUncertainTwoParameterScalarValue, + AbstractUncertainThreeParameterScalarValue, + UncertainScalarTheoreticalTwoParameter, + UncertainScalarTheoreticalThreeParameter, + UncertainScalarNormallyDistributed, + UncertainScalarUniformlyDistributed, + UncertainScalarBetaDistributed, + UncertainScalarBetaPrimeDistributed, + UncertainScalarBetaBinomialDistributed, + UncertainScalarBinomialDistributed, + UncertainScalarGammaDistributed, + UncertainScalarFrechetDistributed +import Distributions + +# Resample for generic +resample(uv::TheoreticalDistributionScalarValue) = rand(uv.distribution) +resample(uv::TheoreticalDistributionScalarValue, n::Int) = rand(uv.distribution, n) + + +# Custom resample methods for each type of uncertain scalars based on +# distributions (in case we want to implement custom sampling for some of them) +# resample(uv::UncertainScalarTheoreticalThreeParameter) = rand(uv.distribution) +# resample(uv::UncertainScalarTheoreticalTwoParameter) = rand(uv.distribution) +# resample(uv::UncertainScalarNormallyDistributed) = rand(uv.distribution) +# resample(uv::UncertainScalarUniformlyDistributed) = rand(uv.distribution) +# resample(uv::UncertainScalarBetaDistributed) = rand(uv.distribution) +# resample(uv::UncertainScalarBetaPrimeDistributed) = rand(uv.distribution) +# resample(uv::UncertainScalarBetaBinomialDistributed) = rand(uv.distribution) +# resample(uv::UncertainScalarGammaDistributed) = rand(uv.distribution) +# resample(uv::UncertainScalarFrechetDistributed) = rand(uv.distribution) +# resample(uv::UncertainScalarBinomialDistributed) = rand(uv.distribution) + + +# resample(uv::UncertainScalarTheoreticalThreeParameter, n::Int) = rand(uv.distribution, n) +# resample(uv::UncertainScalarTheoreticalTwoParameter, n::Int) = rand(uv.distribution, n) +# resample(uv::UncertainScalarNormallyDistributed, n::Int) = rand(uv.distribution, n) +# resample(uv::UncertainScalarUniformlyDistributed, n::Int) = rand(uv.distribution, n) +# resample(uv::UncertainScalarBetaDistributed, n::Int) = rand(uv.distribution, n) +# resample(uv::UncertainScalarBetaPrimeDistributed, n::Int) = rand(uv.distribution, n) +# resample(uv::UncertainScalarBetaBinomialDistributed, n::Int) = rand(uv.distribution, n) +# resample(uv::UncertainScalarGammaDistributed, n::Int) = rand(uv.distribution, n) +# resample(uv::UncertainScalarFrechetDistributed, n::Int) = rand(uv.distribution, n) +# resample(uv::UncertainScalarBinomialDistributed, n::Int) = rand(uv.distribution, n) + +resample(x::Distributions.Truncated) = rand(x) +resample(x::Distributions.Truncated, n::Int) = rand(x, n) +resample(x::Distributions.Distribution) = rand(x) +resample(x::Distributions.Distribution, n::Int) = rand(x, n) \ No newline at end of file diff --git a/src/resampling/uncertain_values/resample_uncertainvalues_distributions.jl b/src/resampling/uncertain_values/resample_uncertainvalues_distributions.jl index 12849b9e..e69de29b 100644 --- a/src/resampling/uncertain_values/resample_uncertainvalues_distributions.jl +++ b/src/resampling/uncertain_values/resample_uncertainvalues_distributions.jl @@ -1,52 +0,0 @@ -import ..UncertainValues: - TheoreticalDistributionScalarValue, - AbstractUncertainTwoParameterScalarValue, - AbstractUncertainThreeParameterScalarValue, - UncertainScalarTheoreticalTwoParameter, - UncertainScalarTheoreticalThreeParameter, - UncertainScalarNormallyDistributed, - UncertainScalarUniformlyDistributed, - UncertainScalarBetaDistributed, - UncertainScalarBetaPrimeDistributed, - UncertainScalarBetaBinomialDistributed, - UncertainScalarBinomialDistributed, - UncertainScalarGammaDistributed, - UncertainScalarFrechetDistributed -import Distributions - -# Resample for generic -resample(uv::TheoreticalDistributionScalarValue) = rand(uv.distribution) -resample(uv::TheoreticalDistributionScalarValue, n::Int) = rand(uv.distribution, n) - - -# Custom resample methods for each type of uncertain scalars based on -# distributions (in case we want to implement custom sampling for some of them) -resample(uv::UncertainScalarTheoreticalThreeParameter) = rand(uv.distribution) -resample(uv::UncertainScalarTheoreticalTwoParameter) = rand(uv.distribution) -resample(uv::UncertainScalarNormallyDistributed) = rand(uv.distribution) -resample(uv::UncertainScalarUniformlyDistributed) = rand(uv.distribution) -resample(uv::UncertainScalarBetaDistributed) = rand(uv.distribution) -resample(uv::UncertainScalarBetaPrimeDistributed) = rand(uv.distribution) -resample(uv::UncertainScalarBetaBinomialDistributed) = rand(uv.distribution) -resample(uv::UncertainScalarGammaDistributed) = rand(uv.distribution) -resample(uv::UncertainScalarFrechetDistributed) = rand(uv.distribution) -resample(uv::UncertainScalarBinomialDistributed) = rand(uv.distribution) - - -resample(uv::UncertainScalarTheoreticalThreeParameter, n::Int) = rand(uv.distribution, n) -resample(uv::UncertainScalarTheoreticalTwoParameter, n::Int) = rand(uv.distribution, n) -resample(uv::UncertainScalarNormallyDistributed, n::Int) = rand(uv.distribution, n) -resample(uv::UncertainScalarUniformlyDistributed, n::Int) = rand(uv.distribution, n) -resample(uv::UncertainScalarBetaDistributed, n::Int) = rand(uv.distribution, n) -resample(uv::UncertainScalarBetaPrimeDistributed, n::Int) = rand(uv.distribution, n) -resample(uv::UncertainScalarBetaBinomialDistributed, n::Int) = rand(uv.distribution, n) -resample(uv::UncertainScalarGammaDistributed, n::Int) = rand(uv.distribution, n) -resample(uv::UncertainScalarFrechetDistributed, n::Int) = rand(uv.distribution, n) -resample(uv::UncertainScalarBinomialDistributed, n::Int) = rand(uv.distribution, n) - - -resample(x::Distributions.Truncated) = rand(x) -resample(x::Distributions.Truncated, n::Int) = rand(x, n) - -resample(x::Distributions.Distribution) = rand(x) -resample(x::Distributions.Distribution, n::Int) = rand(x, n) \ No newline at end of file diff --git a/src/resampling/uncertain_values/resample_uncertainvalues_kde.jl b/src/resampling/uncertain_values/resample_uncertainvalues_kde.jl index f60605cf..3e9155be 100644 --- a/src/resampling/uncertain_values/resample_uncertainvalues_kde.jl +++ b/src/resampling/uncertain_values/resample_uncertainvalues_kde.jl @@ -1,17 +1,17 @@ import ..UncertainValues.AbstractUncertainScalarKDE -""" - resample(uv::UncertainScalarKDE) +# """ +# resample(uv::UncertainScalarKDE) -Resample an uncertain value whose distribution is approximated using a -kernel density estimate once. -""" +# Resample an uncertain value whose distribution is approximated using a +# kernel density estimate once. +# """ resample(uv::AbstractUncertainScalarKDE) = rand(uv) -""" - resample(uv::AbstractUncertainScalarKDE) +# """ +# resample(uv::AbstractUncertainScalarKDE) -Resample an uncertain value whose distribution is approximated using a -kernel density estimate `n` times. -""" +# Resample an uncertain value whose distribution is approximated using a +# kernel density estimate `n` times. +# """ resample(uv::AbstractUncertainScalarKDE, n::Int) = rand(uv, n) diff --git a/src/resampling/uncertain_values/resample_uncertainvalues_kde_withconstraints.jl b/src/resampling/uncertain_values/resample_uncertainvalues_kde_withconstraints.jl index 357cb876..d826a94f 100644 --- a/src/resampling/uncertain_values/resample_uncertainvalues_kde_withconstraints.jl +++ b/src/resampling/uncertain_values/resample_uncertainvalues_kde_withconstraints.jl @@ -22,69 +22,69 @@ import ..SamplingConstraints: fallback -""" - resample(uv::AbstractUncertainScalarKDE, constraint::NoConstraint) +# """ +# resample(uv::AbstractUncertainScalarKDE, constraint::NoConstraint) -Resample without contraints (use the full distribution representing the value) +# Resample without contraints (use the full distribution representing the value) -## Example +# ## Example -```julia -some_sample = rand(Normal(), 1000) +# ```julia +# some_sample = rand(Normal(), 1000) -# Calling UncertainValue with a single vector of numbers triggers KDE estimation -uncertainval = UncertainValue(some_sample) # -> UncertainScalarKDE +# # Calling UncertainValue with a single vector of numbers triggers KDE estimation +# uncertainval = UncertainValue(some_sample) # -> UncertainScalarKDE -# Resample the uncertain value by resampling the full distribution once. -resample(uncertainval, NoConstraint()) -``` -""" +# # Resample the uncertain value by resampling the full distribution once. +# resample(uncertainval, NoConstraint()) +# ``` +# """ resample(uv::AbstractUncertainScalarKDE, constraint::NoConstraint) = resample(uv) -""" - resample(uv::AbstractUncertainScalarKDE, constraint::NoConstraint, n::Int) +# """ +# resample(uv::AbstractUncertainScalarKDE, constraint::NoConstraint, n::Int) -Resample without contraints (use the full distribution representing the value) +# Resample without contraints (use the full distribution representing the value) -## Example +# ## Example -```julia -some_sample = rand(Normal(), 1000) +# ```julia +# some_sample = rand(Normal(), 1000) -# Calling UncertainValue with a single vector of numbers triggers KDE estimation -uncertainval = UncertainValue(some_sample) # -> UncertainScalarKDE +# # Calling UncertainValue with a single vector of numbers triggers KDE estimation +# uncertainval = UncertainValue(some_sample) # -> UncertainScalarKDE -# Resample the uncertain value by resampling the full distribution n times -resample(uncertainval, NoConstraint(), n) -``` -""" +# # Resample the uncertain value by resampling the full distribution n times +# resample(uncertainval, NoConstraint(), n) +# ``` +# """ resample(uv::AbstractUncertainScalarKDE, constraint::NoConstraint, n::Int) = resample(uv, n) -""" - resample(uv::AbstractUncertainScalarKDE, constraint::TruncateLowerQuantile) +# """ +# resample(uv::AbstractUncertainScalarKDE, constraint::TruncateLowerQuantile) -Resample `uv` by first truncating below the kernel density estimate of the -distribution furnishing the value at some lower quantile, then resampling -it once. +# Resample `uv` by first truncating below the kernel density estimate of the +# distribution furnishing the value at some lower quantile, then resampling +# it once. -## Example +# ## Example -```julia -using UncertainData +# ```julia +# using UncertainData -some_sample = rand(Normal(), 1000) +# some_sample = rand(Normal(), 1000) -# Calling UncertainValue with a single vector of numbers triggers KDE estimation -uncertainval = UncertainValue(some_sample) # -> UncertainScalarKDE +# # Calling UncertainValue with a single vector of numbers triggers KDE estimation +# uncertainval = UncertainValue(some_sample) # -> UncertainScalarKDE -constraint = TruncateLowerQuantile(0.16) +# constraint = TruncateLowerQuantile(0.16) -# Resample the uncertain value by truncating the distribution furnishing it, -# then resampling the new distribution once. -resample(uncertainval, constraint) -``` -""" +# # Resample the uncertain value by truncating the distribution furnishing it, +# # then resampling the new distribution once. +# resample(uncertainval, constraint) +# ``` +# """ function resample(uv::AbstractUncertainScalarKDE, constraint::TruncateLowerQuantile) # Find the index of the kernel density estimated distribution # corresponding to the lower quantile at which we want to truncate. @@ -104,31 +104,31 @@ function resample(uv::AbstractUncertainScalarKDE, constraint::TruncateLowerQuant rand(Uniform(sampled_val, sampled_val + δ)) end -""" - resample(uv::AbstractUncertainScalarKDE, constraint::TruncateLowerQuantile, - n::Int) +# """ +# resample(uv::AbstractUncertainScalarKDE, constraint::TruncateLowerQuantile, +# n::Int) -Resample `uv` by first truncating below the kernel density estimate of the -distribution furnishing the value at some lower quantile, then resampling -it `n` times. +# Resample `uv` by first truncating below the kernel density estimate of the +# distribution furnishing the value at some lower quantile, then resampling +# it `n` times. -## Example +# ## Example -```julia -using UncertainData +# ```julia +# using UncertainData -some_sample = rand(Normal(), 1000) +# some_sample = rand(Normal(), 1000) -# Calling UncertainValue with a single vector of numbers triggers KDE estimation -uncertainval = UncertainValue(some_sample) # -> UncertainScalarKDE +# # Calling UncertainValue with a single vector of numbers triggers KDE estimation +# uncertainval = UncertainValue(some_sample) # -> UncertainScalarKDE -constraint = TruncateLowerQuantile(0.16) +# constraint = TruncateLowerQuantile(0.16) -# Resample the uncertain value by truncating the distribution furnishing it, -# then resampling the new distribution 500 times. -resample(uncertainval, constraint, 500) -``` -""" +# # Resample the uncertain value by truncating the distribution furnishing it, +# # then resampling the new distribution 500 times. +# resample(uncertainval, constraint, 500) +# ``` +# """ function resample(uv::AbstractUncertainScalarKDE, constraint::TruncateLowerQuantile, n::Int) # Find the index of the kernel density estimated distribution @@ -151,30 +151,30 @@ function resample(uv::AbstractUncertainScalarKDE, constraint::TruncateLowerQuant end -""" - resample(uv::AbstractUncertainScalarKDE, constraint::TruncateUpperQuantile) +# """ +# resample(uv::AbstractUncertainScalarKDE, constraint::TruncateUpperQuantile) -Resample `uv` by first truncating above the kernel density estimate of the -distribution furnishing the value at some upper quantile, then resampling -it once. +# Resample `uv` by first truncating above the kernel density estimate of the +# distribution furnishing the value at some upper quantile, then resampling +# it once. -## Example +# ## Example -```julia -using UncertainData +# ```julia +# using UncertainData -some_sample = rand(Normal(), 1000) +# some_sample = rand(Normal(), 1000) -# Calling UncertainValue with a single vector of numbers triggers KDE estimation -uncertainval = UncertainValue(some_sample) # -> UncertainScalarKDE +# # Calling UncertainValue with a single vector of numbers triggers KDE estimation +# uncertainval = UncertainValue(some_sample) # -> UncertainScalarKDE -constraint = TruncateUpperQuantile(0.78) +# constraint = TruncateUpperQuantile(0.78) -# Resample the uncertain value by truncating the distribution furnishing it, -# then resampling the new distribution once. -resample(uncertainval, constraint) -``` -""" +# # Resample the uncertain value by truncating the distribution furnishing it, +# # then resampling the new distribution once. +# resample(uncertainval, constraint) +# ``` +# """ function resample(uv::AbstractUncertainScalarKDE, constraint::TruncateUpperQuantile) # Find the index of the kernel density estimated distribution # corresponding to the lower quantile at which we want to truncate. @@ -194,31 +194,31 @@ function resample(uv::AbstractUncertainScalarKDE, constraint::TruncateUpperQuant rand(Uniform(sampled_val, sampled_val + δ)) end -""" - resample(uv::AbstractUncertainScalarKDE, constraint::TruncateUpperQuantile, - n::Int) +# """ +# resample(uv::AbstractUncertainScalarKDE, constraint::TruncateUpperQuantile, +# n::Int) -Resample `uv` by first truncating above the kernel density estimate of the -distribution furnishing the value at some upper quantile, then resampling -it `n` times. +# Resample `uv` by first truncating above the kernel density estimate of the +# distribution furnishing the value at some upper quantile, then resampling +# it `n` times. -## Example +# ## Example -```julia -using UncertainData +# ```julia +# using UncertainData -some_sample = rand(Normal(), 1000) +# some_sample = rand(Normal(), 1000) -# Calling UncertainValue with a single vector of numbers triggers KDE estimation -uncertainval = UncertainValue(some_sample) # -> UncertainScalarKDE +# # Calling UncertainValue with a single vector of numbers triggers KDE estimation +# uncertainval = UncertainValue(some_sample) # -> UncertainScalarKDE -constraint = TruncateLowerQuantile(0.16) +# constraint = TruncateLowerQuantile(0.16) -# Resample the uncertain value by truncating the distribution furnishing it, -# then resampling the new distribution 500 times. -resample(uncertainval, constraint, 500) -``` -""" +# # Resample the uncertain value by truncating the distribution furnishing it, +# # then resampling the new distribution 500 times. +# resample(uncertainval, constraint, 500) +# ``` +# """ function resample(uv::AbstractUncertainScalarKDE, constraint::TruncateUpperQuantile, n::Int) # Find the index of the kernel density estimated distribution @@ -242,30 +242,30 @@ end -""" - resample(uv::AbstractUncertainScalarKDE, constraint::TruncateUpperQuantile) +# """ +# resample(uv::AbstractUncertainScalarKDE, constraint::TruncateUpperQuantile) -Resample `uv` by first truncating the kernel density estimate of the -distribution furnishing the value both above and below at some quantile range, -then resampling it once. +# Resample `uv` by first truncating the kernel density estimate of the +# distribution furnishing the value both above and below at some quantile range, +# then resampling it once. -## Example +# ## Example -```julia -using UncertainData +# ```julia +# using UncertainData -some_sample = rand(Normal(), 1000) +# some_sample = rand(Normal(), 1000) -# Calling UncertainValue with a single vector of numbers triggers KDE estimation -uncertainval = UncertainValue(some_sample) # -> UncertainScalarKDE +# # Calling UncertainValue with a single vector of numbers triggers KDE estimation +# uncertainval = UncertainValue(some_sample) # -> UncertainScalarKDE -constraint = TruncateQuantiles(0.1, 0.9) +# constraint = TruncateQuantiles(0.1, 0.9) -# Resample the uncertain value by truncating the distribution furnishing it, -# then resampling the new distribution once. -resample(uncertainval, constraint) -``` -""" +# # Resample the uncertain value by truncating the distribution furnishing it, +# # then resampling the new distribution once. +# resample(uncertainval, constraint) +# ``` +# """ function resample(uv::AbstractUncertainScalarKDE, constraint::TruncateQuantiles) # Find the index of the kernel density estimated distribution # corresponding to the lower quantile at which we want to truncate. @@ -288,31 +288,31 @@ end -""" - resample(uv::AbstractUncertainScalarKDE, constraint::TruncateQuantiles, - n::Int) +# """ +# resample(uv::AbstractUncertainScalarKDE, constraint::TruncateQuantiles, +# n::Int) -Resample `uv` by first truncating the kernel density estimate of the -distribution furnishing the value both above and below at some quantile range, -then resampling it `n` times. +# Resample `uv` by first truncating the kernel density estimate of the +# distribution furnishing the value both above and below at some quantile range, +# then resampling it `n` times. -## Example +# ## Example -```julia -using UncertainData +# ```julia +# using UncertainData -some_sample = rand(Normal(), 1000) +# some_sample = rand(Normal(), 1000) -# Calling UncertainValue with a single vector of numbers triggers KDE estimation -uncertainval = UncertainValue(some_sample) # -> UncertainScalarKDE +# # Calling UncertainValue with a single vector of numbers triggers KDE estimation +# uncertainval = UncertainValue(some_sample) # -> UncertainScalarKDE -constraint = TruncateQuantiles(0.1, 0.9) +# constraint = TruncateQuantiles(0.1, 0.9) -# Resample the uncertain value by truncating the distribution furnishing it, -# then resampling the new distribution 500 times. -resample(uncertainval, constraint, 500) -``` -""" +# # Resample the uncertain value by truncating the distribution furnishing it, +# # then resampling the new distribution 500 times. +# resample(uncertainval, constraint, 500) +# ``` +# """ function resample(uv::AbstractUncertainScalarKDE, constraint::TruncateQuantiles, n::Int) # Find the index of the kernel density estimated distribution @@ -337,27 +337,27 @@ end -""" - resample(uv::AbstractUncertainScalarKDE, constraint::TruncateMaximum) +# """ +# resample(uv::AbstractUncertainScalarKDE, constraint::TruncateMaximum) -Resample `uv` by first truncating the kernel density estimate of the -distribution furnishing the value at some maximum value, -then resampling it once. +# Resample `uv` by first truncating the kernel density estimate of the +# distribution furnishing the value at some maximum value, +# then resampling it once. -## Example +# ## Example -```julia -# Uncertain value represented by a normal distribution with mean = 0 and -# standard deviation = 1. -uncertainval = UncertainValue(rand(Normal(0, 1), 1000)) +# ```julia +# # Uncertain value represented by a normal distribution with mean = 0 and +# # standard deviation = 1. +# uncertainval = UncertainValue(rand(Normal(0, 1), 1000)) -constraint = TruncateMaximum(0.8) # accept no values larger than 1.1 +# constraint = TruncateMaximum(0.8) # accept no values larger than 1.1 -# Resample the uncertain value by truncating the distribution furnishing it, -# then resampling the new distribution 700 times. -resample(uncertainval, constraint, 700) -``` -""" +# # Resample the uncertain value by truncating the distribution furnishing it, +# # then resampling the new distribution 700 times. +# resample(uncertainval, constraint, 700) +# ``` +# """ function resample(uv::AbstractUncertainScalarKDE, constraint::TruncateMaximum) # Box width δ = step(uv.range) @@ -378,28 +378,28 @@ function resample(uv::AbstractUncertainScalarKDE, constraint::TruncateMaximum) rand(Uniform(sampled_val, sampled_val + δ)) end -""" - resample(uv::AbstractUncertainScalarKDE, constraint::TruncateMaximum, n::Int) +# """ +# resample(uv::AbstractUncertainScalarKDE, constraint::TruncateMaximum, n::Int) -Resample `uv` by first truncating the kernel density estimate of the -distribution furnishing the value at some minimum value, -then resampling it `n` times. +# Resample `uv` by first truncating the kernel density estimate of the +# distribution furnishing the value at some minimum value, +# then resampling it `n` times. -## Example +# ## Example -```julia -# Uncertain value represented by a normal distribution with mean = 0 and -# standard deviation = 1. -uncertainval = UncertainValue(rand(Normal(0, 1), 1000)) +# ```julia +# # Uncertain value represented by a normal distribution with mean = 0 and +# # standard deviation = 1. +# uncertainval = UncertainValue(rand(Normal(0, 1), 1000)) -constraint = TruncateMaximum(0.8) # accept no values larger than 1.1 +# constraint = TruncateMaximum(0.8) # accept no values larger than 1.1 -# Resample the uncertain value by truncating the distribution furnishing it, -# then resampling the new distribution 700 times. -resample(uncertainval, constraint, 700) -``` -""" +# # Resample the uncertain value by truncating the distribution furnishing it, +# # then resampling the new distribution 700 times. +# resample(uncertainval, constraint, 700) +# ``` +# """ function resample(uv::AbstractUncertainScalarKDE, constraint::TruncateMaximum, n::Int) # Box width δ = step(uv.range) @@ -422,27 +422,27 @@ function resample(uv::AbstractUncertainScalarKDE, constraint::TruncateMaximum, n end -""" - resample(uv::AbstractUncertainScalarKDE, constraint::TruncateMinimum) +# """ +# resample(uv::AbstractUncertainScalarKDE, constraint::TruncateMinimum) -Resample `uv` by first truncating the kernel density estimate of the -distribution furnishing the value at some minimum value, -then resampling it once. +# Resample `uv` by first truncating the kernel density estimate of the +# distribution furnishing the value at some minimum value, +# then resampling it once. -## Example +# ## Example -```julia -# Uncertain value represented by a normal distribution with mean = 0 and -# standard deviation = 1. -uncertainval = UncertainValue(rand(Normal(0, 1), 1000)) +# ```julia +# # Uncertain value represented by a normal distribution with mean = 0 and +# # standard deviation = 1. +# uncertainval = UncertainValue(rand(Normal(0, 1), 1000)) -constraint = TruncateMinimum(0.2) # accept no values smaller than 0.2 +# constraint = TruncateMinimum(0.2) # accept no values smaller than 0.2 -# Resample the uncertain value by truncating the distribution furnishing it, -# then resampling the new distribution 700 times. -resample(uncertainval, constraint, 700) -``` -""" +# # Resample the uncertain value by truncating the distribution furnishing it, +# # then resampling the new distribution 700 times. +# resample(uncertainval, constraint, 700) +# ``` +# """ function resample(uv::AbstractUncertainScalarKDE, constraint::TruncateMinimum) # Box width δ = step(uv.range) @@ -465,27 +465,27 @@ end -""" - resample(uv::AbstractUncertainScalarKDE, constraint::TruncateMinimum, n::Int) +# """ +# resample(uv::AbstractUncertainScalarKDE, constraint::TruncateMinimum, n::Int) -Resample `uv` by first truncating the kernel density estimate of the -distribution furnishing the value at some minimum value, -then resampling it `n` times. +# Resample `uv` by first truncating the kernel density estimate of the +# distribution furnishing the value at some minimum value, +# then resampling it `n` times. -## Example +# ## Example -```julia -# Uncertain value represented by a normal distribution with mean = 0 and -# standard deviation = 1. -uncertainval = UncertainValue(rand(Normal(0, 1), 1000)) +# ```julia +# # Uncertain value represented by a normal distribution with mean = 0 and +# # standard deviation = 1. +# uncertainval = UncertainValue(rand(Normal(0, 1), 1000)) -constraint = TruncateMinimum(0.2) # accept no values smaller than 0.2 +# constraint = TruncateMinimum(0.2) # accept no values smaller than 0.2 -# Resample the uncertain value by truncating the distribution furnishing it, -# then resampling the new distribution 700 times. -resample(uncertainval, constraint, 700) -``` -""" +# # Resample the uncertain value by truncating the distribution furnishing it, +# # then resampling the new distribution 700 times. +# resample(uncertainval, constraint, 700) +# ``` +# """ function resample(uv::AbstractUncertainScalarKDE, constraint::TruncateMinimum, n::Int) # Box width δ = step(uv.range) @@ -508,28 +508,28 @@ function resample(uv::AbstractUncertainScalarKDE, constraint::TruncateMinimum, n end -""" - resample(uv::AbstractUncertainScalarKDE, constraint::TruncateRange) +# """ +# resample(uv::AbstractUncertainScalarKDE, constraint::TruncateRange) -Resample `uv` by first truncating the kernel density estimate of the -distribution furnishing the value at some minimum and maximum values, -then resampling it once. +# Resample `uv` by first truncating the kernel density estimate of the +# distribution furnishing the value at some minimum and maximum values, +# then resampling it once. -## Example +# ## Example -```julia -# Uncertain value represented by a normal distribution with mean = 0 and -# standard deviation = 1. -uncertainval = UncertainValue(rand(Normal(0, 1), 1000)) +# ```julia +# # Uncertain value represented by a normal distribution with mean = 0 and +# # standard deviation = 1. +# uncertainval = UncertainValue(rand(Normal(0, 1), 1000)) -# Only accept values in the range [-0.9, 1.2] -constraint = TruncateRange(-0.9, 1.2) +# # Only accept values in the range [-0.9, 1.2] +# constraint = TruncateRange(-0.9, 1.2) -# Resample the uncertain value by truncating the distribution furnishing it, -# then resampling the new distribution 300 times. -resample(uncertainval, constraint, 300) -``` -""" +# # Resample the uncertain value by truncating the distribution furnishing it, +# # then resampling the new distribution 300 times. +# resample(uncertainval, constraint, 300) +# ``` +# """ function resample(uv::AbstractUncertainScalarKDE, constraint::TruncateRange) # Box width δ = step(uv.range) @@ -552,28 +552,28 @@ function resample(uv::AbstractUncertainScalarKDE, constraint::TruncateRange) end -""" - resample(uv::AbstractUncertainScalarKDE, constraint::TruncateRange, n::Int) +# """ +# resample(uv::AbstractUncertainScalarKDE, constraint::TruncateRange, n::Int) -Resample `uv` by first truncating the kernel density estimate of the -distribution furnishing the value at some minimum and maximum values, -then resampling it `n` times. +# Resample `uv` by first truncating the kernel density estimate of the +# distribution furnishing the value at some minimum and maximum values, +# then resampling it `n` times. -## Example +# ## Example -```julia -# Uncertain value represented by a normal distribution with mean = 0 and -# standard deviation = 1. -uncertainval = UncertainValue(rand(Normal(0, 1), 1000)) +# ```julia +# # Uncertain value represented by a normal distribution with mean = 0 and +# # standard deviation = 1. +# uncertainval = UncertainValue(rand(Normal(0, 1), 1000)) -# Only accept values in the range [-0.9, 1.2] -constraint = TruncateRange(-0.9, 1.2) +# # Only accept values in the range [-0.9, 1.2] +# constraint = TruncateRange(-0.9, 1.2) -# Resample the uncertain value by truncating the distribution furnishing it, -# then resampling the new distribution 300 times. -resample(uncertainval, constraint, 300) -``` -""" +# # Resample the uncertain value by truncating the distribution furnishing it, +# # then resampling the new distribution 300 times. +# resample(uncertainval, constraint, 300) +# ``` +# """ function resample(uv::AbstractUncertainScalarKDE, constraint::TruncateRange, n::Int) # Box width δ = step(uv.range) diff --git a/src/uncertain_datasets/UncertainIndexDataset.jl b/src/uncertain_datasets/UncertainIndexDataset.jl index 483537bb..9ab82e7b 100644 --- a/src/uncertain_datasets/UncertainIndexDataset.jl +++ b/src/uncertain_datasets/UncertainIndexDataset.jl @@ -22,29 +22,6 @@ inds = UncertainIndexDataset(uvals) 33rd to 67th percentile range for the indices. plot(inds, [0.33, 0.67]) ``` - -`UncertainValueDataset`s can also be comprised of uncertain values of different -types (see also [`UncertainValue`](@ref)). - -```julia -o1 = UncertainValue(Normal, 0, 0.5) -o2 = UncertainValue(Normal, 2.0, 0.1) -o3 = UncertainValue(Uniform, 0, 4) -o4 = UncertainValue(Uniform, rand(100)) -o5 = UncertainValue(Beta, 4, 5) -o6 = UncertainValue(Gamma, 4, 5) -o7 = UncertainValue(Frechet, 1, 2) -o8 = UncertainValue(BetaPrime, 1, 2) -o9 = UncertainValue(BetaBinomial, 10, 3, 2) -o10 = UncertainValue(Binomial, 10, 0.3) - -uvals = [o1, o2, o3, o4, o5, o6, o7, o8, o9, o10] -d = UncertainValueDataset(uvals) - -# Plot the 20th to 80th percentile range error bars. -plot(d, [0.2, 0.8]) -``` - """ struct UncertainIndexDataset <: AbstractUncertainIndexDataset indices::AbstractVector{<:AbstractUncertainValue} diff --git a/src/uncertain_datasets/UncertainValueDataset.jl b/src/uncertain_datasets/UncertainValueDataset.jl index 2ea03e3f..3ba97b81 100644 --- a/src/uncertain_datasets/UncertainValueDataset.jl +++ b/src/uncertain_datasets/UncertainValueDataset.jl @@ -2,14 +2,34 @@ """ UncertainValueDataset(values) -A dataset of uncertain values which have no explicit index associated with its uncertain values. Use this type when you want to be explicit -about the values representing data values, as opposed to [`UncertainIndexDataset`](@ref)s. - - -## Fields - -- **`values::AbstractVector{<:AbstractUncertainValue}`**: The uncertain values. Each value is - represented by an `AbstractUncertainValue`. +A dataset of uncertain values which have no explicit index associated with its uncertain values. +Use this type when you want to be explicit about the values representing data values, as +opposed to [`UncertainIndexDataset`](@ref)s. + +`UncertainValueDataset`s can be comprised of uncertain values of any type compatible with +this package (see also [`UncertainValue`](@ref)). + +## Example + +```julia +using UncertainData +o1 = UncertainValue(Normal, 0, 0.5) +o2 = UncertainValue(Normal, 2.0, 0.1) +o3 = UncertainValue(Uniform, 0, 4) +o4 = UncertainValue(Uniform, rand(100)) +o5 = UncertainValue(Beta, 4, 5) +o6 = UncertainValue(Gamma, 4, 5) +o7 = UncertainValue(Frechet, 1, 2) +o8 = UncertainValue(BetaPrime, 1, 2) +o9 = UncertainValue(BetaBinomial, 10, 3, 2) +o10 = UncertainValue(Binomial, 10, 0.3) + +uvals = [o1, o2, o3, o4, o5, o6, o7, o8, o9, o10] +d = UncertainValueDataset(uvals) + +# Plot the 20th to 80th percentile range error bars. +plot(d, [0.2, 0.8]) +``` """ struct UncertainValueDataset <: AbstractUncertainValueDataset values::AbstractVector{<:AbstractUncertainValue} From 8b4380ee0b49abeaf04f1aaffc94a7114aa458a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristian=20Agas=C3=B8ster=20Haaga?= Date: Fri, 7 May 2021 10:02:36 +0200 Subject: [PATCH 13/21] Simplify docs for `UncertainIndexValueDataset` --- .../UncertainIndexValueDataset.jl | 35 +++++-------------- 1 file changed, 9 insertions(+), 26 deletions(-) diff --git a/src/uncertain_datasets/UncertainIndexValueDataset.jl b/src/uncertain_datasets/UncertainIndexValueDataset.jl index c55904bb..95a323b6 100644 --- a/src/uncertain_datasets/UncertainIndexValueDataset.jl +++ b/src/uncertain_datasets/UncertainIndexValueDataset.jl @@ -13,33 +13,16 @@ The i-th index is assumed to correspond to the i-th value. For example, if ## Example -```julia -# Simulate some data values measured a specific times. -times = 1:100 -values = sin.(0.0:0.1:100.0) - -# Assume the data were measured by a device with normally distributed -# measurement uncertainties with fluctuating standard deviations -σ_range = (0.1, 0.7) - -uncertain_values = [UncertainValue(Normal, val, rand(Uniform(σ_range...))) - for val in values] - -# Assume the clock used to record the times is uncertain, but with uniformly -# distributed noise that doesn't change through time. -uncertain_times = [UncertainValue(Uniform, t-0.1, t+0.1) for t in times] +Here, we simulate data which were measured with some uncertainty, with some timing error. +The data were measured by a device with normally distributed measurement uncertainties, +with fluctuating standard deviations. The clock used to record the times is uncertain, +but with uniformly distributed noise whose magnitude is restricted to the interval +``[0.1, 0.7]``. -# Pair the time-value data. If vectors are provided to the constructor, -# the first will be interpreted as the indices and the second as the values. -data = UncertainIndexValueDataset(uncertain_times, uncertain_values) - -# A safer option is to first convert to UncertainIndexDataset and -# UncertainValueDataset, so you don't accidentally mix the indices -# and the values. -uidxs = UncertainIndexDataset(uncertain_times) -uvals = UncertainValueDataset(uncertain_values) - -data = UncertainIndexValueDataset(uidxs, uvals) +```julia +v = [UncertainValue(Normal, x, rand(Uniform(0.1, 0.7))) for x in sin.(0.0:0.1:100.0)] +t = [UncertainValue(Uniform, x-0.1, x+0.1) for x in 1:100] +data = UncertainIndexValueDataset(t, v) ``` """ struct UncertainIndexValueDataset{IDXTYP <: AbstractUncertainIndexDataset, VALSTYP <: AbstractUncertainValueDataset} <: AbstractUncertainIndexValueDataset From 93125e4056b02c4656539bbb7e4b3afbe881bd99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristian=20Agas=C3=B8ster=20Haaga?= Date: Fri, 7 May 2021 11:14:23 +0200 Subject: [PATCH 14/21] Add missing import --- docs/src/uncertain_datasets/datasets.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/src/uncertain_datasets/datasets.md b/docs/src/uncertain_datasets/datasets.md index ed0210fe..3ad8885d 100644 --- a/docs/src/uncertain_datasets/datasets.md +++ b/docs/src/uncertain_datasets/datasets.md @@ -54,7 +54,7 @@ vals = [r1; r2; r3; r4; r5] # These are our time indices inds = [UncertainValue(Normal, i, rand(Uniform(0, 1))) for i = 1:length(vals)] -# Combine +# Combine indices and values x = UncertainIndexValueDataset(inds, vals) # Plot 90th percentile range both for indices and values. @@ -71,7 +71,7 @@ with increasing standard deviation through time. We also have some uncertain val that are associated with the indices. ```@example uivd2 -using UncertainData, Plots +using UncertainData, Plots, Distributions # Time indices time_inds = 1:13 @@ -82,9 +82,9 @@ inds = UncertainIndexDataset(uvals) u1 = UncertainValue(Gamma, rand(Gamma(), 500)) u2 = UncertainValue(rand(MixtureModel([Normal(1, 0.3), Normal(0.1, 0.1)]), 500)) uvals3 = [UncertainValue(Normal, rand(), rand()) for i = 1:11] -measurements = UncertainValueDataset([u1; u2; uvals3]) +measurements = [u1; u2; uvals3] -# Combine +# Combine indices and values x = UncertainIndexValueDataset(inds, measurements) # Plot the dataset with error bars in both directions, using the 20th to 80th percentile From 843aa9c22a0688cc97b8b1c74de9e706640c65ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristian=20Agas=C3=B8ster=20Haaga?= Date: Fri, 7 May 2021 11:14:29 +0200 Subject: [PATCH 15/21] Remove duplicate docs --- docs/src/uncertain_datasets/uncertain_value_dataset.md | 6 ------ 1 file changed, 6 deletions(-) diff --git a/docs/src/uncertain_datasets/uncertain_value_dataset.md b/docs/src/uncertain_datasets/uncertain_value_dataset.md index 93c47230..d4927e61 100644 --- a/docs/src/uncertain_datasets/uncertain_value_dataset.md +++ b/docs/src/uncertain_datasets/uncertain_value_dataset.md @@ -1,11 +1,5 @@ # Uncertain value datasets -## Documentation - -```@docs -UncertainValueDataset -``` - ## Description `UncertainValueDataset`s is an uncertain dataset type that has no explicit index From dfa740d7ab8caf9d1333ea09dd88e3cc19b7a5e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristian=20Agas=C3=B8ster=20Haaga?= Date: Fri, 7 May 2021 14:38:10 +0200 Subject: [PATCH 16/21] Mathematical operations --- docs/make.jl | 1 - docs/src/mathematics/elementary_operations.md | 97 -------------- .../constrain_uncertain_values.md | 1 - .../sampling_constraints.md | 16 ++- docs/src/uncertain_values/uncertain_values.md | 39 ++++++ .../uncertainvalues/add_uncertainvalues.jl | 118 +++++------------- .../uncertainvalues/divide_uncertainvalues.jl | 83 +++--------- 7 files changed, 101 insertions(+), 254 deletions(-) delete mode 100644 docs/src/mathematics/elementary_operations.md delete mode 100644 docs/src/sampling_constraints/constrain_uncertain_values.md diff --git a/docs/make.jl b/docs/make.jl index 19f51af3..f55b8707 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -77,7 +77,6 @@ PAGES = [ ], "Mathematics" => [ - "mathematics/elementary_operations.md", "mathematics/trig_functions.md" ], diff --git a/docs/src/mathematics/elementary_operations.md b/docs/src/mathematics/elementary_operations.md deleted file mode 100644 index 641a55a6..00000000 --- a/docs/src/mathematics/elementary_operations.md +++ /dev/null @@ -1,97 +0,0 @@ -# Elementary mathematical operations - -Elementary mathematical operations (`+`, `-`, `*`, and `/`) between arbitrary -uncertain values of different types and scalars are supported. - -## Syntax - -Resampling is used to perform the mathematical operations. All mathematical -operations return a vector containing the results of repeated element-wise operations -(where each element is a resampled draw from the furnishing distribution(s) of the -uncertain value(s)). - -The default number of realizations is set to `10000`. This allows calling `uval1 + uval2` -for two uncertain values `uval1` and `uval2`. If you need to tune the number of resample -draws to `n`, use the `+(uval1, uval2, n)` syntax. - -## Future improvements - -In the future, elementary operations might be improved for certain combinations of uncertain -values where exact expressions for error propagation are now, for example using the -machinery in `Measurements.jl` for normally distributed values. - -## Supported operations - -## Addition - -```@docs -Base.:+(a::AbstractUncertainValue, b::AbstractUncertainValue) -Base.:+(a::AbstractUncertainValue, b::AbstractUncertainValue, n::Int) -``` - -```@docs -Base.:+(a::Real, b::AbstractUncertainValue) -Base.:+(a::Real, b::AbstractUncertainValue, n::Int) -``` - -```@docs -Base.:+(a::AbstractUncertainValue, b::Real) -Base.:+(a::AbstractUncertainValue, b::Real, n::Int) -``` - -## Subtraction - -```@docs -Base.:-(a::AbstractUncertainValue, b::AbstractUncertainValue) -Base.:-(a::AbstractUncertainValue, b::AbstractUncertainValue, n::Int) -``` - -```@docs -Base.:-(a::Real, b::AbstractUncertainValue) -Base.:-(a::Real, b::AbstractUncertainValue, n::Int) -``` - -```@docs -Base.:-(a::AbstractUncertainValue, b::Real) -Base.:-(a::AbstractUncertainValue, b::Real, n::Int) -``` - -## Multiplication - -```@docs -Base.:*(a::AbstractUncertainValue, b::AbstractUncertainValue) -Base.:*(a::AbstractUncertainValue, b::AbstractUncertainValue, n::Int) -``` - -```@docs -Base.:*(a::Real, b::AbstractUncertainValue) -Base.:*(a::Real, b::AbstractUncertainValue, n::Int) -``` - -```@docs -Base.:*(a::AbstractUncertainValue, b::Real) -Base.:*(a::AbstractUncertainValue, b::Real, n::Int) -``` - -## Division - -```@docs -Base.:/(a::AbstractUncertainValue, b::AbstractUncertainValue) -Base.:/(a::AbstractUncertainValue, b::AbstractUncertainValue, n::Int) -``` - -```@docs -Base.:/(a::Real, b::AbstractUncertainValue) -Base.:/(a::Real, b::AbstractUncertainValue, n::Int) -``` - -```@docs -Base.:/(a::AbstractUncertainValue, b::Real) -Base.:/(a::AbstractUncertainValue, b::Real, n::Int) -``` - -## Special cases - -### `CertainScalar`s - -Performing elementary operations with `CertainScalar`s behaves as for scalars. diff --git a/docs/src/sampling_constraints/constrain_uncertain_values.md b/docs/src/sampling_constraints/constrain_uncertain_values.md deleted file mode 100644 index 8b137891..00000000 --- a/docs/src/sampling_constraints/constrain_uncertain_values.md +++ /dev/null @@ -1 +0,0 @@ - diff --git a/docs/src/sampling_constraints/sampling_constraints.md b/docs/src/sampling_constraints/sampling_constraints.md index 705d20be..6c128ec3 100644 --- a/docs/src/sampling_constraints/sampling_constraints.md +++ b/docs/src/sampling_constraints/sampling_constraints.md @@ -7,8 +7,7 @@ constrain(uv::AbstractUncertainValue, constraint::SamplingConstraint) ## Element-wise constraints - -The following sampling constraintsa are aimed to be used element-wise on uncertain values. +The following sampling constraints are aimed to be used element-wise on uncertain values. ```@docs TruncateStd @@ -51,8 +50,7 @@ plot(p_min, p_max, p_range, ) ``` - -## Sequential constraints +## Dataset (sequential) constraints Sequential constraints are used when sampling [`UncertainIndexDataset`](@ref)s or [`UncertainIndexValueDataset`](@ref)s. @@ -60,6 +58,16 @@ Sequential constraints are used when sampling [`UncertainIndexDataset`](@ref)s o ```@docs StrictlyIncreasing StrictlyDecreasing +``` + +### Sampling algorithms + +```@docs StartToEnd +``` + +### Utils + +```@docs sequence_exists ``` diff --git a/docs/src/uncertain_values/uncertain_values.md b/docs/src/uncertain_values/uncertain_values.md index 79a7a52d..9766f715 100644 --- a/docs/src/uncertain_values/uncertain_values.md +++ b/docs/src/uncertain_values/uncertain_values.md @@ -402,3 +402,42 @@ savefig("figs/combine_ex.png") #hide ``` ![](figs/combine_ex.png) + + +## Mathematical operations + +# Elementary mathematical operations + +Elementary mathematical operations (`+`, `-`, `*`, and `/`) between arbitrary +uncertain values of different types and scalars are supported. + +## Syntax + +Resampling is used to perform the mathematical operations. All mathematical +operations return a vector containing the results of repeated element-wise operations +(where each element is a resampled draw from the furnishing distribution(s) of the +uncertain value(s)). + +The default number of realizations is set to `10000`. This allows calling `uval1 + uval2` +for two uncertain values `uval1` and `uval2`. If you need to tune the number of resample +draws to `n`, use the `+(uval1, uval2, n)` syntax. + +## Future improvements + +In the future, elementary operations might be improved for certain combinations of uncertain +values where exact expressions for error propagation are now, for example using the +machinery in `Measurements.jl` for normally distributed values. + +## Supported operations + +Performing elementary operations with `CertainScalar`s behaves as for scalars. + +## Addition + +```@docs +Base.:+(a::AbstractUncertainValue, b::AbstractUncertainValue) +Base.:-(a::AbstractUncertainValue, b::AbstractUncertainValue) +Base.:*(a::AbstractUncertainValue, b::AbstractUncertainValue) +Base.:/(a::AbstractUncertainValue, b::AbstractUncertainValue) +``` + diff --git a/src/mathematics/uncertainvalues/add_uncertainvalues.jl b/src/mathematics/uncertainvalues/add_uncertainvalues.jl index 3f2925f4..a3102f92 100644 --- a/src/mathematics/uncertainvalues/add_uncertainvalues.jl +++ b/src/mathematics/uncertainvalues/add_uncertainvalues.jl @@ -4,89 +4,59 @@ ########## """ - Base.:+(a::AbstractUncertainValue, b::AbstractUncertainValue) -> UncertainValue + Base.:+(a::AbstractUncertainValue, b::Real; n::Int = 30000) -> UncertainScalarKDE + Base.:+(a::Real, b::AbstractUncertainValue; n::Int = 30000) -> UncertainScalarKDE + Base.:+(a::AbstractUncertainValue, b::AbstractUncertainValue; n::Int = 30000) -> UncertainScalarKDE -Addition operator for pairs of uncertain values. +Addition operator. Add `a` and `b` by drawing `n` realizations of the uncertain value(s), +then performing element-wise addition on the draws. +A kernel density estimate to the distribution of sums is returned. -Computes the element-wise sum between for a default of `n = 10000` realizations of `a` and -`b`, then returns an uncertain value based on a kernel density estimate to the distribution -of the element-wise sums. +Use the `+(a, b, n)` syntax to tune the number of draws. -Use the `+(a, b, n)` syntax to tune the number (`n`) of draws. +## Example + +```julia +using UncertainData +x = UncertainValue(Normal, 0, 1) +y = UncertainValue([1, 2, -15, -20], [0.2, 0.3, 0.2, 0.3]) +x + y # uses the default number of draws (n = 30000) ++(x, y, 100000) # use more samples +``` """ function Base.:+(a::AbstractUncertainValue, b::AbstractUncertainValue; n::Int = 30000) UncertainValue(resample(a, n) .+ resample(b, n)) end -""" - Base.:+(a::Real, b::AbstractUncertainValue) -> UncertainValue - -Addition operator for between scalars and uncertain values. - -Computes the element-wise sum between `a` and `b` for a default of `n = 10000` realizations -of `b`, then returns an uncertain value based on a kernel density estimate to the -distribution of the element-wise sums. - -Use the `+(a, b, n)` syntax to tune the number (`n`) of draws. -""" Base.:+(a::Real, b::AbstractUncertainValue; n::Int = 30000) = UncertainValue(a .+ resample(b, n)) - -""" - Base.:+(a::AbstractUncertainValue, b::Real) -> UncertainValue - -Addition operator for between uncertain values and scalars. - -Computes the element-wise sum between `a` and `b` for a default of `n = 10000` realizations -of `a`, then returns an uncertain value based on a kernel density estimate to the -distribution of the element-wise sums. - -Use the `+(a, b, n)` syntax to tune the number (`n`) of draws. -""" Base.:+(a::AbstractUncertainValue, b::Real; n::Int = 30000) = UncertainValue(resample(a, n) .+ b) - -""" - Base.:+(a::AbstractUncertainValue, b::AbstractUncertainValue, n::Int) -> UncertainValue - -Addition operator for pairs of uncertain values. - -Computes the element-wise sum between `a` and `b` for `n` realizations -of `a` and `b`, then returns an uncertain value based on a kernel density estimate to the -distribution of the element-wise sums. - -Call this function using the `+(a, b, n)` syntax. -""" -function Base.:+(a::AbstractUncertainValue, b::AbstractUncertainValue, n::Int) +Base.:+(a::AbstractUncertainValue, b::AbstractUncertainValue, n::Int) = UncertainValue(resample(a, n) .+ resample(b, n)) -end -""" - Base.:+(a::Real, b::AbstractUncertainValue, n::Int) -> UncertainValue +Base.:+(a::Real, b::AbstractUncertainValue, n::Int) = UncertainValue(a .+ resample(b, n)) +Base.:+(a::AbstractUncertainValue, b::Real, n::Int) = UncertainValue(resample(a, n) .+ b) -Addition operator for scalar-uncertain value pairs. -Computes the element-wise sum between `a` and `b` for `n` realizations -of `b`, then returns an uncertain value based on a kernel density estimate to the -distribution of the element-wise sums. +##################################################################################### +# Special cases +##################################################################################### -Call this function using the `+(a, b, n)` syntax. -""" -Base.:+(a::Real, b::AbstractUncertainValue, n::Int) = - UncertainValue(a .+ resample(b, n)) +import ..UncertainValues: CertainScalar +################## +# `CertainScalar`s +################# """ - Base.:+(a::AbstractUncertainValue, b::Real, n::Int) -> UncertainValue - -Addition operator for scalar-uncertain value pairs. - -Computes the element-wise sum between `a` and `b` for `n` realizations -of `a`, then returns an uncertain value based on a kernel density estimate to the -distribution of the element-wise sums. + Base.:+(a::Union{CertainScalar, Real}, b::Union{CertainScalar, Real}) -Call this function using the `+(a, b, n)` syntax. +Addition of certain values with themselves or scalars acts as regular addition, but +returns the result wrapped in a `CertainScalar` instance. """ -Base.:+(a::AbstractUncertainValue, b::Real, n::Int) = - UncertainValue(resample(a, n) .+ b) +Base.:+(a::Union{CertainScalar, Real}, b::Union{CertainScalar, Real}) +Base.:+(a::CertainScalar, b::CertainScalar) = CertainScalar(a.value + b.value) +Base.:+(a::CertainScalar, b::Real) = CertainScalar(a.value + b) +Base.:+(a::Real, b::CertainScalar) = CertainScalar(a + b.value) @@ -124,25 +94,3 @@ function Base.:+(a::AbstractUncertainValue, [+(a, b[i], n) for i = 1:length(b)] end - -##################################################################################### -# Special cases -##################################################################################### - -import ..UncertainValues: CertainScalar - -################## -# `CertainScalar`s -################# -""" - Base.:+(a::Union{CertainScalar, Real}, b::Union{CertainScalar, Real}) - -Addition of certain values with themselves or scalars acts as regular addition, but -returns the result wrapped in a `CertainScalar` instance. -""" -Base.:+(a::Union{CertainScalar, Real}, b::Union{CertainScalar, Real}) - -Base.:+(a::CertainScalar, b::CertainScalar) = CertainScalar(a.value + b.value) -Base.:+(a::CertainScalar, b::Real) = CertainScalar(a.value + b) -Base.:+(a::Real, b::CertainScalar) = CertainScalar(a + b.value) - diff --git a/src/mathematics/uncertainvalues/divide_uncertainvalues.jl b/src/mathematics/uncertainvalues/divide_uncertainvalues.jl index f347c5e7..81239f7f 100644 --- a/src/mathematics/uncertainvalues/divide_uncertainvalues.jl +++ b/src/mathematics/uncertainvalues/divide_uncertainvalues.jl @@ -3,87 +3,38 @@ ################ """ - Base.:/(a::AbstractUncertainValue, b::AbstractUncertainValue) -> UncertainValue + Base.:/(a::AbstractUncertainValue, b::Real; n::Int = 30000) -> UncertainScalarKDE + Base.:/(a::Real, b::AbstractUncertainValue; n::Int = 30000) -> UncertainScalarKDE + Base.:/(a::AbstractUncertainValue, b::AbstractUncertainValue; n::Int = 30000) -> UncertainScalarKDE -Division operator for pairs of uncertain values. +Right-division operator. Divide `a` by `b`, by drawing `n` realizations of the uncertain value(s), +then performing element-wise right-division on the draws. +A kernel density estimate to the distribution of sums is returned. -Computes the element-wise quotients between for a default of `n = 10000` realizations of `a` and -`b`, then returns an uncertain value based on a kernel density estimate to the distribution -of the element-wise quotients. +Use the `/(a, b, n)` syntax to tune the number of draws. -Use the `/(a, b, n)` syntax to tune the number (`n`) of draws. +## Example + +```julia +using UncertainData +x = UncertainValue(Normal, 0, 1) +y = UncertainValue([1, 2, -15, -20], [0.2, 0.3, 0.2, 0.3]) +x / y # uses the default number of draws (n = 30000) +/(x, y, 100000) # use more samples +``` """ function Base.:/(a::AbstractUncertainValue, b::AbstractUncertainValue; n::Int = 30000) UncertainValue(resample(a, n) ./ resample(b, n)) end -""" - Base.:/(a::Real, b::AbstractUncertainValue) -> UncertainValue - -Division operator for between scalars and uncertain values. - -Computes the element-wise quotients between `a` and `b` for a default of `n = 10000` realizations -of `b`, then returns an uncertain value based on a kernel density estimate to the -distribution of the element-wise quotients. - -Use the `/(a, b, n)` syntax to tune the number (`n`) of draws. -""" Base.:/(a::Real, b::AbstractUncertainValue; n::Int = 30000) = UncertainValue(a ./ resample(b, n)) - -""" - Base.:/(a::AbstractUncertainValue, b::Real) -> UncertainValue - -Division operator for between uncertain values and scalars. - -Computes the element-wise quotients between `a` and `b` for a default of `n = 10000` realizations -of `a`, then returns an uncertain value based on a kernel density estimate to the -distribution of the element-wise quotients. - -Use the `/(a, b, n)` syntax to tune the number (`n`) of draws. -""" Base.:/(a::AbstractUncertainValue, b::Real; n::Int = 30000) = UncertainValue(resample(a, n) ./ b) - -""" - Base.:/(a::AbstractUncertainValue, b::AbstractUncertainValue, n::Int) -> UncertainValue - -Division operator for pairs of uncertain values. - -Computes the element-wise quotients between `a` and `b` for `n` realizations -of `a` and `b`, then returns an uncertain value based on a kernel density estimate to the -distribution of the element-wise quotients. - -Call this function using the `/(a, b, n)` syntax. -""" -function Base.:/(a::AbstractUncertainValue, b::AbstractUncertainValue, n::Int) +Base.:/(a::AbstractUncertainValue, b::AbstractUncertainValue, n::Int) = UncertainValue(resample(a, n) ./ resample(b, n)) -end -""" - Base.:/(a::Real, b::AbstractUncertainValue, n::Int) -> UncertainValue - -Division operator for scalar-uncertain value pairs. - -Computes the element-wise quotients between `a` and `b` for `n` realizations -of `b`, then returns an uncertain value based on a kernel density estimate to the -distribution of the element-wise quotients. - -Call this function using the `/(a, b, n)` syntax. -""" Base.:/(a::Real, b::AbstractUncertainValue, n::Int) = UncertainValue(a ./ resample(b, n)) - -""" - Base.:/(a::AbstractUncertainValue, b::Real, n::Int) -> UncertainValue - -Division operator for scalar-uncertain value pairs. - -Computes the element-wise quotients between `a` and `b` for `n` realizations -of `a`, then returns an uncertain value based on a kernel density estimate to the -distribution of the element-wise quotients. - -Call this function using the `/(a, b, n)` syntax. -""" Base.:/(a::AbstractUncertainValue, b::Real, n::Int) = UncertainValue(resample(a, n) ./ b) From bf31afc76c96ef3f4e7a6004f2bc8fdfbe358414 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristian=20Agas=C3=B8ster=20Haaga?= Date: Fri, 7 May 2021 14:38:15 +0200 Subject: [PATCH 17/21] Remove old file --- .../sequential_constraints.md | 28 ------------------- 1 file changed, 28 deletions(-) delete mode 100644 docs/src/sampling_constraints/sequential_constraints.md diff --git a/docs/src/sampling_constraints/sequential_constraints.md b/docs/src/sampling_constraints/sequential_constraints.md deleted file mode 100644 index 54862cc2..00000000 --- a/docs/src/sampling_constraints/sequential_constraints.md +++ /dev/null @@ -1,28 +0,0 @@ -# Increasing/decreasing - - -The following constraints may be used to impose sequential constraints when sampling a -collection of uncertain values element-wise. - -## StrictlyIncreasing - -```@docs -StrictlyIncreasing -``` - -## StrictlyDecreasing - -```@docs -StrictlyDecreasing -``` - -## Existence of sequences - -`sequence_exists` will check whether a valid sequence through your collection of -uncertain values exists, so that you can know beforehand whether a particular -sequential sampling constraint is possible to apply to your data. - -```@docs -sequence_exists -``` - From f934e0a98f5b01a71b881ce21f83c48467d06077 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristian=20Agas=C3=B8ster=20Haaga?= Date: Fri, 7 May 2021 14:42:26 +0200 Subject: [PATCH 18/21] Elementary mathematical operations --- .../uncertainvalues/add_uncertainvalues.jl | 7 +- .../uncertainvalues/divide_uncertainvalues.jl | 10 +- .../multiply_uncertainvalues.jl | 91 ++++--------------- .../subtract_uncertainvalues.jl | 90 ++++-------------- 4 files changed, 42 insertions(+), 156 deletions(-) diff --git a/src/mathematics/uncertainvalues/add_uncertainvalues.jl b/src/mathematics/uncertainvalues/add_uncertainvalues.jl index a3102f92..0d88fab8 100644 --- a/src/mathematics/uncertainvalues/add_uncertainvalues.jl +++ b/src/mathematics/uncertainvalues/add_uncertainvalues.jl @@ -47,12 +47,9 @@ import ..UncertainValues: CertainScalar ################## # `CertainScalar`s ################# -""" - Base.:+(a::Union{CertainScalar, Real}, b::Union{CertainScalar, Real}) -Addition of certain values with themselves or scalars acts as regular addition, but -returns the result wrapped in a `CertainScalar` instance. -""" +# Addition of certain values with themselves or scalars acts as regular addition, but +# returns the result wrapped in a `CertainScalar` instance. Base.:+(a::Union{CertainScalar, Real}, b::Union{CertainScalar, Real}) Base.:+(a::CertainScalar, b::CertainScalar) = CertainScalar(a.value + b.value) Base.:+(a::CertainScalar, b::Real) = CertainScalar(a.value + b) diff --git a/src/mathematics/uncertainvalues/divide_uncertainvalues.jl b/src/mathematics/uncertainvalues/divide_uncertainvalues.jl index 81239f7f..12171c6d 100644 --- a/src/mathematics/uncertainvalues/divide_uncertainvalues.jl +++ b/src/mathematics/uncertainvalues/divide_uncertainvalues.jl @@ -49,14 +49,8 @@ import ..UncertainValues: CertainScalar ################## # `CertainScalar`s ################# -""" - Base.:/(a::Union{CertainScalar, Real}, b::Union{CertainScalar, Real}) - -Division of certain values with themselves or scalars acts as regular division, but -returns the result wrapped in a `CertainScalar` instance. -""" -Base.:/(a::Union{CertainScalar, Real}, b::Union{CertainScalar, Real}) - +# Division of certain values with themselves or scalars acts as regular division, but +# returns the result wrapped in a `CertainScalar` instance. Base.:/(a::CertainScalar, b::CertainScalar) = CertainScalar(a.value / b.value) Base.:/(a::CertainScalar, b::Real) = CertainScalar(a.value / b) Base.:/(a::Real, b::CertainScalar) = CertainScalar(a / b.value) diff --git a/src/mathematics/uncertainvalues/multiply_uncertainvalues.jl b/src/mathematics/uncertainvalues/multiply_uncertainvalues.jl index 8e3a6ebc..60932d5d 100644 --- a/src/mathematics/uncertainvalues/multiply_uncertainvalues.jl +++ b/src/mathematics/uncertainvalues/multiply_uncertainvalues.jl @@ -4,87 +4,38 @@ ################ """ - Base.:*(a::AbstractUncertainValue, b::AbstractUncertainValue) -> UncertainValue + Base.:*(a::AbstractUncertainValue, b::Real; n::Int = 30000) -> UncertainScalarKDE + Base.:*(a::Real, b::AbstractUncertainValue; n::Int = 30000) -> UncertainScalarKDE + Base.:*(a::AbstractUncertainValue, b::AbstractUncertainValue; n::Int = 30000) -> UncertainScalarKDE -Multiplication operator for pairs of uncertain values. +Multiplication operator. Multiply `a` by `b` by drawing `n` realizations of the uncertain value(s), +then performing element-wise multiplication on the draws. +A kernel density estimate to the distribution of sums is returned. -Computes the element-wise products between for a default of `n = 10000` realizations of `a` and -`b`, then returns an uncertain value based on a kernel density estimate to the distribution -of the element-wise products. +Use the `*(a, b, n)` syntax to tune the number of draws. -Use the `*(a, b, n)` syntax to tune the number (`n`) of draws. +## Example + +```julia +using UncertainData +x = UncertainValue(Normal, 0, 1) +y = UncertainValue([1, 2, -15, -20], [0.2, 0.3, 0.2, 0.3]) +x * y # uses the default number of draws (n = 30000) +*(x, y, 100000) # use more samples +``` """ function Base.:*(a::AbstractUncertainValue, b::AbstractUncertainValue; n::Int = 30000) UncertainValue(resample(a, n) .* resample(b, n)) end -""" - Base.:*(a::Real, b::AbstractUncertainValue) -> UncertainValue - -Multiplication operator for between scalars and uncertain values. - -Computes the element-wise products between `a` and `b` for a default of `n = 10000` realizations -of `b`, then returns an uncertain value based on a kernel density estimate to the -distribution of the element-wise products. - -Use the `*(a, b, n)` syntax to tune the number (`n`) of draws. -""" Base.:*(a::Real, b::AbstractUncertainValue; n::Int = 30000) = UncertainValue(a .* resample(b, n)) - -""" - Base.:*(a::AbstractUncertainValue, b::Real) -> UncertainValue - -Multiplication operator for between uncertain values and scalars. - -Computes the element-wise products between `a` and `b` for a default of `n = 10000` realizations -of `a`, then returns an uncertain value based on a kernel density estimate to the -distribution of the element-wise products. - -Use the `*(a, b, n)` syntax to tune the number (`n`) of draws. -""" Base.:*(a::AbstractUncertainValue, b::Real; n::Int = 30000) = UncertainValue(resample(a, n) .* b) - -""" - Base.:*(a::AbstractUncertainValue, b::AbstractUncertainValue, n::Int) -> UncertainValue - -Multiplication operator for pairs of uncertain values. - -Computes the element-wise products between `a` and `b` for `n` realizations -of `a` and `b`, then returns an uncertain value based on a kernel density estimate to the -distribution of the element-wise products. - -Call this function using the `*(a, b, n)` syntax. -""" -function Base.:*(a::AbstractUncertainValue, b::AbstractUncertainValue, n::Int) +Base.:*(a::AbstractUncertainValue, b::AbstractUncertainValue, n::Int) = UncertainValue(resample(a, n) .* resample(b, n)) -end -""" - Base.:*(a::Real, b::AbstractUncertainValue, n::Int) -> UncertainValue - -Multiplication operator for scalar-uncertain value pairs. - -Computes the element-wise products between `a` and `b` for `n` realizations -of `b`, then returns an uncertain value based on a kernel density estimate to the -distribution of the element-wise products. - -Call this function using the `*(a, b, n)` syntax. -""" Base.:*(a::Real, b::AbstractUncertainValue, n::Int) = UncertainValue(a .* resample(b, n)) - -""" - Base.:*(a::AbstractUncertainValue, b::Real, n::Int) -> UncertainValue - -Multiplication operator for scalar-uncertain value pairs. - -Computes the element-wise products between `a` and `b` for `n` realizations -of `a`, then returns an uncertain value based on a kernel density estimate to the -distribution of the element-wise products. - -Call this function using the `*(a, b, n)` syntax. -""" Base.:*(a::AbstractUncertainValue, b::Real, n::Int) = UncertainValue(resample(a, n) .* b) @@ -99,14 +50,10 @@ import ..UncertainValues: CertainScalar ################## # `CertainScalar`s ################# -""" - Base.:*(a::Union{CertainScalar, Real}, b::Union{CertainScalar, Real}) -Multiplication of certain values with themselves or scalars acts as regular multiplication, -but returns the result wrapped in a `CertainScalar` instance. -""" +#Multiplication of certain values with themselves or scalars acts as regular multiplication, +#but returns the result wrapped in a `CertainScalar` instance. Base.:*(a::Union{CertainScalar, Real}, b::Union{CertainScalar, Real}) - Base.:*(a::CertainScalar, b::CertainScalar) = CertainScalar(a.value * b.value) Base.:*(a::CertainScalar, b::Real) = CertainScalar(a.value * b) Base.:*(a::Real, b::CertainScalar) = CertainScalar(a * b.value) diff --git a/src/mathematics/uncertainvalues/subtract_uncertainvalues.jl b/src/mathematics/uncertainvalues/subtract_uncertainvalues.jl index 7c66828e..e2463906 100644 --- a/src/mathematics/uncertainvalues/subtract_uncertainvalues.jl +++ b/src/mathematics/uncertainvalues/subtract_uncertainvalues.jl @@ -8,87 +8,38 @@ import ..Resampling: ############# """ - Base.:-(a::AbstractUncertainValue, b::AbstractUncertainValue) -> UncertainValue + Base.:-(a::AbstractUncertainValue, b::Real; n::Int = 30000) -> UncertainScalarKDE + Base.:-(a::Real, b::AbstractUncertainValue; n::Int = 30000) -> UncertainScalarKDE + Base.:-(a::AbstractUncertainValue, b::AbstractUncertainValue; n::Int = 30000) -> UncertainScalarKDE -Subtraction operator for pairs of uncertain values. +Subtraction operator. Subtract `b` from `a` by drawing `n` realizations of the uncertain value(s), +then performing element-wise subtraction on the draws. +A kernel density estimate to the distribution of sums is returned. -Computes the element-wise differences between for a default of `n = 30000` realizations of `a` and -`b`, then returns an uncertain value based on a kernel density estimate to the distribution -of the element-wise differences. +Use the `-(a, b, n)` syntax to tune the number of draws. -Use the `-(a, b, n)` syntax to tune the number (`n`) of draws. +## Example + +```julia +using UncertainData +x = UncertainValue(Normal, 0, 1) +y = UncertainValue([1, 2, -15, -20], [0.2, 0.3, 0.2, 0.3]) +x - y # uses the default number of draws (n = 30000) +-(x, y, 100000) # use more samples +``` """ function Base.:-(a::AbstractUncertainValue, b::AbstractUncertainValue; n::Int = 30000) UncertainValue(resample(a, n) .- resample(b, n)) end -""" - Base.:-(a::Real, b::AbstractUncertainValue) -> UncertainValue - -Subtraction operator for between scalars and uncertain values. - -Computes the element-wise differences between `a` and `b` for a default of `n = 30000` realizations -of `b`, then returns an uncertain value based on a kernel density estimate to the -distribution of the element-wise differences. - -Use the `-(a, b, n)` syntax to tune the number (`n`) of draws. -""" Base.:-(a::Real, b::AbstractUncertainValue; n::Int = 30000) = UncertainValue(a .- resample(b, n)) - -""" - Base.:-(a::AbstractUncertainValue, b::Real) -> UncertainValue - -Subtraction operator for between uncertain values and scalars. - -Computes the element-wise differences between `a` and `b` for a default of `n = 30000` realizations -of `a`, then returns an uncertain value based on a kernel density estimate to the -distribution of the element-wise differences. - -Use the `-(a, b, n)` syntax to tune the number (`n`) of draws. -""" Base.:-(a::AbstractUncertainValue, b::Real; n::Int = 30000) = UncertainValue(resample(a, n) .- b) - -""" - Base.:-(a::AbstractUncertainValue, b::AbstractUncertainValue, n::Int) -> UncertainValue - -Subtraction operator for pairs of uncertain values. - -Computes the element-wise differences between `a` and `b` for `n` realizations -of `a` and `b`, then returns an uncertain value based on a kernel density estimate to the -distribution of the element-wise differences. - -Call this function using the `-(a, b, n)` syntax. -""" -function Base.:-(a::AbstractUncertainValue, b::AbstractUncertainValue, n::Int) +Base.:-(a::AbstractUncertainValue, b::AbstractUncertainValue, n::Int) = UncertainValue(resample(a, n) .- resample(b, n)) -end -""" - Base.:-(a::Real, b::AbstractUncertainValue, n::Int) -> UncertainValue - -Subtraction operator for scalar-uncertain value pairs. - -Computes the element-wise differences between `a` and `b` for `n` realizations -of `b`, then returns an uncertain value based on a kernel density estimate to the -distribution of the element-wise differences. - -Call this function using the `-(a, b, n)` syntax. -""" Base.:-(a::Real, b::AbstractUncertainValue, n::Int) = UncertainValue(a .- resample(b, n)) - -""" - Base.:-(a::AbstractUncertainValue, b::Real, n::Int) -> UncertainValue - -Subtraction operator for scalar-uncertain value pairs. - -Computes the element-wise differences between `a` and `b` for `n` realizations -of `a`, then returns an uncertain value based on a kernel density estimate to the -distribution of the element-wise differences. - -Call this function using the `-(a, b, n)` syntax. -""" Base.:-(a::AbstractUncertainValue, b::Real, n::Int) = UncertainValue(resample(a, n) .- b) @@ -103,12 +54,9 @@ import ..UncertainValues: CertainScalar ################## # `CertainScalar`s ################# -""" - Base.:-(a::Union{CertainScalar, Real}, b::Union{CertainScalar, Real}) -Subtraction of certain values with themselves or scalars acts as regular subtraction, -but returns the result wrapped in a `CertainScalar` instance. -""" +#Subtraction of certain values with themselves or scalars acts as regular subtraction, +#but returns the result wrapped in a `CertainScalar` instance. Base.:-(a::Union{CertainScalar, Real}, b::Union{CertainScalar, Real}) Base.:-(a::CertainScalar, b::CertainScalar) = CertainScalar(a.value - b.value) From 233da2ccadef30cd0502438f2814e52edc4f3e57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristian=20Agas=C3=B8ster=20Haaga?= Date: Fri, 7 May 2021 14:48:51 +0200 Subject: [PATCH 19/21] Terminology --- src/mathematics/uncertainvalues/add_uncertainvalues.jl | 5 ++--- src/mathematics/uncertainvalues/divide_uncertainvalues.jl | 8 +++----- .../uncertainvalues/multiply_uncertainvalues.jl | 7 +++---- .../uncertainvalues/subtract_uncertainvalues.jl | 8 +++----- 4 files changed, 11 insertions(+), 17 deletions(-) diff --git a/src/mathematics/uncertainvalues/add_uncertainvalues.jl b/src/mathematics/uncertainvalues/add_uncertainvalues.jl index 0d88fab8..47e6609e 100644 --- a/src/mathematics/uncertainvalues/add_uncertainvalues.jl +++ b/src/mathematics/uncertainvalues/add_uncertainvalues.jl @@ -8,11 +8,10 @@ Base.:+(a::Real, b::AbstractUncertainValue; n::Int = 30000) -> UncertainScalarKDE Base.:+(a::AbstractUncertainValue, b::AbstractUncertainValue; n::Int = 30000) -> UncertainScalarKDE -Addition operator. Add `a` and `b` by drawing `n` realizations of the uncertain value(s), -then performing element-wise addition on the draws. +Addition operator. Perform the operation `a + b` by drawing `n` realizations of the uncertain value(s), +then performing element-wise addition on the draws. Use the `+(a, b, n)` syntax to tune the number of draws. A kernel density estimate to the distribution of sums is returned. -Use the `+(a, b, n)` syntax to tune the number of draws. ## Example diff --git a/src/mathematics/uncertainvalues/divide_uncertainvalues.jl b/src/mathematics/uncertainvalues/divide_uncertainvalues.jl index 12171c6d..2f537129 100644 --- a/src/mathematics/uncertainvalues/divide_uncertainvalues.jl +++ b/src/mathematics/uncertainvalues/divide_uncertainvalues.jl @@ -7,11 +7,9 @@ Base.:/(a::Real, b::AbstractUncertainValue; n::Int = 30000) -> UncertainScalarKDE Base.:/(a::AbstractUncertainValue, b::AbstractUncertainValue; n::Int = 30000) -> UncertainScalarKDE -Right-division operator. Divide `a` by `b`, by drawing `n` realizations of the uncertain value(s), -then performing element-wise right-division on the draws. -A kernel density estimate to the distribution of sums is returned. - -Use the `/(a, b, n)` syntax to tune the number of draws. +Right-division operator. Perform the operation `a / b` by drawing `n` realizations of the uncertain value(s), +then performing element-wise right-division on the draws. Use the `/(a, b, n)` syntax to tune the number of draws. +A kernel density estimate to the distribution of pairwise divisions is returned. ## Example diff --git a/src/mathematics/uncertainvalues/multiply_uncertainvalues.jl b/src/mathematics/uncertainvalues/multiply_uncertainvalues.jl index 60932d5d..a5f14453 100644 --- a/src/mathematics/uncertainvalues/multiply_uncertainvalues.jl +++ b/src/mathematics/uncertainvalues/multiply_uncertainvalues.jl @@ -8,11 +8,10 @@ Base.:*(a::Real, b::AbstractUncertainValue; n::Int = 30000) -> UncertainScalarKDE Base.:*(a::AbstractUncertainValue, b::AbstractUncertainValue; n::Int = 30000) -> UncertainScalarKDE -Multiplication operator. Multiply `a` by `b` by drawing `n` realizations of the uncertain value(s), -then performing element-wise multiplication on the draws. -A kernel density estimate to the distribution of sums is returned. +Multiplication operator. Perform the operation `a * b` by drawing `n` realizations of the uncertain value(s), +then performing element-wise multiplication on the draws. Use the `*(a, b, n)` syntax to tune the number of draws. +A kernel density estimate to the distribution of products is returned. -Use the `*(a, b, n)` syntax to tune the number of draws. ## Example diff --git a/src/mathematics/uncertainvalues/subtract_uncertainvalues.jl b/src/mathematics/uncertainvalues/subtract_uncertainvalues.jl index e2463906..73eca4c9 100644 --- a/src/mathematics/uncertainvalues/subtract_uncertainvalues.jl +++ b/src/mathematics/uncertainvalues/subtract_uncertainvalues.jl @@ -12,12 +12,10 @@ import ..Resampling: Base.:-(a::Real, b::AbstractUncertainValue; n::Int = 30000) -> UncertainScalarKDE Base.:-(a::AbstractUncertainValue, b::AbstractUncertainValue; n::Int = 30000) -> UncertainScalarKDE -Subtraction operator. Subtract `b` from `a` by drawing `n` realizations of the uncertain value(s), -then performing element-wise subtraction on the draws. -A kernel density estimate to the distribution of sums is returned. +Subtraction operator. Perform the operation `a - b` by drawing `n` realizations of the uncertain value(s), +then performing element-wise subtraction on the draws. Use the `-(a, b, n)` syntax to tune the number of draws. +A kernel density estimate to the distribution of differences is returned. -Use the `-(a, b, n)` syntax to tune the number of draws. - ## Example ```julia From f8ec257e732357fda7f855a2116f55edb8a9fd6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristian=20Agas=C3=B8ster=20Haaga?= Date: Fri, 7 May 2021 14:48:58 +0200 Subject: [PATCH 20/21] Simplify --- docs/src/uncertain_values/uncertain_values.md | 29 +++---------------- 1 file changed, 4 insertions(+), 25 deletions(-) diff --git a/docs/src/uncertain_values/uncertain_values.md b/docs/src/uncertain_values/uncertain_values.md index 9766f715..6fe8b0ef 100644 --- a/docs/src/uncertain_values/uncertain_values.md +++ b/docs/src/uncertain_values/uncertain_values.md @@ -406,33 +406,9 @@ savefig("figs/combine_ex.png") #hide ## Mathematical operations -# Elementary mathematical operations - Elementary mathematical operations (`+`, `-`, `*`, and `/`) between arbitrary uncertain values of different types and scalars are supported. - -## Syntax - -Resampling is used to perform the mathematical operations. All mathematical -operations return a vector containing the results of repeated element-wise operations -(where each element is a resampled draw from the furnishing distribution(s) of the -uncertain value(s)). - -The default number of realizations is set to `10000`. This allows calling `uval1 + uval2` -for two uncertain values `uval1` and `uval2`. If you need to tune the number of resample -draws to `n`, use the `+(uval1, uval2, n)` syntax. - -## Future improvements - -In the future, elementary operations might be improved for certain combinations of uncertain -values where exact expressions for error propagation are now, for example using the -machinery in `Measurements.jl` for normally distributed values. - -## Supported operations - -Performing elementary operations with `CertainScalar`s behaves as for scalars. - -## Addition +Elementary operations with `CertainScalar`s behaves as for scalars. ```@docs Base.:+(a::AbstractUncertainValue, b::AbstractUncertainValue) @@ -441,3 +417,6 @@ Base.:*(a::AbstractUncertainValue, b::AbstractUncertainValue) Base.:/(a::AbstractUncertainValue, b::AbstractUncertainValue) ``` +In the future, elementary operations might be improved for certain combinations of uncertain +values where exact expressions for error propagation are now, for example using the +machinery in `Measurements.jl` for normally distributed values. From 0ca350805e833b37d0f342a6aa883cf458f26f67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristian=20Agas=C3=B8ster=20Haaga?= Date: Wed, 17 Nov 2021 20:35:26 +0100 Subject: [PATCH 21/21] Update DynamicalSystemsBase version --- Project.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index a89283b5..e03edb4c 100644 --- a/Project.toml +++ b/Project.toml @@ -2,7 +2,7 @@ name = "UncertainData" uuid = "dcd9ba68-c27b-5cea-ae21-829cd07325bf" authors = ["Kristian Agasøster Haaga "] repo = "https://github.com/kahaaga/UncertainData.jl.git" -version = "0.14.1" +version = "0.15.0" [deps] Bootstrap = "e28b5b4c-05e8-5b66-bc03-6f0c0a0a06e0" @@ -30,7 +30,7 @@ Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d" Bootstrap = "^2.2" Combinatorics = "^0.7.0, ^1" Distributions = "0.21, 1, 0.23, 0.24" -DynamicalSystemsBase = "^1.3" +DynamicalSystemsBase = "^2.0" HypothesisTests = "0.8, 1, 0.10" Interpolations = "^0.12, ^1, 0.13" IntervalArithmetic = "^0.16, ^1, 0.17, 0.18"