diff --git a/CHANGELOG.md b/CHANGELOG.md index ce3e635c..bddac130 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,32 @@ # UncertainData changelog -## v.0.14 +## v0.15.0 + +### Breaking changes + +- `CertainValue` is renamed to `CertainScalar`. +- Some abstract types are no longer exported. + +### Features + +- More flexible inputs to `UncertainValue` constructor. + +### Documentation + +- Shortened and improved documentation. +- Use regular Documenter.jl style, not mkdocs. + +## v0.14.1 + +### Features + +- Implement sequential resampling with chunks. + +### Misc + +- Make some methods more generic (non-breaking). + +## v0.14.0 ### Breaking changes diff --git a/Project.toml b/Project.toml index ac22a231..e03edb4c 100644 --- a/Project.toml +++ b/Project.toml @@ -2,32 +2,35 @@ name = "UncertainData" uuid = "dcd9ba68-c27b-5cea-ae21-829cd07325bf" authors = ["Kristian Agasøster Haaga "] repo = "https://github.com/kahaaga/UncertainData.jl.git" -version = "0.14.0" - +version = "0.15.0" [deps] Bootstrap = "e28b5b4c-05e8-5b66-bc03-6f0c0a0a06e0" Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" +Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" +DocumenterTools = "35a29f4d-8980-5a13-9543-d66fff28ecb8" DynamicalSystemsBase = "6e36e845-645a-534a-86f2-f5d4aa5a06b4" HypothesisTests = "09f84164-cd44-5f33-b23f-e6b0d136a0d5" Interpolations = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59" IntervalArithmetic = "d1acc4aa-44c8-5952-acd4-ba5d80a2a253" KernelDensity = "5ab0869b-81aa-558d-bb23-cbf5423bbe9b" Measurements = "eff96d63-e80a-5855-80a2-b1b0885c5ab7" +Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" +StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd" Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d" [compat] Bootstrap = "^2.2" Combinatorics = "^0.7.0, ^1" Distributions = "0.21, 1, 0.23, 0.24" -DynamicalSystemsBase = "^1.3" +DynamicalSystemsBase = "^2.0" HypothesisTests = "0.8, 1, 0.10" Interpolations = "^0.12, ^1, 0.13" IntervalArithmetic = "^0.16, ^1, 0.17, 0.18" diff --git a/docs/Project.toml b/docs/Project.toml index 243ed875..719cc50e 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,9 +1,11 @@ [deps] Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" DocumenterTools = "35a29f4d-8980-5a13-9543-d66fff28ecb8" -DocumenterMarkdown = "997ab1e6-3595-5248-9280-8efb232c3433" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" Interpolations = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59" KernelDensity = "5ab0869b-81aa-558d-bb23-cbf5423bbe9b" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" +StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd" Measurements = "eff96d63-e80a-5855-80a2-b1b0885c5ab7" +Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" + diff --git a/docs/make.jl b/docs/make.jl index 0a695fa9..f55b8707 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -4,9 +4,16 @@ CI = get(ENV, "CI", nothing) == "true" || get(ENV, "GITHUB_TOKEN", nothing) !== CI && Pkg.activate(@__DIR__) CI && Pkg.instantiate() CI && (ENV["GKSwstype"] = "100") + +using Plots using Documenter -using DocumenterTools -using DocumenterMarkdown +using DocumenterTools: Themes + +# %% Theme stuff? + +# %% Build docs +cd(@__DIR__) +ENV["JULIA_DEBUG"] = "Documenter" using UncertainData using Distributions @@ -17,24 +24,9 @@ using Interpolations PAGES = [ "index.md", - "Uncertain values" => [ - "uncertain_values/uncertainvalues_overview.md", - "uncertain_values/uncertainvalues_theoreticaldistributions.md", - "uncertain_values/uncertainvalues_kde.md", - "uncertain_values/uncertainvalues_fitted.md", - "uncertain_values/uncertainvalues_certainvalue.md", - "uncertain_values/uncertainvalues_populations.md", - "uncertain_values/uncertainvalues_Measurements.md", - "uncertain_values/merging.md", - "uncertain_values/uncertainvalues_examples.md", - ], - "Uncertain datasets" => [ - "uncertain_datasets/uncertain_datasets_overview.md", - "uncertain_datasets/uncertain_index_dataset.md", - "uncertain_datasets/uncertain_value_dataset.md", - "uncertain_datasets/uncertain_indexvalue_dataset.md", - "uncertain_datasets/uncertain_dataset.md", - ], + "uncertain_values/uncertain_values.md", + "uncertain_datasets/datasets.md", + "sampling_constraints/sampling_constraints.md", "Uncertain statistics" => [ "Core statistics" => [ "uncertain_statistics/core_stats/core_statistics.md", @@ -57,42 +49,34 @@ PAGES = [ "uncertain_statistics/hypothesistests/anderson_darling_test.md" ], ], - "Sampling constraints" => [ - "sampling_constraints/available_constraints.md", - "sampling_constraints/constrain_uncertain_values.md", - "sampling_constraints/sequential_constraints.md" - ], - "Binning" => [ - "binning/bin.md" - ], - "Resampling" => [ - "resampling/resampling_overview.md", - "resampling/resampling_uncertain_values.md", - "resampling/resampling_uncertain_datasets.md", - "resampling/resampling_uncertain_indexvalue_datasets.md", - - "resampling/sequential/resampling_uncertaindatasets_sequential.md", - "resampling/sequential/resampling_indexvalue_sequential.md", - "resampling/sequential/strictly_increasing.md", - "resampling/sequential/strictly_decreasing.md", - - "resampling/interpolation/interpolation.md", - "resampling/interpolation/gridded.md", - "resampling/resampling_schemes/resampling_schemes_uncertain_value_collections.md", - "resampling/resampling_schemes/resampling_schemes_uncertain_indexvalue_collections.md", - "resampling/resampling_schemes/resampling_with_schemes_uncertain_value_collections.md", - "resampling/resampling_schemes/resampling_with_schemes_uncertain_indexvalue_collections.md", - "resampling/resampling_inplace.md" - #"resampling/models/resampling_with_models.md" - ], + "resampling_and_binning.md", + # "Resampling" => [ + # "resampling/resampling_overview.md", + # "resampling/resampling_uncertain_values.md", + # "resampling/resampling_uncertain_datasets.md", + # "resampling/resampling_uncertain_indexvalue_datasets.md", + + # "resampling/sequential/resampling_uncertaindatasets_sequential.md", + # "resampling/sequential/resampling_indexvalue_sequential.md", + # "resampling/sequential/strictly_increasing.md", + # "resampling/sequential/strictly_decreasing.md", + + # "resampling/interpolation/interpolation.md", + # "resampling/interpolation/gridded.md", + # "resampling/resampling_schemes/resampling_schemes_uncertain_value_collections.md", + # "resampling/resampling_schemes/resampling_schemes_uncertain_indexvalue_collections.md", + # "resampling/resampling_schemes/resampling_with_schemes_uncertain_value_collections.md", + # "resampling/resampling_schemes/resampling_with_schemes_uncertain_indexvalue_collections.md", + # "resampling/resampling_inplace.md" + # #"resampling/models/resampling_with_models.md" + # ], "Propagation of errors" => [ "propagation_of_errors/propagation_of_errors.md" ], "Mathematics" => [ - "mathematics/elementary_operations.md", "mathematics/trig_functions.md" ], @@ -119,14 +103,14 @@ ENV["JULIA_DEBUG"] = "Documenter" makedocs( modules = [UncertainData], - sitename = "UncertainData.jl documentation", - format = format = Documenter.HTML( + format = Documenter.HTML( prettyurls = CI, ), + sitename = "UncertainData.jl", + authors = "Kristian Agasøster Haaga", pages = PAGES ) - if CI deploydocs( repo = "github.com/kahaaga/UncertainData.jl.git", diff --git a/docs/mkdocs.yml b/docs/mkdocs similarity index 100% rename from docs/mkdocs.yml rename to docs/mkdocs diff --git a/docs/src/binning/bin.md b/docs/src/binning/bin.md index e071dc6c..e69de29b 100644 --- a/docs/src/binning/bin.md +++ b/docs/src/binning/bin.md @@ -1,45 +0,0 @@ -# Binning scalar values - -## Bin values - -```@docs -bin(left_bin_edges::AbstractRange, xs, ys) -``` - -```@docs -bin!(bins::Vector{AbstractVector{T}}, ::AbstractRange{T}, xs, ys) where T -``` - -## Bin summaries - -```@docs -bin(f::Function, left_bin_edges::AbstractRange, xs, ys) -``` - -## Fast bin summaries - -```@docs -bin_mean -``` - -# Binning uncertain data - -## Bin values - -```@docs -bin(x::AbstractUncertainIndexValueDataset, binning::BinnedResampling{RawValues}) -``` - -```@docs -bin(x::AbstractUncertainIndexValueDataset, binning::BinnedWeightedResampling{RawValues}) -``` - -## Bin summaries - -```@docs -bin(x::AbstractUncertainIndexValueDataset, binning::BinnedResampling) -``` - -```@docs -bin(x::AbstractUncertainIndexValueDataset, binning::BinnedWeightedResampling) -``` diff --git a/docs/src/mathematics/elementary_operations.md b/docs/src/mathematics/elementary_operations.md deleted file mode 100644 index 94de2a6d..00000000 --- a/docs/src/mathematics/elementary_operations.md +++ /dev/null @@ -1,97 +0,0 @@ -# Elementary mathematical operations - -Elementary mathematical operations (`+`, `-`, `*`, and `/`) between arbitrary -uncertain values of different types and scalars are supported. - -## Syntax - -Resampling is used to perform the mathematical operations. All mathematical -operations return a vector containing the results of repeated element-wise operations -(where each element is a resampled draw from the furnishing distribution(s) of the -uncertain value(s)). - -The default number of realizations is set to `10000`. This allows calling `uval1 + uval2` -for two uncertain values `uval1` and `uval2`. If you need to tune the number of resample -draws to `n`, use the `+(uval1, uval2, n)` syntax. - -## Future improvements - -In the future, elementary operations might be improved for certain combinations of uncertain -values where exact expressions for error propagation are now, for example using the -machinery in `Measurements.jl` for normally distributed values. - -## Supported operations - -## Addition - -```@docs -Base.:+(a::AbstractUncertainValue, b::AbstractUncertainValue) -Base.:+(a::AbstractUncertainValue, b::AbstractUncertainValue, n::Int) -``` - -```@docs -Base.:+(a::Real, b::AbstractUncertainValue) -Base.:+(a::Real, b::AbstractUncertainValue, n::Int) -``` - -```@docs -Base.:+(a::AbstractUncertainValue, b::Real) -Base.:+(a::AbstractUncertainValue, b::Real, n::Int) -``` - -## Subtraction - -```@docs -Base.:-(a::AbstractUncertainValue, b::AbstractUncertainValue) -Base.:-(a::AbstractUncertainValue, b::AbstractUncertainValue, n::Int) -``` - -```@docs -Base.:-(a::Real, b::AbstractUncertainValue) -Base.:-(a::Real, b::AbstractUncertainValue, n::Int) -``` - -```@docs -Base.:-(a::AbstractUncertainValue, b::Real) -Base.:-(a::AbstractUncertainValue, b::Real, n::Int) -``` - -## Multiplication - -```@docs -Base.:*(a::AbstractUncertainValue, b::AbstractUncertainValue) -Base.:*(a::AbstractUncertainValue, b::AbstractUncertainValue, n::Int) -``` - -```@docs -Base.:*(a::Real, b::AbstractUncertainValue) -Base.:*(a::Real, b::AbstractUncertainValue, n::Int) -``` - -```@docs -Base.:*(a::AbstractUncertainValue, b::Real) -Base.:*(a::AbstractUncertainValue, b::Real, n::Int) -``` - -## Division - -```@docs -Base.:/(a::AbstractUncertainValue, b::AbstractUncertainValue) -Base.:/(a::AbstractUncertainValue, b::AbstractUncertainValue, n::Int) -``` - -```@docs -Base.:/(a::Real, b::AbstractUncertainValue) -Base.:/(a::Real, b::AbstractUncertainValue, n::Int) -``` - -```@docs -Base.:/(a::AbstractUncertainValue, b::Real) -Base.:/(a::AbstractUncertainValue, b::Real, n::Int) -``` - -## Special cases - -### `CertainValue`s - -Performing elementary operations with `CertainValue`s behaves as for scalars. diff --git a/docs/src/resampling/resampling_uncertain_values.md b/docs/src/resampling/resampling_uncertain_values.md index 5d1a4e72..30ec1003 100644 --- a/docs/src/resampling/resampling_uncertain_values.md +++ b/docs/src/resampling/resampling_uncertain_values.md @@ -1,18 +1,5 @@ # Resampling uncertain values -Uncertain values may be resampled by drawing random number from the distributions -furnishing them. - -## Documentation - -```@docs -resample(uv::AbstractUncertainValue) -``` - -```@docs -resample(uv::AbstractUncertainValue, n::Int) -``` - ## Examples ``` julia tab="Resample once" diff --git a/docs/src/resampling/sequential/strictly_decreasing.md b/docs/src/resampling/sequential/strictly_decreasing.md index beb97d12..6dfa2c38 100644 --- a/docs/src/resampling/sequential/strictly_decreasing.md +++ b/docs/src/resampling/sequential/strictly_decreasing.md @@ -6,15 +6,12 @@ The default constructor for a strictly decreasing sequential sampling constraint ## Documentation ```@docs -resample(udata::AbstractUncertainValueDataset, - constraint::Union{SamplingConstraint, Vector{SamplingConstraint}}, - sequential_constraint::StrictlyDecreasing{OrderedSamplingAlgorithm}; - quantiles = [0.0001, 0.9999]) +resample(udata::AbstractUncertainValueDataset, sequential_constraint::StrictlyDecreasing{OrderedSamplingAlgorithm}, + constraint::Union{SamplingConstraint, Vector{SamplingConstraint}}) ``` ```@docs -resample(udata::DT, sequential_constraint::StrictlyDecreasing{T}; - quantiles = [0.0001, 0.9999]) where {DT <: AbstractUncertainValueDataset, T <: StartToEnd} +resample(udata::DT, sequential_constraint::StrictlyDecreasing{T}) where {DT <: AbstractUncertainValueDataset, T <: StartToEnd} ``` ## Compatible ordering algorithms diff --git a/docs/src/resampling/sequential/strictly_increasing.md b/docs/src/resampling/sequential/strictly_increasing.md index dc98d20f..ffb3db52 100644 --- a/docs/src/resampling/sequential/strictly_increasing.md +++ b/docs/src/resampling/sequential/strictly_increasing.md @@ -13,15 +13,13 @@ The default constructor for a strictly increasing sequential sampling constraint ```@docs resample(udata::AbstractUncertainValueDataset, - constraint::Union{SamplingConstraint, Vector{SamplingConstraint}}, - sequential_constraint::StrictlyIncreasing{OrderedSamplingAlgorithm}; - quantiles = [0.0001, 0.9999]) + sequential_constraint::StrictlyIncreasing{OrderedSamplingAlgorithm}, + constraint::Union{SamplingConstraint, Vector{SamplingConstraint}}) ``` ```@docs -resample(udata::DT, sequential_constraint::StrictlyIncreasing{T}; - quantiles = [0.0001, 0.9999]) where {DT <: AbstractUncertainValueDataset, T <: StartToEnd} +resample(udata::DT, sequential_constraint::StrictlyIncreasing{T}) where {DT <: AbstractUncertainValueDataset, T <: StartToEnd} ``` ## Examples diff --git a/docs/src/resampling_and_binning.md b/docs/src/resampling_and_binning.md new file mode 100644 index 00000000..90c7480c --- /dev/null +++ b/docs/src/resampling_and_binning.md @@ -0,0 +1,30 @@ +# Resampling and binning + +## Uncertain values + +Uncertain values may be resampled by drawing random number from the distributions +furnishing them. Optionally, sampling constraints can be applied. + + +```@docs +resample(uv::AbstractUncertainValue) +resample(uv::AbstractUncertainValue, n::Int) +``` + +## Uncertain datasets + +### Binning + +```@docs +bin(x::AbstractUncertainIndexValueDataset, binning::BinnedResampling{RawValues}) +bin(x::AbstractUncertainIndexValueDataset, binning::BinnedWeightedResampling{RawValues}) +bin(x::AbstractUncertainIndexValueDataset, binning::BinnedResampling) +bin(x::AbstractUncertainIndexValueDataset, binning::BinnedWeightedResampling) +``` + +```@docs +bin(left_bin_edges::AbstractRange, xs, ys) +bin!(bins::Vector{AbstractVector{T}}, ::AbstractRange{T}, xs, ys) where T +bin(f::Function, left_bin_edges::AbstractRange, xs, ys) +bin_mean +``` diff --git a/docs/src/sampling_constraints/available_constraints.md b/docs/src/sampling_constraints/available_constraints.md deleted file mode 100644 index 773d9f00..00000000 --- a/docs/src/sampling_constraints/available_constraints.md +++ /dev/null @@ -1,46 +0,0 @@ - -# Available sampling constraints - -The following sampling constraints are available. These constraints may be used in any resampling setting. - -## Standard deviation - -```@docs -TruncateStd -``` - -## Minimum value - -```@docs -TruncateMinimum -``` - -## Maximum value - -```@docs -TruncateMaximum -``` - -## Value range - -```@docs -TruncateRange -``` - -## Lower quantile - -```@docs -TruncateLowerQuantile -``` - -## Upper quantile - -```@docs -TruncateUpperQuantile -``` - -## Quantile range - -```@docs -TruncateQuantiles -``` diff --git a/docs/src/sampling_constraints/constrain_uncertain_values.md b/docs/src/sampling_constraints/old.txt similarity index 92% rename from docs/src/sampling_constraints/constrain_uncertain_values.md rename to docs/src/sampling_constraints/old.txt index 4f254d4a..f3aed51a 100644 --- a/docs/src/sampling_constraints/constrain_uncertain_values.md +++ b/docs/src/sampling_constraints/old.txt @@ -1,30 +1,4 @@ -# Documentation - -```@docs -constrain(uv::AbstractUncertainValue, constraint::SamplingConstraint) -``` - -# Examples: constraining uncertain values - -## Theoretical distributions - -``` julia tab="Theoretical distribution" -using UncertainData, Distributions - -# Define an uncertain value furnished by a theoretical distribution -uv = UncertainValue(Normal, 1, 0.5) - -# Constrain the support of the furnishing distribution using various -# constraints -uvc_lq = constrain(uv, TruncateLowerQuantile(0.2)) -uvc_uq = constrain(uv, TruncateUpperQuantile(0.8)) -uvc_q = constrain(uv, TruncateQuantiles(0.2, 0.8)) -uvc_min = constrain(uv, TruncateMinimum(0.5)) -uvc_max = constrain(uv, TruncateMaximum(1.5)) -uvc_range = constrain(uv, TruncateRange(0.5, 1.5)) -``` - ## Theoretical distributions with fitted parameters ``` julia tab="Theoretical distribution with fitted parameters" diff --git a/docs/src/sampling_constraints/sampling_constraints.md b/docs/src/sampling_constraints/sampling_constraints.md new file mode 100644 index 00000000..6c128ec3 --- /dev/null +++ b/docs/src/sampling_constraints/sampling_constraints.md @@ -0,0 +1,73 @@ + +# Sampling constraints + +```@docs +constrain(uv::AbstractUncertainValue, constraint::SamplingConstraint) +``` + +## Element-wise constraints + +The following sampling constraints are aimed to be used element-wise on uncertain values. + +```@docs +TruncateStd +TruncateMinimum +TruncateMaximum +TruncateRange +TruncateLowerQuantile +TruncateUpperQuantile +TruncateQuantiles +``` + + +### Examples + +```@example constraint_theoretical +using UncertainData, Distributions, Plots + +# Define an uncertain value furnished by a theoretical distribution +x = UncertainValue(Normal, 1, 0.5) + +# Constrain the support of the furnishing distribution using various +# constraints +xc_lq = constrain(x, TruncateLowerQuantile(0.2)) +xc_uq = constrain(x, TruncateUpperQuantile(0.8)) +xc_q = constrain(x, TruncateQuantiles(0.2, 0.8)) +xc_min = constrain(x, TruncateMinimum(0.5)) +xc_max = constrain(x, TruncateMaximum(1.5)) +xc_range = constrain(x, TruncateRange(0.5, 1.5)) + +p_lq = plot(x, label = ""); plot!(xc_lq, label = "TruncateLowerQuantile(0.2)") +p_uq = plot(x, label = ""); plot!(xc_uq, label = "TruncateLowerQuantile(0.8)") +p_q = plot(x, label = ""); plot!(xc_q, label = "TruncateQuantiles(0.2, 0.8)") +p_min = plot(x, label = ""); plot!(xc_min, label = "TruncateMinimum(0.5)") +p_max = plot(x, label = ""); plot!(xc_max, label = "TruncateMaximum(1.5)") +p_range = plot(x, label = ""); plot!(xc_range, label = "TruncateRange(0.5, 1.5)") +plot(p_min, p_max, p_range, + p_q, p_lq, p_uq, + size = (750, 500), legendfont = font(7), xlabel = "Value", ylabel = "Density", + legend = :topright, fg_legend = :transparent, bg_legend = :transparent +) +``` + +## Dataset (sequential) constraints + +Sequential constraints are used when sampling [`UncertainIndexDataset`](@ref)s or +[`UncertainIndexValueDataset`](@ref)s. + +```@docs +StrictlyIncreasing +StrictlyDecreasing +``` + +### Sampling algorithms + +```@docs +StartToEnd +``` + +### Utils + +```@docs +sequence_exists +``` diff --git a/docs/src/sampling_constraints/sequential_constraints.md b/docs/src/sampling_constraints/sequential_constraints.md deleted file mode 100644 index 54862cc2..00000000 --- a/docs/src/sampling_constraints/sequential_constraints.md +++ /dev/null @@ -1,28 +0,0 @@ -# Increasing/decreasing - - -The following constraints may be used to impose sequential constraints when sampling a -collection of uncertain values element-wise. - -## StrictlyIncreasing - -```@docs -StrictlyIncreasing -``` - -## StrictlyDecreasing - -```@docs -StrictlyDecreasing -``` - -## Existence of sequences - -`sequence_exists` will check whether a valid sequence through your collection of -uncertain values exists, so that you can know beforehand whether a particular -sequential sampling constraint is possible to apply to your data. - -```@docs -sequence_exists -``` - diff --git a/docs/src/tutorials/tutorial_transforming_data_to_regular_grid.md b/docs/src/tutorials/tutorial_transforming_data_to_regular_grid.md index 6a34acd5..5f0a4fa3 100644 --- a/docs/src/tutorials/tutorial_transforming_data_to_regular_grid.md +++ b/docs/src/tutorials/tutorial_transforming_data_to_regular_grid.md @@ -110,7 +110,7 @@ X_binned = resample(X, resampling) ``` `X_binned` is still a `UncertainIndexValueDataset`, but the indices have been reduced -to `CertainValue` instances placed at the bin midpoints. The values, however, are kept +to `CertainScalar` instances placed at the bin midpoints. The values, however, are kept as uncertain values. Plotting the result: diff --git a/docs/src/uncertain_datasets/datasets.md b/docs/src/uncertain_datasets/datasets.md new file mode 100644 index 00000000..3ad8885d --- /dev/null +++ b/docs/src/uncertain_datasets/datasets.md @@ -0,0 +1,96 @@ +# Uncertain datasets + +## `UncertainValueDataset` + +```@docs +UncertainValueDataset +``` + +## `UncertainIndexDataset` + +```@docs +UncertainIndexDataset +``` + + +## `UncertainIndexValueDataset` + +```@docs +UncertainIndexValueDataset +``` + +## Examples + +### Example 1: `UncertainIndexValueDataset` + + +`UncertainIndexValueDataset`s have uncertainties associated with both the +indices (e.g. time, depth, etc) and the values of the data points. + + +Let's consider some measurements with associated uncertainties, which are of different types, +because they are taken from different sources and/or were measured used different devices. +The values were measures at some time indices by an inaccurate clock, so that the times +of measuring are normally distributed values with fluctuating standard deviations. We'll +represent all of these measurements in an [`UncertainIndexValueDataset`](@ref). + +Built-in plot recipes make it easy to visualize such datasets with error bars. +By default, plotting the dataset plots the median value of the index and the measurement +(only for scatter plots), along with the 33rd to 67th percentile range error bars in both +directions. You can also tune the error bars explicitly, by specifying +quantiles, like below: + +```@example uivd1 +using UncertainData, Plots + +# These are our measurements +r1 = [UncertainValue(Normal, rand(), rand()) for i = 1:10] +r2 = UncertainValue(rand(10000)) +r3 = UncertainValue(Uniform, rand(10000)) +r4 = UncertainValue(Normal, -0.1, 0.5) +r5 = UncertainValue(Gamma, 0.4, 0.8) +vals = [r1; r2; r3; r4; r5] + +# These are our time indices +inds = [UncertainValue(Normal, i, rand(Uniform(0, 1))) for i = 1:length(vals)] + +# Combine indices and values +x = UncertainIndexValueDataset(inds, vals) + +# Plot 90th percentile range both for indices and values. +plot(x, [0.05, 0.95], [0.05, 0.95], xlabel = "Time", ylabel = "Value") +savefig("uncertainindexvaluedataset_ex1.png") # hide +``` + +![](uncertainindexvaluedataset_ex1.png) + +### Example 2: `UncertainIndexValueDataset` + +Say we had a dataset of 20 values for which the uncertainties are normally distributed +with increasing standard deviation through time. We also have some uncertain values +that are associated with the indices. + +```@example uivd2 +using UncertainData, Plots, Distributions + +# Time indices +time_inds = 1:13 +uvals = [UncertainValue(Normal, ind, rand(Uniform()) + (ind / 6)) for ind in time_inds] +inds = UncertainIndexDataset(uvals) + +# Measurements +u1 = UncertainValue(Gamma, rand(Gamma(), 500)) +u2 = UncertainValue(rand(MixtureModel([Normal(1, 0.3), Normal(0.1, 0.1)]), 500)) +uvals3 = [UncertainValue(Normal, rand(), rand()) for i = 1:11] +measurements = [u1; u2; uvals3] + +# Combine indices and values +x = UncertainIndexValueDataset(inds, measurements) + +# Plot the dataset with error bars in both directions, using the 20th to 80th percentile +# range for the indices and the 33rd to 67th percentile range for the data values. +plot(x, [0.2, 0.8], [0.33, 0.67], xlabel = "Time", ylabel = "Value") +savefig("uncertainindexvaluedataset_ex2.png") # hide +``` + +![](uncertainindexvaluedataset_ex2.png) diff --git a/docs/src/uncertain_datasets/uncertain_index_dataset.md b/docs/src/uncertain_datasets/uncertain_index_dataset.md index 71de557d..8b137891 100644 --- a/docs/src/uncertain_datasets/uncertain_index_dataset.md +++ b/docs/src/uncertain_datasets/uncertain_index_dataset.md @@ -1,38 +1 @@ -# Uncertain index datasets -## Documentation - -```@docs -UncertainIndexDataset -``` - -## Description - -`UncertainIndexDataset`s is an uncertain dataset type that represents the indices -corresponding to an [UncertainValueDataset](uncertain_value_dataset.md). - -It is meant to be used for the `indices` field in -[UncertainIndexValueDataset](uncertain_indexvalue_dataset.md)s instances. - -## Defining uncertain index datasets - -### Example 1: increasing index uncertainty through time - -#### Defining the indices - -Say we had a dataset of 20 values for which the uncertainties are normally distributed -with increasing standard deviation through time. - -```julia -time_inds = 1:13 -uvals = [UncertainValue(Normal, ind, rand(Uniform()) + (ind / 6)) for ind in time_inds] -inds = UncertainIndexDataset(uvals) -``` - -That's it. We can also plot the 33rd to 67th percentile range for the indices. - -```plot -plot(inds, [0.33, 0.67]) -``` - -![](uncertain_indexvalue_dataset_indices.svg) diff --git a/docs/src/uncertain_datasets/uncertain_indexvalue_dataset.md b/docs/src/uncertain_datasets/uncertain_indexvalue_dataset.md index 59a92350..e69de29b 100644 --- a/docs/src/uncertain_datasets/uncertain_indexvalue_dataset.md +++ b/docs/src/uncertain_datasets/uncertain_indexvalue_dataset.md @@ -1,128 +0,0 @@ -# Uncertain index-value datasets - -## Documentation - -```@docs -UncertainIndexValueDataset -``` - -## Description - -`UncertainIndexValueDataset`s have uncertainties associated with both the -indices (e.g. time, depth, etc) and the values of the data points. - -## Defining an uncertain index-value dataset - -### Example 1 - -#### Defining the values - -Let's start by defining the uncertain data values and collecting them in -an `UncertainValueDataset`. - -```julia -using UncertainData, Plots -gr() -r1 = [UncertainValue(Normal, rand(), rand()) for i = 1:10] -r2 = UncertainValue(rand(10000)) -r3 = UncertainValue(Uniform, rand(10000)) -r4 = UncertainValue(Normal, -0.1, 0.5) -r5 = UncertainValue(Gamma, 0.4, 0.8) - -u_values = [r1; r2; r3; r4; r5] -udata = UncertainValueDataset(u_values); -``` - -#### Defining the indices - -The values were measures at some time indices by an inaccurate clock, so that the times -of measuring are normally distributed values with fluctuating standard deviations. - -```julia -u_timeindices = [UncertainValue(Normal, i, rand(Uniform(0, 1))) - for i = 1:length(udata)] -uindices = UncertainIndexDataset(u_timeindices); -``` - -#### Combinining the indices and values - -Now, combine the uncertain time indices and measurements into an -`UncertainIndexValueDataset`. - -```julia -x = UncertainIndexValueDataset(uindices, udata) -``` - -The built-in plot recipes make it easy to visualize the dataset. -By default, plotting the dataset plots the median value of the index and the measurement -(only for scatter plots), along with the 33rd to 67th percentile range error bars in both -directions. - -```julia -plot(x) -``` - -![](uncertain_indexvalue_dataset_plot_defaulterrorbars.svg) - -You can also tune the error bars by calling -`plot(udata::UncertainIndexValueDataset, idx_quantiles, val_quantiles)`, explicitly -specifying the quantiles in each direction, like so: - -```julia -plot(x, [0.05, 0.95], [0.05, 0.95]) -``` - -![](uncertain_indexvalue_dataset_plot_customerrorbars.svg) - -### Example 2 - -#### Defining the indices - -Say we had a dataset of 20 values for which the uncertainties are normally distributed -with increasing standard deviation through time. - -```julia -time_inds = 1:13 -uvals = [UncertainValue(Normal, ind, rand(Uniform()) + (ind / 6)) for ind in time_inds] -inds = UncertainIndexDataset(uvals) -``` - -That's it. We can also plot the 33rd to 67th percentile range for the indices. - -```plot -plot(inds, [0.33, 0.67]) -``` - -![](uncertain_indexvalue_dataset_indices.svg) - -#### Defining the values - -Let's define some uncertain values that are associated with the indices. - -```julia -u1 = UncertainValue(Gamma, rand(Gamma(), 500)) -u2 = UncertainValue(rand(MixtureModel([Normal(1, 0.3), Normal(0.1, 0.1)]), 500)) -uvals3 = [UncertainValue(Normal, rand(), rand()) for i = 1:11] - -measurements = [u1; u2; uvals3] -datavals = UncertainValueDataset(measurements) -``` - -![](uncertain_indexvalue_dataset_vals.svg) - -#### Combinining the indices and values - -Now, we combine the indices and the corresponding data. - -```julia -d = UncertainIndexValueDataset(inds, datavals) -``` - -Plot the dataset with error bars in both directions, using the 20th to 80th percentile -range for the indices and the 33rd to 67th percentile range for the data values. - -```julia -plot(d, [0.2, 0.8], [0.33, 0.67]) -``` - -![](uncertain_indexvalue_dataset_indices_and_vals.svg) diff --git a/docs/src/uncertain_datasets/uncertain_value_dataset.md b/docs/src/uncertain_datasets/uncertain_value_dataset.md index 93c47230..d4927e61 100644 --- a/docs/src/uncertain_datasets/uncertain_value_dataset.md +++ b/docs/src/uncertain_datasets/uncertain_value_dataset.md @@ -1,11 +1,5 @@ # Uncertain value datasets -## Documentation - -```@docs -UncertainValueDataset -``` - ## Description `UncertainValueDataset`s is an uncertain dataset type that has no explicit index diff --git a/docs/src/uncertain_values/merging.md b/docs/src/uncertain_values/merging.md deleted file mode 100644 index aab721ed..00000000 --- a/docs/src/uncertain_values/merging.md +++ /dev/null @@ -1,279 +0,0 @@ -Because all uncertainties are handled using a resampling approach, it is trivial to -[`combine`](@ref) or merge uncertain values of different types into a single uncertain value. - -# Nomenclature - -Depending on your data, you may want to choose of one the following ways of -representing multiple uncertain values as one: - -- [Combining](@ref uncertainvalue_combine). An ensemble of uncertain - values is represented as a weighted population. This approach is nice if you want - to impose expert-opinion on the relative sampling probabilities of uncertain - values in the ensemble, but still sample from the entire supports of each of the - furnishing values. This introduces no additional approximations besides what - is already present at the moment you define your uncertain values. -- [Merging](@ref uncertainvalue_merge). Multiple uncertain values are merged using - a kernel density estimate to the overall distribution. This approach introduces - approximations *beyond* what is present in the uncertain values when you define them. - -# [Combining uncertain values: the population approach](@id uncertainvalue_combine) - -**Combining** uncertain values is done by representing them as a weighted population -of uncertain values, which is illustrated in the following example: - -```julia -# Assume we have done some analysis and have three points whose uncertainties -# significantly overlap. -v1 = UncertainValue(Normal(0.13, 0.52)) -v2 = UncertainValue(Normal(0.27, 0.42)) -v3 = UncertainValue(Normal(0.21, 0.61)) - -# Give each value equal sampling probabilities and represent as a population -pop = UncertainValue([v1, v2, v3], [1, 1, 1]) - -# Let the values v1, v2 and v3 be sampled with probability ratios 1-2-3 -pop = UncertainValue([v1, v2, v3], [1, 2, 3]) -``` - -![](figs/combining_uncertain_values.svg) - -This is not restricted to normal distributions! We can combine any type of -value in our population, even populations! - -```julia -# Consider a population of normal distributions, and a gamma distribution -v1 = UncertainValue(Normal(0.265, 0.52)) -v2 = UncertainValue(Normal(0.311, 0.15)) -v3 = UncertainValue([v1, v2], [2, 1]) -v4 = UncertainValue(Gamma(0.5, -1)) -pts = [v1, v4] -wts = [2, 1] - -# New population is a nested population with unequal weights -pop = UncertainValue(pts, wts) - -d1 = density(resample(pop, 20000), label = "population") - -d2 = plot() -density!(d2, resample(pop[1], 20000), label = "v1") -density!(d2, resample(pop[2], 20000), label = "v2") - -plot(d1, d2, layout = (2, 1), xlabel = "Value", ylabel = "Density", link = :x, xlims = (-2.5, 2.5)) -``` - -![](figs/combining_uncertain_values_ex2.svg) - -This makes it possible treat an ensemble of uncertain values as a single uncertain value. - -With equal weights, this introduces no bias beyond what is present in the data, -because resampling is done from the full supports of each of the furnishing values. -Additional information on relative sampling probabilities, however, be it informed by -expert opinion or quantative estimates, is easily incorporated by adjusting -the sampling weights. - -# [Merging uncertain values: the kernel density estimation (KDE) approach](@id uncertainvalue_merge) - -**Merging** multiple uncertain values could be done by fitting a model distribution to -the values. Using any specific theoretical distribution as a model for the combined -uncertainty, however, is in general not possible, because the values may have -different types of uncertainties. - -Thus, in this package, kernel kernel density estimation is used to merge multiple uncertain values. -This has the advantage that you only have to deal with a single estimate to the combined -distribution, but introduces bias because the distribution is *estimated* and the -shape of the distribution depends on the parameters of the KDE procedure. - -## Without weights - -When no weights are provided, the combined value is computed -by resampling each of the `N` uncertain values `n/N` times, -then combining using kernel density estimation. - -```@docs -combine(uvals::Vector{AbstractUncertainValue}; n = 1000*length(uvals), - bw::Union{Nothing, Real} = nothing) -``` - -Weights dictating the relative contribution of each -uncertain value into the combined value can also be provided. `combine` works -with `ProbabilityWeights`, `AnalyticWeights`, -`FrequencyWeights` and the generic `Weights`. - -Below shows an example of combining - -```julia -v1 = UncertainValue(rand(1000)) -v2 = UncertainValue(Normal, 0.8, 0.4) -v3 = UncertainValue([rand() for i = 1:3], [0.3, 0.3, 0.4]) -v4 = UncertainValue(Normal, 3.7, 0.8) -uvals = [v1, v2, v3, v4] - -p = plot(title = L"distributions \,\, with \,\, overlapping \,\, supports") -plot!(v1, label = L"v_1", ls = :dash) -plot!(v2, label = L"v_2", ls = :dot) -vline!(v3.values, label = L"v_3") # plot each possible state as vline -plot!(v4, label = L"v_4") - -pcombined = plot(combine(uvals), title = L"merge(v_1, v_2, v_3, v_4)", lc = :black, lw = 2) - -plot(p, pcombined, layout = (2, 1), link = :x, ylabel = "Density") -``` - -![](figs/combine_example_noweights.png) - -## With weights - -`Weights`, `ProbabilityWeights` and `AnalyticWeights` are functionally the same. Either -may be used depending on whether the weights are assigned subjectively or quantitatively. -With `FrequencyWeights`, it is possible to control the exact number of draws from each -uncertain value that goes into the draw pool before performing KDE. - -### ProbabilityWeights - -```@docs -combine(uvals::Vector{AbstractUncertainValue}, weights::ProbabilityWeights; - n = 1000*length(uvals)) -``` - -For example: - -```julia -v1 = UncertainValue(UnivariateKDE, rand(4:0.25:6, 1000), bandwidth = 0.02) -v2 = UncertainValue(Normal, 0.8, 0.4) -v3 = UncertainValue([rand() for i = 1:3], [0.3, 0.3, 0.4]) -v4 = UncertainValue(Gamma, 8, 0.4) -uvals = [v1, v2, v3, v4]; - -p = plot(title = L"distributions \,\, with \,\, overlapping \,\, supports") -plot!(v1, label = L"v_1: KDE \, over \, empirical \, distribution", ls = :dash) -plot!(v2, label = L"v_2: Normal(0.8, 0.4)", ls = :dot) -# plot each possible state as vline -vline!(v3.values, - label = L"v_3: \, Discrete \, population\, [1,2,3], w/ \, weights \, [0.3, 0.4, 0.4]") -plot!(v4, label = L"v_4: \, Gamma(8, 0.4)") - -pcombined = plot( - combine(uvals, ProbabilityWeights([0.1, 0.3, 0.02, 0.5]), n = 100000, bw = 0.05), - title = L"combine([v_1, v_2, v_3, v_4], ProbabilityWeights([0.1, 0.3, 0.02, 0.5])", - lc = :black, lw = 2) - -plot(p, pcombined, layout = (2, 1), size = (800, 600), - link = :x, - ylabel = "Density", - tickfont = font(12), - legendfont = font(8), fg_legend = :transparent, bg_legend = :transparent) -``` - -![](figs/combine_example_pweights.png) - -### AnalyticWeights - -```@docs -combine(uvals::Vector{AbstractUncertainValue}, weights::AnalyticWeights; - n = 1000*length(uvals)) -``` - -For example: - -```julia -v1 = UncertainValue(UnivariateKDE, rand(4:0.25:6, 1000), bandwidth = 0.02) -v2 = UncertainValue(Normal, 0.8, 0.4) -v3 = UncertainValue([rand() for i = 1:3], [0.3, 0.3, 0.4]) -v4 = UncertainValue(Gamma, 8, 0.4) -uvals = [v1, v2, v3, v4]; - -p = plot(title = L"distributions \,\, with \,\, overlapping \,\, supports") -plot!(v1, label = L"v_1: KDE \, over \, empirical \, distribution", ls = :dash) -plot!(v2, label = L"v_2: Normal(0.8, 0.4)", ls = :dot) -vline!(v3.values, label = L"v_3: \, Discrete \, population\, [1,2,3], w/ \, weights \, [0.3, 0.4, 0.4]") # plot each possible state as vline -plot!(v4, label = L"v_4: \, Gamma(8, 0.4)") - -pcombined = plot(combine(uvals, AnalyticWeights([0.1, 0.3, 0.02, 0.5]), n = 100000, bw = 0.05), - title = L"combine([v_1, v_2, v_3, v_4], AnalyticWeights([0.1, 0.3, 0.02, 0.5])", lc = :black, lw = 2) - -plot(p, pcombined, layout = (2, 1), size = (800, 600), - link = :x, - ylabel = "Density", - tickfont = font(12), - legendfont = font(8), fg_legend = :transparent, bg_legend = :transparent) -``` - -![](figs/combine_example_aweights.png) - -### Generic Weights - -```@docs -combine(uvals::Vector{AbstractUncertainValue}, weights::Weights; - n = 1000*length(uvals)) -``` - -For example: - -```julia -v1 = UncertainValue(UnivariateKDE, rand(4:0.25:6, 1000), bandwidth = 0.01) -v2 = UncertainValue(Normal, 0.8, 0.4) -v3 = UncertainValue([rand() for i = 1:3], [0.3, 0.3, 0.4]) -v4 = UncertainValue(Gamma, 8, 0.4) -uvals = [v1, v2, v3, v4]; - -p = plot(title = L"distributions \,\, with \,\, overlapping \,\, supports") -plot!(v1, label = L"v_1: KDE \, over \, empirical \, distribution", ls = :dash) -plot!(v2, label = L"v_2: Normal(0.8, 0.4)", ls = :dot) -# plot each possible state as vline -vline!(v3.values, - label = L"v_3: \, Discrete \, population\, [1,2,3], w/ \, weights \, [0.3, 0.4, 0.4]") -plot!(v4, label = L"v_4: \, Gamma(8, 0.4)") - -pcombined = plot(combine(uvals, Weights([0.1, 0.15, 0.1, 0.1]), n = 100000, bw = 0.02), - title = L"combine([v_1, v_2, v_3, v_4], Weights([0.1, 0.15, 0.1, 0.1]))", - lc = :black, lw = 2) - -plot(p, pcombined, layout = (2, 1), size = (800, 600), - link = :x, - ylabel = "Density", - tickfont = font(12), - legendfont = font(8), fg_legend = :transparent, bg_legend = :transparent) -``` - -![](figs/combine_example_generic_weights.png) - -### FrequencyWeights - -Using `FrequencyWeights`, one may specify the number of times each of the uncertain values -should be sampled to form the pooled resampled draws on which the final kernel density -estimate is performed. - -```@docs -combine(uvals::Vector{AbstractUncertainValue}, weights::FrequencyWeights; - n = 1000*length(uvals)) -``` - -For example: - -```julia -v1 = UncertainValue(UnivariateKDE, rand(4:0.25:6, 1000), bandwidth = 0.01) -v2 = UncertainValue(Normal, 0.8, 0.4) -v3 = UncertainValue([rand() for i = 1:3], [0.3, 0.3, 0.4]) -v4 = UncertainValue(Gamma, 8, 0.4) -uvals = [v1, v2, v3, v4]; - -p = plot(title = L"distributions \,\, with \,\, overlapping \,\, supports") -plot!(v1, label = L"v_1: KDE \, over \, empirical \, distribution", ls = :dash) -plot!(v2, label = L"v_2: Normal(0.8, 0.4)", ls = :dot) -# plot each possible state as vline -vline!(v3.values, - label = L"v_3: \, Discrete \, population\, [1,2,3], w/ \, weights \, [0.3, 0.4, 0.4]") -plot!(v4, label = L"v_4: \, Gamma(8, 0.4)") - -pcombined = plot(combine(uvals, FrequencyWeights([10000, 20000, 3000, 5000]), bw = 0.05), - title = L"combine([v_1, v_2, v_3, v_4], FrequencyWeights([10000, 20000, 3000, 5000])", - lc = :black, lw = 2) - -plot(p, pcombined, layout = (2, 1), size = (800, 600), - link = :x, - ylabel = "Density", - tickfont = font(12), - legendfont = font(8), fg_legend = :transparent, bg_legend = :transparent) -``` - -![](figs/combine_example_fweights.png) diff --git a/docs/src/uncertain_values/uncertain_values.md b/docs/src/uncertain_values/uncertain_values.md new file mode 100644 index 00000000..6fe8b0ef --- /dev/null +++ b/docs/src/uncertain_values/uncertain_values.md @@ -0,0 +1,422 @@ +# Uncertain values + +## `UncertainValue` constructors + +The following convenience constructors are used to defined uncertain values. + +```@docs +UncertainValue +``` + +## Uncertain data types + +### [Theoretical distributions](@id uncertain_value_theoretical_distribution) + +It is common in the scientific literature to encounter uncertain data values +which are reported as following a specific distribution. For example, an author +report the mean and standard deviation of a value stated to follow a +normal distribution. `UncertainData.jl` makes it easy to represent such values! + +```@docs +UncertainScalarBetaDistributed +UncertainScalarBetaBinomialDistributed +UncertainScalarBetaPrimeDistributed +UncertainScalarBinomialDistributed +UncertainScalarFrechetDistributed +UncertainScalarGammaDistributed +UncertainScalarNormallyDistributed +UncertainScalarUniformlyDistributed +``` + +### [Fitted theoretical distributions](@id uncertain_value_fitted_theoretical_distribution) + +For data values with histograms close to some known distribution, the user +may choose to represent the data by fitting a theoretical distribution to the +values. This will only work well if the histogram closely resembles a +theoretical distribution. + +```@docs +UncertainScalarTheoreticalFit +``` + +### [Kernel density estimates (KDE)](@id uncertain_value_kde) + +When your data have an empirical distribution that doesn't follow any obvious +theoretical distribution, the data may be represented by a kernel density +estimate to the underlying distribution. + +```@docs +UncertainScalarKDE +``` + +#### Extended example + +Let's create a bimodal distribution, then sample 10000 values from it. + +```@example kde1 +using UncertainData, Distributions, Plots, StatsPlots +# Draw 1000 points from a three-component mixture model to create a multimodal distribution. +n1 = Normal(-3.0, 1.2) +n2 = Normal(8.0, 1.2) +n3 = Normal(0.0, 2.5) +M = MixtureModel([n1, n2, n3]) +s = rand(M, 1000); +histogram(s, nbins = 80) +ylabel!("Frequency"); xlabel!("Value") +savefig("figs/bimodal_empirical.svg") #hide +``` + +![](figs/bimodal_empirical.svg) + +It is not obvious which distribution to fit to such data. +A kernel density estimate, however, will always be a decent representation +of the data, because it doesn't follow a specific distribution and adapts to +the data values. + +To create a kernel density estimate, simply call the +`UncertainValue` constructor with a vector containing the sample. This will trigger +kernel density estimation. + +```@example kde1 +x = UncertainValue(s) +``` + +The plot below compares the empirical histogram (here represented as a density +plot) with our kernel density estimate. + +```@example kde1 +x = UncertainValue(s) +density(s, label = "10000 mixture model (M) samples") +density!(rand(x, 50000), + label = "50000 samples from KDE estimate to M") +xlabel!("data value") +ylabel!("probability density") +savefig("figs/KDEUncertainValue.svg") #hide +``` + +![](figs/KDEUncertainValue.svg) + + +### [Populations](@id uncertain_value_population) + +The `UncertainScalarPopulation` type allows representation of an uncertain scalar +represented by a population of values who will be sampled according to a set of +explicitly provided probabilities. See [examples](@ref uncertainvalue_combine). + +```@docs +UncertainScalarPopulation +``` + +### Certain values + +The `CertainScalar` allows representation of values with no uncertainty. It behaves +just as a scalar, but can be mixed with uncertain values when performing +[mathematical operations](../mathematics/elementary_operations.md) and +[resampling](../resampling/resampling_overview.md). + +```@docs +CertainScalar +``` + +### Compatibility with Measurements.jl + +`Measurement` instances from the Measurements.jl package[^1] are in UncertainData.jl represented as normal distributions. If exact error propagation is a requirement and your data is exclusively normally distributed, use Measurements.jl. If your data is not necessarily +normally distributed and contain errors of different types, and +a resampling approach to error propagation is desired, use UncertainData.jl. + +See the [`UncertainValue`](@ref) constructor for instructions on how to +convert `Measurement`s to uncertain values compatible with this package. + +[^1]: + M. Giordano, 2016, "Uncertainty propagation with functionally correlated quantities", arXiv:1610.08716 (Bibcode: 2016arXiv161008716G). + + +## Examples + +First, load the necessary packages: + +```julia +using UncertainData, Distributions, KernelDensity, Plots +``` + +### Theoretical distributions + +#### A uniformly distributed uncertain value + +Consider the following contrived example. We've measure a data value with a poor instrument +that tells us that the value lies between `-2` and `3`. However, we but that we know nothing +more about how the value is distributed on that interval. Then it may be reasonable to +represent that value as a uniform distribution on `[-2, 3]`. + +To construct an uncertain value following a uniform distribution, we use the constructor +for theoretical distributions with known parameters +(`UncertainValue(distribution, params...)`). + +The uniform distribution is defined by its lower and upper bounds, so we'll provide +these bounds as the parameters. + +```julia +u = UncertainValue(Uniform, 1, 2) + +# Plot the estimated density +bar(u, label = "", xlabel = "value", ylabel = "probability density") +``` + +![](figs/uncertainvalue_theoretical_uniform.svg) + +#### A normally distributed uncertain value + +A situation commonly encountered is to want to use someone else's data from a publication. +Usually, these values are reported as the mean or median, with some associated uncertainty. +Say we want to use an uncertain value which is normally distributed with mean `2.1` and +standard deviation `0.3`. + +Normal distributions also have two parameters, so we'll use the two-parameter constructor +as we did above. + +```julia +u = UncertainValue(Normal, 2.1, 0.3) + +# Plot the estimated density +bar(u, label = "", xlabel = "value", ylabel = "probability density") +``` + +![](figs/uncertainvalue_theoretical_normal.svg) + +### Kernel density estimated distributions + +One may also be given a a distribution of numbers that's not quite normally distributed. +How to represent this uncertainty? Easy: we use a kernel density estimate to the distribution. + +Let's define a complicated distribution which is a mixture of two different normal +distributions, then draw a sample of numbers from it. + +```julia +M = MixtureModel([Normal(-5, 0.5), Normal(0.2)]) +some_sample = rand(M, 250) +``` + +Now, pretend that `some_sample` is a list of measurements we got from somewhere. +KDE estimates to the distribution can be defined implicitly or explicitly as follows: + +```julia +# If the only argument to `UncertainValue()` is a vector of number, KDE will be triggered. +u = UncertainValue(rand(M, 250)) + +# You may also tell the constructor explicitly that you want KDE. +u = UncertainValue(UnivariateKDE, rand(M, 250)) +``` + +Now, let's plot the resulting distribution. _Note: this is not the original mixture of +Gaussians we started out with, it's the kernel density estimate to that mixture!_ + +```julia +# Plot the estimated distribution. +plot(u, xlabel = "Value", ylabel = "Probability density") +``` + +![](figs/uncertainvalue_kde_bimodal.svg) + +### Theoretical distributions fitted to empirical data + +One may also be given a dataset whose histogram looks a lot like a theoretical +distribution. We may then select a theoretical distribution and fit its +parameters to the empirical data. + +Say our data was a sample that looks like it obeys Gamma distribution. + +```julia +# Draw a 2000-point sample from a Gamma distribution with parameters α = 1.7 and θ = 5.5 +some_sample = rand(Gamma(1.7, 5.5), 2000) +``` + +To perform a parameter estimation, simply provide the distribution as the first +argument and the sample as the second argument to the `UncertainValue` constructor. + +```julia +# Take a sample from a Gamma distribution with parameters α = 1.7 and θ = 5.5 and +# create a histogram of the sample. +some_sample = rand(Gamma(1.7, 5.5), 2000) + +p1 = histogram(some_sample, normalize = true, + fc = :black, lc = :black, + label = "", xlabel = "value", ylabel = "density") + +# For the uncertain value representation, fit a gamma distribution to the sample. +# Then, compare the histogram obtained from the original distribution to that obtained +# when resampling the fitted distribution +uv = UncertainValue(Gamma, some_sample) + +# Resample the fitted theoretical distribution +p2 = histogram(resample(uv, 10000), normalize = true, + fc = :blue, lc = :blue, + label = "", xlabel = "value", ylabel = "density") + +plot(p1, p2, layout = (2, 1), link = :x) +``` + +As expected, the histograms closely match (but are not exact because we estimated +the distribution using a limited sample). + +![](figs/uncertainvalue_theoretical_fitted_gamma.svg) + +### Populations + +See [examples for combining multiple values](@ref uncertainvalue_combine). + +## Combining/merging + +Because all uncertainties are handled using a resampling approach, it is trivial to +[`combine`](@ref) or merge uncertain values of different types into a single uncertain value. + +Depending on your data, you may want to choose of one the following ways of +representing multiple uncertain values as one. + +### [Exact approach: populations](@id uncertainvalue_combine) + +Combining uncertain values is done by representing them as a weighted population +of uncertain values. This approach exactly preserves the uncertainties of +the multiple uncertain values during resampling. Adding weights makes it possible to +impose expert-opinion on the relative sampling probabilities of uncertain +values but still sample from the entire supports of the furnishing distributions. + +With equal weights, combining uncertain values introduces no bias beyond what is present in the data, +because resampling is done from the full supports of each of the furnishing values. +Additional information on relative sampling probabilities, however, be it informed by +expert opinion or quantative estimates, is easily incorporated by adjusting +the sampling weights. + +```@example +using UncertainData, Plots, StatsPlots + +# Assume we have done some analysis and have three points whose uncertainties +# significantly overlap. We want to combine these into one uncertain value. +v1 = UncertainValue(Normal(-0.1, 0.52)) +v2 = UncertainValue(Normal(0.27, 0.42)) +v3 = UncertainValue(Normal(0.5, 0.61)) + +# Let the values v1, v2 and v3 be sampled with equal importance +pop = UncertainValue([v1, v2, v3], [1, 1, 1]) + +# Let the values v1, v2 and v3 be sampled with relative importance 1-2-3 +pop = UncertainValue([v1, v2, v3], [1, 2, 3]) + +d1 = plot() +density!(d1, resample(pop, 20000), label = "Overall population") +d2 = plot() +density!(d2, resample(v1, 20000), label = "v1") +density!(d2, resample(v2, 20000), label = "v2") +density!(d2, resample(v3, 20000), label = "v3") +plot(d1, d2, layout = (2, 1), xlabel = "Value", ylabel = "Density", + link = :x, xlims = (-2.5, 2.5), + legend = :topleft, fg_legend = :transparent, bg_legend = :transparent) +savefig("figs/population_ex1.png") #hide +``` + +![](figs/population_ex1.png) + +This is not restricted to normal distributions! We can combine any type of +value in our population, even populations! + +```@example +using UncertainData, Plots, StatsPlots +v1 = UncertainValue(Normal, 0.265, 0.52) +v2 = UncertainValue(Normal, 0.311, 0.15) +v3 = UncertainValue(Beta, 0.7, 0.8) +v4 = UncertainValue(Gamma, 0.5, 1.0) + +# Define two sub-populations. Members of each sub-population are sampled +# with equal importance. +p1, p2 = UncertainValue([v1, v4], [1, 1]), UncertainValue([v2, v3], [1, 1]) + +# In the overall population, sub-populations are sampled with relative importance 2 to 1. +pop = UncertainValue([p1, p2], [2, 1]) + +d1 = plot() +density!(d1, resample(pop, 20000), label = "Overall population") +d2 = plot() +density!(d2, resample(pop[1], 20000), label = "Subpopulation p1 (v1 and v4)") +density!(d2, resample(pop[2], 20000), label = "Subpopulation p2 (v2 and v3)") + +d3 = plot() +density!(d3, resample(v1, 20000), label = "v1") +density!(d3, resample(v2, 20000), label = "v2") +density!(d3, resample(v3, 20000), label = "v3") +density!(d3, resample(v4, 20000), label = "v4") + +plot(d1, d2, d3, layout = (3, 1), xlabel = "Value", ylabel = "Density", + link = :x, xlims = (-2.5, 2.5), + legend = :topleft, fg_legend = :transparent, bg_legend = :transparent) +savefig("figs/population_ex2.png") #hide +``` + +![](figs/population_ex2.png) + +### [Approximate approach: KDE](@id uncertainvalue_merge) + +Merging multiple uncertain values could be done by fitting a model distribution to +the values. Using any specific theoretical distribution as a model for the combined +uncertainty, however, is in general not possible, because the values may have +different types of uncertainties. + +The `combine` function instead uses kernel density estimation is used to merge +multiple uncertain values. This has the advantage that you only +have to deal with a single estimate to the combined distribution, but +introduces bias because the distribution is *estimated*. Additionally, the +shape of the distribution depends on the parameters of the KDE procedure. + +```@docs +combine(uvals::Vector{AbstractUncertainValue}) +``` + +#### Example + +```@example +using UncertainData, Plots, StatsPlots +v1 = UncertainValue(UnivariateKDE, rand(4:0.25:6, 1000), bandwidth = 0.02) +v2 = UncertainValue(Normal, 0.8, 0.4) +v3 = UncertainValue([rand() for i = 1:3], [0.3, 0.3, 0.4]) +v4 = UncertainValue(Gamma, 8, 0.4) +uvals = [v1, v2, v3, v4]; + +p = plot() +plot!(v1, label = "v1", ls = :dashdot) +plot!(v2, label = "v2", ls = :dot) +vline!(v3, label = "v3", ls = :dash) # plot each possible state as vline +plot!(v4, label = "v4") + +pcombined = plot(lc = :black, + combine(uvals, n = 100000), + title = "combine([v1, v2, v3, v4])") +pcombined_pw = plot(lc = :black, + combine(uvals, ProbabilityWeights([0.1, 0.3, 0.02, 0.5]), n = 100000, bw = 0.05), + title = "combine([v1, v2, v3, v4], ProbabilityWeights([0.1, 0.3, 0.02, 0.5])") +pcombined_fw = plot(lc = :black, + combine(uvals, FrequencyWeights([10000, 20000, 3000, 5000]), bw = 0.05), + title = "combine([v1, v2, v3, v4], FrequencyWeights([10000, 20000, 3000, 5000])") + +plot(p, pcombined, pcombined_pw, pcombined_fw, + layout = (4, 1), link = :x, ylabel = "Density", lw = 1, + titlefont = font(8), guidefont = font(9), size = (700, 600)) +savefig("figs/combine_ex.png") #hide +``` + +![](figs/combine_ex.png) + + +## Mathematical operations + +Elementary mathematical operations (`+`, `-`, `*`, and `/`) between arbitrary +uncertain values of different types and scalars are supported. +Elementary operations with `CertainScalar`s behaves as for scalars. + +```@docs +Base.:+(a::AbstractUncertainValue, b::AbstractUncertainValue) +Base.:-(a::AbstractUncertainValue, b::AbstractUncertainValue) +Base.:*(a::AbstractUncertainValue, b::AbstractUncertainValue) +Base.:/(a::AbstractUncertainValue, b::AbstractUncertainValue) +``` + +In the future, elementary operations might be improved for certain combinations of uncertain +values where exact expressions for error propagation are now, for example using the +machinery in `Measurements.jl` for normally distributed values. diff --git a/docs/src/uncertain_values/uncertainvalues_Measurements.md b/docs/src/uncertain_values/uncertainvalues_Measurements.md deleted file mode 100644 index 72875fba..00000000 --- a/docs/src/uncertain_values/uncertainvalues_Measurements.md +++ /dev/null @@ -1,13 +0,0 @@ -`Measurement` instances from [Measurements.jl](https://github.com/JuliaPhysics/Measurements.jl)[^1] are -treated as normal distributions with known means. *Note: once you convert a measurement, you lose the -functionality provided by Measurements.jl, such as exact error propagation*. - -# Generic constructor - -If `x = measurement(2.2, 0.21)` is a measurement, then `UncertainValue(x)` will return an -`UncertainScalarNormallyDistributed` instance. - -# References - -[^1]: - M. Giordano, 2016, "Uncertainty propagation with functionally correlated quantities", arXiv:1610.08716 (Bibcode: 2016arXiv161008716G). \ No newline at end of file diff --git a/docs/src/uncertain_values/uncertainvalues_certainvalue.md b/docs/src/uncertain_values/uncertainvalues_certainvalue.md deleted file mode 100644 index 40c28117..00000000 --- a/docs/src/uncertain_values/uncertainvalues_certainvalue.md +++ /dev/null @@ -1,16 +0,0 @@ -The `CertainValue` allows representation of values with no uncertainty. It behaves -just as a scalar, but can be mixed with uncertain values when performing -[mathematical operations](../mathematics/elementary_operations.md) and -[resampling](../resampling/resampling_overview.md). - -# Generic constructor - -```@docs -UncertainValue(::Real) -``` - -# Type documentation - -```@docs -CertainValue -``` diff --git a/docs/src/uncertain_values/uncertainvalues_examples.md b/docs/src/uncertain_values/uncertainvalues_examples.md deleted file mode 100644 index cb15f3db..00000000 --- a/docs/src/uncertain_values/uncertainvalues_examples.md +++ /dev/null @@ -1,132 +0,0 @@ - -First, load the necessary packages: - -```julia -using UncertainData, Distributions, KernelDensity, Plots -``` - -# Example 1: Uncertain values defined by theoretical distributions - -## A uniformly distributed uncertain value - -Consider the following contrived example. We've measure a data value with a poor instrument -that tells us that the value lies between `-2` and `3`. However, we but that we know nothing -more about how the value is distributed on that interval. Then it may be reasonable to -represent that value as a uniform distribution on `[-2, 3]`. - -To construct an uncertain value following a uniform distribution, we use the constructor -for theoretical distributions with known parameters -(`UncertainValue(distribution, params...)`). - -The uniform distribution is defined by its lower and upper bounds, so we'll provide -these bounds as the parameters. - -```julia -u = UncertainValue(Uniform, 1, 2) - -# Plot the estimated density -bar(u, label = "", xlabel = "value", ylabel = "probability density") -``` - -![](figs/uncertainvalue_theoretical_uniform.svg) - -## A normally distributed uncertain value - -A situation commonly encountered is to want to use someone else's data from a publication. -Usually, these values are reported as the mean or median, with some associated uncertainty. -Say we want to use an uncertain value which is normally distributed with mean `2.1` and -standard deviation `0.3`. - -Normal distributions also have two parameters, so we'll use the two-parameter constructor -as we did above. - -```julia -u = UncertainValue(Normal, 2.1, 0.3) - -# Plot the estimated density -bar(u, label = "", xlabel = "value", ylabel = "probability density") -``` - -![](figs/uncertainvalue_theoretical_normal.svg) - -## Other distributions - -You may define uncertain values following any of the -[supported distributions](uncertainvalues_theoreticaldistributions.md). - -# Example 2: Uncertain values defined by kernel density estimated distributions - -One may also be given a a distribution of numbers that's not quite normally distributed. -How to represent this uncertainty? Easy: we use a kernel density estimate to the distribution. - -Let's define a complicated distribution which is a mixture of two different normal -distributions, then draw a sample of numbers from it. - -```julia -M = MixtureModel([Normal(-5, 0.5), Normal(0.2)]) -some_sample = rand(M, 250) -``` - -Now, pretend that `some_sample` is a list of measurements we got from somewhere. -KDE estimates to the distribution can be defined implicitly or explicitly as follows: - -```julia -# If the only argument to `UncertainValue()` is a vector of number, KDE will be triggered. -u = UncertainValue(rand(M, 250)) - -# You may also tell the constructor explicitly that you want KDE. -u = UncertainValue(UnivariateKDE, rand(M, 250)) -``` - -Now, let's plot the resulting distribution. _Note: this is not the original mixture of -Gaussians we started out with, it's the kernel density estimate to that mixture!_ - -```julia -# Plot the estimated distribution. -plot(u, xlabel = "Value", ylabel = "Probability density") -``` - -![](figs/uncertainvalue_kde_bimodal.svg) - -# Example 3: Uncertain values defined by theoretical distributions fitted to empirical data - -One may also be given a dataset whose histogram looks a lot like a theoretical -distribution. We may then select a theoretical distribution and fit its -parameters to the empirical data. - -Say our data was a sample that looks like it obeys Gamma distribution. - -```julia -# Draw a 2000-point sample from a Gamma distribution with parameters α = 1.7 and θ = 5.5 -some_sample = rand(Gamma(1.7, 5.5), 2000) -``` - -To perform a parameter estimation, simply provide the distribution as the first -argument and the sample as the second argument to the `UncertainValue` constructor. - -```julia -# Take a sample from a Gamma distribution with parameters α = 1.7 and θ = 5.5 and -# create a histogram of the sample. -some_sample = rand(Gamma(1.7, 5.5), 2000) - -p1 = histogram(some_sample, normalize = true, - fc = :black, lc = :black, - label = "", xlabel = "value", ylabel = "density") - -# For the uncertain value representation, fit a gamma distribution to the sample. -# Then, compare the histogram obtained from the original distribution to that obtained -# when resampling the fitted distribution -uv = UncertainValue(Gamma, some_sample) - -# Resample the fitted theoretical distribution -p2 = histogram(resample(uv, 10000), normalize = true, - fc = :blue, lc = :blue, - label = "", xlabel = "value", ylabel = "density") - -plot(p1, p2, layout = (2, 1), link = :x) -``` - -As expected, the histograms closely match (but are not exact because we estimated -the distribution using a limited sample). - -![](figs/uncertainvalue_theoretical_fitted_gamma.svg) diff --git a/docs/src/uncertain_values/uncertainvalues_fitted.md b/docs/src/uncertain_values/uncertainvalues_fitted.md deleted file mode 100644 index 70749cee..00000000 --- a/docs/src/uncertain_values/uncertainvalues_fitted.md +++ /dev/null @@ -1,86 +0,0 @@ -# [Fitted theoretical distributions](@id uncertain_value_fitted_theoretical_distribution) - -For data values with histograms close to some known distribution, the user -may choose to represent the data by fitting a theoretical distribution to the -values. This will only work well if the histogram closely resembles a -theoretical distribution. - -## Generic constructor - -```@docs -UncertainValue(d::Type{D}, empiricaldata::Vector{T}) where {D<:Distribution, T} -``` - -## Type documentation - -```@docs -UncertainScalarTheoreticalFit -``` - -## Examples - -``` julia tab="Uniform" -using Distributions, UncertainData - -# Create a normal distribution -d = Uniform() - -# Draw a 1000-point sample from the distribution. -some_sample = rand(d, 1000) - -# Define an uncertain value by fitting a uniform distribution to the sample. -uv = UncertainValue(Uniform, some_sample) -``` - -``` julia tab="Normal" -using Distributions, UncertainData - -# Create a normal distribution -d = Normal() - -# Draw a 1000-point sample from the distribution. -some_sample = rand(d, 1000) - -# Represent the uncertain value by a fitted normal distribution. -uv = UncertainValue(Normal, some_sample) -``` - -``` julia tab="Gamma" -using Distributions, UncertainData - -# Generate 1000 values from a gamma distribution with parameters α = 2.1, -# θ = 5.2. -some_sample = rand(Gamma(2.1, 5.2), 1000) - -# Represent the uncertain value by a fitted gamma distribution. -uv = UncertainValue(Gamma, some_sample) -``` -In these examples we're trying to fit the same distribution to our sample -as the distribution from which we draw the sample. Thus, we will get good fits. -In real applications, make sure to always visually investigate the histogram -of your data! - - -### Beware: fitting distributions may lead to nonsensical results! -In a less contrived example, we may try to fit a beta distribution to a sample -generated from a gamma distribution. - - -``` julia -using Distributions, UncertainData - -# Generate 1000 values from a gamma distribution with parameters α = 2.1, -# θ = 5.2. -some_sample = rand(Gamma(2.1, 5.2), 1000) - -# Represent the uncertain value by a fitted beta distribution. -uv = UncertainValue(Beta, some_sample) -``` - -This is obviously not a good idea. Always visualise your distribution before -deciding on which distribution to fit! You won't get any error messages if you -try to fit a distribution that does not match your data. - -If the data do not follow an obvious theoretical distribution, it is better to -use kernel density estimation to define the uncertain value. - diff --git a/docs/src/uncertain_values/uncertainvalues_kde.md b/docs/src/uncertain_values/uncertainvalues_kde.md deleted file mode 100644 index a6de554b..00000000 --- a/docs/src/uncertain_values/uncertainvalues_kde.md +++ /dev/null @@ -1,144 +0,0 @@ -# [Kernel density estimated distributions](@id uncertain_value_kde) - -When your data have an empirical distribution that doesn't follow any obvious -theoretical distribution, the data may be represented by a kernel density -estimate. - -# Generic constructor - -```@docs -UncertainValue(::AbstractVector{<:Real}) -``` - -# Type documentation - -```@docs -UncertainScalarKDE -``` - -# Examples - -``` julia tab="Implicit KDE constructor" -using Distributions, UncertainData - -# Create a normal distribution -d = Normal() - -# Draw a 1000-point sample from the distribution. -some_sample = rand(d, 1000) - -# Use the implicit KDE constructor to create the uncertain value -uv = UncertainValue(v::Vector) -``` - -``` julia tab="Explicit KDE constructor" -using Distributions, UncertainData, KernelDensity - -# Create a normal distribution -d = Normal() - -# Draw a 1000-point sample from the distribution. -some_sample = rand(d, 1000) - -# Use the explicit KDE constructor to create the uncertain value. -# This constructor follows the same convention as when fitting distributions -# to empirical data, so this is the recommended way to construct KDE estimates. -uv = UncertainValue(UnivariateKDE, v::Vector) -``` - -``` julia tab="Changing the kernel" -using Distributions, UncertainData, KernelDensity - -# Create a normal distribution -d = Normal() - -# Draw a 1000-point sample from the distribution. -some_sample = rand(d, 1000) - -# Use the explicit KDE constructor to create the uncertain value, specifying -# that we want to use normal distributions as the kernel. The kernel can be -# any valid kernel from Distributions.jl, and the default is to use normal -# distributions. -uv = UncertainValue(UnivariateKDE, v::Vector; kernel = Normal) -``` - -``` julia tab="Adjusting number of points" -using Distributions, UncertainData, KernelDensity - -# Create a normal distribution -d = Normal() - -# Draw a 1000-point sample from the distribution. -some_sample = rand(d, 1000) - -# Use the explicit KDE constructor to create the uncertain value, specifying -# the number of points we want to use for the kernel density estimate. Fast -# Fourier transforms are used behind the scenes, so the number of points -# should be a power of 2 (the default is 2048 points). -uv = UncertainValue(UnivariateKDE, v::Vector; npoints = 1024) -``` - -# Extended example - -Let's create a bimodal distribution, then sample 10000 values from it. - -```julia -using Distributions - -n1 = Normal(-3.0, 1.2) -n2 = Normal(8.0, 1.2) -n3 = Normal(0.0, 2.5) - -# Use a mixture model to create a bimodal distribution -M = MixtureModel([n1, n2, n3]) - -# Sample the mixture model. -samples_empirical = rand(M, Int(1e4)); -``` - -![](imgs/bimodal_empirical.svg) - -It is not obvious which distribution to fit to such data. - -A kernel density estimate, however, will always be a decent representation -of the data, because it doesn't follow a specific distribution and adapts to -the data values. - -To create a kernel density estimate, simply call the -`UncertainValue(v::Vector{Number})` constructor with a vector containing the -sample: - -```julia -uv = UncertainValue(samples_empirical) -``` - -The plot below compares the empirical histogram (here represented as a density -plot) with our kernel density estimate. - -```julia -using Plots, StatPlots, UncertainData -uv = UncertainValue(samples_empirical) -density(mvals, label = "10000 mixture model (M) samples") -density!(rand(uv, Int(1e4)), - label = "10000 samples from KDE estimate to M") -xlabel!("data value") -ylabel!("probability density") -``` - -![](imgs/KDEUncertainValue.svg) - -## Constructor - -```@docs -UncertainValue(data::Vector{T}; - kernel::Type{D} = Normal, - npoints::Int = 2048) where {D <: Distributions.Distribution, T} -``` - -### Additional keyword arguments and examples - -If the only argument to the `UncertainValue` constructor is a vector of values, -the default behaviour is to represent the distribution by a kernel density -estimate (KDE), i.e. `UncertainValue(data)`. Gaussian kernels are used by -default. The syntax `UncertainValue(UnivariateKDE, data)` will also work if -`KernelDensity.jl` is loaded. diff --git a/docs/src/uncertain_values/uncertainvalues_overview.md b/docs/src/uncertain_values/uncertainvalues_overview.md deleted file mode 100644 index 85b25936..00000000 --- a/docs/src/uncertain_values/uncertainvalues_overview.md +++ /dev/null @@ -1,125 +0,0 @@ -# [Uncertain value types](@id uncertain_value_types) - -The core concept of `UncertainData` is to replace an uncertain data value with a -probability distribution describing the point's uncertainty. - -The following types of uncertain values are currently implemented: - -- [Theoretical distributions with known parameters](uncertainvalues_theoreticaldistributions.md). -- [Theoretical distributions with parameters fitted to empirical data](uncertainvalues_fitted.md). -- [Kernel density estimated distributions estimated from empirical data](uncertainvalues_kde.md). -- [Weighted (nested) populations](uncertainvalues_populations.md) where the probability of - drawing values are already known, so you can skip kernel density estimation. Populations can be - nested, and may contain numerical values, uncertain values or both. -- [Values without uncertainty](uncertainvalues_certainvalue.md) have their own dedicated - [`CertainValue`](@ref) type, so that you can uncertain values with certain values. -- [`Measurement` instances](uncertainvalues_Measurements.md) from [Measurements.jl](https://github.com/JuliaPhysics/Measurements.jl) are treated as normal distributions with known mean and standard devation. - -## Some quick examples - -See also the [extended examples](uncertainvalues_examples.md)! - -### Kernel density estimation (KDE) - -If the data doesn't follow an obvious theoretical distribution, the recommended -course of action is to represent the uncertain value with a kernel density -estimate of the distribution. - -``` julia tab="Implicit KDE estimate" -using Distributions, UncertainData, KernelDensity - -# Generate some random data from a normal distribution, so that we get a -# histogram resembling a normal distribution. -some_sample = rand(Normal(), 1000) - -# Uncertain value represented by a kernel density estimate (it is inferred -# that KDE is wanted when no distribution is provided to the constructor). -uv = UncertainValue(some_sample) -``` - -``` julia tab="Explicit KDE estimate" -using Distributions, UncertainData - -# Generate some random data from a normal distribution, so that we get a -# histogram resembling a normal distribution. -some_sample = rand(Normal(), 1000) - - -# Specify that we want a kernel density estimate representation -uv = UncertainValue(UnivariateKDE, some_sample) -``` - -### Populations - -If you have a population of values where each value has a probability assigned to it, -you can construct an uncertain value by providing the values and uncertainties as -two equal-length vectors to the constructor. Weights are normalized by default. - -```julia -vals = rand(100) -weights = rand(100) -p = UncertainValue(vals, weights) -``` - -### Fitting a theoretical distribution - -If your data has a histogram closely resembling some theoretical distribution, -the uncertain value may be represented by fitting such a distribution to the data. - -``` julia tab="Example 1: fitting a normal distribution" -using Distributions, UncertainData - -# Generate some random data from a normal distribution, so that we get a -# histogram resembling a normal distribution. -some_sample = rand(Normal(), 1000) - -# Uncertain value represented by a theoretical normal distribution with -# parameters fitted to the data. -uv = UncertainValue(Normal, some_sample) -``` - -``` julia tab="Example 2: fitting a gamma distribution" -using Distributions, UncertainData - -# Generate some random data from a gamma distribution, so that we get a -# histogram resembling a gamma distribution. -some_sample = rand(Gamma(), 1000) - -# Uncertain value represented by a theoretical gamma distribution with -# parameters fitted to the data. -uv = UncertainValue(Gamma, some_sample) -``` - -### Theoretical distribution with known parameters - -It is common when working with uncertain data found in the scientific -literature that data value are stated to follow a distribution with given -parameters. For example, a data value may be given as normal distribution with -a given mean `μ = 2.2` and standard deviation `σ = 0.3`. - - -``` julia tab="Example 1: theoretical normal distribution" -# Uncertain value represented by a theoretical normal distribution with -# known parameters μ = 2.2 and σ = 0.3 -uv = UncertainValue(Normal, 2.2, 0.3) -``` - -``` julia tab="Example 2: theoretical gamma distribution" -# Uncertain value represented by a theoretical gamma distribution with -# known parameters α = 2.1 and θ = 3.1 -uv = UncertainValue(Gamma, 2.1, 3.1) -``` - -``` julia tab="Example 3: theoretical binomial distribution" -# Uncertain value represented by a theoretical binomial distribution with -# known parameters p = 32 and p = 0.13 -uv = UncertainValue(Binomial, 32, 0.13) -``` - -### Values with no uncertainty - -Scalars with no uncertainty can also be represented. - -```julia -c1, c2 = UncertainValue(2), UncertainValue(2.2) -``` diff --git a/docs/src/uncertain_values/uncertainvalues_populations.md b/docs/src/uncertain_values/uncertainvalues_populations.md deleted file mode 100644 index 783491d4..00000000 --- a/docs/src/uncertain_values/uncertainvalues_populations.md +++ /dev/null @@ -1,18 +0,0 @@ - -# [Weighted populations](@id uncertain_value_population) - -The `UncertainScalarPopulation` type allows representation of an uncertain scalar -represented by a population of values who will be sampled according to a vector of -explicitly provided probabilities. Think of it as an explicit kernel density estimate. - -# Generic constructor - -```@docs -UncertainValue(::Vector, ::Vector) -``` - -# Type documentation - -```@docs -UncertainScalarPopulation -``` diff --git a/docs/src/uncertain_values/uncertainvalues_theoreticaldistributions.md b/docs/src/uncertain_values/uncertainvalues_theoreticaldistributions.md deleted file mode 100644 index 4053a566..00000000 --- a/docs/src/uncertain_values/uncertainvalues_theoreticaldistributions.md +++ /dev/null @@ -1,112 +0,0 @@ -# [Theoretical distributions](@id uncertain_value_theoretical_distribution) - -It is common in the scientific literature to encounter uncertain data values -which are reported as following a specific distribution. For example, an author -report the mean and standard deviation of a value stated to follow a -normal distribution. `UncertainData` makes it easy to represent such values! - -# Generic constructors - -## From instances of distributions - -```@docs -UncertainValue(d::Distributions.Distribution) -``` - -## Defined from scratch - -Uncertain values represented by theoretical distributions may be constructed -using the two-parameter or three-parameter constructors -`UncertainValue(d::Type{D}, a<:Number, b<:Number)` or -`UncertainValue(d::Type{D}, a<:Number, b<:Number, c<:Number)` (see below). -Parameters are provided to the constructor in the same order as for constructing -the equivalent distributions in `Distributions.jl`. - -### Two-parameter distributions - -```@docs -UncertainValue(distribution::Type{D}, a::T1, b::T2; kwargs...) where {T1<:Number, T2 <: Number, D<:Distribution} -``` - -### Three-parameter distributions - -```@docs -UncertainValue(distribution::Type{D}, a::T1, b::T2, c::T3; kwargs...) where {T1<:Number, T2<:Number, T3<:Number, D<:Distribution} -``` - -# Type documentation - -```@docs -UncertainScalarBetaBinomialDistributed -UncertainScalarBetaDistributed -UncertainScalarBetaPrimeDistributed -UncertainScalarBinomialDistributed -UncertainScalarFrechetDistributed -UncertainScalarGammaDistributed -UncertainScalarNormallyDistributed -UncertainScalarUniformlyDistributed -``` - -# List of supported distributions - -Supported distributions are: - -- `Uniform` -- `Normal` -- `Gamma` -- `Beta` -- `BetaPrime` -- `Frechet` -- `Binomial` -- `BetaBinomial` - -More distributions will be added in the future!. - -# Examples - -``` julia tab="Uniform" -# Uncertain value generated by a uniform distribution on [-5.0, 5.1]. -uv = UncertainValue(Uniform, -5.0, 5.1) -``` - -``` julia tab="Normal" -# Uncertain value generated by a normal distribution with parameters μ = -2 and -# σ = 0.5. -uv = UncertainValue(Normal, -2, 0.5) -``` - -``` julia tab="Gamma" -# Uncertain value generated by a gamma distribution with parameters α = 2.2 -# and θ = 3. -uv = UncertainValue(Gamma, 2.2, 3) -``` - -``` julia tab="Beta" -# Uncertain value generated by a beta distribution with parameters α = 1.5 -# and β = 3.5 -uv = UncertainValue(Beta, 1.5, 3.5) -``` - -``` julia tab="BetaPrime" -# Uncertain value generated by a beta prime distribution with parameters α = 1.7 -# and β = 3.2 -uv = UncertainValue(Beta, 1.7, 3.2) -``` - -``` julia tab="Fréchet" -# Uncertain value generated by a Fréchet distribution with parameters α = 2.1 -# and θ = 4 -uv = UncertainValue(Beta, 2.1, 4) -``` - -``` julia tab="Binomial" -# Uncertain value generated by binomial distribution with n = 28 trials and -# probability p = 0.2 of success in individual trials. -uv = UncertainValue(Binomial, 28, 0.2) -``` - -``` julia tab="BetaBinomial" -# Creates an uncertain value generated by a beta-binomial distribution with -# n = 28 trials, and parameters α = 1.5 and β = 3.5. -uv = UncertainValue(BetaBinomial, 28, 3.3, 4.4) -``` diff --git a/src/interpolation_and_binning/binning.jl b/src/interpolation_and_binning/binning.jl index b71fd628..981016d9 100644 --- a/src/interpolation_and_binning/binning.jl +++ b/src/interpolation_and_binning/binning.jl @@ -14,17 +14,25 @@ Returns `N - 1` bin vectors. ## Examples -### Getting the values in each bin: - -```julia +```jldoctest xs = [1.2, 1.7, 2.2, 3.3, 4.5, 4.6, 7.1] ys = [4.2, 5.1, 6.5, 4.2, 3.2, 3.1, 2.5] left_bin_edges = 0.0:1.0:6.0 bin(left_bin_edges, xs, ys) + +# output +6-element Array{Array{Float64,1},1}: + [] + [4.2, 5.1] + [6.5] + [4.2] + [3.2, 3.1] + [] ``` +Some example data with unevenly spaced time indices: + ```julia -# Some example data with unevenly spaced time indices npts = 300 time, vals = sort(rand(1:1000, npts)), rand(npts) diff --git a/src/mathematics/uncertainvalues/add_uncertainvalues.jl b/src/mathematics/uncertainvalues/add_uncertainvalues.jl index c588a4c9..47e6609e 100644 --- a/src/mathematics/uncertainvalues/add_uncertainvalues.jl +++ b/src/mathematics/uncertainvalues/add_uncertainvalues.jl @@ -4,89 +4,55 @@ ########## """ - Base.:+(a::AbstractUncertainValue, b::AbstractUncertainValue) -> UncertainValue + Base.:+(a::AbstractUncertainValue, b::Real; n::Int = 30000) -> UncertainScalarKDE + Base.:+(a::Real, b::AbstractUncertainValue; n::Int = 30000) -> UncertainScalarKDE + Base.:+(a::AbstractUncertainValue, b::AbstractUncertainValue; n::Int = 30000) -> UncertainScalarKDE -Addition operator for pairs of uncertain values. +Addition operator. Perform the operation `a + b` by drawing `n` realizations of the uncertain value(s), +then performing element-wise addition on the draws. Use the `+(a, b, n)` syntax to tune the number of draws. +A kernel density estimate to the distribution of sums is returned. -Computes the element-wise sum between for a default of `n = 10000` realizations of `a` and -`b`, then returns an uncertain value based on a kernel density estimate to the distribution -of the element-wise sums. -Use the `+(a, b, n)` syntax to tune the number (`n`) of draws. +## Example + +```julia +using UncertainData +x = UncertainValue(Normal, 0, 1) +y = UncertainValue([1, 2, -15, -20], [0.2, 0.3, 0.2, 0.3]) +x + y # uses the default number of draws (n = 30000) ++(x, y, 100000) # use more samples +``` """ function Base.:+(a::AbstractUncertainValue, b::AbstractUncertainValue; n::Int = 30000) UncertainValue(resample(a, n) .+ resample(b, n)) end -""" - Base.:+(a::Real, b::AbstractUncertainValue) -> UncertainValue - -Addition operator for between scalars and uncertain values. - -Computes the element-wise sum between `a` and `b` for a default of `n = 10000` realizations -of `b`, then returns an uncertain value based on a kernel density estimate to the -distribution of the element-wise sums. - -Use the `+(a, b, n)` syntax to tune the number (`n`) of draws. -""" Base.:+(a::Real, b::AbstractUncertainValue; n::Int = 30000) = UncertainValue(a .+ resample(b, n)) - -""" - Base.:+(a::AbstractUncertainValue, b::Real) -> UncertainValue - -Addition operator for between uncertain values and scalars. - -Computes the element-wise sum between `a` and `b` for a default of `n = 10000` realizations -of `a`, then returns an uncertain value based on a kernel density estimate to the -distribution of the element-wise sums. - -Use the `+(a, b, n)` syntax to tune the number (`n`) of draws. -""" Base.:+(a::AbstractUncertainValue, b::Real; n::Int = 30000) = UncertainValue(resample(a, n) .+ b) - -""" - Base.:+(a::AbstractUncertainValue, b::AbstractUncertainValue, n::Int) -> UncertainValue - -Addition operator for pairs of uncertain values. - -Computes the element-wise sum between `a` and `b` for `n` realizations -of `a` and `b`, then returns an uncertain value based on a kernel density estimate to the -distribution of the element-wise sums. - -Call this function using the `+(a, b, n)` syntax. -""" -function Base.:+(a::AbstractUncertainValue, b::AbstractUncertainValue, n::Int) +Base.:+(a::AbstractUncertainValue, b::AbstractUncertainValue, n::Int) = UncertainValue(resample(a, n) .+ resample(b, n)) -end -""" - Base.:+(a::Real, b::AbstractUncertainValue, n::Int) -> UncertainValue - -Addition operator for scalar-uncertain value pairs. +Base.:+(a::Real, b::AbstractUncertainValue, n::Int) = UncertainValue(a .+ resample(b, n)) +Base.:+(a::AbstractUncertainValue, b::Real, n::Int) = UncertainValue(resample(a, n) .+ b) -Computes the element-wise sum between `a` and `b` for `n` realizations -of `b`, then returns an uncertain value based on a kernel density estimate to the -distribution of the element-wise sums. -Call this function using the `+(a, b, n)` syntax. -""" -Base.:+(a::Real, b::AbstractUncertainValue, n::Int) = - UncertainValue(a .+ resample(b, n)) - -""" - Base.:+(a::AbstractUncertainValue, b::Real, n::Int) -> UncertainValue +##################################################################################### +# Special cases +##################################################################################### -Addition operator for scalar-uncertain value pairs. +import ..UncertainValues: CertainScalar -Computes the element-wise sum between `a` and `b` for `n` realizations -of `a`, then returns an uncertain value based on a kernel density estimate to the -distribution of the element-wise sums. +################## +# `CertainScalar`s +################# -Call this function using the `+(a, b, n)` syntax. -""" -Base.:+(a::AbstractUncertainValue, b::Real, n::Int) = - UncertainValue(resample(a, n) .+ b) +# Addition of certain values with themselves or scalars acts as regular addition, but +# returns the result wrapped in a `CertainScalar` instance. +Base.:+(a::Union{CertainScalar, Real}, b::Union{CertainScalar, Real}) +Base.:+(a::CertainScalar, b::CertainScalar) = CertainScalar(a.value + b.value) +Base.:+(a::CertainScalar, b::Real) = CertainScalar(a.value + b) +Base.:+(a::Real, b::CertainScalar) = CertainScalar(a + b.value) @@ -124,25 +90,3 @@ function Base.:+(a::AbstractUncertainValue, [+(a, b[i], n) for i = 1:length(b)] end - -##################################################################################### -# Special cases -##################################################################################### - -import ..UncertainValues: CertainValue - -################## -# `CertainValue`s -################# -""" - Base.:+(a::Union{CertainValue, Real}, b::Union{CertainValue, Real}) - -Addition of certain values with themselves or scalars acts as regular addition, but -returns the result wrapped in a `CertainValue` instance. -""" -Base.:+(a::Union{CertainValue, Real}, b::Union{CertainValue, Real}) - -Base.:+(a::CertainValue, b::CertainValue) = CertainValue(a.value + b.value) -Base.:+(a::CertainValue, b::Real) = CertainValue(a.value + b) -Base.:+(a::Real, b::CertainValue) = CertainValue(a + b.value) - diff --git a/src/mathematics/uncertainvalues/divide_uncertainvalues.jl b/src/mathematics/uncertainvalues/divide_uncertainvalues.jl index 381ff370..2f537129 100644 --- a/src/mathematics/uncertainvalues/divide_uncertainvalues.jl +++ b/src/mathematics/uncertainvalues/divide_uncertainvalues.jl @@ -3,87 +3,36 @@ ################ """ - Base.:/(a::AbstractUncertainValue, b::AbstractUncertainValue) -> UncertainValue + Base.:/(a::AbstractUncertainValue, b::Real; n::Int = 30000) -> UncertainScalarKDE + Base.:/(a::Real, b::AbstractUncertainValue; n::Int = 30000) -> UncertainScalarKDE + Base.:/(a::AbstractUncertainValue, b::AbstractUncertainValue; n::Int = 30000) -> UncertainScalarKDE -Division operator for pairs of uncertain values. - -Computes the element-wise quotients between for a default of `n = 10000` realizations of `a` and -`b`, then returns an uncertain value based on a kernel density estimate to the distribution -of the element-wise quotients. +Right-division operator. Perform the operation `a / b` by drawing `n` realizations of the uncertain value(s), +then performing element-wise right-division on the draws. Use the `/(a, b, n)` syntax to tune the number of draws. +A kernel density estimate to the distribution of pairwise divisions is returned. -Use the `/(a, b, n)` syntax to tune the number (`n`) of draws. +## Example + +```julia +using UncertainData +x = UncertainValue(Normal, 0, 1) +y = UncertainValue([1, 2, -15, -20], [0.2, 0.3, 0.2, 0.3]) +x / y # uses the default number of draws (n = 30000) +/(x, y, 100000) # use more samples +``` """ function Base.:/(a::AbstractUncertainValue, b::AbstractUncertainValue; n::Int = 30000) UncertainValue(resample(a, n) ./ resample(b, n)) end -""" - Base.:/(a::Real, b::AbstractUncertainValue) -> UncertainValue - -Division operator for between scalars and uncertain values. - -Computes the element-wise quotients between `a` and `b` for a default of `n = 10000` realizations -of `b`, then returns an uncertain value based on a kernel density estimate to the -distribution of the element-wise quotients. - -Use the `/(a, b, n)` syntax to tune the number (`n`) of draws. -""" Base.:/(a::Real, b::AbstractUncertainValue; n::Int = 30000) = UncertainValue(a ./ resample(b, n)) - -""" - Base.:/(a::AbstractUncertainValue, b::Real) -> UncertainValue - -Division operator for between uncertain values and scalars. - -Computes the element-wise quotients between `a` and `b` for a default of `n = 10000` realizations -of `a`, then returns an uncertain value based on a kernel density estimate to the -distribution of the element-wise quotients. - -Use the `/(a, b, n)` syntax to tune the number (`n`) of draws. -""" Base.:/(a::AbstractUncertainValue, b::Real; n::Int = 30000) = UncertainValue(resample(a, n) ./ b) - -""" - Base.:/(a::AbstractUncertainValue, b::AbstractUncertainValue, n::Int) -> UncertainValue - -Division operator for pairs of uncertain values. - -Computes the element-wise quotients between `a` and `b` for `n` realizations -of `a` and `b`, then returns an uncertain value based on a kernel density estimate to the -distribution of the element-wise quotients. - -Call this function using the `/(a, b, n)` syntax. -""" -function Base.:/(a::AbstractUncertainValue, b::AbstractUncertainValue, n::Int) +Base.:/(a::AbstractUncertainValue, b::AbstractUncertainValue, n::Int) = UncertainValue(resample(a, n) ./ resample(b, n)) -end -""" - Base.:/(a::Real, b::AbstractUncertainValue, n::Int) -> UncertainValue - -Division operator for scalar-uncertain value pairs. - -Computes the element-wise quotients between `a` and `b` for `n` realizations -of `b`, then returns an uncertain value based on a kernel density estimate to the -distribution of the element-wise quotients. - -Call this function using the `/(a, b, n)` syntax. -""" Base.:/(a::Real, b::AbstractUncertainValue, n::Int) = UncertainValue(a ./ resample(b, n)) - -""" - Base.:/(a::AbstractUncertainValue, b::Real, n::Int) -> UncertainValue - -Division operator for scalar-uncertain value pairs. - -Computes the element-wise quotients between `a` and `b` for `n` realizations -of `a`, then returns an uncertain value based on a kernel density estimate to the -distribution of the element-wise quotients. - -Call this function using the `/(a, b, n)` syntax. -""" Base.:/(a::AbstractUncertainValue, b::Real, n::Int) = UncertainValue(resample(a, n) ./ b) @@ -93,20 +42,14 @@ Base.:/(a::AbstractUncertainValue, b::Real, n::Int) = # Special cases ##################################################################################### -import ..UncertainValues: CertainValue +import ..UncertainValues: CertainScalar ################## -# `CertainValue`s +# `CertainScalar`s ################# -""" - Base.:/(a::Union{CertainValue, Real}, b::Union{CertainValue, Real}) - -Division of certain values with themselves or scalars acts as regular division, but -returns the result wrapped in a `CertainValue` instance. -""" -Base.:/(a::Union{CertainValue, Real}, b::Union{CertainValue, Real}) - -Base.:/(a::CertainValue, b::CertainValue) = CertainValue(a.value / b.value) -Base.:/(a::CertainValue, b::Real) = CertainValue(a.value / b) -Base.:/(a::Real, b::CertainValue) = CertainValue(a / b.value) +# Division of certain values with themselves or scalars acts as regular division, but +# returns the result wrapped in a `CertainScalar` instance. +Base.:/(a::CertainScalar, b::CertainScalar) = CertainScalar(a.value / b.value) +Base.:/(a::CertainScalar, b::Real) = CertainScalar(a.value / b) +Base.:/(a::Real, b::CertainScalar) = CertainScalar(a / b.value) diff --git a/src/mathematics/uncertainvalues/elementary_operations_uncertainvalues_special_cases.jl b/src/mathematics/uncertainvalues/elementary_operations_uncertainvalues_special_cases.jl index 6f29ef12..471ab7d2 100644 --- a/src/mathematics/uncertainvalues/elementary_operations_uncertainvalues_special_cases.jl +++ b/src/mathematics/uncertainvalues/elementary_operations_uncertainvalues_special_cases.jl @@ -6,50 +6,50 @@ for operator in operators funcs = quote """ - $($f)(a::CertainValue, b::AbstractUncertainValue; n::Int = 30000) + $($f)(a::CertainScalar, b::AbstractUncertainValue; n::Int = 30000) Compute `a $($operator) b`. Treats the certain value as a scalar and performs the operation element-wise on a default of `n = 30000` realizations of `b`. To tune the number of draws to `n`, use the `$($operator)(a, b, n::Int)` syntax. """ - function $(f)(a::CertainValue, b::AbstractUncertainValue; n::Int = 30000) + function $(f)(a::CertainScalar, b::AbstractUncertainValue; n::Int = 30000) $(elementwise_operator)(a.value, b, n) end """ - $($f)(a::AbstractUncertainValue, b::CertainValue; n::Int = 30000) + $($f)(a::AbstractUncertainValue, b::CertainScalar; n::Int = 30000) Compute `a $($operator) b`. Treats the certain value as a scalar and performs the operation element-wise on a default of `n = 30000` realizations of `a`. To tune the number of draws to `n`, use the `$($operator)(a, b, n::Int)` syntax. """ - function $(f)(a::AbstractUncertainValue, b::CertainValue; n::Int = 30000) + function $(f)(a::AbstractUncertainValue, b::CertainScalar; n::Int = 30000) $(elementwise_operator)(a, b.value, n) end """ - $($f)(a::AbstractUncertainValue, b::CertainValue; n::Int) + $($f)(a::AbstractUncertainValue, b::CertainScalar; n::Int) Compute `a $($operator) b`. Treats the certain value as a scalar and performs the operation element-wise on `n` realizations of `a`. This function is called with the `$($operator)(a, b, n::Int)` syntax. """ - function $(f)(a::AbstractUncertainValue, b::CertainValue, n::Int) + function $(f)(a::AbstractUncertainValue, b::CertainScalar, n::Int) $(elementwise_operator)(a, b.value, n) end """ - $($f)(a::CertainValue, b::AbstractUncertainValue, n::Int) + $($f)(a::CertainScalar, b::AbstractUncertainValue, n::Int) Compute `a $($operator) b`. Treats the certain value as a scalar and performs the operation element-wise on `n` realizations of `b`. This function is called with the `$($operator)(a, b, n::Int)` syntax. """ - function $(f)(a::CertainValue, b::AbstractUncertainValue, n::Int) + function $(f)(a::CertainScalar, b::AbstractUncertainValue, n::Int) $(elementwise_operator)(a.value, b, n) end end diff --git a/src/mathematics/uncertainvalues/exponentiation_uncertainvalues.jl b/src/mathematics/uncertainvalues/exponentiation_uncertainvalues.jl index 8270b703..b51c3aa1 100644 --- a/src/mathematics/uncertainvalues/exponentiation_uncertainvalues.jl +++ b/src/mathematics/uncertainvalues/exponentiation_uncertainvalues.jl @@ -105,20 +105,20 @@ Base.:^(a::AbstractUncertainValue, b::Real, n::Int) = # Special cases ##################################################################################### -import ..UncertainValues: CertainValue +import ..UncertainValues: CertainScalar ################## -# `CertainValue`s +# `CertainScalar`s ################# """ - Base.:^(a::Union{CertainValue, Real}, b::Union{CertainValue, Real}) + Base.:^(a::Union{CertainScalar, Real}, b::Union{CertainScalar, Real}) Exponentiation of certain values with themselves or scalars acts as regular exponentiation, -but returns the result wrapped in a `CertainValue` instance. +but returns the result wrapped in a `CertainScalar` instance. """ -Base.:^(a::Union{CertainValue, Real}, b::Union{CertainValue, Real}) +Base.:^(a::Union{CertainScalar, Real}, b::Union{CertainScalar, Real}) -Base.:^(a::CertainValue, b::CertainValue) = CertainValue(a.value ^ b.value) -Base.:^(a::CertainValue, b::Real) = CertainValue(a.value ^ b) -Base.:^(a::Real, b::CertainValue) = CertainValue(a ^ b.value) +Base.:^(a::CertainScalar, b::CertainScalar) = CertainScalar(a.value ^ b.value) +Base.:^(a::CertainScalar, b::Real) = CertainScalar(a.value ^ b) +Base.:^(a::Real, b::CertainScalar) = CertainScalar(a ^ b.value) diff --git a/src/mathematics/uncertainvalues/multiply_uncertainvalues.jl b/src/mathematics/uncertainvalues/multiply_uncertainvalues.jl index 06da479b..a5f14453 100644 --- a/src/mathematics/uncertainvalues/multiply_uncertainvalues.jl +++ b/src/mathematics/uncertainvalues/multiply_uncertainvalues.jl @@ -4,87 +4,37 @@ ################ """ - Base.:*(a::AbstractUncertainValue, b::AbstractUncertainValue) -> UncertainValue + Base.:*(a::AbstractUncertainValue, b::Real; n::Int = 30000) -> UncertainScalarKDE + Base.:*(a::Real, b::AbstractUncertainValue; n::Int = 30000) -> UncertainScalarKDE + Base.:*(a::AbstractUncertainValue, b::AbstractUncertainValue; n::Int = 30000) -> UncertainScalarKDE -Multiplication operator for pairs of uncertain values. +Multiplication operator. Perform the operation `a * b` by drawing `n` realizations of the uncertain value(s), +then performing element-wise multiplication on the draws. Use the `*(a, b, n)` syntax to tune the number of draws. +A kernel density estimate to the distribution of products is returned. -Computes the element-wise products between for a default of `n = 10000` realizations of `a` and -`b`, then returns an uncertain value based on a kernel density estimate to the distribution -of the element-wise products. -Use the `*(a, b, n)` syntax to tune the number (`n`) of draws. +## Example + +```julia +using UncertainData +x = UncertainValue(Normal, 0, 1) +y = UncertainValue([1, 2, -15, -20], [0.2, 0.3, 0.2, 0.3]) +x * y # uses the default number of draws (n = 30000) +*(x, y, 100000) # use more samples +``` """ function Base.:*(a::AbstractUncertainValue, b::AbstractUncertainValue; n::Int = 30000) UncertainValue(resample(a, n) .* resample(b, n)) end -""" - Base.:*(a::Real, b::AbstractUncertainValue) -> UncertainValue - -Multiplication operator for between scalars and uncertain values. - -Computes the element-wise products between `a` and `b` for a default of `n = 10000` realizations -of `b`, then returns an uncertain value based on a kernel density estimate to the -distribution of the element-wise products. - -Use the `*(a, b, n)` syntax to tune the number (`n`) of draws. -""" Base.:*(a::Real, b::AbstractUncertainValue; n::Int = 30000) = UncertainValue(a .* resample(b, n)) - -""" - Base.:*(a::AbstractUncertainValue, b::Real) -> UncertainValue - -Multiplication operator for between uncertain values and scalars. - -Computes the element-wise products between `a` and `b` for a default of `n = 10000` realizations -of `a`, then returns an uncertain value based on a kernel density estimate to the -distribution of the element-wise products. - -Use the `*(a, b, n)` syntax to tune the number (`n`) of draws. -""" Base.:*(a::AbstractUncertainValue, b::Real; n::Int = 30000) = UncertainValue(resample(a, n) .* b) - -""" - Base.:*(a::AbstractUncertainValue, b::AbstractUncertainValue, n::Int) -> UncertainValue - -Multiplication operator for pairs of uncertain values. - -Computes the element-wise products between `a` and `b` for `n` realizations -of `a` and `b`, then returns an uncertain value based on a kernel density estimate to the -distribution of the element-wise products. - -Call this function using the `*(a, b, n)` syntax. -""" -function Base.:*(a::AbstractUncertainValue, b::AbstractUncertainValue, n::Int) +Base.:*(a::AbstractUncertainValue, b::AbstractUncertainValue, n::Int) = UncertainValue(resample(a, n) .* resample(b, n)) -end -""" - Base.:*(a::Real, b::AbstractUncertainValue, n::Int) -> UncertainValue - -Multiplication operator for scalar-uncertain value pairs. - -Computes the element-wise products between `a` and `b` for `n` realizations -of `b`, then returns an uncertain value based on a kernel density estimate to the -distribution of the element-wise products. - -Call this function using the `*(a, b, n)` syntax. -""" Base.:*(a::Real, b::AbstractUncertainValue, n::Int) = UncertainValue(a .* resample(b, n)) - -""" - Base.:*(a::AbstractUncertainValue, b::Real, n::Int) -> UncertainValue - -Multiplication operator for scalar-uncertain value pairs. - -Computes the element-wise products between `a` and `b` for `n` realizations -of `a`, then returns an uncertain value based on a kernel density estimate to the -distribution of the element-wise products. - -Call this function using the `*(a, b, n)` syntax. -""" Base.:*(a::AbstractUncertainValue, b::Real, n::Int) = UncertainValue(resample(a, n) .* b) @@ -94,20 +44,16 @@ Base.:*(a::AbstractUncertainValue, b::Real, n::Int) = # Special cases ##################################################################################### -import ..UncertainValues: CertainValue +import ..UncertainValues: CertainScalar ################## -# `CertainValue`s +# `CertainScalar`s ################# -""" - Base.:*(a::Union{CertainValue, Real}, b::Union{CertainValue, Real}) - -Multiplication of certain values with themselves or scalars acts as regular multiplication, -but returns the result wrapped in a `CertainValue` instance. -""" -Base.:*(a::Union{CertainValue, Real}, b::Union{CertainValue, Real}) -Base.:*(a::CertainValue, b::CertainValue) = CertainValue(a.value * b.value) -Base.:*(a::CertainValue, b::Real) = CertainValue(a.value * b) -Base.:*(a::Real, b::CertainValue) = CertainValue(a * b.value) +#Multiplication of certain values with themselves or scalars acts as regular multiplication, +#but returns the result wrapped in a `CertainScalar` instance. +Base.:*(a::Union{CertainScalar, Real}, b::Union{CertainScalar, Real}) +Base.:*(a::CertainScalar, b::CertainScalar) = CertainScalar(a.value * b.value) +Base.:*(a::CertainScalar, b::Real) = CertainScalar(a.value * b) +Base.:*(a::Real, b::CertainScalar) = CertainScalar(a * b.value) diff --git a/src/mathematics/uncertainvalues/subtract_uncertainvalues.jl b/src/mathematics/uncertainvalues/subtract_uncertainvalues.jl index 9b04b284..73eca4c9 100644 --- a/src/mathematics/uncertainvalues/subtract_uncertainvalues.jl +++ b/src/mathematics/uncertainvalues/subtract_uncertainvalues.jl @@ -8,87 +8,36 @@ import ..Resampling: ############# """ - Base.:-(a::AbstractUncertainValue, b::AbstractUncertainValue) -> UncertainValue + Base.:-(a::AbstractUncertainValue, b::Real; n::Int = 30000) -> UncertainScalarKDE + Base.:-(a::Real, b::AbstractUncertainValue; n::Int = 30000) -> UncertainScalarKDE + Base.:-(a::AbstractUncertainValue, b::AbstractUncertainValue; n::Int = 30000) -> UncertainScalarKDE -Subtraction operator for pairs of uncertain values. +Subtraction operator. Perform the operation `a - b` by drawing `n` realizations of the uncertain value(s), +then performing element-wise subtraction on the draws. Use the `-(a, b, n)` syntax to tune the number of draws. +A kernel density estimate to the distribution of differences is returned. -Computes the element-wise differences between for a default of `n = 30000` realizations of `a` and -`b`, then returns an uncertain value based on a kernel density estimate to the distribution -of the element-wise differences. +## Example -Use the `-(a, b, n)` syntax to tune the number (`n`) of draws. +```julia +using UncertainData +x = UncertainValue(Normal, 0, 1) +y = UncertainValue([1, 2, -15, -20], [0.2, 0.3, 0.2, 0.3]) +x - y # uses the default number of draws (n = 30000) +-(x, y, 100000) # use more samples +``` """ function Base.:-(a::AbstractUncertainValue, b::AbstractUncertainValue; n::Int = 30000) UncertainValue(resample(a, n) .- resample(b, n)) end -""" - Base.:-(a::Real, b::AbstractUncertainValue) -> UncertainValue - -Subtraction operator for between scalars and uncertain values. - -Computes the element-wise differences between `a` and `b` for a default of `n = 30000` realizations -of `b`, then returns an uncertain value based on a kernel density estimate to the -distribution of the element-wise differences. - -Use the `-(a, b, n)` syntax to tune the number (`n`) of draws. -""" Base.:-(a::Real, b::AbstractUncertainValue; n::Int = 30000) = UncertainValue(a .- resample(b, n)) - -""" - Base.:-(a::AbstractUncertainValue, b::Real) -> UncertainValue - -Subtraction operator for between uncertain values and scalars. - -Computes the element-wise differences between `a` and `b` for a default of `n = 30000` realizations -of `a`, then returns an uncertain value based on a kernel density estimate to the -distribution of the element-wise differences. - -Use the `-(a, b, n)` syntax to tune the number (`n`) of draws. -""" Base.:-(a::AbstractUncertainValue, b::Real; n::Int = 30000) = UncertainValue(resample(a, n) .- b) - -""" - Base.:-(a::AbstractUncertainValue, b::AbstractUncertainValue, n::Int) -> UncertainValue - -Subtraction operator for pairs of uncertain values. - -Computes the element-wise differences between `a` and `b` for `n` realizations -of `a` and `b`, then returns an uncertain value based on a kernel density estimate to the -distribution of the element-wise differences. - -Call this function using the `-(a, b, n)` syntax. -""" -function Base.:-(a::AbstractUncertainValue, b::AbstractUncertainValue, n::Int) +Base.:-(a::AbstractUncertainValue, b::AbstractUncertainValue, n::Int) = UncertainValue(resample(a, n) .- resample(b, n)) -end -""" - Base.:-(a::Real, b::AbstractUncertainValue, n::Int) -> UncertainValue - -Subtraction operator for scalar-uncertain value pairs. - -Computes the element-wise differences between `a` and `b` for `n` realizations -of `b`, then returns an uncertain value based on a kernel density estimate to the -distribution of the element-wise differences. - -Call this function using the `-(a, b, n)` syntax. -""" Base.:-(a::Real, b::AbstractUncertainValue, n::Int) = UncertainValue(a .- resample(b, n)) - -""" - Base.:-(a::AbstractUncertainValue, b::Real, n::Int) -> UncertainValue - -Subtraction operator for scalar-uncertain value pairs. - -Computes the element-wise differences between `a` and `b` for `n` realizations -of `a`, then returns an uncertain value based on a kernel density estimate to the -distribution of the element-wise differences. - -Call this function using the `-(a, b, n)` syntax. -""" Base.:-(a::AbstractUncertainValue, b::Real, n::Int) = UncertainValue(resample(a, n) .- b) @@ -98,20 +47,17 @@ Base.:-(a::AbstractUncertainValue, b::Real, n::Int) = # Special cases ##################################################################################### -import ..UncertainValues: CertainValue +import ..UncertainValues: CertainScalar ################## -# `CertainValue`s +# `CertainScalar`s ################# -""" - Base.:-(a::Union{CertainValue, Real}, b::Union{CertainValue, Real}) -Subtraction of certain values with themselves or scalars acts as regular subtraction, -but returns the result wrapped in a `CertainValue` instance. -""" -Base.:-(a::Union{CertainValue, Real}, b::Union{CertainValue, Real}) +#Subtraction of certain values with themselves or scalars acts as regular subtraction, +#but returns the result wrapped in a `CertainScalar` instance. +Base.:-(a::Union{CertainScalar, Real}, b::Union{CertainScalar, Real}) -Base.:-(a::CertainValue, b::CertainValue) = CertainValue(a.value - b.value) -Base.:-(a::CertainValue, b::Real) = CertainValue(a.value - b) -Base.:-(a::Real, b::CertainValue) = CertainValue(a - b.value) +Base.:-(a::CertainScalar, b::CertainScalar) = CertainScalar(a.value - b.value) +Base.:-(a::CertainScalar, b::Real) = CertainScalar(a.value - b) +Base.:-(a::Real, b::CertainScalar) = CertainScalar(a - b.value) diff --git a/src/mathematics/uncertainvalues/trig_functions_uncertainvalues.jl b/src/mathematics/uncertainvalues/trig_functions_uncertainvalues.jl index 71303c9b..bec81380 100644 --- a/src/mathematics/uncertainvalues/trig_functions_uncertainvalues.jl +++ b/src/mathematics/uncertainvalues/trig_functions_uncertainvalues.jl @@ -695,15 +695,15 @@ trigfuncs = [:(cos), :(cosd), :(cosh), :(sin), :(sind), :(sinh), :(tan), :(tand) :(secd), :(sech), :(cot), :(cotd), :(coth)] ################## -# `CertainValue`s +# `CertainScalar`s ################# -import ..UncertainValues: CertainValue +import ..UncertainValues: CertainScalar """ - Base.:-(a::Union{CertainValue, Real}, b::Union{CertainValue, Real}) + Base.:-(a::Union{CertainScalar, Real}, b::Union{CertainScalar, Real}) Subtraction of certain values with themselves or scalars acts as regular subtraction, -but returns the result wrapped in a `CertainValue` instance. +but returns the result wrapped in a `CertainScalar` instance. """ for trigfunc in trigfuncs @@ -711,21 +711,21 @@ for trigfunc in trigfuncs regular_func = quote """ - $($f)(x::CertainValue) + $($f)(x::CertainScalar) Compute `$($trigfunc)(x)`. """ - $(f)(x::CertainValue) = x.value + $(f)(x::CertainScalar) = x.value end n_reps_func = quote """ - $($f)(x::CertainValue, n::Int) + $($f)(x::CertainScalar, n::Int) Compute `$($trigfunc)(x)` `n` times and return the result(s) as a vector (just repeating the value `n` times). """ - $(f)(x::CertainValue, n::Int) = [x.value for i = 1:n] + $(f)(x::CertainScalar, n::Int) = [x.value for i = 1:n] end eval(regular_func) diff --git a/src/plot_recipes/recipes_certainvalues.jl b/src/plot_recipes/recipes_certainvalues.jl index 62f45057..b1cf78bf 100644 --- a/src/plot_recipes/recipes_certainvalues.jl +++ b/src/plot_recipes/recipes_certainvalues.jl @@ -1,16 +1,21 @@ -import ..UncertainValues: CertainValue +import ..UncertainValues: CertainScalar using RecipesBase -@recipe f(::Type{CertainValue{T}}, x::CertainValue{T}) where {T} = [x.value] +@recipe f(::Type{CertainScalar{T}}, x::CertainScalar{T}) where {T} = [x.value] -@recipe function f(certainvals::Vector{CertainValue}) +@recipe function f(certainvals::Vector{CertainScalar}) @series begin [val.value for val in certainvals] end end +@recipe function f(x::CertainScalar) + @series begin + x + end +end -@recipe function f(certainvals::Vararg{CertainValue,N}) where {N} +@recipe function f(certainvals::Vararg{CertainScalar,N}) where {N} @series begin [val.value for val in certainvals] end diff --git a/src/plot_recipes/recipes_uncertainvalues_kde.jl b/src/plot_recipes/recipes_uncertainvalues_kde.jl index 962fedb3..305f7855 100644 --- a/src/plot_recipes/recipes_uncertainvalues_kde.jl +++ b/src/plot_recipes/recipes_uncertainvalues_kde.jl @@ -6,10 +6,10 @@ import ..SamplingConstraints: @recipe function plot_uncertainvalueKDE(uv::AbstractUncertainScalarKDE) @series begin seriestype := :path - fα --> 0.5 - fc --> :green - xlabel --> "Value" - ylabel --> "Density" + fillalpha --> 0.5 + fillcolor --> :green + xguide --> "Value" + yguide --> "Density" label --> "" uv.distribution.x, uv.distribution.density ./ sum(uv.distribution.density) end @@ -22,10 +22,10 @@ end cuv = constrain(uv, constraint) @series begin seriestype := :path - fα --> 0.5 - fc --> :green - xlabel --> "Value" - ylabel --> "Density" + fillalpha --> 0.5 + fillcolor --> :green + xguide --> "Value" + yguide --> "Density" label --> "" cuv.distribution.x, cuv.distribution.density ./ sum(cuv.distribution.density) end diff --git a/src/plot_recipes/recipes_uncertainvalues_multiple.jl b/src/plot_recipes/recipes_uncertainvalues_multiple.jl index 0817b103..91bc04de 100644 --- a/src/plot_recipes/recipes_uncertainvalues_multiple.jl +++ b/src/plot_recipes/recipes_uncertainvalues_multiple.jl @@ -10,15 +10,15 @@ @series begin label --> "P1, $d1" seriestype := :bar - fα --> 0.4 - fc --> :green + fillalpha --> 0.4 + fillcolor --> :green fit(Histogram, resample(d1, n_samples), nbins = nbins) end @series begin label --> "P2, $d2" seriestype := :bar - fc --> :blue - fα --> 0.4 + fillcolor --> :blue + fillalpha --> 0.4 fit(Histogram, resample(d2, n_samples), nbins = nbins) end @@ -28,8 +28,8 @@ @series begin label --> "MixtureModel with uniform priors" seriestype := :bar - fα --> 0.6 - fc --> :black + fillalpha --> 0.6 + fillcolor --> :black fit(Histogram, rand(M, n_samples), nbins = nbins) end end diff --git a/src/resampling/Resampling.jl b/src/resampling/Resampling.jl index 2e82de72..cce811ef 100644 --- a/src/resampling/Resampling.jl +++ b/src/resampling/Resampling.jl @@ -14,6 +14,15 @@ using Reexport UncertainValue, AbstractUncertainValue + """ + resample(x::AbstractUncertainValue) + resample(x::AbstractUncertainValue, n::Int) + + Draw a single sample, or `n` samples, from the uncertain value `x`. + + + Draw `n` samples from the uncertain value `x`. + """ function resample end ################################### @@ -36,6 +45,9 @@ using Reexport ################################### # Resampling uncertain values ################################### + + include("uncertain_values/resample_uncertainvalues.jl") + # Uncertain values based on distributions include("uncertain_values/resample_uncertainvalues_distributions.jl") diff --git a/src/resampling/binning/bin_BinnedResampling.jl b/src/resampling/binning/bin_BinnedResampling.jl index 8308bc78..a9ef8e24 100644 --- a/src/resampling/binning/bin_BinnedResampling.jl +++ b/src/resampling/binning/bin_BinnedResampling.jl @@ -11,7 +11,7 @@ distribute the values according to their indices, into the bins given by `binnin ## Returns Returns an `UncertainIndexValueDataset`. Indices are assumed to be uniformly distributed within each -bin, and are represented as `CertainValue`s at the bin centers. Values of the dataset have different +bin, and are represented as `CertainScalar`s at the bin centers. Values of the dataset have different representations depending on what `binning` is: - If `binning isa BinnedResampling{UncertainScalarKDE}`, then values in each bin are represented by a @@ -118,7 +118,7 @@ function bin(x::AbstractUncertainIndexValueDataset, binning::BinnedResampling{Un bin_centers, binvecs = bin(x, BinnedResampling(RawValues, left_bin_edges, n)) # Estimate distributions in each bin by kernel density estimation - estimated_value_dists = Vector{Union{CertainValue, UncertainScalarPopulation}}(undef, n_bins) + estimated_value_dists = Vector{Union{CertainScalar, UncertainScalarPopulation}}(undef, n_bins) binvec_lengths = length.(binvecs) for i in 1:n_bins @@ -153,7 +153,7 @@ function bin(x::AbstractUncertainIndexValueDataset, binning::BinnedResampling{Un bin_centers, binvecs = bin(x, BinnedResampling(RawValues, left_bin_edges, n)) # Estimate distributions in each bin by kernel density estimation - estimated_value_dists = Vector{Union{CertainValue, UncertainScalarKDE}}(undef, n_bins) + estimated_value_dists = Vector{Union{CertainScalar, UncertainScalarKDE}}(undef, n_bins) binvec_lengths = length.(binvecs) for i in 1:n_bins diff --git a/src/resampling/binning/bin_BinnedWeightedResampling.jl b/src/resampling/binning/bin_BinnedWeightedResampling.jl index ccd64e92..4324dd4f 100644 --- a/src/resampling/binning/bin_BinnedWeightedResampling.jl +++ b/src/resampling/binning/bin_BinnedWeightedResampling.jl @@ -14,7 +14,7 @@ given by `binning.weights[i]` (probability weights are always normalised to 1). ## Returns Returns an `UncertainIndexValueDataset`. Indices are assumed to be uniformly distributed within each -bin, and are represented as `CertainValue`s at the bin centers. Values of the dataset have different +bin, and are represented as `CertainScalar`s at the bin centers. Values of the dataset have different representations depending on what `binning` is: - If `binning isa BinnedWeightedResampling{UncertainScalarKDE}`, then values in each bin are @@ -149,7 +149,7 @@ function bin(x::AbstractUncertainIndexValueDataset, binning::BinnedWeightedResam # Estimate distributions in each bin by kernel density estimation n_bins = length(binning.left_bin_edges) - 1 - estimated_value_dists = Vector{Union{CertainValue, UncertainScalarKDE}}(undef, n_bins) + estimated_value_dists = Vector{Union{CertainScalar, UncertainScalarKDE}}(undef, n_bins) for i in 1:n_bins if length(binvecs[i]) > nan_threshold @@ -178,7 +178,7 @@ function bin(x::AbstractUncertainIndexValueDataset, binning::BinnedWeightedResam # Estimate distributions in each bin by kernel density estimation n_bins = length(binning.left_bin_edges) - 1 - estimated_value_dists = Vector{Union{CertainValue, UncertainScalarPopulation}}(undef, n_bins) + estimated_value_dists = Vector{Union{CertainScalar, UncertainScalarPopulation}}(undef, n_bins) for i in 1:n_bins if length(binvecs[i]) > nan_threshold diff --git a/src/resampling/resampling_with_schemes/resampling_schemes_binned.jl b/src/resampling/resampling_with_schemes/resampling_schemes_binned.jl index 1b5bdc6c..57333532 100644 --- a/src/resampling/resampling_with_schemes/resampling_schemes_binned.jl +++ b/src/resampling/resampling_with_schemes/resampling_schemes_binned.jl @@ -265,7 +265,7 @@ function resample(x::AbstractUncertainIndexValueDataset, end # Estimate distributions in each bin by kernel density estimation - estimated_value_dists = Vector{Union{CertainValue, UncertainScalarKDE}}(undef, n_bins) + estimated_value_dists = Vector{Union{CertainScalar, UncertainScalarKDE}}(undef, n_bins) for i in 1:n_bins if length(binvecs[i]) > nan_threshold @@ -327,7 +327,7 @@ function resample(x::AbstractUncertainIndexValueDataset, end # Estimate distributions in each bin by kernel density estimation - estimated_value_dists = Vector{Union{CertainValue, UncertainScalarPopulation}}(undef, n_bins) + estimated_value_dists = Vector{Union{CertainScalar, UncertainScalarPopulation}}(undef, n_bins) for i in 1:n_bins if length(binvecs[i]) > nan_threshold diff --git a/src/resampling/resampling_with_schemes/resampling_schemes_constrained.jl b/src/resampling/resampling_with_schemes/resampling_schemes_constrained.jl index 3359854e..6bde64cd 100644 --- a/src/resampling/resampling_with_schemes/resampling_schemes_constrained.jl +++ b/src/resampling/resampling_with_schemes/resampling_schemes_constrained.jl @@ -53,7 +53,7 @@ x = UncertainValueDataset(x_uncertain) y = UncertainValueDataset(y_uncertain) time_uncertain = [UncertainValue(Normal, i, 1) for i = 1:length(x)]; -time_certain = [CertainValue(i) for i = 1:length(x)]; +time_certain = [CertainScalar(i) for i = 1:length(x)]; timeinds_x = UncertainIndexDataset(time_uncertain) timeinds_y = UncertainIndexDataset(time_certain) diff --git a/src/resampling/resampling_with_schemes/resampling_schemes_sequential.jl b/src/resampling/resampling_with_schemes/resampling_schemes_sequential.jl index 6807c31c..b47bf7b7 100644 --- a/src/resampling/resampling_with_schemes/resampling_schemes_sequential.jl +++ b/src/resampling/resampling_with_schemes/resampling_schemes_sequential.jl @@ -20,7 +20,7 @@ x = UncertainValueDataset(x_uncertain) y = UncertainValueDataset(y_uncertain) time_uncertain = [UncertainValue(Normal, i, 1) for i = 1:length(x)]; -time_certain = [CertainValue(i) for i = 1:length(x)]; +time_certain = [CertainScalar(i) for i = 1:length(x)]; timeinds_x = UncertainIndexDataset(time_uncertain) timeinds_y = UncertainIndexDataset(time_certain) @@ -59,7 +59,7 @@ x = UncertainValueDataset(x_uncertain) y = UncertainValueDataset(y_uncertain) time_uncertain = [UncertainValue(Normal, i, 1) for i = 1:length(x)]; -time_certain = [CertainValue(i) for i = 1:length(x)]; +time_certain = [CertainScalar(i) for i = 1:length(x)]; timeinds_x = UncertainIndexDataset(time_uncertain) timeinds_y = UncertainIndexDataset(time_certain) diff --git a/src/resampling/uncertain_values/resample_certainvalues.jl b/src/resampling/uncertain_values/resample_certainvalues.jl index e2bb2cad..0f2754bc 100644 --- a/src/resampling/uncertain_values/resample_certainvalues.jl +++ b/src/resampling/uncertain_values/resample_certainvalues.jl @@ -1,13 +1,13 @@ import ..SamplingConstraints: SamplingConstraint -import ..UncertainValues: CertainValue +import ..UncertainValues: CertainScalar resample(x::Number) = x -resample(v::CertainValue) = v.value -resample(v::CertainValue, n::Int) = [v.value for i = 1:n] +resample(v::CertainScalar) = v.value +resample(v::CertainScalar, n::Int) = [v.value for i = 1:n] -resample(v::CertainValue, s::SamplingConstraint) = v.value -resample(v::CertainValue, s::SamplingConstraint, n::Int) = [v.value for i = 1:n] +resample(v::CertainScalar, s::SamplingConstraint) = v.value +resample(v::CertainScalar, s::SamplingConstraint, n::Int) = [v.value for i = 1:n] constraints = [ :(NoConstraint), @@ -20,11 +20,10 @@ constraints = [ :(TruncateStd) ] - for constraint in constraints funcs = quote - resample(x::CertainValue, constraint::$(constraint)) = x.value - resample(x::CertainValue, constraint::$(constraint), n::Int) = [x.value for i = 1:n] + resample(x::CertainScalar, constraint::$(constraint)) = x.value + resample(x::CertainScalar, constraint::$(constraint), n::Int) = [x.value for i = 1:n] end eval(funcs) end \ No newline at end of file diff --git a/src/resampling/uncertain_values/resample_measurements.jl b/src/resampling/uncertain_values/resample_measurements.jl index 5c58eae0..e69de29b 100644 --- a/src/resampling/uncertain_values/resample_measurements.jl +++ b/src/resampling/uncertain_values/resample_measurements.jl @@ -1,12 +0,0 @@ - -import Measurements: Measurement -import ..UncertainValues: UncertainValue -import Distributions: Normal - -resample(m::Measurement{T}) where T = resample(UncertainValue(Normal, m.val, m.err)) - -function resample(m::Measurement{T}, n::Int) where T - uval = UncertainValue(Normal, m.val, m.err) - - [resample(uval) for i = 1:n] -end \ No newline at end of file diff --git a/src/resampling/uncertain_values/resample_uncertainvalues.jl b/src/resampling/uncertain_values/resample_uncertainvalues.jl new file mode 100644 index 00000000..66e44ae8 --- /dev/null +++ b/src/resampling/uncertain_values/resample_uncertainvalues.jl @@ -0,0 +1,100 @@ +import ..SamplingConstraints: SamplingConstraint +import ..UncertainValues: CertainScalar +import Measurements: Measurement +import ..UncertainValues: UncertainValue +import Distributions: Normal + + +################################# +# Values without uncertainties +################################# + +resample(x::Number) = x +resample(v::CertainScalar) = v.value +resample(v::CertainScalar, n::Int) = [v.value for i = 1:n] +resample(v::CertainScalar, s::SamplingConstraint) = v.value +resample(v::CertainScalar, s::SamplingConstraint, n::Int) = [v.value for i = 1:n] + +# constraints = [ +# :(NoConstraint), +# :(TruncateLowerQuantile), +# :(TruncateUpperQuantile), +# :(TruncateQuantiles), +# :(TruncateMaximum), +# :(TruncateMinimum), +# :(TruncateRange), +# :(TruncateStd) +# ] + +# for constraint in constraints +# funcs = quote +# resample(x::CertainScalar, constraint::$(constraint)) = x.value +# resample(x::CertainScalar, constraint::$(constraint), n::Int) = [x.value for i = 1:n] +# end +# eval(funcs) +# end + +################################# +# Measurements +################################# + +resample(m::Measurement{T}) where T = resample(UncertainValue(Normal, m.val, m.err)) +function resample(m::Measurement{T}, n::Int) where T + uval = UncertainValue(Normal, m.val, m.err) + + [resample(uval) for i = 1:n] +end + +################################# +# Theoretical distributions +################################# +import ..UncertainValues: + TheoreticalDistributionScalarValue, + AbstractUncertainTwoParameterScalarValue, + AbstractUncertainThreeParameterScalarValue, + UncertainScalarTheoreticalTwoParameter, + UncertainScalarTheoreticalThreeParameter, + UncertainScalarNormallyDistributed, + UncertainScalarUniformlyDistributed, + UncertainScalarBetaDistributed, + UncertainScalarBetaPrimeDistributed, + UncertainScalarBetaBinomialDistributed, + UncertainScalarBinomialDistributed, + UncertainScalarGammaDistributed, + UncertainScalarFrechetDistributed +import Distributions + +# Resample for generic +resample(uv::TheoreticalDistributionScalarValue) = rand(uv.distribution) +resample(uv::TheoreticalDistributionScalarValue, n::Int) = rand(uv.distribution, n) + + +# Custom resample methods for each type of uncertain scalars based on +# distributions (in case we want to implement custom sampling for some of them) +# resample(uv::UncertainScalarTheoreticalThreeParameter) = rand(uv.distribution) +# resample(uv::UncertainScalarTheoreticalTwoParameter) = rand(uv.distribution) +# resample(uv::UncertainScalarNormallyDistributed) = rand(uv.distribution) +# resample(uv::UncertainScalarUniformlyDistributed) = rand(uv.distribution) +# resample(uv::UncertainScalarBetaDistributed) = rand(uv.distribution) +# resample(uv::UncertainScalarBetaPrimeDistributed) = rand(uv.distribution) +# resample(uv::UncertainScalarBetaBinomialDistributed) = rand(uv.distribution) +# resample(uv::UncertainScalarGammaDistributed) = rand(uv.distribution) +# resample(uv::UncertainScalarFrechetDistributed) = rand(uv.distribution) +# resample(uv::UncertainScalarBinomialDistributed) = rand(uv.distribution) + + +# resample(uv::UncertainScalarTheoreticalThreeParameter, n::Int) = rand(uv.distribution, n) +# resample(uv::UncertainScalarTheoreticalTwoParameter, n::Int) = rand(uv.distribution, n) +# resample(uv::UncertainScalarNormallyDistributed, n::Int) = rand(uv.distribution, n) +# resample(uv::UncertainScalarUniformlyDistributed, n::Int) = rand(uv.distribution, n) +# resample(uv::UncertainScalarBetaDistributed, n::Int) = rand(uv.distribution, n) +# resample(uv::UncertainScalarBetaPrimeDistributed, n::Int) = rand(uv.distribution, n) +# resample(uv::UncertainScalarBetaBinomialDistributed, n::Int) = rand(uv.distribution, n) +# resample(uv::UncertainScalarGammaDistributed, n::Int) = rand(uv.distribution, n) +# resample(uv::UncertainScalarFrechetDistributed, n::Int) = rand(uv.distribution, n) +# resample(uv::UncertainScalarBinomialDistributed, n::Int) = rand(uv.distribution, n) + +resample(x::Distributions.Truncated) = rand(x) +resample(x::Distributions.Truncated, n::Int) = rand(x, n) +resample(x::Distributions.Distribution) = rand(x) +resample(x::Distributions.Distribution, n::Int) = rand(x, n) \ No newline at end of file diff --git a/src/resampling/uncertain_values/resample_uncertainvalues_distributions.jl b/src/resampling/uncertain_values/resample_uncertainvalues_distributions.jl index 12849b9e..e69de29b 100644 --- a/src/resampling/uncertain_values/resample_uncertainvalues_distributions.jl +++ b/src/resampling/uncertain_values/resample_uncertainvalues_distributions.jl @@ -1,52 +0,0 @@ -import ..UncertainValues: - TheoreticalDistributionScalarValue, - AbstractUncertainTwoParameterScalarValue, - AbstractUncertainThreeParameterScalarValue, - UncertainScalarTheoreticalTwoParameter, - UncertainScalarTheoreticalThreeParameter, - UncertainScalarNormallyDistributed, - UncertainScalarUniformlyDistributed, - UncertainScalarBetaDistributed, - UncertainScalarBetaPrimeDistributed, - UncertainScalarBetaBinomialDistributed, - UncertainScalarBinomialDistributed, - UncertainScalarGammaDistributed, - UncertainScalarFrechetDistributed -import Distributions - -# Resample for generic -resample(uv::TheoreticalDistributionScalarValue) = rand(uv.distribution) -resample(uv::TheoreticalDistributionScalarValue, n::Int) = rand(uv.distribution, n) - - -# Custom resample methods for each type of uncertain scalars based on -# distributions (in case we want to implement custom sampling for some of them) -resample(uv::UncertainScalarTheoreticalThreeParameter) = rand(uv.distribution) -resample(uv::UncertainScalarTheoreticalTwoParameter) = rand(uv.distribution) -resample(uv::UncertainScalarNormallyDistributed) = rand(uv.distribution) -resample(uv::UncertainScalarUniformlyDistributed) = rand(uv.distribution) -resample(uv::UncertainScalarBetaDistributed) = rand(uv.distribution) -resample(uv::UncertainScalarBetaPrimeDistributed) = rand(uv.distribution) -resample(uv::UncertainScalarBetaBinomialDistributed) = rand(uv.distribution) -resample(uv::UncertainScalarGammaDistributed) = rand(uv.distribution) -resample(uv::UncertainScalarFrechetDistributed) = rand(uv.distribution) -resample(uv::UncertainScalarBinomialDistributed) = rand(uv.distribution) - - -resample(uv::UncertainScalarTheoreticalThreeParameter, n::Int) = rand(uv.distribution, n) -resample(uv::UncertainScalarTheoreticalTwoParameter, n::Int) = rand(uv.distribution, n) -resample(uv::UncertainScalarNormallyDistributed, n::Int) = rand(uv.distribution, n) -resample(uv::UncertainScalarUniformlyDistributed, n::Int) = rand(uv.distribution, n) -resample(uv::UncertainScalarBetaDistributed, n::Int) = rand(uv.distribution, n) -resample(uv::UncertainScalarBetaPrimeDistributed, n::Int) = rand(uv.distribution, n) -resample(uv::UncertainScalarBetaBinomialDistributed, n::Int) = rand(uv.distribution, n) -resample(uv::UncertainScalarGammaDistributed, n::Int) = rand(uv.distribution, n) -resample(uv::UncertainScalarFrechetDistributed, n::Int) = rand(uv.distribution, n) -resample(uv::UncertainScalarBinomialDistributed, n::Int) = rand(uv.distribution, n) - - -resample(x::Distributions.Truncated) = rand(x) -resample(x::Distributions.Truncated, n::Int) = rand(x, n) - -resample(x::Distributions.Distribution) = rand(x) -resample(x::Distributions.Distribution, n::Int) = rand(x, n) \ No newline at end of file diff --git a/src/resampling/uncertain_values/resample_uncertainvalues_kde.jl b/src/resampling/uncertain_values/resample_uncertainvalues_kde.jl index f60605cf..3e9155be 100644 --- a/src/resampling/uncertain_values/resample_uncertainvalues_kde.jl +++ b/src/resampling/uncertain_values/resample_uncertainvalues_kde.jl @@ -1,17 +1,17 @@ import ..UncertainValues.AbstractUncertainScalarKDE -""" - resample(uv::UncertainScalarKDE) +# """ +# resample(uv::UncertainScalarKDE) -Resample an uncertain value whose distribution is approximated using a -kernel density estimate once. -""" +# Resample an uncertain value whose distribution is approximated using a +# kernel density estimate once. +# """ resample(uv::AbstractUncertainScalarKDE) = rand(uv) -""" - resample(uv::AbstractUncertainScalarKDE) +# """ +# resample(uv::AbstractUncertainScalarKDE) -Resample an uncertain value whose distribution is approximated using a -kernel density estimate `n` times. -""" +# Resample an uncertain value whose distribution is approximated using a +# kernel density estimate `n` times. +# """ resample(uv::AbstractUncertainScalarKDE, n::Int) = rand(uv, n) diff --git a/src/resampling/uncertain_values/resample_uncertainvalues_kde_withconstraints.jl b/src/resampling/uncertain_values/resample_uncertainvalues_kde_withconstraints.jl index 357cb876..d826a94f 100644 --- a/src/resampling/uncertain_values/resample_uncertainvalues_kde_withconstraints.jl +++ b/src/resampling/uncertain_values/resample_uncertainvalues_kde_withconstraints.jl @@ -22,69 +22,69 @@ import ..SamplingConstraints: fallback -""" - resample(uv::AbstractUncertainScalarKDE, constraint::NoConstraint) +# """ +# resample(uv::AbstractUncertainScalarKDE, constraint::NoConstraint) -Resample without contraints (use the full distribution representing the value) +# Resample without contraints (use the full distribution representing the value) -## Example +# ## Example -```julia -some_sample = rand(Normal(), 1000) +# ```julia +# some_sample = rand(Normal(), 1000) -# Calling UncertainValue with a single vector of numbers triggers KDE estimation -uncertainval = UncertainValue(some_sample) # -> UncertainScalarKDE +# # Calling UncertainValue with a single vector of numbers triggers KDE estimation +# uncertainval = UncertainValue(some_sample) # -> UncertainScalarKDE -# Resample the uncertain value by resampling the full distribution once. -resample(uncertainval, NoConstraint()) -``` -""" +# # Resample the uncertain value by resampling the full distribution once. +# resample(uncertainval, NoConstraint()) +# ``` +# """ resample(uv::AbstractUncertainScalarKDE, constraint::NoConstraint) = resample(uv) -""" - resample(uv::AbstractUncertainScalarKDE, constraint::NoConstraint, n::Int) +# """ +# resample(uv::AbstractUncertainScalarKDE, constraint::NoConstraint, n::Int) -Resample without contraints (use the full distribution representing the value) +# Resample without contraints (use the full distribution representing the value) -## Example +# ## Example -```julia -some_sample = rand(Normal(), 1000) +# ```julia +# some_sample = rand(Normal(), 1000) -# Calling UncertainValue with a single vector of numbers triggers KDE estimation -uncertainval = UncertainValue(some_sample) # -> UncertainScalarKDE +# # Calling UncertainValue with a single vector of numbers triggers KDE estimation +# uncertainval = UncertainValue(some_sample) # -> UncertainScalarKDE -# Resample the uncertain value by resampling the full distribution n times -resample(uncertainval, NoConstraint(), n) -``` -""" +# # Resample the uncertain value by resampling the full distribution n times +# resample(uncertainval, NoConstraint(), n) +# ``` +# """ resample(uv::AbstractUncertainScalarKDE, constraint::NoConstraint, n::Int) = resample(uv, n) -""" - resample(uv::AbstractUncertainScalarKDE, constraint::TruncateLowerQuantile) +# """ +# resample(uv::AbstractUncertainScalarKDE, constraint::TruncateLowerQuantile) -Resample `uv` by first truncating below the kernel density estimate of the -distribution furnishing the value at some lower quantile, then resampling -it once. +# Resample `uv` by first truncating below the kernel density estimate of the +# distribution furnishing the value at some lower quantile, then resampling +# it once. -## Example +# ## Example -```julia -using UncertainData +# ```julia +# using UncertainData -some_sample = rand(Normal(), 1000) +# some_sample = rand(Normal(), 1000) -# Calling UncertainValue with a single vector of numbers triggers KDE estimation -uncertainval = UncertainValue(some_sample) # -> UncertainScalarKDE +# # Calling UncertainValue with a single vector of numbers triggers KDE estimation +# uncertainval = UncertainValue(some_sample) # -> UncertainScalarKDE -constraint = TruncateLowerQuantile(0.16) +# constraint = TruncateLowerQuantile(0.16) -# Resample the uncertain value by truncating the distribution furnishing it, -# then resampling the new distribution once. -resample(uncertainval, constraint) -``` -""" +# # Resample the uncertain value by truncating the distribution furnishing it, +# # then resampling the new distribution once. +# resample(uncertainval, constraint) +# ``` +# """ function resample(uv::AbstractUncertainScalarKDE, constraint::TruncateLowerQuantile) # Find the index of the kernel density estimated distribution # corresponding to the lower quantile at which we want to truncate. @@ -104,31 +104,31 @@ function resample(uv::AbstractUncertainScalarKDE, constraint::TruncateLowerQuant rand(Uniform(sampled_val, sampled_val + δ)) end -""" - resample(uv::AbstractUncertainScalarKDE, constraint::TruncateLowerQuantile, - n::Int) +# """ +# resample(uv::AbstractUncertainScalarKDE, constraint::TruncateLowerQuantile, +# n::Int) -Resample `uv` by first truncating below the kernel density estimate of the -distribution furnishing the value at some lower quantile, then resampling -it `n` times. +# Resample `uv` by first truncating below the kernel density estimate of the +# distribution furnishing the value at some lower quantile, then resampling +# it `n` times. -## Example +# ## Example -```julia -using UncertainData +# ```julia +# using UncertainData -some_sample = rand(Normal(), 1000) +# some_sample = rand(Normal(), 1000) -# Calling UncertainValue with a single vector of numbers triggers KDE estimation -uncertainval = UncertainValue(some_sample) # -> UncertainScalarKDE +# # Calling UncertainValue with a single vector of numbers triggers KDE estimation +# uncertainval = UncertainValue(some_sample) # -> UncertainScalarKDE -constraint = TruncateLowerQuantile(0.16) +# constraint = TruncateLowerQuantile(0.16) -# Resample the uncertain value by truncating the distribution furnishing it, -# then resampling the new distribution 500 times. -resample(uncertainval, constraint, 500) -``` -""" +# # Resample the uncertain value by truncating the distribution furnishing it, +# # then resampling the new distribution 500 times. +# resample(uncertainval, constraint, 500) +# ``` +# """ function resample(uv::AbstractUncertainScalarKDE, constraint::TruncateLowerQuantile, n::Int) # Find the index of the kernel density estimated distribution @@ -151,30 +151,30 @@ function resample(uv::AbstractUncertainScalarKDE, constraint::TruncateLowerQuant end -""" - resample(uv::AbstractUncertainScalarKDE, constraint::TruncateUpperQuantile) +# """ +# resample(uv::AbstractUncertainScalarKDE, constraint::TruncateUpperQuantile) -Resample `uv` by first truncating above the kernel density estimate of the -distribution furnishing the value at some upper quantile, then resampling -it once. +# Resample `uv` by first truncating above the kernel density estimate of the +# distribution furnishing the value at some upper quantile, then resampling +# it once. -## Example +# ## Example -```julia -using UncertainData +# ```julia +# using UncertainData -some_sample = rand(Normal(), 1000) +# some_sample = rand(Normal(), 1000) -# Calling UncertainValue with a single vector of numbers triggers KDE estimation -uncertainval = UncertainValue(some_sample) # -> UncertainScalarKDE +# # Calling UncertainValue with a single vector of numbers triggers KDE estimation +# uncertainval = UncertainValue(some_sample) # -> UncertainScalarKDE -constraint = TruncateUpperQuantile(0.78) +# constraint = TruncateUpperQuantile(0.78) -# Resample the uncertain value by truncating the distribution furnishing it, -# then resampling the new distribution once. -resample(uncertainval, constraint) -``` -""" +# # Resample the uncertain value by truncating the distribution furnishing it, +# # then resampling the new distribution once. +# resample(uncertainval, constraint) +# ``` +# """ function resample(uv::AbstractUncertainScalarKDE, constraint::TruncateUpperQuantile) # Find the index of the kernel density estimated distribution # corresponding to the lower quantile at which we want to truncate. @@ -194,31 +194,31 @@ function resample(uv::AbstractUncertainScalarKDE, constraint::TruncateUpperQuant rand(Uniform(sampled_val, sampled_val + δ)) end -""" - resample(uv::AbstractUncertainScalarKDE, constraint::TruncateUpperQuantile, - n::Int) +# """ +# resample(uv::AbstractUncertainScalarKDE, constraint::TruncateUpperQuantile, +# n::Int) -Resample `uv` by first truncating above the kernel density estimate of the -distribution furnishing the value at some upper quantile, then resampling -it `n` times. +# Resample `uv` by first truncating above the kernel density estimate of the +# distribution furnishing the value at some upper quantile, then resampling +# it `n` times. -## Example +# ## Example -```julia -using UncertainData +# ```julia +# using UncertainData -some_sample = rand(Normal(), 1000) +# some_sample = rand(Normal(), 1000) -# Calling UncertainValue with a single vector of numbers triggers KDE estimation -uncertainval = UncertainValue(some_sample) # -> UncertainScalarKDE +# # Calling UncertainValue with a single vector of numbers triggers KDE estimation +# uncertainval = UncertainValue(some_sample) # -> UncertainScalarKDE -constraint = TruncateLowerQuantile(0.16) +# constraint = TruncateLowerQuantile(0.16) -# Resample the uncertain value by truncating the distribution furnishing it, -# then resampling the new distribution 500 times. -resample(uncertainval, constraint, 500) -``` -""" +# # Resample the uncertain value by truncating the distribution furnishing it, +# # then resampling the new distribution 500 times. +# resample(uncertainval, constraint, 500) +# ``` +# """ function resample(uv::AbstractUncertainScalarKDE, constraint::TruncateUpperQuantile, n::Int) # Find the index of the kernel density estimated distribution @@ -242,30 +242,30 @@ end -""" - resample(uv::AbstractUncertainScalarKDE, constraint::TruncateUpperQuantile) +# """ +# resample(uv::AbstractUncertainScalarKDE, constraint::TruncateUpperQuantile) -Resample `uv` by first truncating the kernel density estimate of the -distribution furnishing the value both above and below at some quantile range, -then resampling it once. +# Resample `uv` by first truncating the kernel density estimate of the +# distribution furnishing the value both above and below at some quantile range, +# then resampling it once. -## Example +# ## Example -```julia -using UncertainData +# ```julia +# using UncertainData -some_sample = rand(Normal(), 1000) +# some_sample = rand(Normal(), 1000) -# Calling UncertainValue with a single vector of numbers triggers KDE estimation -uncertainval = UncertainValue(some_sample) # -> UncertainScalarKDE +# # Calling UncertainValue with a single vector of numbers triggers KDE estimation +# uncertainval = UncertainValue(some_sample) # -> UncertainScalarKDE -constraint = TruncateQuantiles(0.1, 0.9) +# constraint = TruncateQuantiles(0.1, 0.9) -# Resample the uncertain value by truncating the distribution furnishing it, -# then resampling the new distribution once. -resample(uncertainval, constraint) -``` -""" +# # Resample the uncertain value by truncating the distribution furnishing it, +# # then resampling the new distribution once. +# resample(uncertainval, constraint) +# ``` +# """ function resample(uv::AbstractUncertainScalarKDE, constraint::TruncateQuantiles) # Find the index of the kernel density estimated distribution # corresponding to the lower quantile at which we want to truncate. @@ -288,31 +288,31 @@ end -""" - resample(uv::AbstractUncertainScalarKDE, constraint::TruncateQuantiles, - n::Int) +# """ +# resample(uv::AbstractUncertainScalarKDE, constraint::TruncateQuantiles, +# n::Int) -Resample `uv` by first truncating the kernel density estimate of the -distribution furnishing the value both above and below at some quantile range, -then resampling it `n` times. +# Resample `uv` by first truncating the kernel density estimate of the +# distribution furnishing the value both above and below at some quantile range, +# then resampling it `n` times. -## Example +# ## Example -```julia -using UncertainData +# ```julia +# using UncertainData -some_sample = rand(Normal(), 1000) +# some_sample = rand(Normal(), 1000) -# Calling UncertainValue with a single vector of numbers triggers KDE estimation -uncertainval = UncertainValue(some_sample) # -> UncertainScalarKDE +# # Calling UncertainValue with a single vector of numbers triggers KDE estimation +# uncertainval = UncertainValue(some_sample) # -> UncertainScalarKDE -constraint = TruncateQuantiles(0.1, 0.9) +# constraint = TruncateQuantiles(0.1, 0.9) -# Resample the uncertain value by truncating the distribution furnishing it, -# then resampling the new distribution 500 times. -resample(uncertainval, constraint, 500) -``` -""" +# # Resample the uncertain value by truncating the distribution furnishing it, +# # then resampling the new distribution 500 times. +# resample(uncertainval, constraint, 500) +# ``` +# """ function resample(uv::AbstractUncertainScalarKDE, constraint::TruncateQuantiles, n::Int) # Find the index of the kernel density estimated distribution @@ -337,27 +337,27 @@ end -""" - resample(uv::AbstractUncertainScalarKDE, constraint::TruncateMaximum) +# """ +# resample(uv::AbstractUncertainScalarKDE, constraint::TruncateMaximum) -Resample `uv` by first truncating the kernel density estimate of the -distribution furnishing the value at some maximum value, -then resampling it once. +# Resample `uv` by first truncating the kernel density estimate of the +# distribution furnishing the value at some maximum value, +# then resampling it once. -## Example +# ## Example -```julia -# Uncertain value represented by a normal distribution with mean = 0 and -# standard deviation = 1. -uncertainval = UncertainValue(rand(Normal(0, 1), 1000)) +# ```julia +# # Uncertain value represented by a normal distribution with mean = 0 and +# # standard deviation = 1. +# uncertainval = UncertainValue(rand(Normal(0, 1), 1000)) -constraint = TruncateMaximum(0.8) # accept no values larger than 1.1 +# constraint = TruncateMaximum(0.8) # accept no values larger than 1.1 -# Resample the uncertain value by truncating the distribution furnishing it, -# then resampling the new distribution 700 times. -resample(uncertainval, constraint, 700) -``` -""" +# # Resample the uncertain value by truncating the distribution furnishing it, +# # then resampling the new distribution 700 times. +# resample(uncertainval, constraint, 700) +# ``` +# """ function resample(uv::AbstractUncertainScalarKDE, constraint::TruncateMaximum) # Box width δ = step(uv.range) @@ -378,28 +378,28 @@ function resample(uv::AbstractUncertainScalarKDE, constraint::TruncateMaximum) rand(Uniform(sampled_val, sampled_val + δ)) end -""" - resample(uv::AbstractUncertainScalarKDE, constraint::TruncateMaximum, n::Int) +# """ +# resample(uv::AbstractUncertainScalarKDE, constraint::TruncateMaximum, n::Int) -Resample `uv` by first truncating the kernel density estimate of the -distribution furnishing the value at some minimum value, -then resampling it `n` times. +# Resample `uv` by first truncating the kernel density estimate of the +# distribution furnishing the value at some minimum value, +# then resampling it `n` times. -## Example +# ## Example -```julia -# Uncertain value represented by a normal distribution with mean = 0 and -# standard deviation = 1. -uncertainval = UncertainValue(rand(Normal(0, 1), 1000)) +# ```julia +# # Uncertain value represented by a normal distribution with mean = 0 and +# # standard deviation = 1. +# uncertainval = UncertainValue(rand(Normal(0, 1), 1000)) -constraint = TruncateMaximum(0.8) # accept no values larger than 1.1 +# constraint = TruncateMaximum(0.8) # accept no values larger than 1.1 -# Resample the uncertain value by truncating the distribution furnishing it, -# then resampling the new distribution 700 times. -resample(uncertainval, constraint, 700) -``` -""" +# # Resample the uncertain value by truncating the distribution furnishing it, +# # then resampling the new distribution 700 times. +# resample(uncertainval, constraint, 700) +# ``` +# """ function resample(uv::AbstractUncertainScalarKDE, constraint::TruncateMaximum, n::Int) # Box width δ = step(uv.range) @@ -422,27 +422,27 @@ function resample(uv::AbstractUncertainScalarKDE, constraint::TruncateMaximum, n end -""" - resample(uv::AbstractUncertainScalarKDE, constraint::TruncateMinimum) +# """ +# resample(uv::AbstractUncertainScalarKDE, constraint::TruncateMinimum) -Resample `uv` by first truncating the kernel density estimate of the -distribution furnishing the value at some minimum value, -then resampling it once. +# Resample `uv` by first truncating the kernel density estimate of the +# distribution furnishing the value at some minimum value, +# then resampling it once. -## Example +# ## Example -```julia -# Uncertain value represented by a normal distribution with mean = 0 and -# standard deviation = 1. -uncertainval = UncertainValue(rand(Normal(0, 1), 1000)) +# ```julia +# # Uncertain value represented by a normal distribution with mean = 0 and +# # standard deviation = 1. +# uncertainval = UncertainValue(rand(Normal(0, 1), 1000)) -constraint = TruncateMinimum(0.2) # accept no values smaller than 0.2 +# constraint = TruncateMinimum(0.2) # accept no values smaller than 0.2 -# Resample the uncertain value by truncating the distribution furnishing it, -# then resampling the new distribution 700 times. -resample(uncertainval, constraint, 700) -``` -""" +# # Resample the uncertain value by truncating the distribution furnishing it, +# # then resampling the new distribution 700 times. +# resample(uncertainval, constraint, 700) +# ``` +# """ function resample(uv::AbstractUncertainScalarKDE, constraint::TruncateMinimum) # Box width δ = step(uv.range) @@ -465,27 +465,27 @@ end -""" - resample(uv::AbstractUncertainScalarKDE, constraint::TruncateMinimum, n::Int) +# """ +# resample(uv::AbstractUncertainScalarKDE, constraint::TruncateMinimum, n::Int) -Resample `uv` by first truncating the kernel density estimate of the -distribution furnishing the value at some minimum value, -then resampling it `n` times. +# Resample `uv` by first truncating the kernel density estimate of the +# distribution furnishing the value at some minimum value, +# then resampling it `n` times. -## Example +# ## Example -```julia -# Uncertain value represented by a normal distribution with mean = 0 and -# standard deviation = 1. -uncertainval = UncertainValue(rand(Normal(0, 1), 1000)) +# ```julia +# # Uncertain value represented by a normal distribution with mean = 0 and +# # standard deviation = 1. +# uncertainval = UncertainValue(rand(Normal(0, 1), 1000)) -constraint = TruncateMinimum(0.2) # accept no values smaller than 0.2 +# constraint = TruncateMinimum(0.2) # accept no values smaller than 0.2 -# Resample the uncertain value by truncating the distribution furnishing it, -# then resampling the new distribution 700 times. -resample(uncertainval, constraint, 700) -``` -""" +# # Resample the uncertain value by truncating the distribution furnishing it, +# # then resampling the new distribution 700 times. +# resample(uncertainval, constraint, 700) +# ``` +# """ function resample(uv::AbstractUncertainScalarKDE, constraint::TruncateMinimum, n::Int) # Box width δ = step(uv.range) @@ -508,28 +508,28 @@ function resample(uv::AbstractUncertainScalarKDE, constraint::TruncateMinimum, n end -""" - resample(uv::AbstractUncertainScalarKDE, constraint::TruncateRange) +# """ +# resample(uv::AbstractUncertainScalarKDE, constraint::TruncateRange) -Resample `uv` by first truncating the kernel density estimate of the -distribution furnishing the value at some minimum and maximum values, -then resampling it once. +# Resample `uv` by first truncating the kernel density estimate of the +# distribution furnishing the value at some minimum and maximum values, +# then resampling it once. -## Example +# ## Example -```julia -# Uncertain value represented by a normal distribution with mean = 0 and -# standard deviation = 1. -uncertainval = UncertainValue(rand(Normal(0, 1), 1000)) +# ```julia +# # Uncertain value represented by a normal distribution with mean = 0 and +# # standard deviation = 1. +# uncertainval = UncertainValue(rand(Normal(0, 1), 1000)) -# Only accept values in the range [-0.9, 1.2] -constraint = TruncateRange(-0.9, 1.2) +# # Only accept values in the range [-0.9, 1.2] +# constraint = TruncateRange(-0.9, 1.2) -# Resample the uncertain value by truncating the distribution furnishing it, -# then resampling the new distribution 300 times. -resample(uncertainval, constraint, 300) -``` -""" +# # Resample the uncertain value by truncating the distribution furnishing it, +# # then resampling the new distribution 300 times. +# resample(uncertainval, constraint, 300) +# ``` +# """ function resample(uv::AbstractUncertainScalarKDE, constraint::TruncateRange) # Box width δ = step(uv.range) @@ -552,28 +552,28 @@ function resample(uv::AbstractUncertainScalarKDE, constraint::TruncateRange) end -""" - resample(uv::AbstractUncertainScalarKDE, constraint::TruncateRange, n::Int) +# """ +# resample(uv::AbstractUncertainScalarKDE, constraint::TruncateRange, n::Int) -Resample `uv` by first truncating the kernel density estimate of the -distribution furnishing the value at some minimum and maximum values, -then resampling it `n` times. +# Resample `uv` by first truncating the kernel density estimate of the +# distribution furnishing the value at some minimum and maximum values, +# then resampling it `n` times. -## Example +# ## Example -```julia -# Uncertain value represented by a normal distribution with mean = 0 and -# standard deviation = 1. -uncertainval = UncertainValue(rand(Normal(0, 1), 1000)) +# ```julia +# # Uncertain value represented by a normal distribution with mean = 0 and +# # standard deviation = 1. +# uncertainval = UncertainValue(rand(Normal(0, 1), 1000)) -# Only accept values in the range [-0.9, 1.2] -constraint = TruncateRange(-0.9, 1.2) +# # Only accept values in the range [-0.9, 1.2] +# constraint = TruncateRange(-0.9, 1.2) -# Resample the uncertain value by truncating the distribution furnishing it, -# then resampling the new distribution 300 times. -resample(uncertainval, constraint, 300) -``` -""" +# # Resample the uncertain value by truncating the distribution furnishing it, +# # then resampling the new distribution 300 times. +# resample(uncertainval, constraint, 300) +# ``` +# """ function resample(uv::AbstractUncertainScalarKDE, constraint::TruncateRange, n::Int) # Box width δ = step(uv.range) diff --git a/src/sampling_constraints/constrain_certain_value.jl b/src/sampling_constraints/constrain_certain_value.jl index b73fd4f9..1892c16f 100644 --- a/src/sampling_constraints/constrain_certain_value.jl +++ b/src/sampling_constraints/constrain_certain_value.jl @@ -1,12 +1,12 @@ -import ..UncertainValues: CertainValue +import ..UncertainValues: CertainScalar -constrain(v::CertainValue) = v -constrain(v::CertainValue, s::SamplingConstraint) = v -constrain(v::CertainValue, s::TruncateLowerQuantile) = v -constrain(v::CertainValue, s::TruncateUpperQuantile) = v -constrain(v::CertainValue, s::TruncateQuantiles) = v -constrain(v::CertainValue, s::TruncateStd) = v +constrain(v::CertainScalar) = v +constrain(v::CertainScalar, s::SamplingConstraint) = v +constrain(v::CertainScalar, s::TruncateLowerQuantile) = v +constrain(v::CertainScalar, s::TruncateUpperQuantile) = v +constrain(v::CertainScalar, s::TruncateQuantiles) = v +constrain(v::CertainScalar, s::TruncateStd) = v export constrain \ No newline at end of file diff --git a/src/sampling_constraints/constraint_definitions.jl b/src/sampling_constraints/constraint_definitions.jl index b3220d48..1ea5f859 100644 --- a/src/sampling_constraints/constraint_definitions.jl +++ b/src/sampling_constraints/constraint_definitions.jl @@ -123,7 +123,7 @@ struct TruncateRange{T1, T2} <: ValueSamplingConstraint max::T2 function TruncateRange(min::T1, max::T2) where {T1, T2} - if min <= max # <= ties are allowed, because we may encounter CertainValue instances + if min <= max # <= ties are allowed, because we may encounter CertainScalar instances return new{T1, T2}(min, max) else err_msg = "Cannot create TruncateRange instance. Need min < max" diff --git a/src/sampling_constraints/ordered_sequences/ordered_sequence_algorithms.jl b/src/sampling_constraints/ordered_sequences/ordered_sequence_algorithms.jl index 8fbdd5dd..fae0de9f 100644 --- a/src/sampling_constraints/ordered_sequences/ordered_sequence_algorithms.jl +++ b/src/sampling_constraints/ordered_sequences/ordered_sequence_algorithms.jl @@ -6,7 +6,7 @@ An abstract type for ordered sampling algorithms. abstract type OrderedSamplingAlgorithm end """ - StartToEnd + StartToEnd <: OrderedSamplingAlgorithm An ordered sampling algorithm indicating that values should be treated consecutively from start to finish of the dataset. @@ -14,7 +14,7 @@ treated consecutively from start to finish of the dataset. struct StartToEnd <: OrderedSamplingAlgorithm end """ - EndToStart + EndToStart <: OrderedSamplingAlgorithm An ordered sampling algorithm indicating that the values should be treated consecutively from the end to the start of the dataset. @@ -22,18 +22,17 @@ treated consecutively from the end to the start of the dataset. struct EndToStart <: OrderedSamplingAlgorithm end """ -RandPtOutwards + RandPtOutwards <: OrderedSamplingAlgorithm An ordered sampling algorithm indicating that the values should be divided into two groups, separating the values at some midpoint of the dataset. The two groups of values are then treated separately. """ struct RandPtOutwards <: OrderedSamplingAlgorithm - midpoint_idx::Int end """ - ChuncksForwards + ChuncksForwards <: OrderedSamplingAlgorithm An ordered sampling algorithm indicating that the values should be divided into multiple (`n_chunks`) groups. The groups of values @@ -45,7 +44,7 @@ struct ChunksForwards <: OrderedSamplingAlgorithm end """ - ChuncksBackwards + ChuncksBackwards <: OrderedSamplingAlgorithm An ordered sampling algorithm indicating that the values should be divided into multiple (`n_chunks`) groups. The groups of values diff --git a/src/sampling_constraints/ordered_sequences/ordered_sequences.jl b/src/sampling_constraints/ordered_sequences/ordered_sequences.jl index 267e7b39..37f20851 100644 --- a/src/sampling_constraints/ordered_sequences/ordered_sequences.jl +++ b/src/sampling_constraints/ordered_sequences/ordered_sequences.jl @@ -58,7 +58,7 @@ is necessary because some distributions may have infinite support). """ function sequence_exists(lqs, uqs, c::StrictlyIncreasing{StartToEnd}) L = length(lqs) - if any(lqs .> uqs) # ties are allowed, because we have `CertainValue`s + if any(lqs .> uqs) # ties are allowed, because we have `CertainScalar`s error("Not all `lqs[i]` are lower than uqs[i]. Quantile calculations are not meaningful.") return false end @@ -73,7 +73,7 @@ end function sequence_exists(lqs, uqs, c::StrictlyDecreasing{StartToEnd}) L = length(lqs) - if any(lqs .> uqs) # ties are allowed, because we have `CertainValue`s + if any(lqs .> uqs) # ties are allowed, because we have `CertainScalar`s error("Not all `lqs[i]` are lower than uqs[i]. Quantile calculations are not meaningful.") return false end diff --git a/src/sampling_constraints/truncation/truncate_CertainValue.jl b/src/sampling_constraints/truncation/truncate_CertainValue.jl index 28be0704..e0c8e82b 100644 --- a/src/sampling_constraints/truncation/truncate_CertainValue.jl +++ b/src/sampling_constraints/truncation/truncate_CertainValue.jl @@ -1,7 +1,7 @@ -import ..UncertainValues.CertainValue +import ..UncertainValues.CertainScalar -Base.truncate(v::CertainValue) = v -function Base.truncate(v::CertainValue, constraint::TruncateMaximum) +Base.truncate(v::CertainScalar) = v +function Base.truncate(v::CertainScalar, constraint::TruncateMaximum) if v.value > constraint.max msg = "Truncating $v with $constraint failed\n" msg2 = "Need value < constraint.max, got $v < $(constraint.max)" @@ -11,7 +11,7 @@ function Base.truncate(v::CertainValue, constraint::TruncateMaximum) end end -function Base.truncate(v::CertainValue, constraint::TruncateMinimum) +function Base.truncate(v::CertainScalar, constraint::TruncateMinimum) if v.value < constraint.min msg = "Truncating $v with $constraint failed\n" msg2 = "Need value > constraint.min, got $v > $(constraint.min)" @@ -21,7 +21,7 @@ function Base.truncate(v::CertainValue, constraint::TruncateMinimum) end end -function Base.truncate(v::CertainValue, constraint::TruncateRange) +function Base.truncate(v::CertainScalar, constraint::TruncateRange) if v.value < constraint.min msg = "Truncating $v with $constraint failed\n" msg2 = "Need value > constraint.min, got $v > $(constraint.min)" @@ -35,7 +35,7 @@ function Base.truncate(v::CertainValue, constraint::TruncateRange) end end -truncate(v::CertainValue, s::TruncateLowerQuantile) = v -truncate(v::CertainValue, s::TruncateUpperQuantile) = v -truncate(v::CertainValue, s::TruncateQuantiles) = v -truncate(v::CertainValue, s::TruncateStd) = v +truncate(v::CertainScalar, s::TruncateLowerQuantile) = v +truncate(v::CertainScalar, s::TruncateUpperQuantile) = v +truncate(v::CertainScalar, s::TruncateQuantiles) = v +truncate(v::CertainScalar, s::TruncateStd) = v diff --git a/src/uncertain_datasets/UncertainIndexDataset.jl b/src/uncertain_datasets/UncertainIndexDataset.jl index 0d95df41..9ab82e7b 100644 --- a/src/uncertain_datasets/UncertainIndexDataset.jl +++ b/src/uncertain_datasets/UncertainIndexDataset.jl @@ -1,10 +1,27 @@ """ - UncertainIndexDataset + UncertainIndexDataset(indices) -Generic dataset containing uncertain indices. +A dataset containing `indices` that have uncertainties associated with them. -## Fields -- **`indices::AbstractVector{AbstractUncertainValue}`**: The uncertain values. +`UncertainIndexDataset`s are meant to contain the indices corresponding to +an [`UncertainValueDataset`](@ref), and are used for the `indices` field +in [`UncertainIndexValueDataset`](@ref)s. + +## Example + +Say we had a dataset of 20 values for which the uncertainties are normally distributed +with increasing standard deviation through time. + +```julia +using UncertainData, Plots +time_inds = 1:13 +uvals = [UncertainValue(Normal, ind, rand(Uniform()) + (ind / 6)) for ind in time_inds] +inds = UncertainIndexDataset(uvals) + +# With built-in plot recipes, we can plot the dataset, say, using the +33rd to 67th percentile range for the indices. +plot(inds, [0.33, 0.67]) +``` """ struct UncertainIndexDataset <: AbstractUncertainIndexDataset indices::AbstractVector{<:AbstractUncertainValue} @@ -24,7 +41,7 @@ struct ConstrainedUncertainIndexDataset <: AbstractUncertainIndexDataset end function UncertainIndexDataset(x::AbstractArray{T, 1}) where T - UncertainIndexDataset(CertainValue.(x)) + UncertainIndexDataset(CertainScalar.(x)) end export diff --git a/src/uncertain_datasets/UncertainIndexValueDataset.jl b/src/uncertain_datasets/UncertainIndexValueDataset.jl index 8fea49af..95a323b6 100644 --- a/src/uncertain_datasets/UncertainIndexValueDataset.jl +++ b/src/uncertain_datasets/UncertainIndexValueDataset.jl @@ -1,7 +1,5 @@ """ - UncertainIndexValueDataset{ - IDXTYP<:AbstractUncertainIndexDataset, - VALSTYP<:AbstractUncertainValueDataset} + UncertainIndexValueDataset(indices, values) A generic dataset type consisting of a set of uncertain `indices` (e.g. time, depth, order, etc...) and a set of uncertain `values`. @@ -13,42 +11,18 @@ The i-th index is assumed to correspond to the i-th value. For example, if - `data.values[7]` is the value for the index `data.indices[7]`. - `data[3]` is an index-value tuple `(data.indices[3], data.values[3])`. -## Fields - -- **`indices::T where {T <: AbstractUncertainIndexDataset}`**: The uncertain indices, - represented by some type of uncertain index dataset. -- **`values::T where {T <: AbstractUncertainValueDataset}`**: The uncertain values, - represented by some type of uncertain index dataset. - ## Example -```julia -# Simulate some data values measured a specific times. -times = 1:100 -values = sin.(0.0:0.1:100.0) - -# Assume the data were measured by a device with normally distributed -# measurement uncertainties with fluctuating standard deviations -σ_range = (0.1, 0.7) - -uncertain_values = [UncertainValue(Normal, val, rand(Uniform(σ_range...))) - for val in values] +Here, we simulate data which were measured with some uncertainty, with some timing error. +The data were measured by a device with normally distributed measurement uncertainties, +with fluctuating standard deviations. The clock used to record the times is uncertain, +but with uniformly distributed noise whose magnitude is restricted to the interval +``[0.1, 0.7]``. -# Assume the clock used to record the times is uncertain, but with uniformly -# distributed noise that doesn't change through time. -uncertain_times = [UncertainValue(Uniform, t-0.1, t+0.1) for t in times] - -# Pair the time-value data. If vectors are provided to the constructor, -# the first will be interpreted as the indices and the second as the values. -data = UncertainIndexValueDataset(uncertain_times, uncertain_values) - -# A safer option is to first convert to UncertainIndexDataset and -# UncertainValueDataset, so you don't accidentally mix the indices -# and the values. -uidxs = UncertainIndexDataset(uncertain_times) -uvals = UncertainValueDataset(uncertain_values) - -data = UncertainIndexValueDataset(uidxs, uvals) +```julia +v = [UncertainValue(Normal, x, rand(Uniform(0.1, 0.7))) for x in sin.(0.0:0.1:100.0)] +t = [UncertainValue(Uniform, x-0.1, x+0.1) for x in 1:100] +data = UncertainIndexValueDataset(t, v) ``` """ struct UncertainIndexValueDataset{IDXTYP <: AbstractUncertainIndexDataset, VALSTYP <: AbstractUncertainValueDataset} <: AbstractUncertainIndexValueDataset diff --git a/src/uncertain_datasets/UncertainValueDataset.jl b/src/uncertain_datasets/UncertainValueDataset.jl index d9a6ab3c..3ba97b81 100644 --- a/src/uncertain_datasets/UncertainValueDataset.jl +++ b/src/uncertain_datasets/UncertainValueDataset.jl @@ -1,13 +1,35 @@ """ - UncertainValueDataset - -A dataset of uncertain values. - -## Fields - -- **`values::AbstractVector{<:AbstractUncertainValue}`**: The uncertain values. Each value is - represented by an `AbstractUncertainValue`. + UncertainValueDataset(values) + +A dataset of uncertain values which have no explicit index associated with its uncertain values. +Use this type when you want to be explicit about the values representing data values, as +opposed to [`UncertainIndexDataset`](@ref)s. + +`UncertainValueDataset`s can be comprised of uncertain values of any type compatible with +this package (see also [`UncertainValue`](@ref)). + +## Example + +```julia +using UncertainData +o1 = UncertainValue(Normal, 0, 0.5) +o2 = UncertainValue(Normal, 2.0, 0.1) +o3 = UncertainValue(Uniform, 0, 4) +o4 = UncertainValue(Uniform, rand(100)) +o5 = UncertainValue(Beta, 4, 5) +o6 = UncertainValue(Gamma, 4, 5) +o7 = UncertainValue(Frechet, 1, 2) +o8 = UncertainValue(BetaPrime, 1, 2) +o9 = UncertainValue(BetaBinomial, 10, 3, 2) +o10 = UncertainValue(Binomial, 10, 0.3) + +uvals = [o1, o2, o3, o4, o5, o6, o7, o8, o9, o10] +d = UncertainValueDataset(uvals) + +# Plot the 20th to 80th percentile range error bars. +plot(d, [0.2, 0.8]) +``` """ struct UncertainValueDataset <: AbstractUncertainValueDataset values::AbstractVector{<:AbstractUncertainValue} @@ -26,7 +48,7 @@ struct ConstrainedUncertainValueDataset <: AbstractUncertainValueDataset end function UncertainValueDataset(x::AbstractArray{T, 1}) where T - UncertainValueDataset(CertainValue.(x)) + UncertainValueDataset(CertainScalar.(x)) end export diff --git a/src/uncertain_values/CertainScalar.jl b/src/uncertain_values/CertainScalar.jl new file mode 100644 index 00000000..1984699f --- /dev/null +++ b/src/uncertain_values/CertainScalar.jl @@ -0,0 +1,88 @@ +""" + CertainScalar + +A simple wrapper type for values with no uncertainty (i.e. represented by a scalar). + +## Examples + +The two following ways of constructing values without uncertainty are equivalent. + +```julia +u1, u2 = CertainScalar(2.2), CertainScalar(6) +w1, w2 = UncertainValue(2.2), UncertainValue(6) +``` +""" +struct CertainScalar{T} <: AbstractUncertainValue + value::T +end + +Broadcast.broadcastable(x::CertainScalar) = Ref(x.value) + +function summarise(uval::CertainScalar) + _type = typeof(uval) + val = uval.value + "$_type($val)" +end +Base.show(io::IO, uval::CertainScalar) = print(io, summarise(uval)) + +eltype(v::CertainScalar{T}) where {T} = T + +Base.size(x::CertainScalar) = () +Base.size(x::CertainScalar,d) = convert(Int,d)<1 ? throw(BoundsError()) : 1 +Base.axes(x::CertainScalar) = () +Base.axes(x::CertainScalar,d) = convert(Int,d)<1 ? throw(BoundsError()) : Base.OneTo(1) +Base.ndims(x::CertainScalar) = 0 +Base.ndims(::Type{<:CertainScalar}) = 0 +Base.length(x::CertainScalar) = 1 +Base.firstindex(x::CertainScalar) = 1 +Base.lastindex(x::CertainScalar) = 1 +Base.IteratorSize(::Type{<:CertainScalar}) = Base.HasShape{0}() +Base.keys(::CertainScalar) = Base.OneTo(1) +Base.getindex(x::CertainScalar) = x + +function Base.getindex(x::CertainScalar, i::Integer) + Base.@_inline_meta + @boundscheck i == 1 || throw(BoundsError()) + x +end +function Base.getindex(x::CertainScalar, I::Integer...) + Base.@_inline_meta + @boundscheck all([i == 1 for i in I]) || throw(BoundsError()) + x +end + +Base.first(x::CertainScalar) = x +Base.last(x::CertainScalar) = x +Base.copy(x::CertainScalar) = x + +Base.minimum(v::CertainScalar) = v.value +Base.maximum(v::CertainScalar) = v.value +Base.isnan(x::CertainScalar) = Base.isnan(x.value) +Base.abs2(x::CertainScalar) = Base.abs2(x.value) + +StatsBase.mean(v::CertainScalar) = v.value +StatsBase.median(v::CertainScalar) = v.value +StatsBase.middle(v::CertainScalar) = v.value +StatsBase.quantile(v::CertainScalar, q) = v.value +StatsBase.quantile(v::CertainScalar, q, n::Int) = v.value +StatsBase.std(v::CertainScalar{T}) where {T} = zero(T) + +Base.rand(v::CertainScalar) = v.value +Base.rand(v::CertainScalar{T}, n::Int) where T = repeat([v.value], n) + +Base.float(v::CertainScalar) = float(v.value) + +function Base.:<(x::CertainScalar{T1}, y::CertainScalar{T2}) where { + T1 <: Real, T2 <: Real} + x.value < y.value +end + +function IntervalArithmetic.interval(x::CertainScalar{T1}, y::CertainScalar{T2}) where { + T1 <: Real, T2 <: Real} + interval(x.value, y.value) +end + + +export +CertainScalar, +UncertainValue \ No newline at end of file diff --git a/src/uncertain_values/CertainValue.jl b/src/uncertain_values/CertainValue.jl deleted file mode 100644 index 80e8c77a..00000000 --- a/src/uncertain_values/CertainValue.jl +++ /dev/null @@ -1,88 +0,0 @@ -""" - CertainValue - -A simple wrapper type for values with no uncertainty (i.e. represented by a scalar). - -## Examples - -The two following ways of constructing values without uncertainty are equivalent. - -```julia -u1, u2 = CertainValue(2.2), CertainValue(6) -w1, w2 = UncertainValue(2.2), UncertainValue(6) -``` -""" -struct CertainValue{T} <: AbstractUncertainValue - value::T -end - -Broadcast.broadcastable(x::CertainValue) = Ref(x.value) - -function summarise(uval::CertainValue) - _type = typeof(uval) - val = uval.value - "$_type($val)" -end -Base.show(io::IO, uval::CertainValue) = print(io, summarise(uval)) - -eltype(v::CertainValue{T}) where {T} = T - -Base.size(x::CertainValue) = () -Base.size(x::CertainValue,d) = convert(Int,d)<1 ? throw(BoundsError()) : 1 -Base.axes(x::CertainValue) = () -Base.axes(x::CertainValue,d) = convert(Int,d)<1 ? throw(BoundsError()) : Base.OneTo(1) -Base.ndims(x::CertainValue) = 0 -Base.ndims(::Type{<:CertainValue}) = 0 -Base.length(x::CertainValue) = 1 -Base.firstindex(x::CertainValue) = 1 -Base.lastindex(x::CertainValue) = 1 -Base.IteratorSize(::Type{<:CertainValue}) = Base.HasShape{0}() -Base.keys(::CertainValue) = Base.OneTo(1) -Base.getindex(x::CertainValue) = x - -function Base.getindex(x::CertainValue, i::Integer) - Base.@_inline_meta - @boundscheck i == 1 || throw(BoundsError()) - x -end -function Base.getindex(x::CertainValue, I::Integer...) - Base.@_inline_meta - @boundscheck all([i == 1 for i in I]) || throw(BoundsError()) - x -end - -Base.first(x::CertainValue) = x -Base.last(x::CertainValue) = x -Base.copy(x::CertainValue) = x - -Base.minimum(v::CertainValue) = v.value -Base.maximum(v::CertainValue) = v.value -Base.isnan(x::CertainValue) = Base.isnan(x.value) -Base.abs2(x::CertainValue) = Base.abs2(x.value) - -StatsBase.mean(v::CertainValue) = v.value -StatsBase.median(v::CertainValue) = v.value -StatsBase.middle(v::CertainValue) = v.value -StatsBase.quantile(v::CertainValue, q) = v.value -StatsBase.quantile(v::CertainValue, q, n::Int) = v.value -StatsBase.std(v::CertainValue{T}) where {T} = zero(T) - -Base.rand(v::CertainValue) = v.value -Base.rand(v::CertainValue{T}, n::Int) where T = repeat([v.value], n) - -Base.float(v::CertainValue) = float(v.value) - -function Base.:<(x::CertainValue{T1}, y::CertainValue{T2}) where { - T1 <: Real, T2 <: Real} - x.value < y.value -end - -function IntervalArithmetic.interval(x::CertainValue{T1}, y::CertainValue{T2}) where { - T1 <: Real, T2 <: Real} - interval(x.value, y.value) -end - - -export -CertainValue, -UncertainValue \ No newline at end of file diff --git a/src/uncertain_values/UncertainScalarPopulation.jl b/src/uncertain_values/UncertainScalarPopulation.jl index 2ebcecef..a4bc5f53 100644 --- a/src/uncertain_values/UncertainScalarPopulation.jl +++ b/src/uncertain_values/UncertainScalarPopulation.jl @@ -3,123 +3,120 @@ import IntervalArithmetic: interval import Distributions import StatsBase -const POTENTIAL_UVAL_TYPES = Union{T1, T2} where {T1<:Number, T2} where T2 <: AbstractUncertainValue +const POTENTIAL_UVAL_TYPES = Union{T1, T2} where {T1 <: Number, T2 <: AbstractUncertainValue} -""" - UncertainScalarPopulation(values, probs) - UncertainScalarPopulation(values, probs::Vector{Number}) - UncertainScalarPopulation(values, probs::Statsbase.AbstractWeights) +convert_elwise(f::Function, x) = map(f, x); +convert_elwise(f::Function, x::T) where T <: AbstractUncertainValue = x +nested_convert_elwise(f::Function, x) = map(xᵢ -> convert_elwise(f, xᵢ), x) +function verify_pop_and_weights(pop, wts) + if length(pop) != length(wts) + throw(ArgumentError("The number of population members and the number of weights do not match.")) + end +end -An `UncertainScalarPopulation`, which consists of some population members (`values`) -and some weights (`probs`) that indicate the relative importance of the -population members (for example during resampling). +""" + UncertainScalarPopulation(members, probs) + UncertainScalarPopulation(members, probs::Vector{Number}) + UncertainScalarPopulation(members, probs::Statsbase.AbstractWeights) -## Fields +An `UncertainScalarPopulation`, which consists of some population `members` +with associated weights (`probs`) that indicate the relative importance of the +population members (for example during resampling). The `members` can be either +numerical values, any type of uncertain value defined in this package +(including populations, so nested populations are possible). -- **`values`**: The members of the population. Can be either numerical values, any - type of uncertain value defined in this package (including populations), and - `Measurement` instances from Measurements.jl. -- **`probs`**: The probabilities of sampling each member of the population. +## Examples -## Constructors +Weighted scalar populations are defined as follows. Weights must always be provided, +and scalars must be converted to uncertain values before creating the population. -- If `values` contains only scalar numeric values, then the `values` field - will be of type `Vector{Number}`. -- If `values` contains one or more uncertain values, then the `values` field - will be of type `Vector{AbstractUncertainValue}` +```julia +using UncertainData +members = UncertainValue.([1.0, 2.0, 3.0]); wts = rand(3) -## Example +# Treat elements of `members` as equiprobable +p = UncertainScalarPopulation(members, [1, 1, 1]) -```julia +# Treat elements of `members` as inequiprobable +p = UncertainScalarPopulation(members, [2, 3, 1]) +``` -# Uncertain population consisting of CertainValues (scalars get promoted to -# CertainValue), theoretical distributions and KDE distributions -pop1 = UncertainScalarPopulation( - [3.0, UncertainValue(Normal, 0, 1), UncertainValue(Gamma, 2, 3), - UncertainValue(Uniform, rand(1000))], [0.5, 0.5, 0.5, 0.5]) +Uncertain populations can also consist of a mixture of different types of uncertain values. +Here, we use a population consisting of a scalar, two theoretical distributions +with known parameters, and a theoretical uniform distribution whose parameters +are estimated from a random sample `s`. We assign equal weights to the member +of the population. -# Uncertain population consisting of scalar values -pop2 = UncertainScalarPopulation([1, 2, 3], rand(3)) -pop3 = UncertainScalarPopulation([1, 2, 3], Weights(rand(3))) +```julia +s = rand(1000) +members = [3.0, UncertainValue(Normal, 0, 1), UncertainValue(Gamma, 2, 3), + UncertainValue(Uniform, s)] +wts = [0.5, 0.5, 0.5, 0.5] +p = UncertainValue(members, wts) +``` -# Uncertain population consisting of uncertain populations -pop4 = UncertainScalarPopulation([pop1, pop2], [0.1, 0.5]) +Nested populations are also possible, and sub-populations can be given +unequal sampling priority. -# Uncertain population consisting of uncertain populations, a scalar and -# a normal distribution. Assign random weights. -vals = [pop1, pop2, 2, UncertainValue(Normal, 0.3, 0.014)] -pop5 = UncertainScalarPopulation(vals, Weights(rand(4))) +```julia +using UncertainData, Distributions +s = rand(Normal(0.1, 2.0), 8000) +v1, v2 = UncertainValue(Normal, 0.5, 0.33), UncertainValue(Gamma, 0.6, 0.9) +v3, v4 = 2.2, UncertainValue(Normal, s), UncertainValue(s) + +# When sampling sub-population m1, members v1 and v2 are given relative importance 1 to 3 +# When sampling sub-population m2, members v3 and v4 are given relative importance 2 to 1 +m1 = UncertainValue([v1, v2], [1, 3]) +m2 = UncertainValue([v3, v4], [2, 1]) + +# When sampling the overall population, the sub-populations m1 and m2 +# are sampled with equal importance. +p = UncertainValue([m1, m2], [1, 1]) ``` """ struct UncertainScalarPopulation{T, PW <: StatsBase.AbstractWeights} <: AbstractScalarPopulation{T, PW} - values::Vector{T} + members::T probs::PW -end -""" - UncertainScalarPopulation(values::Vector, probabilities::Vector{Float64}) - -Construct a population from a vector of values and a vector of probabilities associated -to those values.""" -function UncertainScalarPopulation(values::Vector{T1}, probabilities::Vector{T2}) where {T1 <: Number, T2 <: Number} - if length(values) != length(probabilities) - throw(ArgumentError("Lengths of values and probability vectors do not match.")) + function UncertainScalarPopulation(members, probs::AbstractVector{T}) where {T <: Number} + verify_pop_and_weights(members, probs) + m = nested_convert_elwise(UncertainValue, members); TT = typeof(m) + wts = Weights(probs); PW = typeof(wts) + new{TT, PW}(m, wts) end - UncertainScalarPopulation(values, StatsBase.weights(probabilities)) -end -function UncertainScalarPopulation(values::VT, probabilities) where VT <: Vector{ELTYPE} where {ELTYPE<:POTENTIAL_UVAL_TYPES} - if length(values) != length(probabilities) - throw(ArgumentError("Lengths of values and probability vectors do not match.")) + + function UncertainScalarPopulation(members, probs::PW) where {PW <: StatsBase.AbstractWeights} + verify_pop_and_weights(members, probs) + m = nested_convert_elwise(UncertainValue, members); TT = typeof(m) + new{TT, PW}(m, probs) end - UncertainScalarPopulation(UncertainValue.(values), StatsBase.weights(probabilities)) end - """ - ConstrainedUncertainScalarPopulation(values, probs) - ConstrainedUncertainScalarPopulation(values, probs::Vector{Number}) - ConstrainedUncertainScalarPopulation(values, probs::Statsbase.AbstractWeights) - -A `ConstrainedUncertainScalarPopulation`, which consists of some population -members (`values`)and some weights (`probs`) that indicate the relative importance of -the population members (for example during resampling). The uncertain values -for this type is meant to consist of constrained uncertain values -(generated by calling `constrain(uval, sampling_constraint`) on them. + ConstrainedUncertainScalarPopulation(members, probs) + ConstrainedUncertainScalarPopulation(members, probs::Vector{Number}) + ConstrainedUncertainScalarPopulation(members, probs::Statsbase.AbstractWeights) -This is just a convenience type to indicate that the population has been +A convenience type to indicate that the population has been constrained. It behaves identically to `UncertainScalarPopulation`. - -There are different constructors for different types of `values`: - -- If `values` contains only scalar numeric values, then the `values` field - will be of type `Vector{Number}`. -- If `values` contains one or more uncertain values, then the `values` field - will be of type `Vector{AbstractUncertainValue}` - """ struct ConstrainedUncertainScalarPopulation{T, PW <: StatsBase.AbstractWeights} <: AbstractScalarPopulation{T, PW} - values::Vector{T} + members::Vector{T} probs::PW end -""" - ConstrainedUncertainScalarPopulation(values::Vector, probabilities::Vector{Float64}) - -Construct a constrained population from a vector of values and a vector of -probabilities associated to those values. -""" -function ConstrainedUncertainScalarPopulation(values::Vector{T1}, probabilities::Vector{T2}) where {T1 <: Number, T2 <: Number} - if length(values) != length(probabilities) - throw(ArgumentError("Lengths of values and probability vectors do not match.")) +function ConstrainedUncertainScalarPopulation(members::Vector{T1}, probabilities::Vector{T2}) where {T1 <: Number, T2 <: Number} + if length(members) != length(probabilities) + throw(ArgumentError("Lengths of members and probability vectors do not match.")) end - ConstrainedUncertainScalarPopulation(float.(values), StatsBase.weights(probabilities)) + ConstrainedUncertainScalarPopulation(float.(members), StatsBase.weights(probabilities)) end -function ConstrainedUncertainScalarPopulation(values::VT, probabilities) where VT <: Vector{ELTYPE} where {ELTYPE<:POTENTIAL_UVAL_TYPES} - if length(values) != length(probabilities) - throw(ArgumentError("Lengths of values and probability vectors do not match.")) +function ConstrainedUncertainScalarPopulation(members::VT, probabilities) where VT <: Vector{ELTYPE} where {ELTYPE<:POTENTIAL_UVAL_TYPES} + if length(members) != length(probabilities) + throw(ArgumentError("Lengths of members and probability vectors do not match.")) end - ConstrainedUncertainScalarPopulation(UncertainValue.(values), StatsBase.weights(probabilities)) + ConstrainedUncertainScalarPopulation(UncertainValue.(members), StatsBase.weights(probabilities)) end export diff --git a/src/uncertain_values/UncertainScalarsKDE.jl b/src/uncertain_values/UncertainScalarsKDE.jl index 5c47c5de..8e725710 100644 --- a/src/uncertain_values/UncertainScalarsKDE.jl +++ b/src/uncertain_values/UncertainScalarsKDE.jl @@ -3,27 +3,43 @@ import Base.rand import StatsBase.quantile import StatsBase.median import Distributions.support - import Distributions.ecdf import Base: minimum, maximum, max, min + """ - UncertainScalarKDE(d::KernelDensity.UnivariateKDE, values::AbstractVector{T}, range, pdf) + UncertainScalarKDE(d::KernelDensity.UnivariateKDE, x::AbstractVector, range, pdf) + +An uncertain value represented by a kernel density estimate `d`, to the +underlying distribution for the empirical sample `x`. + +`range` are the values for which the pdf is estimated, and `pdf` are the +corresponding values of the pdf. Gaussian kernels are used by default. + +## Examples -An empirical value represented by a distribution estimated from actual data. +```julia +using Distributions, UncertainData, KernelDensity -## Fields +# Draw a 1000-point sample from a normal distribution. +s = rand(Normal(), 1000) -- **`distribution`**: The `UnivariateKDE` estimate for the distribution of `values`. -- **`values`**: The values from which `distribution` is estimated. -- **`range`**: The values for which the pdf is estimated. -- **`pdf`**: The values of the pdf at each point in `range`. +# Estimate a distribution to the underlying distribution by using +# kernel density estimation on the sample `s` +x = UncertainValue(s) + +# The explicit constructor allows adjusting the kernel (must be a valid +# kernel from Distributions.jl; normal distributions are the default), +# and the number of points used for the estimation (must be a power of 2; +# default is 2048 points). +x = UncertainValue(UnivariateKDE, s; kernel = Normal, npoints = 1024) +``` """ -struct UncertainScalarKDE{T} <: AbstractUncertainScalarKDE{T} +struct UncertainScalarKDE{T, V <: AbstractVector{T}} <: AbstractUncertainScalarKDE{T} distribution::KernelDensity.UnivariateKDE - values::AbstractVector{T} + values::V range pdf::StatsBase.Weights end @@ -34,9 +50,9 @@ end A truncated [`UncertainScalarKDE`](@ref). """ -struct TruncatedUncertainScalarKDE{T} <: AbstractUncertainScalarKDE{T} +struct TruncatedUncertainScalarKDE{T, V <: AbstractVector{T}} <: AbstractUncertainScalarKDE{T} distribution::KernelDensity.UnivariateKDE - values::AbstractVector{T} + values::V range pdf::StatsBase.Weights end @@ -147,15 +163,12 @@ min(uv::AbstractUncertainScalarKDE) = minimum(uv.range) max(uv::AbstractUncertainScalarKDE) = maximum(uv.range) - - - export -AbstractUncertainScalarKDE, -UncertainScalarKDE -ecdf, -support, -getquantileindex, -UnivariateKDE, -minimum, -maximum \ No newline at end of file + AbstractUncertainScalarKDE, + UncertainScalarKDE + ecdf, + support, + getquantileindex, + UnivariateKDE, + minimum, + maximum \ No newline at end of file diff --git a/src/uncertain_values/UncertainScalarsTheoretical.jl b/src/uncertain_values/UncertainScalarsTheoretical.jl index a522ad69..28af705c 100644 --- a/src/uncertain_values/UncertainScalarsTheoretical.jl +++ b/src/uncertain_values/UncertainScalarsTheoretical.jl @@ -77,7 +77,9 @@ import Distributions.Gamma import Distributions.Frechet """ -Uncertain value represented by a generic three-parameter distribution. + UncertainScalarTheoreticalThreeParameter(d::Distribution, a, b, c) + +Uncertain value represented by a generic three-parameter distribution `d` with parameters `a`, `b` and `c`. """ struct UncertainScalarTheoreticalThreeParameter{S<:ValueSupport, T1<:Number, T2<:Number, T3<:Number} <: AbstractUncertainThreeParameterScalarValue{S, T1, T2, T3} distribution::Distribution{Univariate, S} @@ -87,7 +89,9 @@ struct UncertainScalarTheoreticalThreeParameter{S<:ValueSupport, T1<:Number, T2< end """ -Uncertain value represented by a generic two-parameter distribution. + UncertainScalarTheoreticalTwoParameter(d::Distribution, a, b) + +Uncertain value represented by a generic two-parameter distribution `d` with parameters `a` and `b`. """ struct UncertainScalarTheoreticalTwoParameter{S<:ValueSupport, T1<:Number, T2<:Number} <: AbstractUncertainTwoParameterScalarValue{S, T1, T2} distribution::Distribution{Univariate, S} @@ -96,17 +100,25 @@ struct UncertainScalarTheoreticalTwoParameter{S<:ValueSupport, T1<:Number, T2<:N end """ -Uncertain value represented by a generic one-parameter distribution. + UncertainScalarTheoreticalOneParameter(d::Distribution, a) + +Uncertain value represented by a generic one-parameter distribution `d` with parameter `a`. """ struct UncertainScalarGenericOneParameter{S<:ValueSupport, T1<:Number} <: AbstractUncertainOneParameterScalarValue{S, T1} distribution::Distribution{Univariate, S} a::T1 end +""" + UncertainScalarNormallyDistributed(d::Normal, μ, σ) + +Uncertain value represented by a normal distribution `d` with mean `μ` and standard deviation `σ`. +## Example -""" -Uncertain value represented by a normal distribution. +```julia +x = UncertainValue(Normal, 1.2, 0.3) +``` """ struct UncertainScalarNormallyDistributed{S<:ValueSupport, T1<:Number, T2<:Number} <: AbstractUncertainTwoParameterScalarValue{S, T1, T2} distribution::Distribution{Univariate, S} @@ -114,9 +126,16 @@ struct UncertainScalarNormallyDistributed{S<:ValueSupport, T1<:Number, T2<:Numbe σ::T2 end - """ -Uncertain value represented by a uniform distribution. + UncertainScalarUniformlyDistributed(d::Uniform, lower, upper) + +Uncertain value represented by a uniform distribution `d` with `lower` and `upper` bounds. + +## Example + +```julia +x = UncertainValue(Uniform, -2.5, 4.5) +``` """ struct UncertainScalarUniformlyDistributed{S<:ValueSupport, T1<:Number, T2<:Number} <: AbstractUncertainTwoParameterScalarValue{S, T1, T2} distribution::Distribution{Univariate, S} @@ -124,9 +143,16 @@ struct UncertainScalarUniformlyDistributed{S<:ValueSupport, T1<:Number, T2<:Numb upper::T2 end - """ -Uncertain value represented by a beta distribution. + UncertainScalarBetaDistributed(d::Beta, α, β) + +Uncertain value represented by a beta distribution `d` with parameters `α` and `β`. + +## Example + +```julia +x = UncertainValue(Beta, 0.5, 3.0) +``` """ struct UncertainScalarBetaDistributed{S<:ValueSupport, T1<:Number, T2<:Number} <: AbstractUncertainTwoParameterScalarValue{S, T1, T2} distribution::Distribution{Univariate, S} @@ -134,9 +160,16 @@ struct UncertainScalarBetaDistributed{S<:ValueSupport, T1<:Number, T2<:Number} < β::T2 end - """ -Uncertain value represented by a beta prime distribution. + UncertainScalarBetaPrimeDistributed(d::BetaPrime, α, β) + +Uncertain value represented by a beta prime distribution `d` with parameters `α` and `β`. + +## Example + +```julia +x = UncertainValue(BetaPrime, 2.1, 3.3) +``` """ struct UncertainScalarBetaPrimeDistributed{S<:ValueSupport, T1<:Number, T2<:Number} <: AbstractUncertainTwoParameterScalarValue{S, T1, T2} distribution::Distribution{Univariate, S} @@ -144,10 +177,16 @@ struct UncertainScalarBetaPrimeDistributed{S<:ValueSupport, T1<:Number, T2<:Numb β::T2 end +""" + UncertainScalarBetaBinomialDistributed(d::BetaBinomial, n, α, β) + +Uncertain value represented by a beta binomial distribution `d` with parameters `n`, `α` and `β`. +## Example -""" -Uncertain value represented by a beta binomial distribution. +```julia +x = UncertainValue(BetaBinomial, 10, 0.2, 0.7) +``` """ struct UncertainScalarBetaBinomialDistributed{S<:ValueSupport, T1<:Number, T2<:Number, T3<:Number} <: AbstractUncertainThreeParameterScalarValue{S, T1, T2, T3} distribution::Distribution{Univariate, S} @@ -156,11 +195,16 @@ struct UncertainScalarBetaBinomialDistributed{S<:ValueSupport, T1<:Number, T2<:N β::T3 end +""" + UncertainScalarGammaDistributed(d::Gamma, α, θ) +Uncertain value represented by a gamma distribution `d` with parameters `α` and `θ`. +## Example -""" -Uncertain value represented by a gamma distribution. +```julia +x = UncertainValue(Gamma, 0.2, 0.44) +``` """ struct UncertainScalarGammaDistributed{S<:ValueSupport, T1<:Number, T2<:Number} <: AbstractUncertainTwoParameterScalarValue{S, T1, T2} distribution::Distribution{Univariate, S} @@ -168,11 +212,16 @@ struct UncertainScalarGammaDistributed{S<:ValueSupport, T1<:Number, T2<:Number} θ::T2 end +""" + UncertainScalarFrechetDistributed(d::Frechet, α, θ) +Uncertain value represented by a Fréchet distribution `d` with parameters `α` and `θ`. +## Example -""" -Uncertain value represented by a Fréchet distribution. +```julia +x = UncertainValue(Frechet, 2.0, 2.1) +``` """ struct UncertainScalarFrechetDistributed{S<:ValueSupport, T1<:Number, T2<:Number} <: AbstractUncertainTwoParameterScalarValue{S, T1, T2} distribution::Distribution{Univariate, S} @@ -180,11 +229,16 @@ struct UncertainScalarFrechetDistributed{S<:ValueSupport, T1<:Number, T2<:Number θ::T2 end +""" + UncertainScalarBinomialDistributed(d::Binomial, n, θ) +Uncertain value represented by a binomial distribution `d` with parameters `n` and `θ`. +## Example -""" -Uncertain value represented by a binomial distribution. +```julia +x = UncertainValue(Binomial, 15, 0.5) +``` """ struct UncertainScalarBinomialDistributed{S<:ValueSupport, T1<:Number, T2<:Number} <: AbstractUncertainTwoParameterScalarValue{S, T1, T2} distribution::Distribution{Univariate, S} @@ -193,9 +247,6 @@ struct UncertainScalarBinomialDistributed{S<:ValueSupport, T1<:Number, T2<:Numbe end - - - ################### # Pretty printing ################### @@ -310,9 +361,9 @@ Base.show(io::IO, q::UncertainScalarBinomialDistributed) = print(io, summarise(q export TheoreticalDistributionScalarValue, -AbstractUncertainOneParameterScalarValue, -AbstractUncertainTwoParameterScalarValue, -AbstractUncertainThreeParameterScalarValue, +# AbstractUncertainOneParameterScalarValue, +# AbstractUncertainTwoParameterScalarValue, +# AbstractUncertainThreeParameterScalarValue, ConstrainedUncertainScalarValueOneParameter, ConstrainedUncertainScalarValueTwoParameter, diff --git a/src/uncertain_values/UncertainScalarsTheoreticalFitted.jl b/src/uncertain_values/UncertainScalarsTheoreticalFitted.jl index 29164863..0d6756c9 100644 --- a/src/uncertain_values/UncertainScalarsTheoreticalFitted.jl +++ b/src/uncertain_values/UncertainScalarsTheoreticalFitted.jl @@ -6,13 +6,79 @@ abstract type TheoreticalFittedUncertainScalar <: TheoreticalDistributionScalarV Broadcast.broadcastable(uv::TheoreticalFittedUncertainScalar) = Ref(uv.distribution) """ -UncertainScalarTheoreticalFit + UncertainScalarTheoreticalFit( + d::FittedDistribution{D}, + x::AbstractVector{T}) where {D <: Distribution, T} -An empirical value represented by a distribution estimated from actual data. +An uncertain value represented a distribution `d` whose parameters are +estimated from the empirical sample `x`. -## Fields -- **`distribution`** The distribution describing the value. -- **`values`**: The values from which `distribution` is estimated. +## Examples + +Here, we simulate an empirical sample. We then decide to represent the +sample by a distribution whose parameters are estimated from the sample. + +``` julia +using UncertainData, Distributions +# Simulate a 1000-point sample by drawing from a uniform distribution. +d = Uniform(); s = rand(d, 1000) + +# Represent `s` by a uniform distribution whose parameters are estimated from `s` +x = UncertainValue(Uniform, s) +``` + +``` julia +using UncertainData, Distributions +# Simulate a 1000-point sample by drawing from a normal distribution. +s = rand(Normal(), 1000) + +# Represent `s` by a normal distribution whose parameters are estimated from `s` +x = UncertainValue(Normal, s) +``` + +```julia +using UncertainData, Distributions + +# Simulate a 1000-point sample by drawing from a gamma distribution +# with parameters α = 2.1, θ = 5.2. +s = rand(Gamma(2.1, 5.2), 1000) + +# Represent `s` by a gamma distribution whose parameters are estimated from `s` +x = UncertainValue(Gamma, some_sample) +``` + + +*Note: these examples are contrived: of course, estimating the parameters +of a uniform distribution from a sample drawn from a uniform distribution +will yield a good fit. Real samples are usually less straight-forward to +model using theoretical distributions*. +In real applications, make sure to always visually investigate the histogram +of your data before picking which distribution to fit! Alternatively, +use kernel density estimation to fit a distribution (i.e. [`UncertainScalarKDE`](@ref)). + +### Beware: fitting distributions may lead to nonsensical results! + +In a less contrived example, we may try to fit a beta distribution to a sample +generated from a gamma distribution. + + +```julia +using Distributions, UncertainData + +# Generate 1000 values from a gamma distribution with parameters α = 2.1, +# θ = 5.2. +s = rand(Gamma(2.1, 5.2), 1000) + +# Represent `s` by a beta distribution whose parameters are estimated from `s` +x = UncertainValue(Beta, some_sample) +``` + +This is obviously not a good idea. Always visualise your distribution before +deciding on which distribution to fit! You won't get any error messages if you +try to fit a distribution that does not match your data. + +If the data do not follow an obvious theoretical distribution, it is better to +use kernel density estimation to define the uncertain value. """ struct UncertainScalarTheoreticalFit{D <: Distribution, T} <: TheoreticalFittedUncertainScalar distribution::FittedDistribution{D} # S may be Continuous or Discrete @@ -20,14 +86,12 @@ struct UncertainScalarTheoreticalFit{D <: Distribution, T} <: TheoreticalFittedU end """ - ConstrainedUncertainScalarTheoreticalFit - -An empirical value represented by a distribution estimated from actual data. + ConstrainedUncertainScalarTheoreticalFit( + d::FittedDistribution{D}, + x::AbstractVector{T}) where {D <: Distribution, T} -## Fields -- **`distribution`** The truncated version of the distribution describing the - value. -- **`values`**: The values from which the original distribution was estimated. +An uncertain value represented a distribution `d` whose parameters are estimated from the empirical sample `x`, +where the distribution `d` has been truncated after it has been estimated. """ struct ConstrainedUncertainScalarTheoreticalFit{D <: Distribution, T} <: TheoreticalFittedUncertainScalar distribution::FittedDistribution{D} # S may be Continuous or Discrete diff --git a/src/uncertain_values/UncertainValue.jl b/src/uncertain_values/UncertainValue.jl index b19e8684..35d4a7f2 100644 --- a/src/uncertain_values/UncertainValue.jl +++ b/src/uncertain_values/UncertainValue.jl @@ -3,65 +3,197 @@ import Distributions.Distribution import StatsBase: AbstractWeights, Weights import Distributions -""" - UncertainValue(x::T) where T <: Real -Create a `CertainValue` instance from a scalar with no uncertainty. """ -UncertainValue(x::T) where T <: Real = CertainValue(x) + UncertainValue(d::Distribution) + UncertainValue(d::Type{Normal}, μ, σ) → UncertainScalarNormallyDistributed + UncertainValue(d::Type{Uniform}, lower, upper) → UncertainScalarUniformlyDistributed + UncertainValue(d::Type{Beta}, α, β) → UncertainScalarBetaDistributed + UncertainValue(d::Type{BetaPrime}, α, β) → UncertainScalarBetaPrimeDistributed + UncertainValue(d::Type{Gamma}, α, θ) → UncertainScalarGammaDistributed + UncertainValue(d::Type{Frechet}, α, θ) → UncertainScalarFrechetDistributed + UncertainValue(d::Type{Binomial, n, p) → UncertainScalarBinomialDistributed + UncertainValue(d::Type{BetaBinomial, n, α, β) → UncertainScalarBetaBinomialDistributed -# Identity constructor -UncertainValue(uval::AbstractUncertainValue) = uval +Construct an uncertain value represented by a (possibly truncated) +theoretical distribution `d`. -# From Measurements.jl -UncertainValue(m::Measurement{T}) where T = UncertainValue(Normal, m.val, m.err) + UncertainValue(d::Type{<:Distribution}, x::AbstractVector) → UncertainScalarTheoreticalFit -""" - UncertainValue(values::Vector{<:Number}, probs::Vector{<:Number}) +Construct an uncertain value by fitting a distribution of type `d` to an empirical sample +`x`, and use that fitted distribution as the representation of `x`. -From a numeric vector, construct an `UncertainPopulation` whose -members are scalar values. -""" -function UncertainValue(values::Vector{<:Number}, probs::Vector{<:Number}) - UncertainScalarPopulation(float.(values), probs) -end +See also: [`UncertainScalarTheoreticalFit`](@ref) -""" - UncertainValue(values::Vector{<:Number}, probs::Vector{<:Number}) + UncertainValue(x::AbstractVector; + kernel::Type{<:Distribution} = Normal, npoints::Int = 2048) → UncertainScalarKDE -From a numeric vector, construct an `UncertainPopulation` whose -members are scalar values. -""" -function UncertainValue(values::Vector{<:Number}, probs::W) where {W <: AbstractWeights} - UncertainScalarPopulation(float.(values), probs) -end +Construct an uncertain value by estimating the underlying distribution to +the empirical sample `x` using the kernel density estimation (KDE), then using the resulting +KDE-distribution as the representation of `x`. Fast Fourier transforms are used in the kernel density +estimation, so the number of points should be a power of 2 (default = 2048). -""" - UncertainValue(values::Vector, probs::Union{Vector, AbstractWeights}) +See also: [`UncertainScalarKDE`](@ref) -Construct a population whose members are given by `values` and whose sampling -probabilities are given by `probs`. The elements of `values` can be either -numeric or uncertain values of any type. -""" -function UncertainValue(values::VT, probs) where VT <: Vector{ELTYPE} where {ELTYPE<:POTENTIAL_UVAL_TYPES} - UncertainScalarPopulation(UncertainValue.(values), probs) -end + UncertainValue(pop::Vector, probs::Union{Vector, AbstractWeights}) → UncertainScalarPopulation -function UncertainValue(values::VT, probs::Vector{Number}) where VT <: Vector{ELTYPE} where {ELTYPE<:POTENTIAL_UVAL_TYPES} - UncertainScalarPopulation(UncertainValue.(values), probs) -end +Construct an uncertain value from a population `pop`, whose sampling +probabilities (prior beliefs) are `probs`. The population `pop` can contain any +type of uncertain value. Scalars in `pop` are converted to [`CertainScalar`](@ref)s. -""" - UncertainValue(data::Vector{T}; - kernel::Type{D} = Normal, - npoints::Int=2048) where {D <: Distributions.Distribution, T} +See also: [`UncertainScalarPopulation`](@ref) -Construct an uncertain value by a kernel density estimate to `data`. + UncertainValue(x::T) where {T <: Real} → CertainScalar + +Create a `CertainScalar` instance from a scalar with no uncertainty. + +See also: [`CertainScalar`](@ref) + + UncertainValue(m::Measurement) → UncertainScalarNormallyDistributed + +Convert a `Measurement` instance to an uncertain value compatible with UncertainData.jl. + +`Measurement` instances from [Measurements.jl](https://github.com/JuliaPhysics/Measurements.jl)[^1] are +treated as normal distributions with known means. Once the conversion is done, the +functionality provided by Measurements.jl, such as exact error propagation, is lost. + +# Examples + +## Theoretical distributions with known parameters + +Measurements are often given as a mean and an associated standard deviation. +Such measurements can be directly represented by the parameters of the distribution. + +Assume a data point has a normally distributed uncertainty, with a mean value of 2.2 +and standard deviation of 4.0. We use the following notation to represent that value. + +```julia +using UncertainData, Distributions +UncertainValue(Normal(2.2, 4.0)) +UncertainValue(Normal, 2.2, 4.0) # alternative constructor +``` + +Other distributions, as well as truncated distributions, also work. + +```julia +using UncertainData +UncertainValue(Uniform, -5.0, 5.0) +UncertainValue(Gamma, 3.0, 1.2) + +lo, hi = 0.5, 3.5 # truncation limits +UncertainValue(Truncated(Gamma(4, 5.1), lo, hi)) +``` + +## Theoretical distributions with parameters estimated from empirical data + +In some cases, it might be convenient to represent an empirical sample by a +porobability distribution whose parameters are estimated from the sample. +Here, we simulate a real dataset by generating a small sample from a +normal distribution, then fit a normal distribution to it. + +```julia +using UncertainData, Distributions +s = rand(Normal(0, 1), 100) + +# Represent the sample `s` by a normal distribution with estimated parameters +x = UncertainValue(Normal, s) +``` + +## Distributions estimated using the kernel density approach + +For empirical data with non-trivial underlying distributions, one may use +kernel density estimation to fit a distribution to the empirical sample. + +Below, we simulate a multimodal empirial sample, and represent that +sample by a kernel density estimated distribution. + +```julia +using UncertainData, Distributions +M = MixtureModel(Normal[ + Normal(-2.0, 1.2), + Normal(0.0, 1.0), + Normal(3.0, 2.5)], [0.2, 0.5, 0.3]) +# This is our sample +s = rand(M, 40000) + +# `x` is now a kernel density estimated distribution that represents the sample `s` +x = UncertainValue(s) # or UncertainValue(UnivariateKDE, s) to be explicit +``` + +## Populations (discrete sets of values with associated weights) + +Sometimes, numerous measurements of the same phenomenon might be available. In such cases, +a population may be used to simultaneously represent all data available. Weights +representing prior beliefs can be added (set weights equal if all points are +equiprobable). + +Below, we assume `x1` and `x2` were measured with sophisticated devices, giving +both a mean and standard deviation. `x3`, on the other hand, was measured with a +primitive device, giving only a mean value. Hence our trust in `x3` is lower than +for `x1` and `x2`. The following + +```julia +x1 = UncertainValue(Normal, 0.1, 0.5) +x2 = UncertainValue(Gamma, 1.2, 3.1) +x3 = UncertainValue(0.1) +pop = [x1, x2, x3] # the population +wts = [0.45, 0.45, 0.1] # weights; `x1` and `x2` are equiprobable, and more probable than `x3`. +UncertainValue(pop, wts) +``` + +## Values without uncertainties + +Numerical values without associated uncertainties must be converted before mixing with +uncertain values. + +```julia +x = UncertainValue(2.0) +``` + +## Compatibility with Measurements.jl + +`Measurement`s from Measurements.jl are assumed to be normally distributed and errors +are propagated using linear error propagation theory. In this package, resampling +is used to propagate errors. Thus, `Measurement`s must be converted to normal distributions +to be used in conjuction with other uncertain values in this package. + +```julia +using UncertainData, Measurements +m = measurement(value, uncertainty) +x = UncertainValue(m) # now compatible with UncertainData.jl, but drops support for exact error propagation +``` -Fast Fourier transforms are used in the kernel density estimation, so the -number of points should be a power of 2 (default = 2048). """ -function UncertainValue(data::Vector{T}; +function UncertainValue end + +UncertainValue(x::T) where T <: Real = CertainScalar(x) + +# Identity constructor +UncertainValue(uval::AbstractUncertainValue) = uval + +# From Measurements.jl +UncertainValue(m::Measurement{T}) where T = UncertainValue(Normal, m.val, m.err) + +#Populations +# function UncertainValue( +# values::AbstractVector{<:Number}, +# probs::Union{Vector{<:Number}, W}) where {W <: AbstractWeights} + +# UncertainScalarPopulation(float.(values), probs) +# end + +UncertainValue(values, probs) = UncertainScalarPopulation(values, probs) + +# function UncertainValue(values::VT, probs) where VT <: Vector{ELTYPE} where {ELTYPE<:POTENTIAL_UVAL_TYPES} +# UncertainScalarPopulation(UncertainValue.(values), probs) +# end + +# function UncertainValue(values::VT, probs::Vector{<:Number}) where VT <: Vector{ELTYPE} where {ELTYPE<:POTENTIAL_UVAL_TYPES} +# UncertainScalarPopulation(UncertainValue.(values), probs) +# end + +#KDE +function UncertainValue(data::AbstractVector{T}; kernel::Type{D} = Normal, bandwidth = KernelDensity.default_bandwidth(data), npoints::Int = 2048) where {D <: Distributions.Distribution, T} @@ -79,17 +211,6 @@ function UncertainValue(data::Vector{T}; UncertainScalarKDE(KDE, data, xrange, Weights(density)) end - -""" - UncertainValue(kerneldensity::Type{K}, data::Vector{T}; - kernel::Type{D} = Normal, - npoints::Int=2048) where {K <: UnivariateKDE, D <: Distribution, T} - -Construct an uncertain value by a kernel density estimate to `data`. - -Fast Fourier transforms are used in the kernel density estimation, so the -number of points should be a power of 2 (default = 2048). -""" function UncertainValue(kerneldensity::Type{K}, data::Vector{T}; kernel::Type{D} = Normal, bandwidth = KernelDensity.default_bandwidth(data)/4, @@ -112,100 +233,86 @@ end UncertainValue(x::Vector{Array{<:Real, 0}}) = UncertainValue([el[] for el in x]) -""" - UncertainValue(empiricaldata::AbstractVector{T}, - d::Type{D}) where {D <: Distribution} - -# Constructor for empirical distributions. - -Fit a distribution of type `d` to the data and use that as the -representation of the empirical distribution. Calls `Distributions.fit` behind -the scenes. +# Fitted distributions +# TODO: make TheoreticalFittedUncertainScalar parametric on the input distribution +function UncertainValue(d::Type{<:Distribution}, data::AbstractVector) -## Arguments -- **`empiricaldata`**: The data for which to fit the `distribution`. -- **`distribution`**: A valid univariate distribution from `Distributions.jl`. - -""" -function UncertainValue(d::Type{D}, - empiricaldata::Vector{T}) where {D<:Distribution, T} - - distribution = FittedDistribution(Distributions.fit(d, empiricaldata)) - UncertainScalarTheoreticalFit(distribution, empiricaldata) + distribution = FittedDistribution(Distributions.fit(d, data)) + UncertainScalarTheoreticalFit(distribution, data) end -""" +# """ - UncertainValue(distribution::Type{D}, a::T1, b::T2; - kwargs...) where {T1<:Number, T2 <: Number, D<:Distribution} +# UncertainValue(distribution::Type{D}, a::T1, b::T2; +# kwargs...) where {T1 <: Number, T2 <: Number, D <: Distribution} → TheoreticalDistributionScalarValue -# Constructor for two-parameter distributions +# # Constructor for two-parameter distributions -`UncertainValue`s are currently implemented for the following two-parameter -distributions: `Uniform`, `Normal`, `Binomial`, `Beta`, `BetaPrime`, `Gamma`, -and `Frechet`. +# `UncertainValue`s are currently implemented for the following two-parameter +# distributions: `Uniform`, `Normal`, `Binomial`, `Beta`, `BetaPrime`, `Gamma`, +# and `Frechet`. -### Arguments +# ### Arguments -- **`a`, `b`**: Generic parameters whose meaning varies depending - on what `distribution` is provided. See the list below. -- **`distribution`**: A valid univariate distribution from `Distributions.jl`. +# - **`a`, `b`**: Generic parameters whose meaning varies depending +# on what `distribution` is provided. See the list below. +# - **`distribution`**: A valid univariate distribution from `Distributions.jl`. -Precisely what `a` and `b` are depends on which distribution is provided. +# Precisely what `a` and `b` are depends on which distribution is provided. -- `UncertainValue(Normal, μ, σ)` returns an `UncertainScalarNormallyDistributed` instance. -- `UncertainValue(Uniform, lower, upper)` returns an `UncertainScalarUniformlyDistributed` instance. -- `UncertainValue(Beta, α, β)` returns an `UncertainScalarBetaDistributed` instance. -- `UncertainValue(BetaPrime, α, β)` returns an `UncertainScalarBetaPrimeDistributed` instance. -- `UncertainValue(Gamma, α, θ)` returns an `UncertainScalarGammaDistributed` instance. -- `UncertainValue(Frechet, α, θ)` returns an `UncertainScalarFrechetDistributed` instance. -- `UncertainValue(Binomial, n, p)` returns an `UncertainScalarBinomialDistributed` instance. +# - `UncertainValue(Normal, μ, σ)` returns an `UncertainScalarNormallyDistributed` instance. +# - `UncertainValue(Uniform, lower, upper)` returns an `UncertainScalarUniformlyDistributed` instance. +# - `UncertainValue(Beta, α, β)` returns an `UncertainScalarBetaDistributed` instance. +# - `UncertainValue(BetaPrime, α, β)` returns an `UncertainScalarBetaPrimeDistributed` instance. +# - `UncertainValue(Gamma, α, θ)` returns an `UncertainScalarGammaDistributed` instance. +# - `UncertainValue(Frechet, α, θ)` returns an `UncertainScalarFrechetDistributed` instance. +# - `UncertainValue(Binomial, n, p)` returns an `UncertainScalarBinomialDistributed` instance. -### Keyword arguments +# ### Keyword arguments -- **`nσ`**: If `distribution <: Distributions.Normal`, then how many standard - deviations away from `μ` does `lower` and `upper` (i.e. both, because - they are the same distance away from `μ`) represent? -- **`tolerance`**: A threshold determining how symmetric the uncertainties - must be in order to allow the construction of Normal distribution - (`upper - lower > threshold` is required). -- **`trunc_lower`**: Lower truncation bound for distributions with infinite - support. Defaults to `-Inf`. -- **`trunc_upper`**: Upper truncation bound for distributions with infinite - support. Defaults to `Inf`. +# - **`nσ`**: If `distribution <: Distributions.Normal`, then how many standard +# deviations away from `μ` does `trunc_lower` and `trunc_upper` (i.e. both, because +# they are the same distance away from `μ`) represent? +# - **`tolerance`**: A threshold determining how symmetric the uncertainties +# must be in order to allow the construction of Normal distribution +# (`upper - lower > threshold` is required). +# - **`trunc_lower`**: Lower truncation bound for distributions with infinite +# support. Defaults to `-Inf`. +# - **`trunc_upper`**: Upper truncation bound for distributions with infinite +# support. Defaults to `Inf`. -## Examples +# ## Examples -### Normal distribution +# ### Normal distribution -Normal distributions are formed by using the constructor -`UncertainValue(μ, σ, Normal; kwargs...)`. This gives a normal distribution with -mean μ and standard deviation σ/nσ (nσ must be given as a keyword argument). +# Normal distributions are formed by using the constructor +# `UncertainValue(μ, σ, Normal; kwargs...)`. This gives a normal distribution with +# mean μ and standard deviation σ/nσ (nσ must be given as a keyword argument). -```julia -# A normal distribution with mean = 2.3 and standard deviation 0.3. -UncertainValue(2.3, 0.3, Normal) +# ```julia +# # A normal distribution with mean = 2.3 and standard deviation 0.3. +# UncertainValue(2.3, 0.3, Normal) -# A normal distribution with mean 2.3 and standard deviation 0.3/2. -UncertainValue(2.3, 0.3, Normal, nσ = 2) +# # A normal distribution with mean 2.3 and standard deviation 0.3/2. +# UncertainValue(2.3, 0.3, Normal, nσ = 2) -# A normal distribution with mean 2.3 and standard deviation = 0.3, -truncated to the interval `[1, 3]`. -UncertainValue(2.3, 0.3, Normal, trunc_lower = 1.0, trunc_upper = 3.0) -``` +# # A normal distribution with mean 2.3 and standard deviation = 0.3, +# truncated to the interval `[1, 3]`. +# UncertainValue(2.3, 0.3, Normal, trunc_lower = 1.0, trunc_upper = 3.0) +# ``` -### Uniform distribution +# ### Uniform distribution -Uniform distributions are formed using the -`UncertainValue(lower, upper, Uniform)` constructor. +# Uniform distributions are formed using the +# `UncertainValue(lower, upper, Uniform)` constructor. -```julia -# A uniform distribution on `[2, 3]` -UncertainValue(-2, 3, Uniform) -``` +# ```julia +# # A uniform distribution on `[2, 3]` +# UncertainValue(-2, 3, Uniform) +# ``` -""" +# """ function UncertainValue(distribution::Type{D}, a::T1, b::T2; kwargs...) where {T1<:Number, T2 <: Number, D<:Distribution} @@ -234,54 +341,11 @@ function UncertainValue(distribution::Type{D}, a::T1, b::T2; dist = assigndist_frechet(a, b; kwargs...) UncertainScalarFrechetDistributed(dist, a, b) else - throw(DomainError("Two-parameter $dist is not implemented.")) + throw(DomainError("Two-parameter $distribution distribution is not implemented")) end end - -""" - UncertainValue(distribution::Type{D}, a::T1, b::T2, c::T3; - kwargs...) where {T1<:Number, T2<:Number, T3<:Number, D<:Distribution} - -## Constructor for three-parameter distributions - -Currently implemented distributions are `BetaBinomial`. - -### Arguments -- **`a`, `b`, `c`**: Generic parameters whose meaning varies depending - on what `distribution` is provided. See the list below. -- **`distribution`**: A valid univariate distribution from `Distributions.jl`. - -Precisely what `a`, `b` and `c` are depends on which distribution is provided. - -- `UncertainValue(BetaBinomial, n, α, β)` returns an `UncertainScalarBetaBinomialDistributed` instance. - - -### Keyword arguments -- **`nσ`**: If `distribution <: Distributions.Normal`, then how many standard - deviations away from `μ` does `lower` and `upper` (i.e. both, because - they are the same distance away from `μ`) represent? -- **`tolerance`**: A threshold determining how symmetric the uncertainties - must be in order to allow the construction of Normal distribution - (`upper - lower > threshold` is required). -- **`trunc_lower`**: Lower truncation bound for distributions with infinite - support. Defaults to `-Inf`. -- **`trunc_upper`**: Upper truncation bound for distributions with infinite - support. Defaults to `Inf`. - -## Examples -### BetaBinomial distribution - -Normal distributions are formed by using the constructor -`UncertainValue(μ, σ, Normal; kwargs...)`. This gives a normal distribution with -mean μ and standard deviation σ/nσ (nσ must be given as a keyword argument). - -```julia -# A beta binomial distribution with n = 100 trials and parameters α = 2.3 and -# β = 5 -UncertainValue(100, 2.3, 5, BetaBinomial) -``` -""" +# TODO: make TheoreticalDistributionScalarValue type parametric on the input distribution function UncertainValue(distribution::Type{D}, a::T1, b::T2, c::T3; kwargs...) where {T1<:Number, T2<:Number, T3<:Number, D<:Distribution} @@ -326,34 +390,7 @@ function untruncated_disttype(t::Distributions.Truncated) return typeof(t_untrunc) end -""" - UncertainValue(t::Distributions.Truncated) - -Construct an uncertain value from an instance of a distribution. If a specific -uncertain value type has not been implemented, the number of parameters is -determined from the distribution and an instance of one of the following types -is returned: - -- `ConstrainedUncertainScalarValueOneParameter` -- `ConstrainedUncertainScalarValueTwoParameter` -- `ConstrainedUncertainScalarValueThreeParameter` - -## Examples - -```julia -# Normal distribution truncated to the interval [0.5, 0.7] -t = truncated(Normal(0, 1), 0.5, 0.7) -UncertainValue(t) - -# Gamma distribution truncated to the interval [0.5, 3.5] -t = Truncate(Gamma(4, 5.1), 0.5, 3.5) -UncertainValue(t) - -# Binomial distribution truncated to the interval [2, 7] -t = Truncate(Binomial(10, 0.4), 2, 7) -UncertainValue(t) -``` -""" +#TODO: this is not type-stable. function UncertainValue(t::Distributions.Truncated) dist_type = untruncated_disttype(t) original_dist = untruncated_dist(t) @@ -370,26 +407,8 @@ function UncertainValue(t::Distributions.Truncated) end end -""" - UncertainValue(d::Distributions.Distribution) - -Construct an uncertain value from an instance of a distribution. If a specific -uncertain value type has not been implemented, the number of parameters is -determined from the distribution and an instance of one of the following types -is returned: - -- `UncertainScalarTheoreticalOneParameter` -- `UncertainScalarTheoreticalTwoParameter` -- `UncertainScalarTheoreticalThreeParameter` +#TODO: this is not type-stable. -## Examples - -```julia -UncertainValue(Normal(0, 1)) -UncertainValue(Gamma(4, 5.1)) -UncertainValue(Binomial, 8, 0.2) -``` -""" function UncertainValue(d::Distributions.Distribution) params = fieldnames(typeof(d)) n_params = length(params) @@ -411,17 +430,21 @@ function UncertainValue(d::Distributions.Distribution) UncertainScalarFrechetDistributed(d, param_values...) # if no specific type is implemented for this distribution, just create # a generic one - else - if n_params == 1 - return UncertainScalarTheoreticalOneParameter(d, param_values...) - elseif n_params == 2 - return UncertainScalarTheoreticalTwoParameter(d, param_values...) - elseif n_params == 3 - return UncertainScalarTheoreticalThreeParameter(d, param_values...) - else - msg = "uncertain value type for $n_params-parameter $d not implemented." + else + # Todo: generic types are not implemented yet + msg = "uncertain value type for $n_params-parameter $d not implemented." throw(DomainError(msg)) - end + + # if n_params == 1 + # return UncertainScalarTheoreticalOneParameter(d, param_values...) + # elseif n_params == 2 + # return UncertainScalarTheoreticalTwoParameter(d, param_values...) + # elseif n_params == 3 + # return UncertainScalarTheoreticalThreeParameter(d, param_values...) + # else + # msg = "uncertain value type for $n_params-parameter $d not implemented." + # throw(DomainError(msg)) + # end end end diff --git a/src/uncertain_values/UncertainValues.jl b/src/uncertain_values/UncertainValues.jl index 5df52e43..0a7d63e4 100644 --- a/src/uncertain_values/UncertainValues.jl +++ b/src/uncertain_values/UncertainValues.jl @@ -57,7 +57,7 @@ using Reexport include("UncertainScalarPopulation.jl") # Certain values (i.e. values without uncertainty) - include("CertainValue.jl") + include("CertainScalar.jl") ########################################## # Composite uncertain vector types diff --git a/src/uncertain_values/abstract_types/AbstractScalarPopulation.jl b/src/uncertain_values/abstract_types/AbstractScalarPopulation.jl index b28765d8..1c3496b3 100644 --- a/src/uncertain_values/abstract_types/AbstractScalarPopulation.jl +++ b/src/uncertain_values/abstract_types/AbstractScalarPopulation.jl @@ -10,17 +10,17 @@ An abstract type for population-based uncertain scalar values. """ abstract type AbstractScalarPopulation{T, PW} <: AbstractPopulation end -Base.length(p::AbstractScalarPopulation) = length(p.values) -Base.getindex(p::AbstractScalarPopulation, i) = p.values[i] +Base.length(p::AbstractScalarPopulation) = length(p.members) +Base.getindex(p::AbstractScalarPopulation, i) = p.members[i] Base.firstindex(p::AbstractScalarPopulation) = 1 -Base.lastindex(p::AbstractScalarPopulation) = length(p.values) +Base.lastindex(p::AbstractScalarPopulation) = length(p.members) Base.eachindex(p::AbstractScalarPopulation) = Base.OneTo(lastindex(p)) -Base.iterate(p::AbstractScalarPopulation, state = 1) = iterate(p.values, state) +Base.iterate(p::AbstractScalarPopulation, state = 1) = iterate(p.members, state) function summarise(p::AbstractScalarPopulation) _type = typeof(p) - l = length(p.values) + l = length(p.members) summary = "$_type containing $l values" return summary end @@ -31,10 +31,10 @@ Base.minimum(p::AbstractScalarPopulation) = minimum(p) Base.maximum(p::AbstractScalarPopulation) = maximum(p) Base.minimum(pop::AbstractScalarPopulation{T, PW} where {T <: Number, PW}) = - minimum(pop.values) + minimum(pop.members) Base.maximum(pop::AbstractScalarPopulation{T, PW} where {T <: Number, PW}) = - maximum(pop.values) + maximum(pop.members) Base.minimum(pop::AbstractScalarPopulation{T, PW} where {T <: AbstractUncertainValue, PW}) = minimum([minimum(uv) for uv in pop]) @@ -45,11 +45,11 @@ Base.maximum(pop::AbstractScalarPopulation{T, PW} where {T <: AbstractUncertainV Distributions.support(p::AbstractScalarPopulation) = interval(minimum(p), maximum(p)) function Base.rand(pop::AbstractScalarPopulation{T, PW}) where {T <: Number, PW} - StatsBase.sample(pop.values, pop.probs) + StatsBase.sample(pop.members, pop.probs) end function Base.rand(pop::AbstractScalarPopulation{T, PW}, n::Int) where {T <: Number, PW} - StatsBase.sample(pop.values, pop.probs, n) + StatsBase.sample(pop.members, pop.probs, n) end function Base.rand(pop::AbstractScalarPopulation{T, PW}) where {T <: AbstractUncertainValue, PW} diff --git a/src/uncertain_values/convert.jl b/src/uncertain_values/convert.jl index 0684f53a..fcbc4b6e 100644 --- a/src/uncertain_values/convert.jl +++ b/src/uncertain_values/convert.jl @@ -1,2 +1,2 @@ -convert(::Type{CertainValue}, x::T) where {T <: Number} = CertainValue(x) -convert(::Type{T1}, x::T2) where {T1 <: AbstractUncertainValue, T2 <: Number} = CertainValue(x) +convert(::Type{CertainScalar}, x::T) where {T <: Number} = CertainScalar(x) +convert(::Type{T1}, x::T2) where {T1 <: AbstractUncertainValue, T2 <: Number} = CertainScalar(x) diff --git a/src/uncertain_values/operations/comparisons.jl b/src/uncertain_values/operations/comparisons.jl index 87a73297..63404d2b 100644 --- a/src/uncertain_values/operations/comparisons.jl +++ b/src/uncertain_values/operations/comparisons.jl @@ -1,8 +1,8 @@ import Base.< import Base.isapprox -Base.:<(x::T1, y::CertainValue{T2}) where {T1 <: Real, T2 <: Real} = x < y.value -Base.:<(x::CertainValue{T1}, y::T2) where {T1 <: Real, T2 <: Real} = x.value < y -Base.isless(x::CertainValue{T1}, y::CertainValue{T2}) where {T1 <: Real, T2 <: Real} = isless(x.value, y.value) -Base.isapprox(x::CertainValue{T1}, y::T2) where {T1 <: Real, T2 <: Real} = isapprox(x.value, y) -Base.isapprox(x::T1, y::CertainValue{T2}) where {T1 <: Real, T2 <: Real} = isapprox(x, y.value) +Base.:<(x::T1, y::CertainScalar{T2}) where {T1 <: Real, T2 <: Real} = x < y.value +Base.:<(x::CertainScalar{T1}, y::T2) where {T1 <: Real, T2 <: Real} = x.value < y +Base.isless(x::CertainScalar{T1}, y::CertainScalar{T2}) where {T1 <: Real, T2 <: Real} = isless(x.value, y.value) +Base.isapprox(x::CertainScalar{T1}, y::T2) where {T1 <: Real, T2 <: Real} = isapprox(x.value, y) +Base.isapprox(x::T1, y::CertainScalar{T2}) where {T1 <: Real, T2 <: Real} = isapprox(x, y.value) diff --git a/src/uncertain_values/operations/merging.jl b/src/uncertain_values/operations/merging.jl index 6a882871..56a2f54e 100644 --- a/src/uncertain_values/operations/merging.jl +++ b/src/uncertain_values/operations/merging.jl @@ -1,22 +1,31 @@ """ - combine(uvals::Vector{AbstractUncertainValue}; n = 10000*length(uvals), - bw::Union{Nothing, Real} = nothing) + combine(x::Vector{AbstractUncertainValue}; + n = 10000*length(uvals), bw::Union{Nothing, Real} = nothing) → UncertainScalarKDE + combine(x::Vector{AbstractUncertainValue}, weights::ProbabilityWeights; kwargs...) → UncertainScalarKDE + combine(x::Vector{AbstractUncertainValue}, weights::AnalyticWeights; kwargs...) → UncertainScalarKDE + combine(x::Vector{AbstractUncertainValue}, weights::FrequencyWeights; kwargs...) → UncertainScalarKDE -Combine multiple uncertain values into a single uncertain value. This is -done by resampling each uncertain value in `uvals`, `n` times each, -then pooling these draws together. Finally, a kernel density estimate to the final -distribution is computed over those draws. +Combine multiple uncertain values `x` into a single uncertain value using kernel +density estimation (KDE). This is done by resampling each uncertain value in `x`, +`n` times each, then pooling these draws together. Finally, an approximation +to the final distribution is computed over those draws using KDE. The KDE bandwidth is controlled by `bw`. By default, `bw = nothing`; in this case, the bandwidth is determined using the `KernelDensity.default_bandwidth` function. +Tip: For very wide, close-to-normal distributions, the default bandwidth usually +works well.For very peaked distributions or discrete populations, however, +a lowering the bandwidth significantly may be a better choice. -!!! tip +If no weights are provided, the sample pool on which KDE is performed is computed +by resampling each of the `N` uncertain values `n/N` times and pooling these values +together. If `weights` are provided, then the `weights` control the relative sampling +importance of the elements of `x`. `Weights`, `ProbabilityWeights` and `AnalyticWeights` are +functionally the same, and represent relative sampling probabilities. Either +may be used depending on whether the weights are assigned subjectively or quantitatively. +With `FrequencyWeights`, it is possible to control the exact number of draws from each +uncertain value that goes into the draw pool before performing kernel density estimation. - For very wide, close-to-normal distributions, the default bandwidth may work well. - If you're combining very peaked distributions or discrete populations, however, - you may want to lower the bandwidth significantly. - -# Example +## Example ```julia v1 = UncertainValue(Normal, 1, 0.3) @@ -80,7 +89,6 @@ uvals = [v1, v2, v3, v4]; combine(uvals, ProbabilityWeights([0.2, 0.1, 0.3, 0.2])) combine(uvals, pweights([0.2, 0.1, 0.3, 0.2]), n = 20000) # adjust number of total draws ``` - """ function combine(uvals::Vector{AbstractUncertainValue}, weights::ProbabilityWeights; n = 10000*length(uvals), diff --git a/test/mathematics/uncertain_values/test_elementary_maths_uncertainvalues.jl b/test/mathematics/uncertain_values/test_elementary_maths_uncertainvalues.jl index 237882ed..cd9191a4 100644 --- a/test/mathematics/uncertain_values/test_elementary_maths_uncertainvalues.jl +++ b/test/mathematics/uncertain_values/test_elementary_maths_uncertainvalues.jl @@ -6,8 +6,8 @@ M = MixtureModel([Normal(3, 0.2), Normal(2, 1)]) r1 = UncertainValue(Normal, rand(), rand()) r2 = UncertainValue(rand(M, 10000)) r3 = UncertainValue(Normal, rand(Normal(4, 3.2), 10000)) -r4 = CertainValue(2.2) -r5 = CertainValue(2) +r4 = CertainScalar(2.2) +r5 = CertainScalar(2) uvals = [r1; r2; r3] diff --git a/test/resampling/uncertain_datasets/sequential/test_resampling_sequential_decreasing.jl b/test/resampling/uncertain_datasets/sequential/test_resampling_sequential_decreasing.jl index 5888a9ea..2062137e 100644 --- a/test/resampling/uncertain_datasets/sequential/test_resampling_sequential_decreasing.jl +++ b/test/resampling/uncertain_datasets/sequential/test_resampling_sequential_decreasing.jl @@ -5,7 +5,7 @@ using Test, UncertainData # Create some uncertain data with decreasing magnitude and zero overlap between values, # so we're guaranteed that a strictly decreasing sequence through the dataset exists. N = 10 - t = [i <= N/2 ? CertainValue(float(i)) : UncertainValue(Normal, i, 1) for i = N:-1:1] + t = [i <= N/2 ? CertainScalar(float(i)) : UncertainValue(Normal, i, 1) for i = N:-1:1] T = UncertainIndexDataset(t) iv = UncertainIndexValueDataset(t, t) diff --git a/test/resampling/uncertain_datasets/sequential/test_resampling_sequential_increasing.jl b/test/resampling/uncertain_datasets/sequential/test_resampling_sequential_increasing.jl index 826104c8..585fd857 100644 --- a/test/resampling/uncertain_datasets/sequential/test_resampling_sequential_increasing.jl +++ b/test/resampling/uncertain_datasets/sequential/test_resampling_sequential_increasing.jl @@ -6,7 +6,7 @@ using StatsBase # Create some uncertain data with decreasing magnitude and zero overlap between values, # so we're guaranteed that a strictly decreasing sequence through the dataset exists. N = 10 - t = [ i <= N/2 ? CertainValue(float(i)) : UncertainValue(Normal, i, 1) for i = 1:N] + t = [ i <= N/2 ? CertainScalar(float(i)) : UncertainValue(Normal, i, 1) for i = 1:N] T = UncertainIndexDataset(t) iv = UncertainIndexValueDataset(t, t) diff --git a/test/resampling/uncertain_datasets/test_resampling_with_schemes.jl b/test/resampling/uncertain_datasets/test_resampling_with_schemes.jl index 67fa4dbd..7149aece 100644 --- a/test/resampling/uncertain_datasets/test_resampling_with_schemes.jl +++ b/test/resampling/uncertain_datasets/test_resampling_with_schemes.jl @@ -8,7 +8,7 @@ x = UncertainValueDataset(x_uncertain) y = UncertainValueDataset(y_uncertain) time_uncertain = [UncertainValue(Normal, i, 1) for i = 1:length(x)]; -time_certain = [CertainValue(i) for i = 1:length(x)]; +time_certain = [CertainScalar(i) for i = 1:length(x)]; timeinds_x = UncertainIndexDataset(time_uncertain) timeinds_y = UncertainIndexDataset(time_certain) diff --git a/test/resampling/uncertain_values/test_resampling_certain_value.jl b/test/resampling/uncertain_values/test_resampling_certain_value.jl index de4e659e..dbcdd285 100644 --- a/test/resampling/uncertain_values/test_resampling_certain_value.jl +++ b/test/resampling/uncertain_values/test_resampling_certain_value.jl @@ -1,4 +1,4 @@ -x = CertainValue(2.0) +x = CertainScalar(2.0) test_constraints = [ NoConstraint(), diff --git a/test/runtests.jl b/test/runtests.jl index 33951c46..315b173b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -22,7 +22,7 @@ using KernelDensity include("uncertain_values/test_minmax.jl") end - @testset "CertainValue" begin + @testset "CertainScalar" begin include("uncertain_values/test_certain_values.jl") end diff --git a/test/sampling_constraints/test_constrain_certainvalue.jl b/test/sampling_constraints/test_constrain_certainvalue.jl index c1451eeb..92f49ec9 100644 --- a/test/sampling_constraints/test_constrain_certainvalue.jl +++ b/test/sampling_constraints/test_constrain_certainvalue.jl @@ -1,4 +1,4 @@ -x = CertainValue(2.0) +x = CertainScalar(2.0) test_constraints = [ NoConstraint(), @@ -12,5 +12,5 @@ test_constraints = [ ] for constraint in test_constraints - @test constrain(x, constraint) isa CertainValue + @test constrain(x, constraint) isa CertainScalar end \ No newline at end of file diff --git a/test/sampling_constraints/test_constrain_with_schemes.jl b/test/sampling_constraints/test_constrain_with_schemes.jl index 88a047a3..899531ac 100644 --- a/test/sampling_constraints/test_constrain_with_schemes.jl +++ b/test/sampling_constraints/test_constrain_with_schemes.jl @@ -8,7 +8,7 @@ x = UncertainValueDataset(x_uncertain) y = UncertainValueDataset(y_uncertain) time_uncertain = [UncertainValue(Normal, i, 1) for i = 1:length(x)]; -time_certain = [CertainValue(i) for i = 1:length(x)]; +time_certain = [CertainScalar(i) for i = 1:length(x)]; timeinds_x = UncertainIndexDataset(time_uncertain) timeinds_y = UncertainIndexDataset(time_certain) diff --git a/test/uncertain_datasets/test_uncertain_index_value_dataset.jl b/test/uncertain_datasets/test_uncertain_index_value_dataset.jl index 52263d45..f2180d4c 100644 --- a/test/uncertain_datasets/test_uncertain_index_value_dataset.jl +++ b/test/uncertain_datasets/test_uncertain_index_value_dataset.jl @@ -9,7 +9,7 @@ o2 = UncertainValue(Normal, 2, 0.3) o3 = UncertainValue(Uniform, 0, 4) o4 = UncertainValue(Uniform, rand(100)) o5 = UncertainValue(rand(400)) -o7 = CertainValue(2) +o7 = CertainScalar(2) o8 = UncertainValue([2, 3, 4], [4, 5, 2]) o9 = UncertainValue([2, 4, 5, 2], rand(4)) diff --git a/test/uncertain_values/populations/test_ConstrainedUncertainScalarPopulation.jl b/test/uncertain_values/populations/test_ConstrainedUncertainScalarPopulation.jl index 8db47d6f..6528cdc4 100644 --- a/test/uncertain_values/populations/test_ConstrainedUncertainScalarPopulation.jl +++ b/test/uncertain_values/populations/test_ConstrainedUncertainScalarPopulation.jl @@ -1,4 +1,4 @@ -# Uncertain population consisting of CertainValues (scalars get promoted to CertainValue)s +# Uncertain population consisting of CertainScalars (scalars get promoted to CertainScalar)s # theoretical distributions and KDE distributions p1 = ConstrainedUncertainScalarPopulation( [3.0, UncertainValue(Normal, 0, 1), diff --git a/test/uncertain_values/populations/test_UncertainScalarPopulation.jl b/test/uncertain_values/populations/test_UncertainScalarPopulation.jl index 59b69219..06825c45 100644 --- a/test/uncertain_values/populations/test_UncertainScalarPopulation.jl +++ b/test/uncertain_values/populations/test_UncertainScalarPopulation.jl @@ -1,6 +1,6 @@ import StatsBase: AbstractWeights -# Uncertain population consisting of CertainValues (scalars get promoted to CertainValue)s +# Uncertain population consisting of CertainScalars (scalars get promoted to CertainScalar)s # theoretical distributions and KDE distributions p1 = UncertainScalarPopulation( [3.0, UncertainValue(Normal, 0, 1), diff --git a/test/uncertain_values/test_certain_values.jl b/test/uncertain_values/test_certain_values.jl index 4873f135..a5b9e45e 100644 --- a/test/uncertain_values/test_certain_values.jl +++ b/test/uncertain_values/test_certain_values.jl @@ -2,5 +2,5 @@ x = 3 y = 3.3 -@test UncertainValue(x) isa CertainValue -@test UncertainValue(y) isa CertainValue \ No newline at end of file +@test UncertainValue(x) isa CertainScalar +@test UncertainValue(y) isa CertainScalar \ No newline at end of file