From fd739dc703cb980e44292adc424548c51286214a Mon Sep 17 00:00:00 2001 From: Nathanael Bosch Date: Wed, 17 Jan 2024 09:48:13 +0000 Subject: [PATCH] Refactor the prior implementation (#286) * Rename `AbstractODEFilterPrior` to `AbstractGaussMarkovPrior` * Use accessors instead of the fields directly * Small intermediate clean-up * Rename AbstractGaussMarkovPrior to AbstractGaussMarkovProcess * Make the initial distribution part of the prior * Specialize the initial distribution for Matern * Implement a `remake` to change elType and dimension of priors * Add MatrixEquations.jl * Add Compat entry for MatrixEquations.jl * Fix a typo * Fix vale.sh issues * Fix another bug * Fix an IOUP bug I introduced * JuliaFormatter.jl * JuliaFormatter.jl update apparently changed a bit * Add the priors to the basic ODE correctness tests * Test the Matern initial distribution in the priors tests * Make IOUP and Matern use the default LTISDE discretize * JuliaFormatter.jl * Add a plotting convenience function * Better plotting of priors with dimension > 1 * Improve the plotting some more * Add the new prior functionality to the tests * JuliaFormatter.jl * Make the plotting much nicer * Add many docstrings * Start working more on the prior documentation * More sensible Documenter.jl settings * Make the priors one-dimensional by default * Give the probabilistic exponential integrator tutorial an id * Make the prior plot layout less opinionated * Fix some prior bugs I introduced now that they're 1-D * Refactor priors some more * Make the plot recipe yet again less opinionated * Towards a better prior documentation * Fix a bug * Better docs * Better docs * Fix some tests * Better docs * Better plots * JuliaFormatter.jl * Somewhat better docs * Change the field names and actually use accessors * Remove things from the plot recipe * Better errors * Fix the Matern constructor * Fix the IOUP constructor * Fix more bugs * Test some prior-related error messages * JuliaFormatter.jl * Rely more on keywords for the priors * Make tip into info * Fix another failing test * JuliaFormatter.jl * JuliaFormatter.jl * Improve some more docstrings * Add a newline in the docstring math --- .../vocabularies}/ProbNumDiffEq/accept.txt | 0 .../vocabularies}/ProbNumDiffEq/reject.txt | 0 Project.toml | 2 + docs/make.jl | 3 +- docs/src/priors.md | 99 +++++++++- docs/src/refs.bib | 11 ++ docs/src/tutorials/exponential_integrators.md | 2 +- ext/RecipesBaseExt.jl | 63 ++++++ src/ProbNumDiffEq.jl | 5 +- src/alg_utils.jl | 2 +- src/algorithms.jl | 12 +- src/caches.jl | 52 ++--- src/covariance_structure.jl | 6 +- src/priors/common.jl | 185 ++++++++++++++++-- src/priors/ioup.jl | 110 +++++------ src/priors/iwp.jl | 60 +++--- src/priors/ltisde.jl | 7 +- src/priors/matern.jl | 67 ++++--- test/core/preconditioning.jl | 2 +- test/core/priors.jl | 41 +++- test/correctness.jl | 12 ++ test/errors_thrown.jl | 7 + 22 files changed, 566 insertions(+), 182 deletions(-) rename .github/vale-styles/{Vocab => config/vocabularies}/ProbNumDiffEq/accept.txt (100%) rename .github/vale-styles/{Vocab => config/vocabularies}/ProbNumDiffEq/reject.txt (100%) diff --git a/.github/vale-styles/Vocab/ProbNumDiffEq/accept.txt b/.github/vale-styles/config/vocabularies/ProbNumDiffEq/accept.txt similarity index 100% rename from .github/vale-styles/Vocab/ProbNumDiffEq/accept.txt rename to .github/vale-styles/config/vocabularies/ProbNumDiffEq/accept.txt diff --git a/.github/vale-styles/Vocab/ProbNumDiffEq/reject.txt b/.github/vale-styles/config/vocabularies/ProbNumDiffEq/reject.txt similarity index 100% rename from .github/vale-styles/Vocab/ProbNumDiffEq/reject.txt rename to .github/vale-styles/config/vocabularies/ProbNumDiffEq/reject.txt diff --git a/Project.toml b/Project.toml index 1530af23c..a5b4c1226 100644 --- a/Project.toml +++ b/Project.toml @@ -17,6 +17,7 @@ FunctionWrappersWrappers = "77dc65aa-8811-40c2-897b-53d922fa7daf" GaussianDistributions = "43dcc890-d446-5863-8d1a-14597580bb8d" Kronecker = "2c470bb0-bcc8-11e8-3dad-c9649493f05e" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +MatrixEquations = "99c1a7ee-ab34-5fd5-8076-27c950a045f4" Octavian = "6fd5a793-0b7e-452c-907f-f8bfe9c57db4" OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed" PSDMatrices = "fe68d972-6fd8-4755-bdf0-97d4c54cefdc" @@ -56,6 +57,7 @@ FunctionWrappersWrappers = "0.1.3" GaussianDistributions = "0.5" Kronecker = "0.5.4" LinearAlgebra = "1" +MatrixEquations = "2" Octavian = "0.3.17" OrdinaryDiffEq = "6.52" PSDMatrices = "0.4.7" diff --git a/docs/make.jl b/docs/make.jl index 5d25d8a69..19e607fd7 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -69,7 +69,8 @@ makedocs( ], "References" => "references.md", ], - warnonly=:missing_docs, + warnonly=Documenter.except(:missing_docs), + checkdocs=:none, ) # Documenter can also automatically deploy documentation to gh-pages. diff --git a/docs/src/priors.md b/docs/src/priors.md index 5fa1fba7e..bed6d8560 100644 --- a/docs/src/priors.md +++ b/docs/src/priors.md @@ -2,7 +2,7 @@ **TL;DR: If you're unsure which prior to use, just stick to the default integrated Wiener process prior [`IWP`](@ref)!** -# Background +## Background We model the ODE solution ``y(t)`` with a Gauss--Markov prior. More precisely, let @@ -24,14 +24,103 @@ Then ``Y^{(i)}(t)`` models the ``i``-th derivative of ``y(t)``. If you're more interested in the _diffusion_ ``\textcolor{#4063D8}{\Gamma}`` check out the [Diffusion models and calibration](@ref) section, and for info on the initial distribution ``\textcolor{purple}{ \mathcal{N} \left( \mu_0, \Sigma_0 \right) }`` check out the [Initialization](@ref) section. -**If you're unsure which prior to use, just stick to the integrated Wiener process prior [`IWP`](@ref)!** -This is also the default choice for all solvers. -The other priors are rather experimental / niche at the time of writing. +!!! info + **If you're unsure which prior to use, just stick to the integrated Wiener process prior [`IWP`](@ref)!** + This is also the default choice for all solvers. + The other priors are rather experimental / niche at the time of writing. -## API +## Prior choices + +ProbNumDiffEq.jl currently supports three classes of priors for the solvers: +- [`IWP`](@ref): ``q``-times integrated Wiener processes +- [`IOUP`](@ref): ``q``-times integrated Ornstein--Uhlenbeck processes +- [`Matern`](@ref): Matérn processes +Let's look at each of them in turn and visualize some examples. + +### Integrated Wiener process ([`IWP`](@ref)) ```@docs IWP +``` +Here is how the [`IWP`](@ref) looks for varying smoothness parameters ``q``: +```@example priors +using ProbNumDiffEq, Plots +plotrange = range(0, 10, length=250) +plot( + plot(IWP(1), plotrange; title="q=1"), + plot(IWP(2), plotrange; title="q=2"), + plot(IWP(3), plotrange; title="q=3"), + plot(IWP(4), plotrange; title="q=4"); + ylims=(-20,20), +) +``` +In the context of ODE solvers, the smoothness parameter ``q`` influences the convergence rate of the solver, +and so it is typically chose similarly to the order of a Runge--Kutta method: lower order for low accuracy, higher order for high accuracy. + + +### Integrated Ornstein--Uhlenbeck process ([`IOUP`](@ref)) +The ``q``-times integrated Ornstein--Uhlenbeck process prior [`IOUP`](@ref) is a generalization of the IWP prior, where the drift matrix ``\textcolor{#389826}{A}`` is not zero: +```@docs IOUP +``` + +Here is how the [`IOUP`](@ref) looks for varying rate parameters: +```@example priors +using ProbNumDiffEq, Plots +plotrange = range(0, 10, length=250) +plot( + plot(IOUP(1, -1), plotrange; title="q=1,L=-1", ylims=(-20,20)), + plot(IOUP(1, 1), plotrange; title="q=1,L=1", ylims=(-20,20)), + plot(IOUP(4, -1), plotrange; title="q=4,L=-1", ylims=(-50,50)), + plot(IOUP(4, 1), plotrange; title="q=4,L=1", ylims=(-50,50)); +) +``` + +In the context of [Probabilistic Exponential Integrators](@ref probexpinttutorial), the rate parameter is often chosen according to the given ODE. +Here is an example for a damped oscillator: +```@example priors +plot(IOUP(2, 1, [-0.2 -2π; 2π -0.2]), plotrange; plot_title="damped oscillator prior"); +``` + +### Matérn process ([`Matern`](@ref)) +```@docs Matern ``` + +Here is how the [`Matern`](@ref) looks for varying smoothness parameters ``q``: +```@example priors +using ProbNumDiffEq, Plots +plotrange = range(0, 10, length=250) +plot( + plot(Matern(1, 1), plotrange; title="q=1"), + plot(Matern(2, 1), plotrange; title="q=2"), + plot(Matern(3, 1), plotrange; title="q=3"), + plot(Matern(4, 1), plotrange; title="q=4"); +) +``` +and for varying length scales ``\ell``: +```@example priors +plot( + plot(Matern(2, 5), plotrange; title="l=5"), + plot(Matern(2, 2), plotrange; title="l=2"), + plot(Matern(2, 1), plotrange; title="l=1"), + plot(Matern(2, 0.5), plotrange; title="l=0.5"), +) +``` + +## API +```@docs +ProbNumDiffEq.AbstractGaussMarkovProcess +ProbNumDiffEq.LTISDE +ProbNumDiffEq.dim +ProbNumDiffEq.num_derivatives +ProbNumDiffEq.to_sde +ProbNumDiffEq.discretize +ProbNumDiffEq.initial_distribution +``` + +### Convenience functions to analyze and visualize priors +```@docs +ProbNumDiffEq.marginalize +ProbNumDiffEq.sample +``` diff --git a/docs/src/refs.bib b/docs/src/refs.bib index cac8d1955..a7c7a21ce 100644 --- a/docs/src/refs.bib +++ b/docs/src/refs.bib @@ -170,3 +170,14 @@ @book{hennig22probnum author = {Hennig, Philipp and Osborne, Michael A. and Kersting, Hans P.}, year = 2022, } + +@book{sarkka19appliedsde, + place = {Cambridge}, + series = {Institute of Mathematical Statistics Textbooks}, + title = {Applied Stochastic Differential Equations}, + DOI = {10.1017/9781108186735}, + publisher = {Cambridge University Press}, + author = {Särkkä, Simo and Solin, Arno}, + year = 2019, + collection = {Institute of Mathematical Statistics Textbooks} +} \ No newline at end of file diff --git a/docs/src/tutorials/exponential_integrators.md b/docs/src/tutorials/exponential_integrators.md index b23059aa9..02db95af9 100644 --- a/docs/src/tutorials/exponential_integrators.md +++ b/docs/src/tutorials/exponential_integrators.md @@ -1,4 +1,4 @@ -# Probabilistic Exponential Integrators +# [Probabilistic Exponential Integrators](@id probexpinttutorial) Exponential integrators are a class of numerical methods for solving semi-linear ordinary differential equations (ODEs) of the form ```math diff --git a/ext/RecipesBaseExt.jl b/ext/RecipesBaseExt.jl index fb416c540..55816a4c4 100644 --- a/ext/RecipesBaseExt.jl +++ b/ext/RecipesBaseExt.jl @@ -72,4 +72,67 @@ import SciMLBase: interpret_vars, getsyms end end +@recipe function f( + process::ProbNumDiffEq.AbstractGaussMarkovProcess, + plotrange; + N_samples=10, + plot_derivatives=false, +) + marginals = ProbNumDiffEq.marginalize(process, plotrange) + d = ProbNumDiffEq.dim(process) + q = ProbNumDiffEq.num_derivatives(process) + means = [mean(m) for m in marginals] |> stack |> permutedims + stddevs = [std(m) for m in marginals] |> stack |> permutedims + + perm = permutedims(reshape(collect(1:d*(q+1)), q + 1, d))[:] + reorder(X) = X[:, perm] + + E0 = ProbNumDiffEq.projection(d, q)(0) + if !plot_derivatives + stddevs = stddevs * E0' + means = means * E0' + q = 0 + end + + dui(i) = "u" * "'"^i + if plot_derivatives + title --> [i == 1 ? "$(dui(j))" : "" for i in 1:d for j in 0:q] |> permutedims + end + ylabel --> [q == 0 ? "u$i" : "" for i in 1:d for q in 0:q] |> permutedims + xlabel --> if plot_derivatives + [i == d ? "t" : "" for i in 1:d for q in 0:q] |> permutedims + else + "t" + end + + @series begin + ribbon --> 3stddevs + label --> "" + fillalpha --> 0.1 + layout --> if plot_derivatives + (d, q + 1) + else + d + end + plotrange, means + end + + if N_samples > 0 + samples = ProbNumDiffEq.sample(process, plotrange, N_samples) |> stack + samples = permutedims(samples, (3, 1, 2)) + for i in 1:N_samples + @series begin + s = samples[:, :, i] + if !plot_derivatives + s = s * E0' + end + primary --> false + linealpha --> 0.3 + label := "" + plotrange, s + end + end + end +end + end diff --git a/src/ProbNumDiffEq.jl b/src/ProbNumDiffEq.jl index 6cf90d47a..3c6695a69 100644 --- a/src/ProbNumDiffEq.jl +++ b/src/ProbNumDiffEq.jl @@ -6,12 +6,12 @@ import Base: copy, copy!, show, size, ndims, similar, isapprox, isequal, iterate using LinearAlgebra import LinearAlgebra: mul! -import Statistics: mean, var, std +import Statistics: mean, var, std, cov using Reexport @reexport using DiffEqBase import SciMLBase -import SciMLBase: interpret_vars, getsyms +import SciMLBase: interpret_vars, getsyms, remake using OrdinaryDiffEq using SpecialMatrices, ToeplitzMatrices using FastBroadcast @@ -29,6 +29,7 @@ import Kronecker using ArrayAllocators using FiniteHorizonGramians using FillArrays +using MatrixEquations @reexport using GaussianDistributions diff --git a/src/alg_utils.jl b/src/alg_utils.jl index f41aa3ba3..a3c84736f 100644 --- a/src/alg_utils.jl +++ b/src/alg_utils.jl @@ -21,7 +21,7 @@ OrdinaryDiffEq.isimplicit(::EK1) = true ############################################ # Step size control OrdinaryDiffEq.isadaptive(::AbstractEK) = true -OrdinaryDiffEq.alg_order(alg::AbstractEK) = alg.prior.num_derivatives +OrdinaryDiffEq.alg_order(alg::AbstractEK) = num_derivatives(alg.prior) # OrdinaryDiffEq.alg_adaptive_order(alg::AbstractEK) = # PI control is the default! diff --git a/src/algorithms.jl b/src/algorithms.jl index 08d42e19b..9f7159115 100644 --- a/src/algorithms.jl +++ b/src/algorithms.jl @@ -8,7 +8,7 @@ abstract type AbstractEK <: OrdinaryDiffEq.OrdinaryDiffEqAdaptiveAlgorithm end smooth=true, prior=IWP(order), diffusionmodel=DynamicDiffusion(), - initialization=TaylorModeInit(prior.num_derivatives)) + initialization=TaylorModeInit(num_derivatives(prior))) **Gaussian ODE filter with zeroth-order vector field linearization.** @@ -25,7 +25,7 @@ which scales cubically with the problem size._ # Arguments - `order::Integer`: Order of the integrated Wiener process (IWP) prior. - `smooth::Bool`: Turn smoothing on/off; smoothing is required for dense output. -- `prior::AbstractODEFilterPrior`: Prior to be used by the ODE filter. +- `prior::AbstractGaussMarkovProcess`: Prior to be used by the ODE filter. By default, uses a 3-times integrated Wiener process prior `IWP(3)`. See also: [Priors](@ref). - `diffusionmodel::ProbNumDiffEq.AbstractDiffusion`: See [Diffusion models and calibration](@ref). @@ -49,7 +49,7 @@ EK0(; prior=IWP(order), diffusionmodel=DynamicDiffusion(), smooth=true, - initialization=TaylorModeInit(prior.num_derivatives), + initialization=TaylorModeInit(num_derivatives(prior)), ) = EK0(prior, diffusionmodel, smooth, initialization) _unwrap_val(::Val{B}) where {B} = B @@ -60,7 +60,7 @@ _unwrap_val(B) = B smooth=true, prior=IWP(order), diffusionmodel=DynamicDiffusion(), - initialization=TaylorModeInit(prior.num_derivatives), + initialization=TaylorModeInit(num_derivatives(prior)), kwargs...) **Gaussian ODE filter with first-order vector field linearization.** @@ -73,7 +73,7 @@ so if you're solving a high-dimensional non-stiff problem you might want to give # Arguments - `order::Integer`: Order of the integrated Wiener process (IWP) prior. - `smooth::Bool`: Turn smoothing on/off; smoothing is required for dense output. -- `prior::AbstractODEFilterPrior`: Prior to be used by the ODE filter. +- `prior::AbstractGaussMarkovProcess`: Prior to be used by the ODE filter. By default, uses a 3-times integrated Wiener process prior `IWP(3)`. See also: [Priors](@ref). - `diffusionmodel::ProbNumDiffEq.AbstractDiffusion`: See [Diffusion models and calibration](@ref). @@ -103,7 +103,7 @@ EK1(; prior::PT=IWP(order), diffusionmodel::DT=DynamicDiffusion(), smooth=true, - initialization::IT=TaylorModeInit(prior.num_derivatives), + initialization::IT=TaylorModeInit(num_derivatives(prior)), chunk_size=Val{0}(), autodiff=Val{true}(), diff_type=Val{:forward}, diff --git a/src/caches.jl b/src/caches.jl index 5360d7c08..9a1bef2a8 100644 --- a/src/caches.jl +++ b/src/caches.jl @@ -95,7 +95,7 @@ function OrdinaryDiffEq.alg_cache( is_secondorder_ode = f isa DynamicalODEFunction - q = alg.prior.num_derivatives + q = num_derivatives(alg.prior) d = is_secondorder_ode ? length(u[1, :]) : length(u) D = d * (q + 1) @@ -105,8 +105,11 @@ function OrdinaryDiffEq.alg_cache( FAC = get_covariance_structure(alg; elType=uElType, d, q) if FAC isa IsometricKroneckerCovariance && !(f.mass_matrix isa UniformScaling) - error( - "The selected algorithm uses an efficient Kronecker-factorized implementation which is incompatible with the provided mass matrix. Try using the `EK1` instead.", + throw( + ArgumentError( + "The selected algorithm uses an efficient Kronecker-factorized " * + "implementation which is incompatible with the provided mass matrix. " * + "Try using the `EK1` instead."), ) end @@ -119,18 +122,28 @@ function OrdinaryDiffEq.alg_cache( SolProj = solution_space_projection(FAC, is_secondorder_ode) # Prior dynamics - prior = if alg.prior isa IWP - IWP{uElType}(d, alg.prior.num_derivatives) - elseif alg.prior isa IOUP && ismissing(alg.prior.rate_parameter) - r = Array{uElType}(calloc, d, d) - IOUP{uElType}(d, q, r, alg.prior.update_rate_parameter) - elseif alg.prior isa IOUP - IOUP{uElType}(d, q, alg.prior.rate_parameter, alg.prior.update_rate_parameter) - elseif alg.prior isa Matern - Matern{uElType}(d, q, alg.prior.lengthscale) - else - error("Invalid prior $(alg.prior)") + if !(dim(alg.prior) == 1 || dim(alg.prior) == d) + throw( + DimensionMismatch( + "The dimension of the prior is not compatible with the dimension " * + "of the problem! The given ODE is $(d)-dimensional, but the prior is " * + "$(dim(alg.prior))-dimensional. Please make sure that the dimension of " * + "the prior is either 1 or $(d)."), + ) + end + prior = remake(alg.prior; elType=uElType, dim=d) + if (prior isa IOUP) && prior.update_rate_parameter + if !(prior.rate_parameter isa Missing) + throw( + ArgumentError( + "Do not manually set the `rate_parameter` of the IOUP prior when " * + "using the `update_rate_parameter=true` option." * + "Reset the prior and try again."), + ) + end + prior = remake(prior; rate_parameter=Array{uElType}(calloc, d, d)) end + A, Q, Ah, Qh, P, PI = initialize_transition_matrices(FAC, prior, dt) F, L = to_sde(prior) F, L = to_factorized_matrix(FAC, F), to_factorized_matrix(FAC, L) @@ -146,15 +159,8 @@ function OrdinaryDiffEq.alg_cache( measurement_model = make_measurement_model(f) # Initial State - initial_variance = ones(uElType, q + 1) - μ0 = uElType <: LinearAlgebra.BlasFloat ? Array{uElType}(calloc, D) : zeros(uElType, D) - Σ0 = PSDMatrix( - to_factorized_matrix( - FAC, - IsometricKroneckerProduct(d, diagm(sqrt.(initial_variance))), - ), - ) - x0 = Gaussian(μ0, Σ0) + x0 = initial_distribution(prior) + x0 = Gaussian(x0.μ, to_factorized_matrix(FAC, x0.Σ)) # Diffusion Model diffmodel = alg.diffusionmodel diff --git a/src/covariance_structure.jl b/src/covariance_structure.jl index 159c8d918..7390b5124 100644 --- a/src/covariance_structure.jl +++ b/src/covariance_structure.jl @@ -42,6 +42,8 @@ factorized_similar(::DenseCovariance{T}, size1, size2) where {T} = similar(Matrix{T}, size1, size2) to_factorized_matrix(::DenseCovariance, M::AbstractMatrix) = Matrix(M) -to_factorized_matrix(::IsometricKroneckerCovariance, M::AbstractMatrix) = - IsometricKroneckerProduct(M) # probably errors to_factorized_matrix(::IsometricKroneckerCovariance, M::IsometricKroneckerProduct) = M +for FT in [:DenseCovariance, :IsometricKroneckerCovariance] + @eval to_factorized_matrix(FAC::$FT, M::PSDMatrix) = + PSDMatrix(to_factorized_matrix(FAC, M.R)) +end diff --git a/src/priors/common.jl b/src/priors/common.jl index 49522ce8f..f6bf51a69 100644 --- a/src/priors/common.jl +++ b/src/priors/common.jl @@ -1,12 +1,120 @@ -abstract type AbstractODEFilterPrior{elType} end +@doc raw""" + AbstractGaussMarkovProcess{elType} +Abstract type for Gauss-Markov processes. + +Gauss-Markov processes are solutions to linear time-invariant stochastic differential +equations (SDEs). Here we assume SDEs of the form +```math +\begin{aligned} +dX_t &= F X_t dt + L dW_t \\ +X_0 &= \mathcal{N} \left( X_0; \mu_0, \Sigma_0 \right) +\end{aligned} +``` +where ``X_t`` is the state, ``W_t`` is a Wiener process, and ``F`` and ``L`` are matrices. + +Currently, ProbNumDiffEq.jl makes many assumptions about the structure of the SDEs that it +can solve. In particular, it assumes that the state vector ``X_t`` contains a range of +dervatives, and that the Wiener process only enters the highest one. It also assumes a +certain ordering of dimensions and derivatives. This is not a limitation of the underlying +mathematics, but rather a limitation of the current implementation. In the future, we hope +to remove these limitations. + +!!! warning + We currently strongly recommended to not implement your own Gauss-Markov process by + subtyping this type! The interface is not yet stable, and the implementation is not yet + sufficiently documented. Proceed at your own risk. +""" +abstract type AbstractGaussMarkovProcess{elType} end + +############################################################################################ +# Interface +############################################################################################ +""" + dim(p::AbstractGaussMarkovProcess) + +Return the dimension of the process. + +This is not the dimension of the "state" that is used to efficiently model the prior +process as a state-space model, but it is the dimension of the process itself that we aim +to model. + +See [`AbstractGaussMarkovProcess`](@ref) for more details on Gauss-Markov processes in ProbNumDiffEq. +""" +dim(p::AbstractGaussMarkovProcess) = p.dim + +""" + num_derivatives(p::AbstractGaussMarkovProcess) + +Return the number of derivatives that are represented by the processes state. + +See [`AbstractGaussMarkovProcess`](@ref) for more details on Gauss-Markov processes in ProbNumDiffEq. +""" +num_derivatives(p::AbstractGaussMarkovProcess) = p.num_derivatives + +""" + discretize(p::AbstractGaussMarkovProcess, step_size::Real) + +Compute the transition matrices of the process for a given step size. +""" +discretize(p::AbstractGaussMarkovProcess, step_size::Real) = + discretize(to_sde(p), step_size) + +""" + initial_distribution(p::AbstractGaussMarkovProcess) + +Return the initial distribution of the process. + +Currently this is always a Gaussian distribution with zero mean and unit variance, unless +explicitly overwitten (e.g. for Matern processes to have the stationary distribution). +This implementation is likely to change in the future to allow for more flexibility. +""" +initial_distribution(p::AbstractGaussMarkovProcess{T}) where {T} = begin + d, q = dim(p), num_derivatives(p) + D = d * (q + 1) + initial_variance = ones(T, q + 1) + μ0 = T <: LinearAlgebra.BlasFloat ? Array{T}(calloc, D) : zeros(T, D) + Σ0 = PSDMatrix(IsometricKroneckerProduct(d, diagm(sqrt.(initial_variance)))) + return Gaussian(μ0, Σ0) +end + +""" + SciMLBase.remake(::AbstractGaussMarkovProcess{T}; eltype=T, kwargs...) + +Create a new process of the same type, but with different parameters. +This is particularly used to set the Wiener process dimension, so that the prior can be +defined with missing dimension first, and then have the dimension set to the dimension of +the ODE. This corresponds to having the same prior for all dimensions of the ODE. +Similarly, the element type of the process is also set to the element type of the ODE. +""" +remake(p::AbstractGaussMarkovProcess{T}; elType=T, kwargs...) where {T} + +@doc raw""" + to_sde(p::AbstractGaussMarkovProcess) + +Convert the prior to the corresponding SDE. + +Gauss-Markov processes are solutions to linear time-invariant stochastic differential +equations (SDEs) of the form +```math +\begin{aligned} +dX_t &= F X_t dt + L dW_t \\ +X_0 &= \mathcal{N} \left( X_0; \mu_0, \Sigma_0 \right) +\end{aligned} +``` +where ``X_t`` is the state, ``W_t`` is a Wiener process, and ``F`` and ``L`` are matrices. +This function returns the corresponding SDE, i.e. the matrices ``F`` and ``L``, as a +[`LTISDE`](@ref). +""" +to_sde(p::AbstractGaussMarkovProcess) + +############################################################################################ +# General implementations +############################################################################################ function initialize_preconditioner( - FAC::CovarianceStructure{T1}, - p::AbstractODEFilterPrior{T}, - dt, -) where {T,T1} + FAC::CovarianceStructure{T1}, p::AbstractGaussMarkovProcess{T}, dt) where {T,T1} @assert T == T1 - d, q = p.wiener_process_dimension, p.num_derivatives + d, q = dim(p), num_derivatives(p) P, PI = init_preconditioner(FAC) make_preconditioner!(P, dt, d, q) make_preconditioner_inv!(PI, dt, d, q) @@ -14,7 +122,7 @@ function initialize_preconditioner( end """ - initilize_transition_matrices!(p::AbstractODEFilterPrior) + initilize_transition_matrices!(p::AbstractGaussMarkovProcess) Create all the (moslty empty) matrices that relate to the transition model. @@ -38,10 +146,10 @@ See also: [`make_transition_matrices`](@ref). """ function initialize_transition_matrices( FAC::DenseCovariance, - p::AbstractODEFilterPrior{T}, + p::AbstractGaussMarkovProcess{T}, dt, ) where {T} - d, q = p.wiener_process_dimension, p.num_derivatives + d, q = dim(p), num_derivatives(p) D = d * (q + 1) Ah, Qh = zeros(T, D, D), PSDMatrix(zeros(T, D, D)) P, PI = initialize_preconditioner(FAC, p, dt) @@ -49,16 +157,15 @@ function initialize_transition_matrices( Q = copy(Qh) return A, Q, Ah, Qh, P, PI end -function initialize_transition_matrices( +initialize_transition_matrices( FAC::CovarianceStructure, - p::AbstractODEFilterPrior, + p::AbstractGaussMarkovProcess, dt, -) +) = error("The chosen prior can not be implemented with a $FAC factorization") -end """ - make_transition_matrices!(cache, prior::AbstractODEFilterPrior, dt) + make_transition_matrices!(cache, prior::AbstractGaussMarkovProcess, dt) Construct all the matrices that relate to the transition model, for a specified step size. @@ -83,10 +190,10 @@ See also: [`initialize_transition_matrices`](@ref). [1] N. Krämer, P. Hennig: **Stable Implementation of Probabilistic ODE Solvers** (2020) """ -function make_transition_matrices!(cache, prior::AbstractODEFilterPrior, dt) +function make_transition_matrices!(cache, prior::AbstractGaussMarkovProcess, dt) @unpack A, Q, Ah, Qh, P, PI = cache make_preconditioners!(cache, dt) - _Ah, _Qh = discretize(cache.prior, dt) + _Ah, _Qh = discretize(prior, dt) copy!(Ah, _Ah) copy!(Qh, _Qh) # A = P * Ah * PI @@ -99,8 +206,48 @@ make_transition_matrices!(cache::AbstractODEFilterCache, dt) = make_transition_matrices!(cache, cache.prior, dt) """ - to_sde(p::AbstractODEFilterPrior) + marginalize(process::AbstractGaussMarkovProcess, times) -Convert the prior to the corresponding SDE. +Compute the marginal distributions of the process at the given time points. + +This function computes the marginal distributions of the process at the given times. +It does so by discretizing the process with the given step sizes (using +[`ProbNumDiffEq.discretize`](@ref)), and then computing the marginal distributions of the +resulting Gaussian distributions. + +See also: [`sample`](@ref). +""" +function marginalize(process::AbstractGaussMarkovProcess, times) + out = [] + X = initial_distribution(process) + push!(out, X) + for i in 2:length(times) + dt = times[i] - times[i-1] + A, Q = ProbNumDiffEq.discretize(process, dt) + X = predict(X, A, Q) + push!(out, X) + end + return out +end + +""" + sample(process::AbstractGaussMarkovProcess, times, N=1) + +Samples from the Gauss-Markov process on the given time grid. + +See also: [`marginalize`](@ref). """ -to_sde(p::AbstractODEFilterPrior) +function sample(process::AbstractGaussMarkovProcess, times, N=1) + out = [] + X = initial_distribution(process) + X = Gaussian(mean(X), Matrix(cov(X))) + s = rand(X, N) + push!(out, s) + for i in 2:length(times) + dt = times[i] - times[i-1] + A, Q = Matrix.(discretize(process, dt)) + s = [rand(Gaussian(A * s[:, j], Q)) for j in 1:N] |> stack |> permutedims + push!(out, s) + end + return out +end diff --git a/src/priors/ioup.jl b/src/priors/ioup.jl index 65006bac9..61f1d25c7 100644 --- a/src/priors/ioup.jl +++ b/src/priors/ioup.jl @@ -1,68 +1,68 @@ @doc raw""" - IOUP([wiener_process_dimension::Integer,] + IOUP([dim::Integer=1,] num_derivatives::Integer, rate_parameter::Union{Number,AbstractVector,AbstractMatrix}) Integrated Ornstein--Uhlenbeck process. -As with the [`IWP`](@ref), the IOUP can be created without specifying its dimension, -in which case it will be inferred from the dimension of the ODE during the solve. -This is typically the preferred usage. -The rate parameter however always needs to be specified. +This prior is mostly used in the context of +[Probabilistic Exponential Integrators](@ref probexpinttutorial) +to include the linear part of a semi-linear ODE in the prior, +so it is used in the [`ExpEK`](@ref) and the [`RosenbrockExpEK`](@ref). # In math +The IOUP is a Gauss--Markov process, which we model with a state representation +```math +\begin{aligned} +Y(t) = \left[ Y^{(0)}(t), Y^{(1)}(t), \dots Y^{(q)}(t) \right], +\end{aligned} +``` +defined as the solution of the stochastic differential equation ```math \begin{aligned} \text{d} Y^{(i)}(t) &= Y^{(i+1)}(t) \ \text{d}t, \qquad i = 0, \dots, q-1 \\ \text{d} Y^{(q)}(t) &= L Y^{(q)}(t) \ \text{d}t + \Gamma \ \text{d}W(t), \end{aligned} ``` -where ``L`` is the `rate_parameter`. +where ``L`` is called the `rate_parameter`. +Then, ``Y^{(0)}(t)`` is the ``q``-times integrated Ornstein--Uhlenbeck process (IOUP) and +``Y^{(i)}(t)`` is the ``i``-th derivative of the IOUP, for ``i = 1, \dots, q``. # Examples ```julia-repl julia> solve(prob, EK1(prior=IOUP(2, -1))) ``` """ -struct IOUP{elType,dimType,R} <: AbstractODEFilterPrior{elType} - wiener_process_dimension::dimType +struct IOUP{elType,R} <: AbstractGaussMarkovProcess{elType} + dim::Int num_derivatives::Int rate_parameter::R update_rate_parameter::Bool end +IOUP{elType}( + ; dim, num_derivatives, rate_parameter, update_rate_parameter=false) where {elType} = + IOUP{elType,typeof(rate_parameter)}( + dim, num_derivatives, rate_parameter, update_rate_parameter) +IOUP(; dim=1, num_derivatives, rate_parameter, update_rate_parameter=false) = + IOUP{typeof(1.0)}(; dim, num_derivatives, rate_parameter, update_rate_parameter) +IOUP(num_derivatives, rate_parameter; update_rate_parameter=false) = + IOUP(; dim=1, num_derivatives, rate_parameter, update_rate_parameter) IOUP(num_derivatives; update_rate_parameter) = begin @assert update_rate_parameter - IOUP(missing, num_derivatives, missing, update_rate_parameter) + IOUP(num_derivatives, missing; update_rate_parameter) end -IOUP(num_derivatives, rate_parameter; update_rate_parameter=false) = - IOUP(missing, num_derivatives, rate_parameter, update_rate_parameter) -IOUP( - wiener_process_dimension, - num_derivatives, - rate_parameter, - update_rate_parameter=false, -) = - IOUP{typeof(1.0)}( - wiener_process_dimension, - num_derivatives, - rate_parameter, - update_rate_parameter, - ) -IOUP{T}( - wiener_process_dimension, - num_derivatives, - rate_parameter, - update_rate_parameter=false, -) where {T} = - IOUP{T,typeof(wiener_process_dimension),typeof(rate_parameter)}( - wiener_process_dimension, - num_derivatives, - rate_parameter, - update_rate_parameter, - ) - -function to_sde(p::IOUP{T,D,<:Number}) where {T,D} - q = p.num_derivatives + +remake( + p::IOUP{T}; + elType=T, + dim=dim(p), + num_derivatives=num_derivatives(p), + rate_parameter=p.rate_parameter, + update_rate_parameter=p.update_rate_parameter, +) where {T} = IOUP{elType}(; dim, num_derivatives, rate_parameter, update_rate_parameter) + +function to_sde(p::IOUP{T,<:Number}) where {T} + q = num_derivatives(p) r = p.rate_parameter F_breve = diagm(1 => ones(q)) @@ -71,14 +71,13 @@ function to_sde(p::IOUP{T,D,<:Number}) where {T,D} L_breve = zeros(q + 1) L_breve[end] = 1.0 - d = p.wiener_process_dimension + d = dim(p) F = IsometricKroneckerProduct(d, F_breve) L = IsometricKroneckerProduct(d, L_breve) return LTISDE(F, L) end function to_sde(p::IOUP) - d = p.wiener_process_dimension - q = p.num_derivatives + d, q = dim(p), num_derivatives(p) r = p.rate_parameter R = if r isa AbstractVector @@ -101,35 +100,18 @@ function to_sde(p::IOUP) return LTISDE(F, L) end -function discretize(p::IOUP, dt::Real) - F, L = to_sde(p) - if F isa IsometricKroneckerProduct - method = FiniteHorizonGramians.ExpAndGram{eltype(F.B),13}() - A_breve, QR_breve = FiniteHorizonGramians.exp_and_gram_chol(F.B, L.B, dt, method) - A = IsometricKroneckerProduct(F.ldim, A_breve) - Q = PSDMatrix(IsometricKroneckerProduct(F.ldim, QR_breve)) - return A, Q - else - method = FiniteHorizonGramians.ExpAndGram{eltype(F),13}() - A, QR = FiniteHorizonGramians.exp_and_gram_chol(F, L, dt, method) - Q = PSDMatrix(QR) - return A, Q - end -end - -function update_sde_drift!(F::AbstractMatrix, prior::IOUP{<:Any,<:Any,<:AbstractMatrix}) - q = prior.num_derivatives +function update_sde_drift!(F::AbstractMatrix, prior::IOUP{<:Any,<:AbstractMatrix}) + q = num_derivatives(prior) r = prior.rate_parameter F[q+1:q+1:end, q+1:q+1:end] = r end -function update_sde_drift!(F::AbstractMatrix, prior::IOUP{<:Any,<:Any,<:AbstractVector}) - q = prior.num_derivatives +function update_sde_drift!(F::AbstractMatrix, prior::IOUP{<:Any,<:AbstractVector}) + q = num_derivatives(prior) r = prior.rate_parameter F[q+1:q+1:end, q+1:q+1:end] = Diagonal(r) end -function update_sde_drift!(F::AbstractMatrix, prior::IOUP{<:Any,<:Any,<:Number}) - d = prior.wiener_process_dimension - q = prior.num_derivatives +function update_sde_drift!(F::AbstractMatrix, prior::IOUP{<:Any,<:Number}) + d, q = dim(prior), num_derivatives(prior) r = prior.rate_parameter F[q+1:q+1:end, q+1:q+1:end] = Diagonal(Fill(r, d)) end diff --git a/src/priors/iwp.jl b/src/priors/iwp.jl index f675c6352..3a7d7f311 100644 --- a/src/priors/iwp.jl +++ b/src/priors/iwp.jl @@ -1,43 +1,57 @@ @doc raw""" - IWP([wiener_process_dimension::Integer,] num_derivatives::Integer) + IWP([dim::Integer=1,] num_derivatives::Integer) -Integrated Wiener process. +``q``-times integrated Wiener process. -**This is the recommended prior!** It is the most well-tested prior, both in this package -and in the probabilistic numerics literature in general -(see the [references](@ref references)). +**This is the recommended prior!** +The IWP is the most common prior choice in the probabilistic ODE solver literature +(see the [references](@ref references)) and is the default choice for the solvers in +ProbNumDiffEq.jl. It is also the prior that has the most efficient implementation. The IWP can be created without specifying the dimension of the Wiener process, -in which case it will be inferred from the dimension of the ODE during the solve. +in which case it will be one-dimensional. The ODE solver then assumes that each dimension +of the ODE solution should be modeled with the same prior. This is typically the preferred usage. # In math +The IWP is a Gauss--Markov process, which we model with a state representation +```math +\begin{aligned} +Y(t) = \left[ Y^{(0)}(t), Y^{(1)}(t), \dots Y^{(q)}(t) \right], +\end{aligned} +``` +defined as the solution of the stochastic differential equation ```math \begin{aligned} \text{d} Y^{(i)}(t) &= Y^{(i+1)}(t) \ \text{d}t, \qquad i = 0, \dots, q-1 \\ \text{d} Y^{(q)}(t) &= \Gamma \ \text{d}W(t). \end{aligned} ``` +Then, ``Y^{(0)}(t)`` is the ``q``-times integrated Wiener process (IWP) and +``Y^{(i)}(t)`` is the ``i``-th derivative of the IWP, for ``i = 1, \dots, q``. # Examples ```julia-repl julia> solve(prob, EK1(prior=IWP(2))) ``` """ -struct IWP{elType,dimType} <: AbstractODEFilterPrior{elType} - wiener_process_dimension::dimType +struct IWP{elType} <: AbstractGaussMarkovProcess{elType} + dim::Int num_derivatives::Int end -# most convenient user-facing constructor: -IWP(num_derivatives) = IWP{typeof(1.0)}(missing, num_derivatives) -IWP{elType}(wiener_process_dimension, num_derivatives) where {elType} = - IWP{elType,typeof(wiener_process_dimension)}(wiener_process_dimension, num_derivatives) -IWP(wiener_process_dimension, num_derivatives) = - IWP{typeof(1.0)}(wiener_process_dimension, num_derivatives) +IWP(; dim, num_derivatives) = IWP{typeof(1.0)}(dim, num_derivatives) +IWP(num_derivatives) = IWP(; dim=1, num_derivatives) + +remake( + p::IWP{T}; + elType=T, + dim=dim(p), + num_derivatives=num_derivatives(p), +) where {T} = IWP{elType}(dim, num_derivatives) function to_sde(p::IWP) - d, q = p.wiener_process_dimension, p.num_derivatives + d, q = dim(p), num_derivatives(p) F_breve = diagm(1 => ones(q)) L_breve = zeros(q + 1) @@ -79,8 +93,8 @@ adjusted such that we obtain the left square root, with different ordering of th return L end -function preconditioned_discretize_1d(iwp::IWP{elType}) where {elType} - q = iwp.num_derivatives +function preconditioned_discretize_1d(p::IWP{elType}) where {elType} + q = num_derivatives(p) A_breve = binomial.(q:-1:0, (q:-1:0)') # Q_breve = Cauchy(collect(q:-1.0:0.0), collect((q+1):-1.0:1.0)) |> Matrix # for Julia1.6 @@ -101,11 +115,11 @@ Generate the discrete dynamics for a q-times integrated Wiener process (IWP). The returned matrices `A::AbstractMatrix` and `Q::PSDMatrix` should be used in combination with the preconditioners; see `./src/preconditioning.jl`. """ -function preconditioned_discretize(iwp::IWP) - A_breve, Q_breve = preconditioned_discretize_1d(iwp) +function preconditioned_discretize(p::IWP) + A_breve, Q_breve = preconditioned_discretize_1d(p) QR_breve = Q_breve.R - d = iwp.wiener_process_dimension + d = dim(p) A = IsometricKroneckerProduct(d, Matrix(A_breve)) QR = IsometricKroneckerProduct(d, Matrix(QR_breve)) Q = PSDMatrix(QR) @@ -113,8 +127,8 @@ function preconditioned_discretize(iwp::IWP) return A, Q end -function discretize_1d(iwp::IWP{elType}, dt::Real) where {elType} - q = iwp.num_derivatives +function discretize_1d(p::IWP{elType}, dt::Real) where {elType} + q = num_derivatives(p) v = 0:q @@ -134,7 +148,7 @@ end function discretize(p::IWP, dt::Real) A_breve, Q_breve = discretize_1d(p, dt) - d = p.wiener_process_dimension + d = dim(p) A = IsometricKroneckerProduct(d, A_breve) QR = IsometricKroneckerProduct(d, Q_breve.R) Q = PSDMatrix(QR) diff --git a/src/priors/ltisde.jl b/src/priors/ltisde.jl index b6d088194..6f7f14d57 100644 --- a/src/priors/ltisde.jl +++ b/src/priors/ltisde.jl @@ -11,8 +11,6 @@ where ``X_t`` is the state, ``W_t`` is a Wiener process, and ``F`` and ``L`` are This `LTISDE` object holds the matrices ``F`` and ``L``. It also provides some functionality to discretize the SDE via a matrix-fraction decomposition. See: [`discretize(::LTISDE, ::Real)`](@ref). - -In this package, the LTISDE is mostly used to implement the discretization of the [`IOUP`](@ref) prior. """ struct LTISDE{AT<:AbstractMatrix,BT<:AbstractVecOrMat} F::AT @@ -25,6 +23,11 @@ iterate(sde::LTISDE) = sde.F, true iterate(sde::LTISDE, s) = s ? (sde.L, false) : nothing length(sde::LTISDE) = 2 +""" + discretize(p::LTISDE, step_size::Real) + +Compute the transition matrices of the SDE solution for a given step size. +""" discretize(sde::LTISDE, dt::Real) = discretize(drift(sde), dispersion(sde), dt) discretize(F::AbstractMatrix, L::AbstractMatrix, dt::Real) = begin method = FiniteHorizonGramians.ExpAndGram{eltype(F),13}() diff --git a/src/priors/matern.jl b/src/priors/matern.jl index 1b5e993a5..feac6f00f 100644 --- a/src/priors/matern.jl +++ b/src/priors/matern.jl @@ -1,16 +1,23 @@ @doc raw""" - Matern([wiener_process_dimension::Integer,] + Matern([dim::Integer=1,] num_derivatives::Integer, lengthscale::Number) Matern process. -As with the [`IWP`](@ref), the Matern can be created without specifying its dimension, -in which case it will be inferred from the dimension of the ODE during the solve. -This is typically the preferred usage. -The lengthscale parameter however always needs to be specified. +The class of [`Matern`](@ref) processes is well-known in the Gaussian process literature, +and they also have a corresponding SDE representation similarly to the +[`IWP`](@ref) and the [`IOUP`](@ref). +See also [sarkka19appliedsde](@cite) for more details. # In math +A Matern process is a Gauss--Markov process, which we model with a state representation +```math +\begin{aligned} +Y(t) = \left[ Y^{(0)}(t), Y^{(1)}(t), \dots Y^{(q)}(t) \right], +\end{aligned} +``` +defined as the solution of the stochastic differential equation ```math \begin{aligned} \text{d} Y^{(i)}(t) &= Y^{(i+1)}(t) \ \text{d}t, \qquad i = 0, \dots, q-1 \\ @@ -20,31 +27,46 @@ The lengthscale parameter however always needs to be specified. Y^{(j)}(t) \right) \ \text{d}t + \Gamma \ \text{d}W(t). \end{aligned} ``` -where ``l`` is the `lengthscale`. +where ``l`` is called the `lengthscale` parameter. +Then, ``Y^{(0)}(t)`` is a Matern process and +``Y^{(i)}(t)`` is the ``i``-th derivative of this process, for ``i = 1, \dots, q``. # Examples ```julia-repl julia> solve(prob, EK1(prior=Matern(2, 1))) ``` """ -struct Matern{elType,dimType,R} <: AbstractODEFilterPrior{elType} - wiener_process_dimension::dimType +struct Matern{elType,L} <: AbstractGaussMarkovProcess{elType} + dim::Int num_derivatives::Int - lengthscale::R + lengthscale::L end +Matern{elType}(; dim, num_derivatives, lengthscale) where {elType} = + Matern{elType,typeof(lengthscale)}(dim, num_derivatives, lengthscale) +Matern(; dim, num_derivatives, lengthscale) = + Matern{typeof(1.0)}(; dim, num_derivatives, lengthscale) Matern(num_derivatives, lengthscale) = - Matern(missing, num_derivatives, lengthscale) -Matern(wiener_process_dimension, num_derivatives, lengthscale) = - Matern{typeof(1.0)}(wiener_process_dimension, num_derivatives, lengthscale) -Matern{T}(wiener_process_dimension, num_derivatives, lengthscale) where {T} = - Matern{T,typeof(wiener_process_dimension),typeof(lengthscale)}( - wiener_process_dimension, - num_derivatives, - lengthscale, - ) + Matern(; dim=1, num_derivatives, lengthscale) + +remake( + p::Matern{T}; + elType=T, + dim=dim(p), + num_derivatives=num_derivatives(p), + lengthscale=p.lengthscale, +) where {T} = Matern{elType}(; dim, num_derivatives, lengthscale) + +initial_distribution(p::Matern{T}) where {T} = begin + d, q = dim(p), num_derivatives(p) + D = d * (q + 1) + sde = to_sde(p) + μ0 = T <: LinearAlgebra.BlasFloat ? Array{T}(calloc, D) : zeros(T, D) + Σ0 = PSDMatrix(plyapc(sde.F, sde.L)') + return Gaussian(μ0, Σ0) +end function to_sde(p::Matern) - q = p.num_derivatives + d, q = dim(p), num_derivatives(p) l = p.lengthscale @assert l isa Number @@ -57,14 +79,7 @@ function to_sde(p::Matern) L_breve = zeros(q + 1) L_breve[end] = 1.0 - d = p.wiener_process_dimension F = IsometricKroneckerProduct(d, F_breve) L = IsometricKroneckerProduct(d, L_breve) return LTISDE(F, L) end -function discretize(p::Matern, dt::Real) - l = p.lengthscale - @assert l isa Number - A, Q = discretize(to_sde(p), dt) - return A, Q -end diff --git a/test/core/preconditioning.jl b/test/core/preconditioning.jl index 7c6598a05..41502fa6b 100644 --- a/test/core/preconditioning.jl +++ b/test/core/preconditioning.jl @@ -13,7 +13,7 @@ prob = remake(prob, tspan=(0.0, 10.0)) d, q = 2, 3 - prior = PNDE.IWP(d, q) + prior = PNDE.IWP(dim=d, num_derivatives=q) Ah, Qh = PNDE.discretize(prior, h) Qh = PNDE.apply_diffusion(Qh, σ^2) diff --git a/test/core/priors.jl b/test/core/priors.jl index e51955131..dac7769f2 100644 --- a/test/core/priors.jl +++ b/test/core/priors.jl @@ -1,15 +1,24 @@ -using ProbNumDiffEq: make_transition_matrices! +using ProbNumDiffEq: make_transition_matrices!, dim, num_derivatives using ProbNumDiffEq import ProbNumDiffEq as PNDE using Test using LinearAlgebra using FiniteHorizonGramians +using Statistics +using Plots +using SimpleUnPack h = 0.1 σ = 0.1 @testset "General prior API" begin - for prior in (IWP(2, 3), IOUP(2, 3, 1), Matern(2, 3, 1)) + for prior in ( + IWP(dim=2, num_derivatives=3), + IOUP(dim=2, num_derivatives=3, rate_parameter=1), + Matern(dim=2, num_derivatives=3, lengthscale=1), + ) + d, q = dim(prior), num_derivatives(prior) + sde = PNDE.to_sde(prior) A1, Q1 = PNDE.discretize(sde, h) A2, Q2 = PNDE.discretize(prior, h) @@ -26,13 +35,26 @@ h = 0.1 PNDE.LTISDE(Matrix(sde.F), Matrix(sde.L)), h) @test A1 ≈ A4 @test Q1.R ≈ Q4R + + ts = 0:0.1:1 + marginals = @test_nowarn PNDE.marginalize(prior, ts) + @test length(marginals) == length(ts) + @test marginals[1] isa Gaussian + + N = 3 + samples = @test_nowarn PNDE.sample(prior, ts, N) + @test length(samples) == length(ts) + @test size(samples[1]) == (d * (q + 1), N) + + @test_nowarn plot(prior, ts; plot_derivatives=true) + @test_nowarn plot(prior, ts; plot_derivatives=false) end end @testset "Test IWP (d=2,q=2)" begin d, q = 2, 2 - prior = PNDE.IWP(d, q) + prior = PNDE.IWP(dim=d, num_derivatives=q) # true sde parameters F = [ @@ -139,7 +161,7 @@ end end function test_make_transition_matrices(prior, Atrue, Qtrue) - d, q = prior.wiener_process_dimension, prior.num_derivatives + d, q = dim(prior), num_derivatives(prior) @testset "Test `make_transition_matrices!`" begin A, Q, Ah, Qh, P, PI = PNDE.initialize_transition_matrices( PNDE.DenseCovariance{Float64}(d, q), prior, h) @@ -177,7 +199,7 @@ end d, q = 2, 2 r = randn(d, d) - prior = PNDE.IOUP(d, q, r) + prior = PNDE.IOUP(dim=d, num_derivatives=q, rate_parameter=r) sde = PNDE.to_sde(prior) F = [ @@ -216,7 +238,7 @@ end λ = sqrt(2ν) / l a(i) = binomial(q + 1, i - 1) - prior = PNDE.Matern(d, q, l) + prior = PNDE.Matern(dim=d, num_derivatives=q, lengthscale=l) sde = PNDE.to_sde(prior) F = [ @@ -245,4 +267,11 @@ end @test Matrix(Q1) ≈ Matrix(Q2) test_make_transition_matrices(prior, A1, Q1) + + @testset "Test initial distribution being stationary" begin + D0 = PNDE.initial_distribution(prior) + D1 = PNDE.predict(D0, A1, Q1) + @test mean(D0) ≈ mean(D1) + @test Matrix(cov(D0)) ≈ Matrix(cov(D1)) + end end diff --git a/test/correctness.jl b/test/correctness.jl index 22e52c508..df76f16de 100644 --- a/test/correctness.jl +++ b/test/correctness.jl @@ -37,6 +37,12 @@ CONSTANT_ALGS = ( EK0(order=3, smooth=true, diffusionmodel=DynamicMVDiffusion()) => 1e-8, EK1(order=3, smooth=true) => 1e-8, EK1(order=3, smooth=true, diffusionmodel=FixedDiffusion()) => 1e-8, + # Priors + EK0(prior=IOUP(3, -1), smooth=true) => 2e-9, + EK1(prior=IOUP(3, -1), smooth=true, diffusionmodel=FixedDiffusion()) => 1e-9, + EK1(prior=IOUP(3, update_rate_parameter=true), smooth=true) => 1e-9, + EK0(prior=Matern(3, 1), smooth=true) => 5e-7, + EK1(prior=Matern(4, 0.1), smooth=true, diffusionmodel=FixedDiffusion()) => 2e-5, ) ADAPTIVE_ALGS = ( EK0(order=2) => 2e-4, @@ -53,6 +59,12 @@ ADAPTIVE_ALGS = ( EK1(order=8) => 5e-6, EK1(order=3, initialization=ClassicSolverInit()) => 1e-5, EK1(order=3, initialization=SimpleInit()) => 1e-4, + # Priors + EK0(prior=IOUP(3, -1), smooth=true) => 1e-5, + EK1(prior=IOUP(3, -1), smooth=true, diffusionmodel=FixedDiffusion()) => 1e-5, + EK1(prior=IOUP(3, update_rate_parameter=true), smooth=true) => 2e-5, + EK0(prior=Matern(3, 1), smooth=true) => 1e-4, + EK1(prior=Matern(3, 0.1), smooth=true, diffusionmodel=FixedDiffusion()) => 1e-5, ) PROBS = ( diff --git a/test/errors_thrown.jl b/test/errors_thrown.jl index ab3ada7ef..88c323259 100644 --- a/test/errors_thrown.jl +++ b/test/errors_thrown.jl @@ -24,3 +24,10 @@ end prob = prob_ode_lotkavolterra @test_throws ErrorException solve(prob, EK0(smooth=true), save_everystep=false) end + +@testset "Invalid prior" begin + prob = prob_ode_lotkavolterra + @test_throws DimensionMismatch solve(prob, EK0(prior=IWP(dim=3, num_derivatives=2))) + prior = IOUP(num_derivatives=1, rate_parameter=3, update_rate_parameter=true) + @test_throws ArgumentError solve(prob, EK0(; prior)) +end