From fd739dc703cb980e44292adc424548c51286214a Mon Sep 17 00:00:00 2001
From: Nathanael Bosch <nathanael.bosch@uni-tuebingen.de>
Date: Wed, 17 Jan 2024 09:48:13 +0000
Subject: [PATCH] Refactor the prior implementation (#286)

* Rename `AbstractODEFilterPrior` to `AbstractGaussMarkovPrior`

* Use accessors instead of the fields directly

* Small intermediate clean-up

* Rename AbstractGaussMarkovPrior to AbstractGaussMarkovProcess

* Make the initial distribution part of the prior

* Specialize the initial distribution for Matern

* Implement a `remake` to change elType and dimension of priors

* Add MatrixEquations.jl

* Add Compat entry for MatrixEquations.jl

* Fix a typo

* Fix vale.sh issues

* Fix another bug

* Fix an IOUP bug I introduced

* JuliaFormatter.jl

* JuliaFormatter.jl update apparently changed a bit

* Add the priors to the basic ODE correctness tests

* Test the Matern initial distribution in the priors tests

* Make IOUP and Matern use the default LTISDE discretize

* JuliaFormatter.jl

* Add a plotting convenience function

* Better plotting of priors with dimension > 1

* Improve the plotting some more

* Add the new prior functionality to the tests

* JuliaFormatter.jl

* Make the plotting much nicer

* Add many docstrings

* Start working more on the prior documentation

* More sensible Documenter.jl settings

* Make the priors one-dimensional by default

* Give the probabilistic exponential integrator tutorial an id

* Make the prior plot layout less opinionated

* Fix some prior bugs I introduced now that they're 1-D

* Refactor priors some more

* Make the plot recipe yet again less opinionated

* Towards a better prior documentation

* Fix a bug

* Better docs

* Better docs

* Fix some tests

* Better docs

* Better plots

* JuliaFormatter.jl

* Somewhat better docs

* Change the field names and actually use accessors

* Remove things from the plot recipe

* Better errors

* Fix the Matern constructor

* Fix the IOUP constructor

* Fix more bugs

* Test some prior-related error messages

* JuliaFormatter.jl

* Rely more on keywords for the priors

* Make tip into info

* Fix another failing test

* JuliaFormatter.jl

* JuliaFormatter.jl

* Improve some more docstrings

* Add a newline in the docstring math
---
 .../vocabularies}/ProbNumDiffEq/accept.txt    |   0
 .../vocabularies}/ProbNumDiffEq/reject.txt    |   0
 Project.toml                                  |   2 +
 docs/make.jl                                  |   3 +-
 docs/src/priors.md                            |  99 +++++++++-
 docs/src/refs.bib                             |  11 ++
 docs/src/tutorials/exponential_integrators.md |   2 +-
 ext/RecipesBaseExt.jl                         |  63 ++++++
 src/ProbNumDiffEq.jl                          |   5 +-
 src/alg_utils.jl                              |   2 +-
 src/algorithms.jl                             |  12 +-
 src/caches.jl                                 |  52 ++---
 src/covariance_structure.jl                   |   6 +-
 src/priors/common.jl                          | 185 ++++++++++++++++--
 src/priors/ioup.jl                            | 110 +++++------
 src/priors/iwp.jl                             |  60 +++---
 src/priors/ltisde.jl                          |   7 +-
 src/priors/matern.jl                          |  67 ++++---
 test/core/preconditioning.jl                  |   2 +-
 test/core/priors.jl                           |  41 +++-
 test/correctness.jl                           |  12 ++
 test/errors_thrown.jl                         |   7 +
 22 files changed, 566 insertions(+), 182 deletions(-)
 rename .github/vale-styles/{Vocab => config/vocabularies}/ProbNumDiffEq/accept.txt (100%)
 rename .github/vale-styles/{Vocab => config/vocabularies}/ProbNumDiffEq/reject.txt (100%)

diff --git a/.github/vale-styles/Vocab/ProbNumDiffEq/accept.txt b/.github/vale-styles/config/vocabularies/ProbNumDiffEq/accept.txt
similarity index 100%
rename from .github/vale-styles/Vocab/ProbNumDiffEq/accept.txt
rename to .github/vale-styles/config/vocabularies/ProbNumDiffEq/accept.txt
diff --git a/.github/vale-styles/Vocab/ProbNumDiffEq/reject.txt b/.github/vale-styles/config/vocabularies/ProbNumDiffEq/reject.txt
similarity index 100%
rename from .github/vale-styles/Vocab/ProbNumDiffEq/reject.txt
rename to .github/vale-styles/config/vocabularies/ProbNumDiffEq/reject.txt
diff --git a/Project.toml b/Project.toml
index 1530af23c..a5b4c1226 100644
--- a/Project.toml
+++ b/Project.toml
@@ -17,6 +17,7 @@ FunctionWrappersWrappers = "77dc65aa-8811-40c2-897b-53d922fa7daf"
 GaussianDistributions = "43dcc890-d446-5863-8d1a-14597580bb8d"
 Kronecker = "2c470bb0-bcc8-11e8-3dad-c9649493f05e"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+MatrixEquations = "99c1a7ee-ab34-5fd5-8076-27c950a045f4"
 Octavian = "6fd5a793-0b7e-452c-907f-f8bfe9c57db4"
 OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
 PSDMatrices = "fe68d972-6fd8-4755-bdf0-97d4c54cefdc"
@@ -56,6 +57,7 @@ FunctionWrappersWrappers = "0.1.3"
 GaussianDistributions = "0.5"
 Kronecker = "0.5.4"
 LinearAlgebra = "1"
+MatrixEquations = "2"
 Octavian = "0.3.17"
 OrdinaryDiffEq = "6.52"
 PSDMatrices = "0.4.7"
diff --git a/docs/make.jl b/docs/make.jl
index 5d25d8a69..19e607fd7 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -69,7 +69,8 @@ makedocs(
         ],
         "References" => "references.md",
     ],
-    warnonly=:missing_docs,
+    warnonly=Documenter.except(:missing_docs),
+    checkdocs=:none,
 )
 
 # Documenter can also automatically deploy documentation to gh-pages.
diff --git a/docs/src/priors.md b/docs/src/priors.md
index 5fa1fba7e..bed6d8560 100644
--- a/docs/src/priors.md
+++ b/docs/src/priors.md
@@ -2,7 +2,7 @@
 
 **TL;DR: If you're unsure which prior to use, just stick to the default integrated Wiener process prior [`IWP`](@ref)!**
 
-# Background
+## Background
 
 We model the ODE solution ``y(t)`` with a Gauss--Markov prior.
 More precisely, let
@@ -24,14 +24,103 @@ Then ``Y^{(i)}(t)`` models the ``i``-th derivative of ``y(t)``.
 If you're more interested in the _diffusion_ ``\textcolor{#4063D8}{\Gamma}`` check out the [Diffusion models and calibration](@ref) section,
 and for info on the initial distribution ``\textcolor{purple}{ \mathcal{N} \left( \mu_0, \Sigma_0 \right) }`` check out the [Initialization](@ref) section.
 
-**If you're unsure which prior to use, just stick to the integrated Wiener process prior [`IWP`](@ref)!**
-This is also the default choice for all solvers.
-The other priors are rather experimental / niche at the time of writing.
+!!! info
+    **If you're unsure which prior to use, just stick to the integrated Wiener process prior [`IWP`](@ref)!**
+    This is also the default choice for all solvers.
+    The other priors are rather experimental / niche at the time of writing.
 
-## API
+## Prior choices
+
+ProbNumDiffEq.jl currently supports three classes of priors for the solvers:
+- [`IWP`](@ref): ``q``-times integrated Wiener processes
+- [`IOUP`](@ref): ``q``-times integrated Ornstein--Uhlenbeck processes
+- [`Matern`](@ref): Matérn processes
+Let's look at each of them in turn and visualize some examples.
 
+
+### Integrated Wiener process ([`IWP`](@ref))
 ```@docs
 IWP
+```
+Here is how the [`IWP`](@ref) looks for varying smoothness parameters ``q``:
+```@example priors
+using ProbNumDiffEq, Plots
+plotrange = range(0, 10, length=250)
+plot(
+    plot(IWP(1), plotrange; title="q=1"),
+    plot(IWP(2), plotrange; title="q=2"),
+    plot(IWP(3), plotrange; title="q=3"),
+    plot(IWP(4), plotrange; title="q=4");
+    ylims=(-20,20),
+)
+```
+In the context of ODE solvers, the smoothness parameter ``q`` influences the convergence rate of the solver,
+and so it is typically chose similarly to the order of a Runge--Kutta method: lower order for low accuracy, higher order for high accuracy.
+
+
+### Integrated Ornstein--Uhlenbeck process ([`IOUP`](@ref))
+The ``q``-times integrated Ornstein--Uhlenbeck process prior [`IOUP`](@ref) is a generalization of the IWP prior, where the drift matrix ``\textcolor{#389826}{A}`` is not zero:
+```@docs
 IOUP
+```
+
+Here is how the [`IOUP`](@ref) looks for varying rate parameters:
+```@example priors
+using ProbNumDiffEq, Plots
+plotrange = range(0, 10, length=250)
+plot(
+    plot(IOUP(1, -1), plotrange; title="q=1,L=-1", ylims=(-20,20)),
+    plot(IOUP(1, 1), plotrange; title="q=1,L=1", ylims=(-20,20)),
+    plot(IOUP(4, -1), plotrange; title="q=4,L=-1", ylims=(-50,50)),
+    plot(IOUP(4, 1), plotrange; title="q=4,L=1", ylims=(-50,50));
+)
+```
+
+In the context of [Probabilistic Exponential Integrators](@ref probexpinttutorial), the rate parameter is often chosen according to the given ODE.
+Here is an example for a damped oscillator:
+```@example priors
+plot(IOUP(2, 1, [-0.2 -2π; 2π -0.2]), plotrange; plot_title="damped oscillator prior");
+```
+
+### Matérn process ([`Matern`](@ref))
+```@docs
 Matern
 ```
+
+Here is how the [`Matern`](@ref) looks for varying smoothness parameters ``q``:
+```@example priors
+using ProbNumDiffEq, Plots
+plotrange = range(0, 10, length=250)
+plot(
+    plot(Matern(1, 1), plotrange; title="q=1"),
+    plot(Matern(2, 1), plotrange; title="q=2"),
+    plot(Matern(3, 1), plotrange; title="q=3"),
+    plot(Matern(4, 1), plotrange; title="q=4");
+)
+```
+and for varying length scales ``\ell``:
+```@example priors
+plot(
+    plot(Matern(2, 5), plotrange; title="l=5"),
+    plot(Matern(2, 2), plotrange; title="l=2"),
+    plot(Matern(2, 1), plotrange; title="l=1"),
+    plot(Matern(2, 0.5), plotrange; title="l=0.5"),
+)
+```
+
+## API
+```@docs
+ProbNumDiffEq.AbstractGaussMarkovProcess
+ProbNumDiffEq.LTISDE
+ProbNumDiffEq.dim
+ProbNumDiffEq.num_derivatives
+ProbNumDiffEq.to_sde
+ProbNumDiffEq.discretize
+ProbNumDiffEq.initial_distribution
+```
+
+### Convenience functions to analyze and visualize priors
+```@docs
+ProbNumDiffEq.marginalize
+ProbNumDiffEq.sample
+```
diff --git a/docs/src/refs.bib b/docs/src/refs.bib
index cac8d1955..a7c7a21ce 100644
--- a/docs/src/refs.bib
+++ b/docs/src/refs.bib
@@ -170,3 +170,14 @@ @book{hennig22probnum
   author =       {Hennig, Philipp and Osborne, Michael A. and Kersting, Hans P.},
   year =         2022,
 }
+
+@book{sarkka19appliedsde,
+  place =        {Cambridge},
+  series =       {Institute of Mathematical Statistics Textbooks},
+  title =        {Applied Stochastic Differential Equations},
+  DOI =          {10.1017/9781108186735},
+  publisher =    {Cambridge University Press},
+  author =       {Särkkä, Simo and Solin, Arno},
+  year =         2019,
+  collection =   {Institute of Mathematical Statistics Textbooks}
+}
\ No newline at end of file
diff --git a/docs/src/tutorials/exponential_integrators.md b/docs/src/tutorials/exponential_integrators.md
index b23059aa9..02db95af9 100644
--- a/docs/src/tutorials/exponential_integrators.md
+++ b/docs/src/tutorials/exponential_integrators.md
@@ -1,4 +1,4 @@
-# Probabilistic Exponential Integrators
+# [Probabilistic Exponential Integrators](@id probexpinttutorial)
 
 Exponential integrators are a class of numerical methods for solving semi-linear ordinary differential equations (ODEs) of the form
 ```math
diff --git a/ext/RecipesBaseExt.jl b/ext/RecipesBaseExt.jl
index fb416c540..55816a4c4 100644
--- a/ext/RecipesBaseExt.jl
+++ b/ext/RecipesBaseExt.jl
@@ -72,4 +72,67 @@ import SciMLBase: interpret_vars, getsyms
     end
 end
 
+@recipe function f(
+    process::ProbNumDiffEq.AbstractGaussMarkovProcess,
+    plotrange;
+    N_samples=10,
+    plot_derivatives=false,
+)
+    marginals = ProbNumDiffEq.marginalize(process, plotrange)
+    d = ProbNumDiffEq.dim(process)
+    q = ProbNumDiffEq.num_derivatives(process)
+    means = [mean(m) for m in marginals] |> stack |> permutedims
+    stddevs = [std(m) for m in marginals] |> stack |> permutedims
+
+    perm = permutedims(reshape(collect(1:d*(q+1)), q + 1, d))[:]
+    reorder(X) = X[:, perm]
+
+    E0 = ProbNumDiffEq.projection(d, q)(0)
+    if !plot_derivatives
+        stddevs = stddevs * E0'
+        means = means * E0'
+        q = 0
+    end
+
+    dui(i) = "u" * "'"^i
+    if plot_derivatives
+        title --> [i == 1 ? "$(dui(j))" : "" for i in 1:d for j in 0:q] |> permutedims
+    end
+    ylabel --> [q == 0 ? "u$i" : "" for i in 1:d for q in 0:q] |> permutedims
+    xlabel --> if plot_derivatives
+        [i == d ? "t" : "" for i in 1:d for q in 0:q] |> permutedims
+    else
+        "t"
+    end
+
+    @series begin
+        ribbon --> 3stddevs
+        label --> ""
+        fillalpha --> 0.1
+        layout --> if plot_derivatives
+            (d, q + 1)
+        else
+            d
+        end
+        plotrange, means
+    end
+
+    if N_samples > 0
+        samples = ProbNumDiffEq.sample(process, plotrange, N_samples) |> stack
+        samples = permutedims(samples, (3, 1, 2))
+        for i in 1:N_samples
+            @series begin
+                s = samples[:, :, i]
+                if !plot_derivatives
+                    s = s * E0'
+                end
+                primary --> false
+                linealpha --> 0.3
+                label := ""
+                plotrange, s
+            end
+        end
+    end
+end
+
 end
diff --git a/src/ProbNumDiffEq.jl b/src/ProbNumDiffEq.jl
index 6cf90d47a..3c6695a69 100644
--- a/src/ProbNumDiffEq.jl
+++ b/src/ProbNumDiffEq.jl
@@ -6,12 +6,12 @@ import Base: copy, copy!, show, size, ndims, similar, isapprox, isequal, iterate
 
 using LinearAlgebra
 import LinearAlgebra: mul!
-import Statistics: mean, var, std
+import Statistics: mean, var, std, cov
 
 using Reexport
 @reexport using DiffEqBase
 import SciMLBase
-import SciMLBase: interpret_vars, getsyms
+import SciMLBase: interpret_vars, getsyms, remake
 using OrdinaryDiffEq
 using SpecialMatrices, ToeplitzMatrices
 using FastBroadcast
@@ -29,6 +29,7 @@ import Kronecker
 using ArrayAllocators
 using FiniteHorizonGramians
 using FillArrays
+using MatrixEquations
 
 @reexport using GaussianDistributions
 
diff --git a/src/alg_utils.jl b/src/alg_utils.jl
index f41aa3ba3..a3c84736f 100644
--- a/src/alg_utils.jl
+++ b/src/alg_utils.jl
@@ -21,7 +21,7 @@ OrdinaryDiffEq.isimplicit(::EK1) = true
 ############################################
 # Step size control
 OrdinaryDiffEq.isadaptive(::AbstractEK) = true
-OrdinaryDiffEq.alg_order(alg::AbstractEK) = alg.prior.num_derivatives
+OrdinaryDiffEq.alg_order(alg::AbstractEK) = num_derivatives(alg.prior)
 # OrdinaryDiffEq.alg_adaptive_order(alg::AbstractEK) =
 
 # PI control is the default!
diff --git a/src/algorithms.jl b/src/algorithms.jl
index 08d42e19b..9f7159115 100644
--- a/src/algorithms.jl
+++ b/src/algorithms.jl
@@ -8,7 +8,7 @@ abstract type AbstractEK <: OrdinaryDiffEq.OrdinaryDiffEqAdaptiveAlgorithm end
           smooth=true,
           prior=IWP(order),
           diffusionmodel=DynamicDiffusion(),
-          initialization=TaylorModeInit(prior.num_derivatives))
+          initialization=TaylorModeInit(num_derivatives(prior)))
 
 **Gaussian ODE filter with zeroth-order vector field linearization.**
 
@@ -25,7 +25,7 @@ which scales cubically with the problem size._
 # Arguments
 - `order::Integer`: Order of the integrated Wiener process (IWP) prior.
 - `smooth::Bool`: Turn smoothing on/off; smoothing is required for dense output.
-- `prior::AbstractODEFilterPrior`: Prior to be used by the ODE filter.
+- `prior::AbstractGaussMarkovProcess`: Prior to be used by the ODE filter.
    By default, uses a 3-times integrated Wiener process prior `IWP(3)`.
    See also: [Priors](@ref).
 - `diffusionmodel::ProbNumDiffEq.AbstractDiffusion`: See [Diffusion models and calibration](@ref).
@@ -49,7 +49,7 @@ EK0(;
     prior=IWP(order),
     diffusionmodel=DynamicDiffusion(),
     smooth=true,
-    initialization=TaylorModeInit(prior.num_derivatives),
+    initialization=TaylorModeInit(num_derivatives(prior)),
 ) = EK0(prior, diffusionmodel, smooth, initialization)
 
 _unwrap_val(::Val{B}) where {B} = B
@@ -60,7 +60,7 @@ _unwrap_val(B) = B
           smooth=true,
           prior=IWP(order),
           diffusionmodel=DynamicDiffusion(),
-          initialization=TaylorModeInit(prior.num_derivatives),
+          initialization=TaylorModeInit(num_derivatives(prior)),
           kwargs...)
 
 **Gaussian ODE filter with first-order vector field linearization.**
@@ -73,7 +73,7 @@ so if you're solving a high-dimensional non-stiff problem you might want to give
 # Arguments
 - `order::Integer`: Order of the integrated Wiener process (IWP) prior.
 - `smooth::Bool`: Turn smoothing on/off; smoothing is required for dense output.
-- `prior::AbstractODEFilterPrior`: Prior to be used by the ODE filter.
+- `prior::AbstractGaussMarkovProcess`: Prior to be used by the ODE filter.
    By default, uses a 3-times integrated Wiener process prior `IWP(3)`.
    See also: [Priors](@ref).
 - `diffusionmodel::ProbNumDiffEq.AbstractDiffusion`: See [Diffusion models and calibration](@ref).
@@ -103,7 +103,7 @@ EK1(;
     prior::PT=IWP(order),
     diffusionmodel::DT=DynamicDiffusion(),
     smooth=true,
-    initialization::IT=TaylorModeInit(prior.num_derivatives),
+    initialization::IT=TaylorModeInit(num_derivatives(prior)),
     chunk_size=Val{0}(),
     autodiff=Val{true}(),
     diff_type=Val{:forward},
diff --git a/src/caches.jl b/src/caches.jl
index 5360d7c08..9a1bef2a8 100644
--- a/src/caches.jl
+++ b/src/caches.jl
@@ -95,7 +95,7 @@ function OrdinaryDiffEq.alg_cache(
 
     is_secondorder_ode = f isa DynamicalODEFunction
 
-    q = alg.prior.num_derivatives
+    q = num_derivatives(alg.prior)
     d = is_secondorder_ode ? length(u[1, :]) : length(u)
     D = d * (q + 1)
 
@@ -105,8 +105,11 @@ function OrdinaryDiffEq.alg_cache(
 
     FAC = get_covariance_structure(alg; elType=uElType, d, q)
     if FAC isa IsometricKroneckerCovariance && !(f.mass_matrix isa UniformScaling)
-        error(
-            "The selected algorithm uses an efficient Kronecker-factorized implementation which is incompatible with the provided mass matrix. Try using the `EK1` instead.",
+        throw(
+            ArgumentError(
+                "The selected algorithm uses an efficient Kronecker-factorized " *
+                "implementation which is incompatible with the provided mass matrix. " *
+                "Try using the `EK1` instead."),
         )
     end
 
@@ -119,18 +122,28 @@ function OrdinaryDiffEq.alg_cache(
     SolProj = solution_space_projection(FAC, is_secondorder_ode)
 
     # Prior dynamics
-    prior = if alg.prior isa IWP
-        IWP{uElType}(d, alg.prior.num_derivatives)
-    elseif alg.prior isa IOUP && ismissing(alg.prior.rate_parameter)
-        r = Array{uElType}(calloc, d, d)
-        IOUP{uElType}(d, q, r, alg.prior.update_rate_parameter)
-    elseif alg.prior isa IOUP
-        IOUP{uElType}(d, q, alg.prior.rate_parameter, alg.prior.update_rate_parameter)
-    elseif alg.prior isa Matern
-        Matern{uElType}(d, q, alg.prior.lengthscale)
-    else
-        error("Invalid prior $(alg.prior)")
+    if !(dim(alg.prior) == 1 || dim(alg.prior) == d)
+        throw(
+            DimensionMismatch(
+                "The dimension of the prior is not compatible with the dimension " *
+                "of the problem! The given ODE is $(d)-dimensional, but the prior is " *
+                "$(dim(alg.prior))-dimensional. Please make sure that the dimension of " *
+                "the prior is either 1 or $(d)."),
+        )
+    end
+    prior = remake(alg.prior; elType=uElType, dim=d)
+    if (prior isa IOUP) && prior.update_rate_parameter
+        if !(prior.rate_parameter isa Missing)
+            throw(
+                ArgumentError(
+                    "Do not manually set the `rate_parameter` of the IOUP prior when " *
+                    "using the `update_rate_parameter=true` option." *
+                    "Reset the prior and try again."),
+            )
+        end
+        prior = remake(prior; rate_parameter=Array{uElType}(calloc, d, d))
     end
+
     A, Q, Ah, Qh, P, PI = initialize_transition_matrices(FAC, prior, dt)
     F, L = to_sde(prior)
     F, L = to_factorized_matrix(FAC, F), to_factorized_matrix(FAC, L)
@@ -146,15 +159,8 @@ function OrdinaryDiffEq.alg_cache(
     measurement_model = make_measurement_model(f)
 
     # Initial State
-    initial_variance = ones(uElType, q + 1)
-    μ0 = uElType <: LinearAlgebra.BlasFloat ? Array{uElType}(calloc, D) : zeros(uElType, D)
-    Σ0 = PSDMatrix(
-        to_factorized_matrix(
-            FAC,
-            IsometricKroneckerProduct(d, diagm(sqrt.(initial_variance))),
-        ),
-    )
-    x0 = Gaussian(μ0, Σ0)
+    x0 = initial_distribution(prior)
+    x0 = Gaussian(x0.μ, to_factorized_matrix(FAC, x0.Σ))
 
     # Diffusion Model
     diffmodel = alg.diffusionmodel
diff --git a/src/covariance_structure.jl b/src/covariance_structure.jl
index 159c8d918..7390b5124 100644
--- a/src/covariance_structure.jl
+++ b/src/covariance_structure.jl
@@ -42,6 +42,8 @@ factorized_similar(::DenseCovariance{T}, size1, size2) where {T} =
     similar(Matrix{T}, size1, size2)
 
 to_factorized_matrix(::DenseCovariance, M::AbstractMatrix) = Matrix(M)
-to_factorized_matrix(::IsometricKroneckerCovariance, M::AbstractMatrix) =
-    IsometricKroneckerProduct(M) # probably errors
 to_factorized_matrix(::IsometricKroneckerCovariance, M::IsometricKroneckerProduct) = M
+for FT in [:DenseCovariance, :IsometricKroneckerCovariance]
+    @eval to_factorized_matrix(FAC::$FT, M::PSDMatrix) =
+        PSDMatrix(to_factorized_matrix(FAC, M.R))
+end
diff --git a/src/priors/common.jl b/src/priors/common.jl
index 49522ce8f..f6bf51a69 100644
--- a/src/priors/common.jl
+++ b/src/priors/common.jl
@@ -1,12 +1,120 @@
-abstract type AbstractODEFilterPrior{elType} end
+@doc raw"""
+    AbstractGaussMarkovProcess{elType}
 
+Abstract type for Gauss-Markov processes.
+
+Gauss-Markov processes are solutions to linear time-invariant stochastic differential
+equations (SDEs). Here we assume SDEs of the form
+```math
+\begin{aligned}
+dX_t &= F X_t dt + L dW_t \\
+X_0 &= \mathcal{N} \left( X_0; \mu_0, \Sigma_0 \right)
+\end{aligned}
+```
+where ``X_t`` is the state, ``W_t`` is a Wiener process, and ``F`` and ``L`` are matrices.
+
+Currently, ProbNumDiffEq.jl makes many assumptions about the structure of the SDEs that it
+can solve. In particular, it assumes that the state vector ``X_t`` contains a range of
+dervatives, and that the Wiener process only enters the highest one. It also assumes a
+certain ordering of dimensions and derivatives. This is not a limitation of the underlying
+mathematics, but rather a limitation of the current implementation. In the future, we hope
+to remove these limitations.
+
+!!! warning
+    We currently strongly recommended to not implement your own Gauss-Markov process by
+    subtyping this type! The interface is not yet stable, and the implementation is not yet
+    sufficiently documented. Proceed at your own risk.
+"""
+abstract type AbstractGaussMarkovProcess{elType} end
+
+############################################################################################
+# Interface
+############################################################################################
+"""
+    dim(p::AbstractGaussMarkovProcess)
+
+Return the dimension of the process.
+
+This is not the dimension of the "state" that is used to efficiently model the prior
+process as a state-space model, but it is the dimension of the process itself that we aim
+to model.
+
+See [`AbstractGaussMarkovProcess`](@ref) for more details on Gauss-Markov processes in ProbNumDiffEq.
+"""
+dim(p::AbstractGaussMarkovProcess) = p.dim
+
+"""
+    num_derivatives(p::AbstractGaussMarkovProcess)
+
+Return the number of derivatives that are represented by the processes state.
+
+See [`AbstractGaussMarkovProcess`](@ref) for more details on Gauss-Markov processes in ProbNumDiffEq.
+"""
+num_derivatives(p::AbstractGaussMarkovProcess) = p.num_derivatives
+
+"""
+    discretize(p::AbstractGaussMarkovProcess, step_size::Real)
+
+Compute the transition matrices of the process for a given step size.
+"""
+discretize(p::AbstractGaussMarkovProcess, step_size::Real) =
+    discretize(to_sde(p), step_size)
+
+"""
+    initial_distribution(p::AbstractGaussMarkovProcess)
+
+Return the initial distribution of the process.
+
+Currently this is always a Gaussian distribution with zero mean and unit variance, unless
+explicitly overwitten (e.g. for Matern processes to have the stationary distribution).
+This implementation is likely to change in the future to allow for more flexibility.
+"""
+initial_distribution(p::AbstractGaussMarkovProcess{T}) where {T} = begin
+    d, q = dim(p), num_derivatives(p)
+    D = d * (q + 1)
+    initial_variance = ones(T, q + 1)
+    μ0 = T <: LinearAlgebra.BlasFloat ? Array{T}(calloc, D) : zeros(T, D)
+    Σ0 = PSDMatrix(IsometricKroneckerProduct(d, diagm(sqrt.(initial_variance))))
+    return Gaussian(μ0, Σ0)
+end
+
+"""
+    SciMLBase.remake(::AbstractGaussMarkovProcess{T}; eltype=T, kwargs...)
+
+Create a new process of the same type, but with different parameters.
+This is particularly used to set the Wiener process dimension, so that the prior can be
+defined with missing dimension first, and then have the dimension set to the dimension of
+the ODE. This corresponds to having the same prior for all dimensions of the ODE.
+Similarly, the element type of the process is also set to the element type of the ODE.
+"""
+remake(p::AbstractGaussMarkovProcess{T}; elType=T, kwargs...) where {T}
+
+@doc raw"""
+    to_sde(p::AbstractGaussMarkovProcess)
+
+Convert the prior to the corresponding SDE.
+
+Gauss-Markov processes are solutions to linear time-invariant stochastic differential
+equations (SDEs) of the form
+```math
+\begin{aligned}
+dX_t &= F X_t dt + L dW_t \\
+X_0 &= \mathcal{N} \left( X_0; \mu_0, \Sigma_0 \right)
+\end{aligned}
+```
+where ``X_t`` is the state, ``W_t`` is a Wiener process, and ``F`` and ``L`` are matrices.
+This function returns the corresponding SDE, i.e. the matrices ``F`` and ``L``, as a
+[`LTISDE`](@ref).
+"""
+to_sde(p::AbstractGaussMarkovProcess)
+
+############################################################################################
+# General implementations
+############################################################################################
 function initialize_preconditioner(
-    FAC::CovarianceStructure{T1},
-    p::AbstractODEFilterPrior{T},
-    dt,
-) where {T,T1}
+    FAC::CovarianceStructure{T1}, p::AbstractGaussMarkovProcess{T}, dt) where {T,T1}
     @assert T == T1
-    d, q = p.wiener_process_dimension, p.num_derivatives
+    d, q = dim(p), num_derivatives(p)
     P, PI = init_preconditioner(FAC)
     make_preconditioner!(P, dt, d, q)
     make_preconditioner_inv!(PI, dt, d, q)
@@ -14,7 +122,7 @@ function initialize_preconditioner(
 end
 
 """
-    initilize_transition_matrices!(p::AbstractODEFilterPrior)
+    initilize_transition_matrices!(p::AbstractGaussMarkovProcess)
 
 Create all the (moslty empty) matrices that relate to the transition model.
 
@@ -38,10 +146,10 @@ See also: [`make_transition_matrices`](@ref).
 """
 function initialize_transition_matrices(
     FAC::DenseCovariance,
-    p::AbstractODEFilterPrior{T},
+    p::AbstractGaussMarkovProcess{T},
     dt,
 ) where {T}
-    d, q = p.wiener_process_dimension, p.num_derivatives
+    d, q = dim(p), num_derivatives(p)
     D = d * (q + 1)
     Ah, Qh = zeros(T, D, D), PSDMatrix(zeros(T, D, D))
     P, PI = initialize_preconditioner(FAC, p, dt)
@@ -49,16 +157,15 @@ function initialize_transition_matrices(
     Q = copy(Qh)
     return A, Q, Ah, Qh, P, PI
 end
-function initialize_transition_matrices(
+initialize_transition_matrices(
     FAC::CovarianceStructure,
-    p::AbstractODEFilterPrior,
+    p::AbstractGaussMarkovProcess,
     dt,
-)
+) =
     error("The chosen prior can not be implemented with a $FAC factorization")
-end
 
 """
-    make_transition_matrices!(cache, prior::AbstractODEFilterPrior, dt)
+    make_transition_matrices!(cache, prior::AbstractGaussMarkovProcess, dt)
 
 Construct all the matrices that relate to the transition model, for a specified step size.
 
@@ -83,10 +190,10 @@ See also: [`initialize_transition_matrices`](@ref).
 
 [1] N. Krämer, P. Hennig: **Stable Implementation of Probabilistic ODE Solvers** (2020)
 """
-function make_transition_matrices!(cache, prior::AbstractODEFilterPrior, dt)
+function make_transition_matrices!(cache, prior::AbstractGaussMarkovProcess, dt)
     @unpack A, Q, Ah, Qh, P, PI = cache
     make_preconditioners!(cache, dt)
-    _Ah, _Qh = discretize(cache.prior, dt)
+    _Ah, _Qh = discretize(prior, dt)
     copy!(Ah, _Ah)
     copy!(Qh, _Qh)
     # A = P * Ah * PI
@@ -99,8 +206,48 @@ make_transition_matrices!(cache::AbstractODEFilterCache, dt) =
     make_transition_matrices!(cache, cache.prior, dt)
 
 """
-    to_sde(p::AbstractODEFilterPrior)
+    marginalize(process::AbstractGaussMarkovProcess, times)
 
-Convert the prior to the corresponding SDE.
+Compute the marginal distributions of the process at the given time points.
+
+This function computes the marginal distributions of the process at the given times.
+It does so by discretizing the process with the given step sizes (using
+[`ProbNumDiffEq.discretize`](@ref)), and then computing the marginal distributions of the
+resulting Gaussian distributions.
+
+See also: [`sample`](@ref).
+"""
+function marginalize(process::AbstractGaussMarkovProcess, times)
+    out = []
+    X = initial_distribution(process)
+    push!(out, X)
+    for i in 2:length(times)
+        dt = times[i] - times[i-1]
+        A, Q = ProbNumDiffEq.discretize(process, dt)
+        X = predict(X, A, Q)
+        push!(out, X)
+    end
+    return out
+end
+
+"""
+    sample(process::AbstractGaussMarkovProcess, times, N=1)
+
+Samples from the Gauss-Markov process on the given time grid.
+
+See also: [`marginalize`](@ref).
 """
-to_sde(p::AbstractODEFilterPrior)
+function sample(process::AbstractGaussMarkovProcess, times, N=1)
+    out = []
+    X = initial_distribution(process)
+    X = Gaussian(mean(X), Matrix(cov(X)))
+    s = rand(X, N)
+    push!(out, s)
+    for i in 2:length(times)
+        dt = times[i] - times[i-1]
+        A, Q = Matrix.(discretize(process, dt))
+        s = [rand(Gaussian(A * s[:, j], Q)) for j in 1:N] |> stack |> permutedims
+        push!(out, s)
+    end
+    return out
+end
diff --git a/src/priors/ioup.jl b/src/priors/ioup.jl
index 65006bac9..61f1d25c7 100644
--- a/src/priors/ioup.jl
+++ b/src/priors/ioup.jl
@@ -1,68 +1,68 @@
 @doc raw"""
-    IOUP([wiener_process_dimension::Integer,]
+    IOUP([dim::Integer=1,]
          num_derivatives::Integer,
          rate_parameter::Union{Number,AbstractVector,AbstractMatrix})
 
 Integrated Ornstein--Uhlenbeck process.
 
-As with the [`IWP`](@ref), the IOUP can be created without specifying its dimension,
-in which case it will be inferred from the dimension of the ODE during the solve.
-This is typically the preferred usage.
-The rate parameter however always needs to be specified.
+This prior is mostly used in the context of
+[Probabilistic Exponential Integrators](@ref probexpinttutorial)
+to include the linear part of a semi-linear ODE in the prior,
+so it is used in the [`ExpEK`](@ref) and the [`RosenbrockExpEK`](@ref).
 
 # In math
+The IOUP is a Gauss--Markov process, which we model with a state representation
+```math
+\begin{aligned}
+Y(t) = \left[ Y^{(0)}(t), Y^{(1)}(t), \dots Y^{(q)}(t) \right],
+\end{aligned}
+```
+defined as the solution of the stochastic differential equation
 ```math
 \begin{aligned}
 \text{d} Y^{(i)}(t) &= Y^{(i+1)}(t) \ \text{d}t, \qquad i = 0, \dots, q-1 \\
 \text{d} Y^{(q)}(t) &= L Y^{(q)}(t) \ \text{d}t + \Gamma \ \text{d}W(t),
 \end{aligned}
 ```
-where ``L`` is the `rate_parameter`.
+where ``L`` is called the `rate_parameter`.
+Then, ``Y^{(0)}(t)`` is the ``q``-times integrated Ornstein--Uhlenbeck process (IOUP) and
+``Y^{(i)}(t)`` is the ``i``-th derivative of the IOUP, for ``i = 1, \dots, q``.
 
 # Examples
 ```julia-repl
 julia> solve(prob, EK1(prior=IOUP(2, -1)))
 ```
 """
-struct IOUP{elType,dimType,R} <: AbstractODEFilterPrior{elType}
-    wiener_process_dimension::dimType
+struct IOUP{elType,R} <: AbstractGaussMarkovProcess{elType}
+    dim::Int
     num_derivatives::Int
     rate_parameter::R
     update_rate_parameter::Bool
 end
+IOUP{elType}(
+    ; dim, num_derivatives, rate_parameter, update_rate_parameter=false) where {elType} =
+    IOUP{elType,typeof(rate_parameter)}(
+        dim, num_derivatives, rate_parameter, update_rate_parameter)
+IOUP(; dim=1, num_derivatives, rate_parameter, update_rate_parameter=false) =
+    IOUP{typeof(1.0)}(; dim, num_derivatives, rate_parameter, update_rate_parameter)
+IOUP(num_derivatives, rate_parameter; update_rate_parameter=false) =
+    IOUP(; dim=1, num_derivatives, rate_parameter, update_rate_parameter)
 IOUP(num_derivatives; update_rate_parameter) = begin
     @assert update_rate_parameter
-    IOUP(missing, num_derivatives, missing, update_rate_parameter)
+    IOUP(num_derivatives, missing; update_rate_parameter)
 end
-IOUP(num_derivatives, rate_parameter; update_rate_parameter=false) =
-    IOUP(missing, num_derivatives, rate_parameter, update_rate_parameter)
-IOUP(
-    wiener_process_dimension,
-    num_derivatives,
-    rate_parameter,
-    update_rate_parameter=false,
-) =
-    IOUP{typeof(1.0)}(
-        wiener_process_dimension,
-        num_derivatives,
-        rate_parameter,
-        update_rate_parameter,
-    )
-IOUP{T}(
-    wiener_process_dimension,
-    num_derivatives,
-    rate_parameter,
-    update_rate_parameter=false,
-) where {T} =
-    IOUP{T,typeof(wiener_process_dimension),typeof(rate_parameter)}(
-        wiener_process_dimension,
-        num_derivatives,
-        rate_parameter,
-        update_rate_parameter,
-    )
-
-function to_sde(p::IOUP{T,D,<:Number}) where {T,D}
-    q = p.num_derivatives
+
+remake(
+    p::IOUP{T};
+    elType=T,
+    dim=dim(p),
+    num_derivatives=num_derivatives(p),
+    rate_parameter=p.rate_parameter,
+    update_rate_parameter=p.update_rate_parameter,
+) where {T} = IOUP{elType}(; dim, num_derivatives, rate_parameter, update_rate_parameter)
+
+function to_sde(p::IOUP{T,<:Number}) where {T}
+    q = num_derivatives(p)
     r = p.rate_parameter
 
     F_breve = diagm(1 => ones(q))
@@ -71,14 +71,13 @@ function to_sde(p::IOUP{T,D,<:Number}) where {T,D}
     L_breve = zeros(q + 1)
     L_breve[end] = 1.0
 
-    d = p.wiener_process_dimension
+    d = dim(p)
     F = IsometricKroneckerProduct(d, F_breve)
     L = IsometricKroneckerProduct(d, L_breve)
     return LTISDE(F, L)
 end
 function to_sde(p::IOUP)
-    d = p.wiener_process_dimension
-    q = p.num_derivatives
+    d, q = dim(p), num_derivatives(p)
     r = p.rate_parameter
 
     R = if r isa AbstractVector
@@ -101,35 +100,18 @@ function to_sde(p::IOUP)
     return LTISDE(F, L)
 end
 
-function discretize(p::IOUP, dt::Real)
-    F, L = to_sde(p)
-    if F isa IsometricKroneckerProduct
-        method = FiniteHorizonGramians.ExpAndGram{eltype(F.B),13}()
-        A_breve, QR_breve = FiniteHorizonGramians.exp_and_gram_chol(F.B, L.B, dt, method)
-        A = IsometricKroneckerProduct(F.ldim, A_breve)
-        Q = PSDMatrix(IsometricKroneckerProduct(F.ldim, QR_breve))
-        return A, Q
-    else
-        method = FiniteHorizonGramians.ExpAndGram{eltype(F),13}()
-        A, QR = FiniteHorizonGramians.exp_and_gram_chol(F, L, dt, method)
-        Q = PSDMatrix(QR)
-        return A, Q
-    end
-end
-
-function update_sde_drift!(F::AbstractMatrix, prior::IOUP{<:Any,<:Any,<:AbstractMatrix})
-    q = prior.num_derivatives
+function update_sde_drift!(F::AbstractMatrix, prior::IOUP{<:Any,<:AbstractMatrix})
+    q = num_derivatives(prior)
     r = prior.rate_parameter
     F[q+1:q+1:end, q+1:q+1:end] = r
 end
-function update_sde_drift!(F::AbstractMatrix, prior::IOUP{<:Any,<:Any,<:AbstractVector})
-    q = prior.num_derivatives
+function update_sde_drift!(F::AbstractMatrix, prior::IOUP{<:Any,<:AbstractVector})
+    q = num_derivatives(prior)
     r = prior.rate_parameter
     F[q+1:q+1:end, q+1:q+1:end] = Diagonal(r)
 end
-function update_sde_drift!(F::AbstractMatrix, prior::IOUP{<:Any,<:Any,<:Number})
-    d = prior.wiener_process_dimension
-    q = prior.num_derivatives
+function update_sde_drift!(F::AbstractMatrix, prior::IOUP{<:Any,<:Number})
+    d, q = dim(prior), num_derivatives(prior)
     r = prior.rate_parameter
     F[q+1:q+1:end, q+1:q+1:end] = Diagonal(Fill(r, d))
 end
diff --git a/src/priors/iwp.jl b/src/priors/iwp.jl
index f675c6352..3a7d7f311 100644
--- a/src/priors/iwp.jl
+++ b/src/priors/iwp.jl
@@ -1,43 +1,57 @@
 @doc raw"""
-    IWP([wiener_process_dimension::Integer,] num_derivatives::Integer)
+    IWP([dim::Integer=1,] num_derivatives::Integer)
 
-Integrated Wiener process.
+``q``-times integrated Wiener process.
 
-**This is the recommended prior!** It is the most well-tested prior, both in this package
-and in the probabilistic numerics literature in general
-(see the [references](@ref references)).
+**This is the recommended prior!**
+The IWP is the most common prior choice in the probabilistic ODE solver literature
+(see the [references](@ref references)) and is the default choice for the solvers in
+ProbNumDiffEq.jl.
 It is also the prior that has the most efficient implementation.
 
 The IWP can be created without specifying the dimension of the Wiener process,
-in which case it will be inferred from the dimension of the ODE during the solve.
+in which case it will be one-dimensional. The ODE solver then assumes that each dimension
+of the ODE solution should be modeled with the same prior.
 This is typically the preferred usage.
 
 # In math
+The IWP is a Gauss--Markov process, which we model with a state representation
+```math
+\begin{aligned}
+Y(t) = \left[ Y^{(0)}(t), Y^{(1)}(t), \dots Y^{(q)}(t) \right],
+\end{aligned}
+```
+defined as the solution of the stochastic differential equation
 ```math
 \begin{aligned}
 \text{d} Y^{(i)}(t) &= Y^{(i+1)}(t) \ \text{d}t, \qquad i = 0, \dots, q-1 \\
 \text{d} Y^{(q)}(t) &= \Gamma \ \text{d}W(t).
 \end{aligned}
 ```
+Then, ``Y^{(0)}(t)`` is the ``q``-times integrated Wiener process (IWP) and
+``Y^{(i)}(t)`` is the ``i``-th derivative of the IWP, for ``i = 1, \dots, q``.
 
 # Examples
 ```julia-repl
 julia> solve(prob, EK1(prior=IWP(2)))
 ```
 """
-struct IWP{elType,dimType} <: AbstractODEFilterPrior{elType}
-    wiener_process_dimension::dimType
+struct IWP{elType} <: AbstractGaussMarkovProcess{elType}
+    dim::Int
     num_derivatives::Int
 end
-# most convenient user-facing constructor:
-IWP(num_derivatives) = IWP{typeof(1.0)}(missing, num_derivatives)
-IWP{elType}(wiener_process_dimension, num_derivatives) where {elType} =
-    IWP{elType,typeof(wiener_process_dimension)}(wiener_process_dimension, num_derivatives)
-IWP(wiener_process_dimension, num_derivatives) =
-    IWP{typeof(1.0)}(wiener_process_dimension, num_derivatives)
+IWP(; dim, num_derivatives) = IWP{typeof(1.0)}(dim, num_derivatives)
+IWP(num_derivatives) = IWP(; dim=1, num_derivatives)
+
+remake(
+    p::IWP{T};
+    elType=T,
+    dim=dim(p),
+    num_derivatives=num_derivatives(p),
+) where {T} = IWP{elType}(dim, num_derivatives)
 
 function to_sde(p::IWP)
-    d, q = p.wiener_process_dimension, p.num_derivatives
+    d, q = dim(p), num_derivatives(p)
 
     F_breve = diagm(1 => ones(q))
     L_breve = zeros(q + 1)
@@ -79,8 +93,8 @@ adjusted such that we obtain the left square root, with different ordering of th
     return L
 end
 
-function preconditioned_discretize_1d(iwp::IWP{elType}) where {elType}
-    q = iwp.num_derivatives
+function preconditioned_discretize_1d(p::IWP{elType}) where {elType}
+    q = num_derivatives(p)
 
     A_breve = binomial.(q:-1:0, (q:-1:0)')
     # Q_breve = Cauchy(collect(q:-1.0:0.0), collect((q+1):-1.0:1.0)) |> Matrix  # for Julia1.6
@@ -101,11 +115,11 @@ Generate the discrete dynamics for a q-times integrated Wiener process (IWP).
 The returned matrices `A::AbstractMatrix` and `Q::PSDMatrix` should be used in combination
 with the preconditioners; see `./src/preconditioning.jl`.
 """
-function preconditioned_discretize(iwp::IWP)
-    A_breve, Q_breve = preconditioned_discretize_1d(iwp)
+function preconditioned_discretize(p::IWP)
+    A_breve, Q_breve = preconditioned_discretize_1d(p)
     QR_breve = Q_breve.R
 
-    d = iwp.wiener_process_dimension
+    d = dim(p)
     A = IsometricKroneckerProduct(d, Matrix(A_breve))
     QR = IsometricKroneckerProduct(d, Matrix(QR_breve))
     Q = PSDMatrix(QR)
@@ -113,8 +127,8 @@ function preconditioned_discretize(iwp::IWP)
     return A, Q
 end
 
-function discretize_1d(iwp::IWP{elType}, dt::Real) where {elType}
-    q = iwp.num_derivatives
+function discretize_1d(p::IWP{elType}, dt::Real) where {elType}
+    q = num_derivatives(p)
 
     v = 0:q
 
@@ -134,7 +148,7 @@ end
 
 function discretize(p::IWP, dt::Real)
     A_breve, Q_breve = discretize_1d(p, dt)
-    d = p.wiener_process_dimension
+    d = dim(p)
     A = IsometricKroneckerProduct(d, A_breve)
     QR = IsometricKroneckerProduct(d, Q_breve.R)
     Q = PSDMatrix(QR)
diff --git a/src/priors/ltisde.jl b/src/priors/ltisde.jl
index b6d088194..6f7f14d57 100644
--- a/src/priors/ltisde.jl
+++ b/src/priors/ltisde.jl
@@ -11,8 +11,6 @@ where ``X_t`` is the state, ``W_t`` is a Wiener process, and ``F`` and ``L`` are
 This `LTISDE` object holds the matrices ``F`` and ``L``.
 It also provides some functionality to discretize the SDE via a matrix-fraction decomposition.
 See: [`discretize(::LTISDE, ::Real)`](@ref).
-
-In this package, the LTISDE is mostly used to implement the discretization of the [`IOUP`](@ref) prior.
 """
 struct LTISDE{AT<:AbstractMatrix,BT<:AbstractVecOrMat}
     F::AT
@@ -25,6 +23,11 @@ iterate(sde::LTISDE) = sde.F, true
 iterate(sde::LTISDE, s) = s ? (sde.L, false) : nothing
 length(sde::LTISDE) = 2
 
+"""
+    discretize(p::LTISDE, step_size::Real)
+
+Compute the transition matrices of the SDE solution for a given step size.
+"""
 discretize(sde::LTISDE, dt::Real) = discretize(drift(sde), dispersion(sde), dt)
 discretize(F::AbstractMatrix, L::AbstractMatrix, dt::Real) = begin
     method = FiniteHorizonGramians.ExpAndGram{eltype(F),13}()
diff --git a/src/priors/matern.jl b/src/priors/matern.jl
index 1b5e993a5..feac6f00f 100644
--- a/src/priors/matern.jl
+++ b/src/priors/matern.jl
@@ -1,16 +1,23 @@
 @doc raw"""
-    Matern([wiener_process_dimension::Integer,]
+    Matern([dim::Integer=1,]
            num_derivatives::Integer,
            lengthscale::Number)
 
 Matern process.
 
-As with the [`IWP`](@ref), the Matern can be created without specifying its dimension,
-in which case it will be inferred from the dimension of the ODE during the solve.
-This is typically the preferred usage.
-The lengthscale parameter however always needs to be specified.
+The class of [`Matern`](@ref) processes is well-known in the Gaussian process literature,
+and they also have a corresponding SDE representation similarly to the
+[`IWP`](@ref) and the [`IOUP`](@ref).
+See also [sarkka19appliedsde](@cite) for more details.
 
 # In math
+A Matern process is a Gauss--Markov process, which we model with a state representation
+```math
+\begin{aligned}
+Y(t) = \left[ Y^{(0)}(t), Y^{(1)}(t), \dots Y^{(q)}(t) \right],
+\end{aligned}
+```
+defined as the solution of the stochastic differential equation
 ```math
 \begin{aligned}
 \text{d} Y^{(i)}(t) &= Y^{(i+1)}(t) \ \text{d}t, \qquad i = 0, \dots, q-1 \\
@@ -20,31 +27,46 @@ The lengthscale parameter however always needs to be specified.
   Y^{(j)}(t) \right) \ \text{d}t + \Gamma \ \text{d}W(t).
 \end{aligned}
 ```
-where ``l`` is the `lengthscale`.
+where ``l`` is called the `lengthscale` parameter.
+Then, ``Y^{(0)}(t)`` is a Matern process and
+``Y^{(i)}(t)`` is the ``i``-th derivative of this process, for ``i = 1, \dots, q``.
 
 # Examples
 ```julia-repl
 julia> solve(prob, EK1(prior=Matern(2, 1)))
 ```
 """
-struct Matern{elType,dimType,R} <: AbstractODEFilterPrior{elType}
-    wiener_process_dimension::dimType
+struct Matern{elType,L} <: AbstractGaussMarkovProcess{elType}
+    dim::Int
     num_derivatives::Int
-    lengthscale::R
+    lengthscale::L
 end
+Matern{elType}(; dim, num_derivatives, lengthscale) where {elType} =
+    Matern{elType,typeof(lengthscale)}(dim, num_derivatives, lengthscale)
+Matern(; dim, num_derivatives, lengthscale) =
+    Matern{typeof(1.0)}(; dim, num_derivatives, lengthscale)
 Matern(num_derivatives, lengthscale) =
-    Matern(missing, num_derivatives, lengthscale)
-Matern(wiener_process_dimension, num_derivatives, lengthscale) =
-    Matern{typeof(1.0)}(wiener_process_dimension, num_derivatives, lengthscale)
-Matern{T}(wiener_process_dimension, num_derivatives, lengthscale) where {T} =
-    Matern{T,typeof(wiener_process_dimension),typeof(lengthscale)}(
-        wiener_process_dimension,
-        num_derivatives,
-        lengthscale,
-    )
+    Matern(; dim=1, num_derivatives, lengthscale)
+
+remake(
+    p::Matern{T};
+    elType=T,
+    dim=dim(p),
+    num_derivatives=num_derivatives(p),
+    lengthscale=p.lengthscale,
+) where {T} = Matern{elType}(; dim, num_derivatives, lengthscale)
+
+initial_distribution(p::Matern{T}) where {T} = begin
+    d, q = dim(p), num_derivatives(p)
+    D = d * (q + 1)
+    sde = to_sde(p)
+    μ0 = T <: LinearAlgebra.BlasFloat ? Array{T}(calloc, D) : zeros(T, D)
+    Σ0 = PSDMatrix(plyapc(sde.F, sde.L)')
+    return Gaussian(μ0, Σ0)
+end
 
 function to_sde(p::Matern)
-    q = p.num_derivatives
+    d, q = dim(p), num_derivatives(p)
     l = p.lengthscale
     @assert l isa Number
 
@@ -57,14 +79,7 @@ function to_sde(p::Matern)
     L_breve = zeros(q + 1)
     L_breve[end] = 1.0
 
-    d = p.wiener_process_dimension
     F = IsometricKroneckerProduct(d, F_breve)
     L = IsometricKroneckerProduct(d, L_breve)
     return LTISDE(F, L)
 end
-function discretize(p::Matern, dt::Real)
-    l = p.lengthscale
-    @assert l isa Number
-    A, Q = discretize(to_sde(p), dt)
-    return A, Q
-end
diff --git a/test/core/preconditioning.jl b/test/core/preconditioning.jl
index 7c6598a05..41502fa6b 100644
--- a/test/core/preconditioning.jl
+++ b/test/core/preconditioning.jl
@@ -13,7 +13,7 @@ prob = remake(prob, tspan=(0.0, 10.0))
 
     d, q = 2, 3
 
-    prior = PNDE.IWP(d, q)
+    prior = PNDE.IWP(dim=d, num_derivatives=q)
 
     Ah, Qh = PNDE.discretize(prior, h)
     Qh = PNDE.apply_diffusion(Qh, σ^2)
diff --git a/test/core/priors.jl b/test/core/priors.jl
index e51955131..dac7769f2 100644
--- a/test/core/priors.jl
+++ b/test/core/priors.jl
@@ -1,15 +1,24 @@
-using ProbNumDiffEq: make_transition_matrices!
+using ProbNumDiffEq: make_transition_matrices!, dim, num_derivatives
 using ProbNumDiffEq
 import ProbNumDiffEq as PNDE
 using Test
 using LinearAlgebra
 using FiniteHorizonGramians
+using Statistics
+using Plots
+using SimpleUnPack
 
 h = 0.1
 σ = 0.1
 
 @testset "General prior API" begin
-    for prior in (IWP(2, 3), IOUP(2, 3, 1), Matern(2, 3, 1))
+    for prior in (
+        IWP(dim=2, num_derivatives=3),
+        IOUP(dim=2, num_derivatives=3, rate_parameter=1),
+        Matern(dim=2, num_derivatives=3, lengthscale=1),
+    )
+        d, q = dim(prior), num_derivatives(prior)
+
         sde = PNDE.to_sde(prior)
         A1, Q1 = PNDE.discretize(sde, h)
         A2, Q2 = PNDE.discretize(prior, h)
@@ -26,13 +35,26 @@ h = 0.1
             PNDE.LTISDE(Matrix(sde.F), Matrix(sde.L)), h)
         @test A1 ≈ A4
         @test Q1.R ≈ Q4R
+
+        ts = 0:0.1:1
+        marginals = @test_nowarn PNDE.marginalize(prior, ts)
+        @test length(marginals) == length(ts)
+        @test marginals[1] isa Gaussian
+
+        N = 3
+        samples = @test_nowarn PNDE.sample(prior, ts, N)
+        @test length(samples) == length(ts)
+        @test size(samples[1]) == (d * (q + 1), N)
+
+        @test_nowarn plot(prior, ts; plot_derivatives=true)
+        @test_nowarn plot(prior, ts; plot_derivatives=false)
     end
 end
 
 @testset "Test IWP (d=2,q=2)" begin
     d, q = 2, 2
 
-    prior = PNDE.IWP(d, q)
+    prior = PNDE.IWP(dim=d, num_derivatives=q)
 
     # true sde parameters
     F = [
@@ -139,7 +161,7 @@ end
 end
 
 function test_make_transition_matrices(prior, Atrue, Qtrue)
-    d, q = prior.wiener_process_dimension, prior.num_derivatives
+    d, q = dim(prior), num_derivatives(prior)
     @testset "Test `make_transition_matrices!`" begin
         A, Q, Ah, Qh, P, PI = PNDE.initialize_transition_matrices(
             PNDE.DenseCovariance{Float64}(d, q), prior, h)
@@ -177,7 +199,7 @@ end
     d, q = 2, 2
     r = randn(d, d)
 
-    prior = PNDE.IOUP(d, q, r)
+    prior = PNDE.IOUP(dim=d, num_derivatives=q, rate_parameter=r)
 
     sde = PNDE.to_sde(prior)
     F = [
@@ -216,7 +238,7 @@ end
     λ = sqrt(2ν) / l
     a(i) = binomial(q + 1, i - 1)
 
-    prior = PNDE.Matern(d, q, l)
+    prior = PNDE.Matern(dim=d, num_derivatives=q, lengthscale=l)
 
     sde = PNDE.to_sde(prior)
     F = [
@@ -245,4 +267,11 @@ end
     @test Matrix(Q1) ≈ Matrix(Q2)
 
     test_make_transition_matrices(prior, A1, Q1)
+
+    @testset "Test initial distribution being stationary" begin
+        D0 = PNDE.initial_distribution(prior)
+        D1 = PNDE.predict(D0, A1, Q1)
+        @test mean(D0) ≈ mean(D1)
+        @test Matrix(cov(D0)) ≈ Matrix(cov(D1))
+    end
 end
diff --git a/test/correctness.jl b/test/correctness.jl
index 22e52c508..df76f16de 100644
--- a/test/correctness.jl
+++ b/test/correctness.jl
@@ -37,6 +37,12 @@ CONSTANT_ALGS = (
     EK0(order=3, smooth=true, diffusionmodel=DynamicMVDiffusion()) => 1e-8,
     EK1(order=3, smooth=true) => 1e-8,
     EK1(order=3, smooth=true, diffusionmodel=FixedDiffusion()) => 1e-8,
+    # Priors
+    EK0(prior=IOUP(3, -1), smooth=true) => 2e-9,
+    EK1(prior=IOUP(3, -1), smooth=true, diffusionmodel=FixedDiffusion()) => 1e-9,
+    EK1(prior=IOUP(3, update_rate_parameter=true), smooth=true) => 1e-9,
+    EK0(prior=Matern(3, 1), smooth=true) => 5e-7,
+    EK1(prior=Matern(4, 0.1), smooth=true, diffusionmodel=FixedDiffusion()) => 2e-5,
 )
 ADAPTIVE_ALGS = (
     EK0(order=2) => 2e-4,
@@ -53,6 +59,12 @@ ADAPTIVE_ALGS = (
     EK1(order=8) => 5e-6,
     EK1(order=3, initialization=ClassicSolverInit()) => 1e-5,
     EK1(order=3, initialization=SimpleInit()) => 1e-4,
+    # Priors
+    EK0(prior=IOUP(3, -1), smooth=true) => 1e-5,
+    EK1(prior=IOUP(3, -1), smooth=true, diffusionmodel=FixedDiffusion()) => 1e-5,
+    EK1(prior=IOUP(3, update_rate_parameter=true), smooth=true) => 2e-5,
+    EK0(prior=Matern(3, 1), smooth=true) => 1e-4,
+    EK1(prior=Matern(3, 0.1), smooth=true, diffusionmodel=FixedDiffusion()) => 1e-5,
 )
 
 PROBS = (
diff --git a/test/errors_thrown.jl b/test/errors_thrown.jl
index ab3ada7ef..88c323259 100644
--- a/test/errors_thrown.jl
+++ b/test/errors_thrown.jl
@@ -24,3 +24,10 @@ end
     prob = prob_ode_lotkavolterra
     @test_throws ErrorException solve(prob, EK0(smooth=true), save_everystep=false)
 end
+
+@testset "Invalid prior" begin
+    prob = prob_ode_lotkavolterra
+    @test_throws DimensionMismatch solve(prob, EK0(prior=IWP(dim=3, num_derivatives=2)))
+    prior = IOUP(num_derivatives=1, rate_parameter=3, update_rate_parameter=true)
+    @test_throws ArgumentError solve(prob, EK0(; prior))
+end