Skip to content

Commit

Permalink
Minimal working example with PSDMatrices.jl (#139)
Browse files Browse the repository at this point in the history
* Minimal working example with PSDMatrices.jl

* Get smoothing and plotting to work (but it's _very_ slow)

* Fix the wrong index order in the error estimation

* Fix some of the tests

* Remote squarerootmatrix.jl

* Add a `diag(::SRMatrix)`

* Add fallbacks for X_A_Xt and X_A_Xt! back in

* Fix ALL the tests

* Switch to the diag implemented in PSDMatrices.jl

* Apply JuliaFormatter.jl

* No need to import diag anymore since PSDMats does it

* Make many of the preallocations more explicit

* General miniature improvements here and there

* For now: Don't check for diagonality in `condition_on!`

* Moar caching

* Improve the callback efficiency

* Make the callback a tiny bit nicer

* Remove `integ` from the `rk_init_improve` function

* Remove `integ` dep to possibly slightly reduce precompilation times

* Fix "test/correctness.jl" tests

* Remove some old unused argument (thanks to a thrown warning)

* Fix the remaining failing tests

* Changes from reviewing the PR

* Remove the outlier count test

* Apply JuliaFormatter

* Use LAPACKs geqrf instead of geqrt for QR decompositions

* Use an old Cholesky constructor to fix Julia 1.6 compatibility

* Replace the oop `triangularize_factor` with an in-place `qr!`

* Fix the solution plotting to show stddevs, not variances

* Fix outdated docstring

* Remove some old commented out lines

* Remove old unused lines

* Fix a line in `smooth`

* Relace a qr! with my custom_qr!

* Remove a comment

* Replace SRMatrix with PSDMatrix

* Fix the plotting standarddev computation

* Add the precompilation again
  • Loading branch information
nathanaelbosch authored May 11, 2022
1 parent c1b563b commit 1d5b415
Show file tree
Hide file tree
Showing 24 changed files with 204 additions and 260 deletions.
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ GaussianDistributions = "43dcc890-d446-5863-8d1a-14597580bb8d"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Octavian = "6fd5a793-0b7e-452c-907f-f8bfe9c57db4"
OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
PSDMatrices = "fe68d972-6fd8-4755-bdf0-97d4c54cefdc"
RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01"
RecursiveArrayTools = "731186ca-8d62-57ce-b412-fbd966d074cd"
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
Expand All @@ -28,6 +29,7 @@ ForwardDiff = "0.10"
GaussianDistributions = "0.5"
Octavian = "0.3"
OrdinaryDiffEq = "6.2 - 6.10"
PSDMatrices = "0.4.2"
RecipesBase = "1"
RecursiveArrayTools = "2"
Reexport = "0.2, 1.0"
Expand Down
13 changes: 6 additions & 7 deletions src/ProbNumDiffEq.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ stack(x) = copy(reduce(hcat, x)')

using LinearAlgebra
import LinearAlgebra: mul!
"""LAPACK.geqrf! seems to be faster on small matrices than LAPACK.geqrt!"""
custom_qr!(A) = qr!(A)
custom_qr!(A::StridedMatrix{<:LinearAlgebra.BlasFloat}) = QR(LAPACK.geqrf!(A)...)
using TaylorSeries
using TaylorIntegration
@reexport using StructArrays
Expand Down Expand Up @@ -60,16 +63,12 @@ _matmul!(
B::AbstractVecOrMat{T},
) where {T<:LinearAlgebra.BlasFloat} = matmul!(C, A, B)

# @reexport using PSDMatrices
# import PSDMatrices: X_A_Xt
include("squarerootmatrix.jl")
const SRMatrix = SquarerootMatrix
export SRMatrix
@reexport using PSDMatrices
import PSDMatrices: X_A_Xt, X_A_Xt!
X_A_Xt(A, X) = X * A * X'
X_A_Xt!(out, A, X) = (out .= X * A * X')
apply_diffusion(Q, diffusion::Diagonal) = X_A_Xt(Q, sqrt.(diffusion))
apply_diffusion(Q::SRMatrix, diffusion::Number) =
SRMatrix(sqrt.(diffusion) * Q.squareroot, diffusion * Q.mat)
apply_diffusion(Q::PSDMatrix, diffusion::Number) = PSDMatrix(sqrt.(diffusion) * Q.R)

# All the Gaussian things
@reexport using GaussianDistributions
Expand Down
63 changes: 39 additions & 24 deletions src/caches.jl
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,13 @@ mutable struct GaussianODEFilterCache{
G1::matType
G2::matType
covmatcache::matType
Smat::matType
C_dxd::matType
C_dxD::matType
C_Dxd::matType
C_DxD::matType
C_2DxD::matType
C_3DxD::matType
default_diffusion::diffusionType
local_diffusion::diffusionType
global_diffusion::diffusionType
Expand Down Expand Up @@ -94,8 +101,6 @@ function OrdinaryDiffEq.alg_cache(
calck,
::Val{IIP},
) where {IIP,uEltypeNoUnits,uBottomEltypeNoUnits,tTypeNoUnits}
initialize_derivatives = true

if u isa Number
error("We currently don't support scalar-valued problems")
end
Expand Down Expand Up @@ -126,10 +131,7 @@ function OrdinaryDiffEq.alg_cache(
A, Q = ibm(d, q, uElType)

initial_variance = ones(uElType, D)
x0 = Gaussian(
zeros(uElType, D),
SRMatrix(diagm(sqrt.(initial_variance)), diagm(initial_variance)),
)
x0 = Gaussian(zeros(uElType, D), PSDMatrix(diagm(sqrt.(initial_variance))))

# Measurement model
R = zeros(uElType, d, d)
Expand All @@ -140,23 +142,29 @@ function OrdinaryDiffEq.alg_cache(
du = f isa DynamicalODEFunction ? similar(u[2, :]) : similar(u)
ddu = f isa DynamicalODEFunction ? zeros(uElType, d, 2d) : zeros(uElType, d, d)
v = similar(h)
S =
# alg isa EK0 ? SRMatrix(zeros(uElType, d, D), Diagonal(zeros(uElType, d, d))) :
SRMatrix(zeros(uElType, d, D), zeros(uElType, d, d))
S = PSDMatrix(zeros(uElType, D, d))
measurement = Gaussian(v, S)
pu_tmp =
f isa DynamicalODEFunction ?
Gaussian(zeros(uElType, 2d), SRMatrix(zeros(uElType, 2d, D))) : similar(measurement)
Gaussian(zeros(uElType, 2d), PSDMatrix(zeros(uElType, D, 2d))) : copy(measurement)
K = zeros(uElType, D, d)
G = zeros(uElType, D, D)
C1 = SRMatrix(zeros(uElType, D, 2D), zeros(uElType, D, D))
C2 = SRMatrix(zeros(uElType, D, 3D), zeros(uElType, D, D))
covmatcache = similar(G)
C1 = PSDMatrix(zeros(uElType, 2D, D))
C2 = PSDMatrix(zeros(uElType, 3D, D))
Smat = zeros(uElType, d, d)
covmatcache = copy(G)

C_dxd = zeros(uElType, d, d)
C_dxD = zeros(uElType, d, D)
C_Dxd = zeros(uElType, D, d)
C_DxD = zeros(uElType, D, D)
C_2DxD = zeros(uElType, 2D, D)
C_3DxD = zeros(uElType, 3D, D)

if alg isa EK1FDB
H = [E1; E2]
v = [v; v]
S = SRMatrix(zeros(uElType, 2d, D), zeros(uElType, 2d, 2d))
S = PSDMatrix(zeros(uElType, D, 2d))
measurement = Gaussian(v, S)
K = zeros(uElType, D, 2d)
end
Expand All @@ -166,17 +174,17 @@ function OrdinaryDiffEq.alg_cache(
copy!(x0.Σ, apply_diffusion(x0.Σ, initdiff))

Ah, Qh = copy(A), copy(Q)
u_pred = similar(u)
u_filt = similar(u)
tmp = similar(u)
u_pred = copy(u)
u_filt = copy(u)
tmp = copy(u)
x_pred = copy(x0)
x_filt = similar(x0)
x_tmp = similar(x0)
x_tmp2 = similar(x0)
m_tmp = similar(measurement)
K2 = similar(K)
G2 = similar(G)
err_tmp = similar(du)
x_filt = copy(x0)
x_tmp = copy(x0)
x_tmp2 = copy(x0)
m_tmp = copy(measurement)
K2 = copy(K)
G2 = copy(G)
err_tmp = copy(du)

# Things for calc_J
uf = get_uf(f, t, p, Val(IIP))
Expand Down Expand Up @@ -251,6 +259,13 @@ function OrdinaryDiffEq.alg_cache(
G,
G2,
covmatcache,
Smat,
C_dxd,
C_dxD,
C_Dxd,
C_DxD,
C_2DxD,
C_3DxD,
initdiff,
initdiff * NaN,
initdiff * NaN,
Expand Down
21 changes: 13 additions & 8 deletions src/callbacks.jl
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
function manifoldupdate!(integ, residualf; maxiters=100, ϵ₁=1e-25, ϵ₂=1e-15)
m, C = integ.cache.x
function manifoldupdate!(cache, residualf; maxiters=100, ϵ₁=1e-25, ϵ₂=1e-15)
m, C = cache.x

# Create some caches
@unpack SolProj, tmp, H, x_tmp, x_tmp2 = integ.cache
@unpack SolProj, tmp, H, x_tmp, x_tmp2 = cache
D = cache.d * (cache.q + 1)
z_tmp = residualf(mul!(tmp, SolProj, m))
result = DiffResults.JacobianResult(z_tmp, tmp)
d = length(z_tmp)
H = H[1:d, :]
S = SquarerootMatrix(C.squareroot[1:d, :], C.mat[1:d, 1:d])
K1, K2 = cache.C_DxD[:, 1:d], cache.C_2DxD[1:D, 1:d]

S = PSDMatrix(C.R[:, 1:d])
m_tmp, C_tmp = x_tmp

m_i = copy(m)
Expand All @@ -23,8 +26,7 @@ function manifoldupdate!(integ, residualf; maxiters=100, ϵ₁=1e-25, ϵ₂=1e-1
X_A_Xt!(S, C, H)

# m_i_new, C_i_new = update(x, Gaussian(z .+ (H * (m - m_i)), S), H)
# More efficient update with less allocations:
K = C * (H' / S)
K = _matmul!(K2, C.R', _matmul!(K1, C.R, H' / S))
m_tmp .= m_i .- m
mul!(z_tmp, H, m_tmp)
z_tmp .-= z
Expand All @@ -38,7 +40,7 @@ function manifoldupdate!(integ, residualf; maxiters=100, ϵ₁=1e-25, ϵ₂=1e-1
m_i = m_i_new
end

copy!(integ.cache.x, Gaussian(m_i_new, C_i_new))
copy!(cache.x, Gaussian(m_i_new, C_i_new))

return nothing
end
Expand Down Expand Up @@ -70,6 +72,9 @@ function ManifoldUpdate(
kwargs...,
)
condition(u, t, integ) = true
affect!(integ) = manifoldupdate!(integ, residual; maxiters=maxiters, ϵ₁=ϵ₁, ϵ₂=ϵ₂)
affect!(integ) = begin
manifoldupdate!(integ.cache, residual; maxiters=maxiters, ϵ₁=ϵ₁, ϵ₂=ϵ₂)
mul!(view(integ.u, :), integ.cache.SolProj, integ.cache.x.μ)
end
return DiscreteCallback(condition, affect!, args...; kwargs...)
end
58 changes: 28 additions & 30 deletions src/diffusions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ A local diffusion parameter is estimated at each step. Works well with adaptive
"""
struct DynamicDiffusion <: AbstractDynamicDiffusion end
initial_diffusion(diffusion::DynamicDiffusion, d, q, Eltype) = one(Eltype)
estimate_local_diffusion(kind::DynamicDiffusion, integ) = local_scalar_diffusion(integ)
estimate_local_diffusion(kind::DynamicDiffusion, integ) =
local_scalar_diffusion(integ.cache)

"""
DynamicMVDiffusion()
Expand All @@ -31,7 +32,8 @@ scales of the different dimensions vary a lot.
struct DynamicMVDiffusion <: AbstractDynamicDiffusion end
initial_diffusion(diffusionmodel::DynamicMVDiffusion, d, q, Eltype) =
kron(Diagonal(ones(Eltype, d)), Diagonal(ones(Eltype, q + 1)))
estimate_local_diffusion(kind::DynamicMVDiffusion, integ) = local_diagonal_diffusion(integ)
estimate_local_diffusion(kind::DynamicMVDiffusion, integ) =
local_diagonal_diffusion(integ.cache)

"""
FixedDiffusion(; initial_diffusion=1.0, calibrate=true)
Expand All @@ -48,20 +50,16 @@ Base.@kwdef struct FixedDiffusion{T<:Number} <: AbstractStaticDiffusion
end
initial_diffusion(diffusionmodel::FixedDiffusion, d, q, Eltype) =
diffusionmodel.initial_diffusion * one(Eltype)
estimate_local_diffusion(kind::FixedDiffusion, integ) = local_scalar_diffusion(integ)
estimate_local_diffusion(kind::FixedDiffusion, integ) = local_scalar_diffusion(integ.cache)
function estimate_global_diffusion(rule::FixedDiffusion, integ)
@unpack d, measurement, m_tmp = integ.cache
@unpack d, measurement, m_tmp, Smat = integ.cache
# sol_diffusions = integ.sol.diffusions

v, S = measurement.μ, measurement.Σ
e, _ = m_tmp.μ, m_tmp.Σ.mat
_S = copy!(m_tmp.Σ.mat, S.mat)
if _S isa Diagonal
e .= v ./ _S.diag
else
S_chol = cholesky!(_S)
ldiv!(e, S_chol, v)
end
e = m_tmp.μ
_S = _matmul!(Smat, S.R', S.R)
S_chol = cholesky!(_S)
ldiv!(e, S_chol, v)
diffusion_t = dot(v, e) / d

if integ.success_iter == 0
Expand Down Expand Up @@ -96,12 +94,15 @@ function initial_diffusion(diffusionmodel::FixedMVDiffusion, d, q, Eltype)
@assert initdiff isa Number || length(initdiff) == d
return kron(Diagonal(initdiff .* ones(Eltype, d)), Diagonal(ones(Eltype, q + 1)))
end
estimate_local_diffusion(kind::FixedMVDiffusion, integ) = local_diagonal_diffusion(integ)
estimate_local_diffusion(kind::FixedMVDiffusion, integ) =
local_diagonal_diffusion(integ.cache)
function estimate_global_diffusion(kind::FixedMVDiffusion, integ)
@unpack q, measurement = integ.cache

v, S = measurement.μ, measurement.Σ
S_11 = diag(S)[1]
# S_11 = diag(S)[1]
c1 = view(S.R, :, 1)
S_11 = dot(c1, c1)

Σ_ii = v .^ 2 ./ S_11
Σ = Diagonal(Σ_ii)
Expand Down Expand Up @@ -132,25 +133,20 @@ where ``z, H, Q`` are taken from the passed integrator.
For more background information
- N. Bosch, P. Hennig, F. Tronarp: **Calibrated Adaptive Probabilistic ODE Solvers** (2021)
"""
function local_scalar_diffusion(integ)
@unpack d, R, H, Qh, measurement, m_tmp = integ.cache
function local_scalar_diffusion(cache)
@unpack d, R, H, Qh, measurement, m_tmp, Smat = cache
z = measurement.μ
e, HQH = m_tmp.μ, m_tmp.Σ
X_A_Xt!(HQH, Qh, H)
if HQH.mat isa Diagonal
e .= z ./ HQH.mat.diag
σ² = dot(e, z) / d
return σ²
else
C = cholesky!(HQH.mat)
ldiv!(e, C, z)
σ² = dot(z, e) / d
return σ²
end
HQHmat = _matmul!(Smat, HQH.R', HQH.R)
C = cholesky!(HQHmat)
ldiv!(e, C, z)
σ² = dot(z, e) / d
return σ²
end

"""
local_diagonal_diffusion(integ)
local_diagonal_diffusion(cache)
Compute the local, scalar diffusion estimate.
Expand All @@ -164,11 +160,13 @@ where ``z, H, Q`` are taken from the passed integrator.
For more background information
- N. Bosch, P. Hennig, F. Tronarp: **Calibrated Adaptive Probabilistic ODE Solvers** (2021)
"""
function local_diagonal_diffusion(integ)
@unpack q, H, Qh, measurement, m_tmp = integ.cache
function local_diagonal_diffusion(cache)
@unpack q, H, Qh, measurement, m_tmp = cache
z = measurement.μ
HQH = X_A_Xt!(m_tmp.Σ, Qh, H)
Q0_11 = diag(HQH)[1]
# Q0_11 = diag(HQH)[1]
c1 = view(HQH.R, :, 1)
Q0_11 = dot(c1, c1)

Σ_ii = z .^ 2 ./ Q0_11
# Σ_ii .= max.(Σ_ii, eps(eltype(Σ_ii)))
Expand Down
37 changes: 18 additions & 19 deletions src/filtering/predict.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,16 @@ predict(x::Gaussian, A::AbstractMatrix, Q::AbstractMatrix) =
Gaussian(predict_mean(x, A), predict_cov(x, A, Q))
predict_mean(x::Gaussian, A::AbstractMatrix) = A * x.μ
predict_cov(x::Gaussian, A::AbstractMatrix, Q::AbstractMatrix) = A * x.Σ * A' + Q
predict_cov(x::SRGaussian, A::AbstractMatrix, Q::SRMatrix) =
SRMatrix(qr([A * x.Σ.squareroot Q.squareroot]').R')
predict_cov(x::SRGaussian, A::AbstractMatrix, Q::PSDMatrix) =
PSDMatrix(qr([x.Σ.R * A'; Q.R]).R)

"""
predict!(x_out, x_curr, Ah, Qh, cachemat)
In-place and square-root implementation of [`predict`](@ref)
which saves the result into `x_out`.
Only works with `ProbNumDiffEq.SquarerootMatrix` types as `Ah`, `Qh`, and in the
Only works with `PSDMatrices.PSDMatrix` types as `Ah`, `Qh`, and in the
covariances of `x_curr` and `x_out` (both of type `Gaussian`).
To prevent allocations, a cache matrix `cachemat` of size ``D \\times 2D``
(where ``D \\times D`` is the size of `Ah` and `Qh`) needs to be passed.
Expand All @@ -32,12 +32,13 @@ function predict!(
x_out::SRGaussian,
x_curr::SRGaussian,
Ah::AbstractMatrix,
Qh::SRMatrix,
cachemat::SRMatrix,
Qh::PSDMatrix,
C_DxD::AbstractMatrix,
C_2DxD::AbstractMatrix,
diffusion=1,
)
predict_mean!(x_out, x_curr, Ah)
predict_cov!(x_out, x_curr, Ah, Qh, cachemat, diffusion)
predict_cov!(x_out, x_curr, Ah, Qh, C_DxD, C_2DxD, diffusion)
return x_out
end

Expand All @@ -50,23 +51,21 @@ function predict_cov!(
x_out::SRGaussian,
x_curr::SRGaussian,
Ah::AbstractMatrix,
Qh::SRMatrix,
cachemat::SRMatrix,
Qh::PSDMatrix,
C_DxD::AbstractMatrix,
C_2DxD::AbstractMatrix,
diffusion=1,
)
M, L = cachemat.mat, cachemat.squareroot
D, D = size(Qh.mat)
R, M = C_2DxD, C_DxD
D, D = size(Qh)

_matmul!(view(L, 1:D, 1:D), Ah, x_curr.Σ.squareroot)
_matmul!(view(L, 1:D, D+1:2D), sqrt.(diffusion), Qh.squareroot)
_matmul!(M, L, L')
_matmul!(view(R, 1:D, 1:D), x_curr.Σ.R, Ah')
_matmul!(view(R, D+1:2D, 1:D), Qh.R, sqrt.(diffusion))
_matmul!(M, R', R)
chol = cholesky!(Symmetric(M), check=false)

QL =
issuccess(chol) ? Matrix(chol.U)' :
eltype(L) <: Union{Float16,Float32,Float64} ? lq!(L).L : qr(L').R'
# QL = eltype(L) <: Union{Float16,Float32,Float64} ? lq!(L).L : qr(L').R'
copy!(x_out.Σ.squareroot, QL)
_matmul!(x_out.Σ.mat, QL, QL')
Q_R = issuccess(chol) ? Matrix(chol.U) : custom_qr!(R).R
copy!(x_out.Σ.R, Q_R)
# _matmul!(x_out.Σ.mat, QL, QL')
return x_out.Σ
end
Loading

0 comments on commit 1d5b415

Please sign in to comment.