Skip to content

Commit

Permalink
Performance Optimizations throughouth the codebase (#68)
Browse files Browse the repository at this point in the history
* Minimally faster projection matrix

* Minimally faster prior computation

* Remove an old unused line of code

* Fix a wrong check; diffusions are now faster for the EK0

* Specify matrices instead of letting it compute zero matmuls

* Pass the diffusion as type, not as symbol

* Separate the `copy`s and `similar`s from the cache creation

* Hard-code `X_A_Xt` for diagonal X'es

* Make `A` a full matrix since Octavian is that much faster

* Use floats in `_matmul!(C, A, B, a, b)`

* Reduce allocations in `estimate_diffusion(DynamicDiffusion)`

* Optimize the `update!` step some more

* More usage of `_matmul!`

* Use Octavian.jl's matmul! for a wider range of matrices and vectors

* Move the diffusion back to being specified with a symbol

* Remove the fixedMAP diffusion from the tests

* Fix `update!` for the EK0

* Remove the `OctavianCompatibleMatrices` type

* Fix the usage of `update!` in the RungeKuttaInit()

* Fix the allocating convenience `update!` function

* Fix some filtering tests that passed LowerTriangular matrices

* Update the benchmark.ipynb

* Fix an issue by using mul! instead of matmul!

I'm really not sure what goes wrong there, but this seems to fix it...

* Make the prior more stable again, using the elType that is supplied

* Update the benchmark once more
  • Loading branch information
nathanaelbosch authored Sep 9, 2021
1 parent 803ec17 commit 535e52c
Show file tree
Hide file tree
Showing 14 changed files with 1,765 additions and 1,743 deletions.
3,341 changes: 1,663 additions & 1,678 deletions benchmarks/multi-language-benchmark.ipynb

Large diffs are not rendered by default.

28 changes: 18 additions & 10 deletions src/ProbNumDiffEq.jl
Original file line number Diff line number Diff line change
Expand Up @@ -35,18 +35,26 @@ using ForwardDiff
using Tullio
import Octavian: matmul!
# Define some fallbacks
_matmul!(C::VecOrMat{T}, A::VecOrMat{T}, B::VecOrMat{T}) where {
T <: Union{Bool, Float16, Float32, Float64,
Int16, Int32, Int64, Int8, UInt16, UInt32, UInt64, UInt8}} =
matmul!(C, A, B)
_matmul!(C::VecOrMat{T}, A::VecOrMat{T}, B::VecOrMat{T}, a::T, b::T) where {T <: Union{
Bool, Float16, Float32, Float64,
Int16, Int32, Int64, Int8, UInt16, UInt32, UInt64, UInt8}} =
matmul!(C, A, B, a, b)
const OctavianCompatibleEltypes = Union{
Bool, Float16, Float32, Float64,
Int16, Int32, Int64, Int8, UInt16, UInt32, UInt64, UInt8}
_matmul!(C::AbstractVecOrMat{T},
A::AbstractVecOrMat{T},
B::AbstractVecOrMat{T}
) where T <: OctavianCompatibleEltypes = matmul!(C, A, B)
_matmul!(C::AbstractVecOrMat{T},
A::AbstractVecOrMat{T},
B::AbstractVecOrMat{T},
a::T, b::T) where T <: OctavianCompatibleEltypes = matmul!(C, A, B, a, b)
_matmul!(C, A, B) = mul!(C, A, B)
_matmul!(C, A, B, a, b) = mul!(C, A, B, a, b)
_matmul!(C::AbstractMatrix, A::AbstractMatrix, B::Diagonal) = (C .= A .* B.diag')
_matmul!(C::AbstractMatrix, A::Diagonal, B::AbstractMatrix) = (C .= A.diag .* B)
_matmul!(C::AbstractMatrix{T}, A::AbstractMatrix{T}, B::Diagonal{T}
) where T <: OctavianCompatibleEltypes = (C .= A .* B.diag')
_matmul!(C::AbstractMatrix{T}, A::Diagonal{T}, B::AbstractMatrix{T}
) where T <: OctavianCompatibleEltypes = (C .= A.diag .* B)
_matmul!(C::Diagonal{T}, A::AbstractMatrix{T}, B::AbstractMatrix{T}
) where T <: OctavianCompatibleEltypes =
mul!(C, A, B)


# @reexport using PSDMatrices
Expand Down
47 changes: 29 additions & 18 deletions src/caches.jl
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ function OrdinaryDiffEq.alg_cache(

A, Q = ibm(d, q, uElType)

x0 = Gaussian(zeros(uElType, D), SRMatrix(Matrix(uElType(1.0)*I, D, D)))
x0 = Gaussian(zeros(uElType, D),
SRMatrix(Matrix(uElType(1.0)*I, D, D), Matrix(uElType(1.0)*I, D, D)))

# Measurement model
R = zeros(uElType, d, d)
Expand All @@ -106,28 +107,38 @@ function OrdinaryDiffEq.alg_cache(
v = similar(h)
S = alg isa EK0 ?
SRMatrix(zeros(uElType, d, D), Diagonal(zeros(uElType, d, d))) :
SRMatrix(zeros(uElType, d, D))
SRMatrix(zeros(uElType, d, D), zeros(uElType, d, d))
measurement = Gaussian(v, S)
pu_tmp =
f isa DynamicalODEFunction ?
Gaussian(zeros(uElType, 2d), SRMatrix(zeros(uElType, 2d, D))) :
similar(measurement)
K = zeros(uElType, D, d)
G = zeros(uElType, D, D)
C1 = SRMatrix(zeros(uElType, D, 2D))
C2 = SRMatrix(zeros(uElType, D, 3D))
C1 = SRMatrix(zeros(uElType, D, 2D), zeros(uElType, D, D))
C2 = SRMatrix(zeros(uElType, D, 3D), zeros(uElType, D, D))
covmatcache = similar(G)

diffusion_models = Dict(
:dynamic => DynamicDiffusion(),
:dynamicMV => MVDynamicDiffusion(),
:fixed => FixedDiffusion(),
:fixedMV => MVFixedDiffusion(),
:fixedMAP => MAPFixedDiffusion(),
)
diffmodel = diffusion_models[alg.diffusionmodel]
diffmodel = alg.diffusionmodel == :dynamic ? DynamicDiffusion() :
alg.diffusionmodel == :fixed ? FixedDiffusion() :
alg.diffusionmodel == :dynamicMV ? MVDynamicDiffusion() :
alg.diffusionmodel == :fixedMV ? MVFixedDiffusion() :
error("The specified diffusion could not be recognized! Use e.g. `:dynamic`.")

initdiff = initial_diffusion(diffmodel, d, q, uEltypeNoUnits)

Ah, Qh = copy(A), copy(Q)
u_pred = similar(u)
u_filt = similar(u)
tmp = similar(u)
x_pred = copy(x0)
x_filt = similar(x0)
x_tmp = similar(x0)
x_tmp2 = similar(x0)
m_tmp = similar(measurement)
K2 = similar(K)
G2 = similar(G)
err_tmp = similar(du)
return GaussianODEFilterCache{
typeof(R), typeof(Proj), typeof(SolProj),
typeof(P), typeof(PI),
Expand All @@ -137,16 +148,16 @@ function OrdinaryDiffEq.alg_cache(
typeof(C1),
}(
# Constants
d, q, A, Q, copy(A), copy(Q), diffmodel, R, Proj, SolProj,
d, q, A, Q, Ah, Qh, diffmodel, R, Proj, SolProj,
P, PI,
E0, E1, E2,
# Mutable stuff
u, similar(u), similar(u), copy(u),
x0, copy(x0), similar(x0), similar(x0), similar(x0),
measurement, similar(measurement), pu_tmp,
H, du, ddu, K, similar(K), G, similar(G),
u, u_pred, u_filt, tmp,
x0, x_pred, x_filt, x_tmp, x_tmp2,
measurement, m_tmp, pu_tmp,
H, du, ddu, K, K2, G, G2,
covmatcache, initdiff, initdiff,
similar(du),
err_tmp,
zero(uEltypeNoUnits),
C1, C2,
)
Expand Down
6 changes: 4 additions & 2 deletions src/diffusions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,13 @@ function estimate_diffusion(kind::DynamicDiffusion, integ)
# @assert all(R .== 0) "The dynamic-diffusion assumes R==0!"
z = measurement.μ
HQH = X_A_Xt!(m_tmp.Σ, Qh, H)
if HQH isa Diagonal
if HQH.mat isa Diagonal
σ² = dot(z ./ HQH.mat.diag, z) / d
return σ², σ²
else
σ² = z' * (Symmetric(HQH.mat) \ z) / d
C = cholesky!(HQH.mat)
e = ldiv!(m_tmp.μ, C, z)
σ² = dot(z, e) / d
return σ², σ²
end
end
Expand Down
21 changes: 11 additions & 10 deletions src/filtering.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ function predict_cov!(x_out::SRGaussian, x_curr::SRGaussian, Ah::AbstractMatrix,
M, L = cachemat.mat, cachemat.squareroot
D, D = size(Qh.mat)

mul!(view(L, 1:D, 1:D), Ah, x_curr.Σ.squareroot)
mul!(view(L, 1:D, D+1:2D), sqrt.(diffusion), Qh.squareroot)
_matmul!(view(L, 1:D, 1:D), Ah, x_curr.Σ.squareroot)
_matmul!(view(L, 1:D, D+1:2D), sqrt.(diffusion), Qh.squareroot)
_matmul!(M, L, L')
chol = cholesky!(Symmetric(M), check=false)

Expand Down Expand Up @@ -91,22 +91,22 @@ See also: [`predict`](@ref)
function update!(x_out::Gaussian, x_pred::Gaussian, measurement::Gaussian,
H::AbstractMatrix, R,
K1::AbstractMatrix, K2::AbstractMatrix,
M_cache::AbstractMatrix)
M_cache::AbstractMatrix, m_tmp)
@assert iszero(R)
z, S = measurement.μ, measurement.Σ
z, S = measurement.μ, copy!(m_tmp.Σ, measurement.Σ)
m_p, P_p = x_pred.μ, x_pred.Σ
D = length(m_p)

S_inv = inv(S)
# K = P_p * H' * S_inv
K1 = _matmul!(K1, Matrix(P_p), H')
K = _matmul!(K2, K1, S_inv)
# K = P_p * H' / S
S_chol = cholesky!(S)
K = _matmul!(K1, Matrix(P_p), H')
rdiv!(K, S_chol)

# x_out.μ .= m_p .+ K * (0 .- z)
x_out.μ .= m_p .- _matmul!(x_out.μ, K, z)

# M_cache .= I(D) .- mul!(M_cache, K, H)
_matmul!(M_cache, K, H, -1, 0)
_matmul!(M_cache, K, H, -1.0, 0.0)
@inbounds @simd ivdep for i in 1:D
M_cache[i, i] += 1
end
Expand All @@ -122,7 +122,8 @@ function update!(x_out::Gaussian, x_pred::Gaussian, measurement::Gaussian,
K1 = zeros(D, d)
K2 = zeros(D, d)
M_cache = zeros(D, D)
return update!(x_out, x_pred, measurement, H, R, K1, K2, M_cache)
m_tmp = copy(measurement)
return update!(x_out, x_pred, measurement, H, R, K1, K2, M_cache, m_tmp)
end
"""
update(x_pred, measurement, H, R=0)
Expand Down
4 changes: 2 additions & 2 deletions src/initialization/common.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,11 @@ function condition_on!(x::SRGaussian, H::AbstractMatrix, data::AbstractVector,
Kcache2 .= Kcache ./ S.diag'
K = Kcache2

_matmul!(x.μ, K, data - z, 1, 1)
_matmul!(x.μ, K, data - z, 1.0, 1.0)
# x.μ .+= K*(data - z)

D = length(x.μ)
_matmul!(Mcache, K, H, -1, 0)
mul!(Mcache, K, H, -1.0, 0.0)
@inbounds @simd ivdep for i in 1:D
Mcache[i, i] += 1
end
Expand Down
2 changes: 1 addition & 1 deletion src/initialization/rungekutta.jl
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ function rk_init_improve(integ, cache::GaussianODEFilterCache, ts, us, dt)
X_A_Xt!(measurement.Σ, x_pred.Σ, H)

update!(x_filt, x_pred, measurement, H, 0,
cache.K1, cache.K2, cache.x_tmp2.Σ.mat)
cache.K1, cache.K2, cache.x_tmp2.Σ.mat, cache.m_tmp)
push!(filts, copy(x_filt))

x = x_filt
Expand Down
7 changes: 3 additions & 4 deletions src/perform_step.jl
Original file line number Diff line number Diff line change
Expand Up @@ -148,9 +148,8 @@ function evaluate_ode!(integ, x_pred, t, second_order::Val{false})
end

integ.destats.njacs += 1
_matmul!(H, ddu, -E0)
H .= E1
_matmul!(H, ddu, E0, -1, 1)
_matmul!(H, ddu, E0, -1.0, 1.0)
else
# H .= E1 # This is already the case!
end
Expand Down Expand Up @@ -222,8 +221,8 @@ compute_measurement_covariance!(cache) =

function update!(integ, prediction)
@unpack measurement, H, R, x_filt = integ.cache
@unpack K1, K2, x_tmp2 = integ.cache
update!(x_filt, prediction, measurement, H, R, K1, K2, x_tmp2.Σ.mat)
@unpack K1, K2, x_tmp2, m_tmp = integ.cache
update!(x_filt, prediction, measurement, H, R, K1, K2, x_tmp2.Σ.mat, m_tmp)
# assert_nonnegative_diagonal(x_filt.Σ)
return x_filt
end
Expand Down
7 changes: 3 additions & 4 deletions src/priors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,19 @@ function ibm(d::Integer, q::Integer, elType=typeof(1.0))
end
A = kron(I(d), A_breve)
@assert istriu(A)
A = UpperTriangular(A)
# A = UpperTriangular(A)

# Make Q
Q_breve = zeros(elType, q+1, q+1)
@fastmath _transdiff_ibm_element(row::Int, col::Int) =
one(elType) / (2 * q + 1 - row - col)
@simd ivdep for col in 0:q
@simd ivdep for row in col:q
@simd ivdep for row in 0:q
val = _transdiff_ibm_element(row, col)
@inbounds Q_breve[1 + col, 1 + row] = val
@inbounds Q_breve[1 + row, 1 + col] = val
end
end
QL_breve = cholesky(Q_breve).L
QL_breve = cholesky!(Q_breve).L
QL = kron(I(d), QL_breve)
Q = SRMatrix(QL)

Expand Down
14 changes: 12 additions & 2 deletions src/projection.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
function projection(d, q, elType=typeof(1.0))
Proj(deriv) = kron(diagm(0 => ones(elType, d)), [i==(deriv+1) ? 1 : 0 for i in 1:q+1]')
function projection(d::Integer, q::Integer, elType=typeof(1.0))
# Proj(deriv) = kron(diagm(0 => ones(elType, d)), [i==(deriv+1) ? 1 : 0 for i in 1:q+1]')

# Slightly faster version of the above:
D = d*(q+1)
Proj(deriv) = begin
P = zeros(elType, d, D)
@simd ivdep for i in deriv*d + 1 : D+1 : d*D
@inbounds P[i] = 1
end
return P
end
return Proj
end
7 changes: 7 additions & 0 deletions src/squarerootmatrix.jl
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,16 @@ X_A_Xt!(out::SquarerootMatrix, M::SquarerootMatrix, X::AbstractMatrix) = begin
_matmul!(out.mat, out.squareroot, out.squareroot')
return out
end
X_A_Xt!(out::SquarerootMatrix, M::SquarerootMatrix, D::Diagonal) = begin
# Basically just to optimize PI*Q*PI
out.squareroot .= D.diag .* M.squareroot
out.mat .= D.diag .* M.mat .* D.diag'
return out
end


Base.inv(M::SquarerootMatrix)= Base.inv(M.mat)
LinearAlgebra.diag(M::SquarerootMatrix) = LinearAlgebra.diag(M.mat)
LinearAlgebra.cholesky(M::SquarerootMatrix) = LinearAlgebra.cholesky(M.mat)
LinearAlgebra.cholesky!(M::SquarerootMatrix) = LinearAlgebra.cholesky!(M.mat)
LinearAlgebra.logdet(M::SquarerootMatrix) = LinearAlgebra.logdet(M.mat)
4 changes: 2 additions & 2 deletions test/correctness.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ for (prob, probname) in [
true_sol = solve(remake(prob, u0=big.(prob.u0)), Tsit5(), abstol=1e-20, reltol=1e-20)

for Alg in (EK0, EK1),
diffusion in [:fixed, :dynamic, :fixedMAP, :fixedMV, :dynamicMV],
diffusion in [:fixed, :dynamic, :fixedMV, :dynamicMV],
init in [TaylorModeInit(), RungeKuttaInit()],
q in [2, 3, 5]

Expand Down Expand Up @@ -51,7 +51,7 @@ for (prob, probname) in [
true_dense_vals = true_sol.(t_eval)

for Alg in (EK0, EK1),
diffusion in [:fixed, :dynamic, :fixedMAP, :fixedMV, :dynamicMV],
diffusion in [:fixed, :dynamic, :fixedMV, :dynamicMV],
init in [TaylorModeInit(), RungeKuttaInit()],
q in [2, 3, 5]

Expand Down
10 changes: 5 additions & 5 deletions test/diffusions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,10 @@ import DiffEqProblemLibrary.ODEProblemLibrary: prob_ode_linear, prob_ode_2Dlinea
@test sol.u[end] true_sol.(sol.t)[end]
end

@testset "Time-Fixed Diffusion MAP" begin
sol = solve(prob, EK0(diffusionmodel=:fixedMAP, smooth=false),
dense=false, adaptive=false, dt=1e-4)
@test sol.u[end] true_sol.(sol.t)[end]
end
# @testset "Time-Fixed Diffusion MAP" begin
# sol = solve(prob, EK0(diffusionmodel=:fixedMAP, smooth=false),
# dense=false, adaptive=false, dt=1e-4)
# @test sol.u[end] ≈ true_sol.(sol.t)[end]
# end

end
10 changes: 5 additions & 5 deletions test/filtering.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ using LinearAlgebra
# Setup
d = 5
m = rand(d)
L_p = LowerTriangular(rand(d,d))
L_p = Matrix(LowerTriangular(rand(d,d)))
P = L_p*L_p'

A = rand(d,d)
L_Q = LowerTriangular(rand(d,d))
L_Q = Matrix(LowerTriangular(rand(d,d)))
Q = L_Q*L_Q'

# PREDICT
Expand Down Expand Up @@ -53,7 +53,7 @@ end
# Setup
d = 5
m_p = rand(d)
L_P_p = LowerTriangular(rand(d,d))
L_P_p = Matrix(LowerTriangular(rand(d,d)))
P_p = L_P_p * L_P_p'

# Measure
Expand Down Expand Up @@ -95,11 +95,11 @@ end
# Setup
d = 5
m, m_s = rand(d), rand(d)
L_P, L_P_s = LowerTriangular(rand(d,d)), LowerTriangular(rand(d,d))
L_P, L_P_s = Matrix(LowerTriangular(rand(d,d))), Matrix(LowerTriangular(rand(d,d)))
P, P_s = L_P*L_P', L_P_s*L_P_s'

A = rand(d,d)
L_Q = LowerTriangular(rand(d,d))
L_Q = Matrix(LowerTriangular(rand(d,d)))
Q = L_Q*L_Q'
Q_SR = SRMatrix(L_Q)

Expand Down

0 comments on commit 535e52c

Please sign in to comment.