Improve precompilation times (#116)

* Simplify the projection to use a type, not val * Seem in OrdinaryDiffEq.jl and is supposed to improve precompilation * Pass just the cache, not the full integrator, to `smooth!` * Small thing I found in OrdinaryDiffEq.jl * Typically do not use views in the TaylorMode init * Seemingly better SRMatrix definition * Simplify some code * Remove Octavian to get precompilation times down _drastically_ * Apply JuliaFormatter.jl * Bugfix: Adjust the old ClassicSolverInit * Always pass the full matrix to SRMat constructors * Better precompilation and more efficient prior computation * Better preconditioner precompilation * `apply_diffusion` should not rely on matrix multiplication but to be honest it should be replaced anyways * Less precompilation in the taylor mode init * Rename mul! on Gaussians to a more explicit `_gaussian_mul!` * Use full covariances in the EK0 for now doesn't change the performance that much, but seems to improve precompilation stuff and also type stability * Make `condition_on!` precompile better * Improve a simd loop in `calibrate_solution!` * Bugfix: `mul!` -> `_gaussian_mul!` * Apply JuliaFormatter.jl * Bump version number
nathanaelbosch · Mar 7, 2022 · d1009e0 · d1009e0 · nathanaelbosch · Mar 7, 2022
1 parent e2ed6e9
commit d1009e0
Show file tree

Hide file tree

Showing 15 changed files with 88 additions and 93 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,15 +1,14 @@
 name = "ProbNumDiffEq"
 uuid = "bf3e78b0-7d74-48a5-b855-9609533b56a5"
 authors = ["Nathanael Bosch"]
-version = "0.5.6"
+version = "0.5.7"
 
 [deps]
 DiffEqBase = "2b5f629d-d688-5b77-993f-72d75c75574e"
 DiffEqDevTools = "f3b72e0c-5b89-59e1-b016-84e28bfd966d"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 GaussianDistributions = "43dcc890-d446-5863-8d1a-14597580bb8d"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
-Octavian = "6fd5a793-0b7e-452c-907f-f8bfe9c57db4"
 OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
 RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01"
 RecursiveArrayTools = "731186ca-8d62-57ce-b412-fbd966d074cd"
@@ -27,7 +26,6 @@ DiffEqBase = "6"
 DiffEqDevTools = "2"
 ForwardDiff = "0.10"
 GaussianDistributions = "0.5"
-Octavian = "0.2,0.3"
 OrdinaryDiffEq = "5 - 5.69"
 RecipesBase = "1"
 RecursiveArrayTools = "2"

diff --git a/src/ProbNumDiffEq.jl b/src/ProbNumDiffEq.jl
@@ -35,26 +35,10 @@ using Tullio
 _matmul!(C, A, B) = mul!(C, A, B)
 _matmul!(C, A, B, a, b) = mul!(C, A, B, a, b)
 # Some special cases
-_matmul!(C::AbstractMatrix{Float64}, A::AbstractMatrix{Float64}, B::Diagonal{Float64}) =
-    (C .= A .* B.diag')
-_matmul!(C::AbstractMatrix{Float64}, A::Diagonal{Float64}, B::AbstractMatrix{Float64}) =
-    (C .= A.diag .* B)
-_matmul!(C::Diagonal{Float64}, A::AbstractMatrix{Float64}, B::AbstractMatrix{Float64}) =
+_matmul!(C::AbstractMatrix, A::AbstractMatrix, B::Diagonal) = (C .= A .* B.diag')
+_matmul!(C::AbstractMatrix, A::Diagonal, B::AbstractMatrix) = (C .= A.diag .* B)
+_matmul!(C::Diagonal, A::AbstractMatrix, B::AbstractMatrix) =
     @tullio C[i, i] = A[i, j] * B[j, i]
-# Use Octavian's matmul! when the types are very specific
-import Octavian: matmul!
-_matmul!(
-    C::AbstractVecOrMat{Float64},
-    A::AbstractVecOrMat{Float64},
-    B::AbstractVecOrMat{Float64},
-) = matmul!(C, A, B)
-_matmul!(
-    C::AbstractVecOrMat{Float64},
-    A::AbstractVecOrMat{Float64},
-    B::AbstractVecOrMat{Float64},
-    a::Float64,
-    b::Float64,
-) = matmul!(C, A, B, a, b)
 
 # @reexport using PSDMatrices
 # import PSDMatrices: X_A_Xt
@@ -64,7 +48,8 @@ export SRMatrix
 X_A_Xt(A, X) = X * A * X'
 X_A_Xt!(out, A, X) = (out .= X * A * X')
 apply_diffusion(Q, diffusion::Diagonal) = X_A_Xt(Q, sqrt.(diffusion))
-apply_diffusion(Q::SRMatrix, diffusion::Number) = SRMatrix(sqrt.(diffusion) * Q.squareroot)
+apply_diffusion(Q::SRMatrix, diffusion::Number) =
+    SRMatrix(sqrt.(diffusion) * Q.squareroot, diffusion * Q.mat)
 
 # All the Gaussian things
 @reexport using GaussianDistributions

diff --git a/src/caches.jl b/src/caches.jl
@@ -81,9 +81,9 @@ function OrdinaryDiffEq.alg_cache(
     alg::GaussianODEFilter,
     u,
     rate_prototype,
-    uEltypeNoUnits,
-    uBottomEltypeNoUnits,
-    tTypeNoUnits,
+    ::Type{uEltypeNoUnits},
+    ::Type{uBottomEltypeNoUnits},
+    ::Type{tTypeNoUnits},
     uprev,
     uprev2,
     f,
@@ -93,7 +93,7 @@ function OrdinaryDiffEq.alg_cache(
     p,
     calck,
     ::Val{IIP},
-) where {IIP}
+) where {IIP,uEltypeNoUnits,uBottomEltypeNoUnits,tTypeNoUnits}
     initialize_derivatives = true
 
     if u isa Number
@@ -113,11 +113,12 @@ function OrdinaryDiffEq.alg_cache(
     t0 = t
 
     uType = typeof(u)
-    uElType = eltype(u_vec)
+    # uElType = eltype(u_vec)
+    uElType = uBottomEltypeNoUnits
     matType = Matrix{uElType}
 
     # Projections
-    Proj = projection(d, q, Val(uElType))
+    Proj = projection(d, q, uElType)
     E0, E1, E2 = Proj(0), Proj(1), Proj(2)
     @assert f isa AbstractODEFunction
     SolProj = f isa DynamicalODEFunction ? [Proj(1); Proj(0)] : Proj(0)
@@ -127,8 +128,11 @@ function OrdinaryDiffEq.alg_cache(
 
     A, Q = ibm(d, q, uElType)
 
-    initial_variance = 1.0 * ones(uElType, D)
-    x0 = Gaussian(zeros(uElType, D), SRMatrix(Matrix(Diagonal(sqrt.(initial_variance)))))
+    initial_variance = ones(uElType, D)
+    x0 = Gaussian(
+        zeros(uElType, D),
+        SRMatrix(diagm(sqrt.(initial_variance)), diagm(initial_variance)),
+    )
 
     # Measurement model
     R = zeros(uElType, d, d)
@@ -141,7 +145,7 @@ function OrdinaryDiffEq.alg_cache(
     v, S = similar(h), similar(ddu)
     v = similar(h)
     S =
-        alg isa EK0 ? SRMatrix(zeros(uElType, d, D), Diagonal(zeros(uElType, d, d))) :
+    # alg isa EK0 ? SRMatrix(zeros(uElType, d, D), Diagonal(zeros(uElType, d, d))) :
         SRMatrix(zeros(uElType, d, D), zeros(uElType, d, d))
     measurement = Gaussian(v, S)
     pu_tmp =

diff --git a/src/filtering/smooth.jl b/src/filtering/smooth.jl
@@ -58,12 +58,12 @@ function smooth(
     return x_curr_smoothed, G
 end
 
-function smooth!(x_curr, x_next, Ah, Qh, integ, diffusion=1)
+function smooth!(x_curr, x_next, Ah, Qh, cache, diffusion=1)
     # x_curr is the state at time t_n (filter estimate) that we want to smooth
     # x_next is the state at time t_{n+1}, already smoothed, which we use for smoothing
-    @unpack d, q = integ.cache
-    @unpack x_pred = integ.cache
-    @unpack C1, G1, G2, C2 = integ.cache
+    @unpack d, q = cache
+    @unpack x_pred = cache
+    @unpack C1, G1, G2, C2 = cache
 
     # Prediction: t -> t+1
     predict_mean!(x_pred, x_curr, Ah)

diff --git a/src/gaussians.jl b/src/gaussians.jl
@@ -20,14 +20,14 @@ ndims(g::Gaussian) = ndims(g.μ)
 Base.:*(M, g::SRGaussian) = Gaussian(M * g.μ, X_A_Xt(g.Σ, M))
 # GaussianDistributions.whiten(Σ::SRMatrix, z) = Σ.L\z
 
-function mul!(g_out::SRGaussian, M::AbstractMatrix, g_in::SRGaussian)
+function _gaussian_mul!(g_out::SRGaussian, M::AbstractMatrix, g_in::SRGaussian)
     _matmul!(g_out.μ, M, g_in.μ)
     X_A_Xt!(g_out.Σ, g_in.Σ, M)
     return g_out
 end
 
-var(p::SRGaussian{T}) where {T} = diag(p.Σ)
-std(p::SRGaussian{T}) where {T} = sqrt.(var(p))
-mean(s::SRGaussianList{T}) where {T} = s.μ
-var(s::SRGaussianList{T}) where {T} = diag.(s.Σ)
-std(s::SRGaussianList{T}) where {T} = map(std, s)
+var(p::SRGaussian) = diag(p.Σ)
+std(p::SRGaussian) = sqrt.(var(p))
+mean(s::SRGaussianList) = s.μ
+var(s::SRGaussianList) = diag.(s.Σ)
+std(s::SRGaussianList) = map(std, s)
diff --git a/src/initialization/classicsolverinit.jl b/src/initialization/classicsolverinit.jl
@@ -11,10 +11,10 @@ function initial_update!(integ, cache, init::ClassicSolverInit)
     @unpack ddu, du, x_tmp, x_tmp2, m_tmp, K1 = cache
 
     # Initialize on u0
-    condition_on!(x, Proj(0), view(u, :), m_tmp, K1, x_tmp.Σ, x_tmp2.Σ.mat)
+    condition_on!(x, Proj(0), view(u, :), m_tmp.Σ, K1, x_tmp.Σ, x_tmp2.Σ.mat)
     f(du, u, p, t)
     integ.destats.nf += 1
-    condition_on!(x, Proj(1), view(du, :), m_tmp, K1, x_tmp.Σ, x_tmp2.Σ.mat)
+    condition_on!(x, Proj(1), view(du, :), m_tmp.Σ, K1, x_tmp.Σ, x_tmp2.Σ.mat)
 
     if q < 2
         return
@@ -31,7 +31,7 @@ function initial_update!(integ, cache, init::ClassicSolverInit)
             ForwardDiff.jacobian!(ddu, (du, u) -> f(du, u, p, t), du, u)
         end
         ddfddu = ddu * view(du, :) + view(dfdt, :)
-        condition_on!(x, Proj(2), ddfddu, m_tmp, K1, x_tmp.Σ, x_tmp2.Σ.mat)
+        condition_on!(x, Proj(2), ddfddu, m_tmp.Σ, K1, x_tmp.Σ, x_tmp2.Σ.mat)
         if q < 3
             return
         end
@@ -88,7 +88,7 @@ function rk_init_improve(integ, cache::GaussianODEFilterCache, ts, us, dt)
     make_preconditioners!(cache, dt)
     @unpack P, PI = cache
 
-    mul!(x, P, x)
+    _gaussian_mul!(x, P, x)
 
     preds = []
     filts = [copy(x)]
@@ -114,10 +114,10 @@ function rk_init_improve(integ, cache::GaussianODEFilterCache, ts, us, dt)
         xf = filts[i-1]
         xs = filts[i]
         xp = preds[i-1] # Since `preds` is one shorter
-        smooth!(xf, xs, A, Q, integ, 1)
+        smooth!(xf, xs, A, Q, integ.cache, 1)
     end
 
-    mul!(cache.x, PI, filts[1])
+    _gaussian_mul!(cache.x, PI, filts[1])
 
     return nothing
 end
diff --git a/src/initialization/common.jl b/src/initialization/common.jl
@@ -25,14 +25,13 @@ function condition_on!(
     x::SRGaussian,
     H::AbstractMatrix,
     data::AbstractVector,
-    meascache,
+    Scache,
     Kcache,
     covcache,
     Mcache,
 )
-    z, S = meascache
+    S = Scache
 
-    _matmul!(z, H, x.μ)
     X_A_Xt!(S, x.Σ, H)
     @assert isdiag(S)
     S_diag = diag(S)
@@ -43,11 +42,12 @@ function condition_on!(
     _matmul!(Kcache, x.Σ.mat, H')
     K = Kcache ./= S_diag'
 
-    _matmul!(x.μ, K, data - z, 1.0, 1.0)
     # x.μ .+= K*(data - z)
+    datadiff = _matmul!(data, H, x.μ, -1, 1)
+    _matmul!(x.μ, K, datadiff, 1, 1)
 
     D = length(x.μ)
-    mul!(Mcache, K, H, -1.0, 0.0)
+    _matmul!(Mcache, K, H, -1, 0)
     @inbounds @simd ivdep for i in 1:D
         Mcache[i, i] += 1
     end

diff --git a/src/initialization/taylormode.jl b/src/initialization/taylormode.jl
@@ -6,6 +6,9 @@ function initial_update!(integ, cache, init::TaylorModeInit)
     D = d * (q + 1)
 
     @unpack x_tmp, x_tmp2, m_tmp, K1, K2 = cache
+    if size(K1, 2) != d
+        K1 = K1[:, 1:d]
+    end
 
     f_derivatives = taylormode_get_derivatives(u, f, p, t, q)
     integ.destats.nf += q
@@ -20,15 +23,12 @@ function initial_update!(integ, cache, init::TaylorModeInit)
             df = df[2, :]
         end
         pmat = f.mass_matrix * Proj(o)
-        condition_on!(
-            x,
-            pmat,
-            view(df, :),
-            m_cache,
-            view(K1, :, 1:d),
-            x_tmp.Σ,
-            x_tmp2.Σ.mat,
-        )
+
+        if !(df isa AbstractVector)
+            df = df[:]
+        end
+
+        condition_on!(x, pmat, df, m_cache.Σ, K1, x_tmp.Σ, x_tmp2.Σ.mat)
     end
 end
 
@@ -40,8 +40,12 @@ function taylormode_get_derivatives(u, f::AbstractODEFunction{false}, p, t, q)
 
     tT = Taylor1(typeof(t), q)
     tT[0] = t
-    uT = Taylor1.(u, tT.order)
-    duT = zero.(Taylor1.(u, tT.order))
+    # uT = Taylor1.(u, tT.order) # precompilation could be faster with the following:
+    uT = similar(u, Taylor1{eltype(u)})
+    @inbounds @simd ivdep for i in eachindex(u)
+        uT[i] = Taylor1(u[i], q)
+    end
+    duT = zero(uT)
     uauxT = similar(uT)
     TaylorIntegration.jetcoeffs!(f, tT, uT, duT, uauxT, p)
     return [evaluate.(differentiate.(uT, i)) for i in 0:q]
@@ -52,8 +56,12 @@ end
 function taylormode_get_derivatives(u, f::AbstractODEFunction{true}, p, t, q)
     tT = Taylor1(typeof(t), q)
     tT[0] = t
-    uT = Taylor1.(u, tT.order)
-    duT = zero.(Taylor1.(u, tT.order))
+    # uT = Taylor1.(u, tT.order)
+    uT = similar(u, Taylor1{eltype(u)})
+    @inbounds @simd ivdep for i in eachindex(u)
+        uT[i] = Taylor1(u[i], q)
+    end
+    duT = zero(uT)
     uauxT = similar(uT)
     TaylorIntegration.jetcoeffs!(f, tT, uT, duT, uauxT, p)
     return [evaluate.(differentiate.(uT, i)) for i in 0:q]

diff --git a/src/integrator_utils.jl b/src/integrator_utils.jl
@@ -40,13 +40,13 @@ function calibrate_solution!(integ, mle_diffusion)
     set_diffusions!(integ.sol, mle_diffusion * integ.cache.default_diffusion)
 
     # Rescale all filtering estimates to have the correct diffusion
-    @simd ivdep for s in integ.sol.x_filt
-        copy!(s.Σ, apply_diffusion(s.Σ, mle_diffusion))
+    @simd ivdep for S in integ.sol.x_filt.Σ
+        copy!(S, apply_diffusion(S, mle_diffusion))
     end
 
     # Re-write into the solution estimates
     for (pu, x) in zip(integ.sol.pu, integ.sol.x_filt)
-        mul!(pu, integ.cache.SolProj, x)
+        _gaussian_mul!(pu, integ.cache.SolProj, x)
     end
     # [(su[:] .= pu) for (su, pu) in zip(integ.sol.u, integ.sol.pu.μ)]
 end
@@ -94,13 +94,13 @@ function smooth_solution!(integ)
         make_preconditioners!(integ.cache, dt)
         P, PI = integ.cache.P, integ.cache.PI
 
-        mul!(x_tmp, P, x[i])
-        mul!(x_tmp2, P, x[i+1])
-        smooth!(x_tmp, x_tmp2, A, Q, integ, diffusions[i])
-        mul!(x[i], PI, x_tmp)
+        _gaussian_mul!(x_tmp, P, x[i])
+        _gaussian_mul!(x_tmp2, P, x[i+1])
+        smooth!(x_tmp, x_tmp2, A, Q, integ.cache, diffusions[i])
+        _gaussian_mul!(x[i], PI, x_tmp)
 
         # Save the smoothed state into the solution
-        mul!(integ.sol.pu[i], integ.cache.SolProj, x[i])
+        _gaussian_mul!(integ.sol.pu[i], integ.cache.SolProj, x[i])
         integ.sol.u[i][:] .= integ.sol.pu[i].μ
     end
     integ.sol.interp = set_smooth(integ.sol.interp)
@@ -121,7 +121,7 @@ function pn_solution_endpoint_match_cur_integrator!(integ)
         OrdinaryDiffEq.copyat_or_push!(
             integ.sol.pu,
             integ.saveiter,
-            mul!(integ.cache.pu_tmp, integ.cache.SolProj, integ.cache.x),
+            _gaussian_mul!(integ.cache.pu_tmp, integ.cache.SolProj, integ.cache.x),
         )
     end
 end
@@ -148,7 +148,7 @@ function DiffEqBase.savevalues!(
         integ.cache.local_diffusion,
     )
     if integ.opts.save_everystep
-        mul!(integ.cache.pu_tmp, integ.cache.SolProj, integ.cache.x),
+        _gaussian_mul!(integ.cache.pu_tmp, integ.cache.SolProj, integ.cache.x),
         OrdinaryDiffEq.copyat_or_push!(integ.sol.pu, integ.saveiter, integ.cache.pu_tmp)
     end
 

diff --git a/src/perform_step.jl b/src/perform_step.jl
@@ -22,7 +22,7 @@ function OrdinaryDiffEq.initialize!(integ, cache::GaussianODEFilterCache)
     OrdinaryDiffEq.copyat_or_push!(
         integ.sol.pu,
         integ.saveiter,
-        mul!(cache.pu_tmp, cache.SolProj, cache.x),
+        _gaussian_mul!(cache.pu_tmp, cache.SolProj, cache.x),
     )
     return nothing
 end
@@ -158,7 +158,9 @@ function evaluate_ode!(
             @unpack du1, uf, jac_config = integ.cache
             uf.f = OrdinaryDiffEq.nlsolve_f(f, alg)
             uf.t = t
-            uf.p = p
+            if !(p isa DiffEqBase.NullParameters)
+                uf.p = p
+            end
             OrdinaryDiffEq.jacobian!(ddu, uf, u_pred, du1, integ, jac_config)
         end
         integ.destats.njacs += 1

diff --git a/src/preconditioning.jl b/src/preconditioning.jl
@@ -1,4 +1,4 @@
-function init_preconditioner(d, q, elType=typeof(1.0))
+function init_preconditioner(d, q, ::Type{elType}=typeof(1.0)) where {elType}
     D = d * (q + 1)
     P = Diagonal(ones(elType, D))
     PI = Diagonal(ones(elType, D))