From 9554581b1e03a0a64529e785a2c413fe94b4e494 Mon Sep 17 00:00:00 2001
From: Nathanael Bosch <nathanael.bosch@uni-tuebingen.de>
Date: Fri, 23 Feb 2024 21:37:28 +0000
Subject: [PATCH] Change the ordering of the state-space model (#305)

* Change the ordering of the state-space model: reorder the Kronecker

* Fix the Kronecker vectrick to do the correct thing

* Test the ldiv trick and fix it

* Make Kronecker h/v/hvcattable and change SolProj object

* Simplify and fix things that relied on the previous more complicated stuff

* JuliaFormatter.jl

* Check that ldiv actually works

* Fix some issues with the prior tests

* Fix more stuff in the diffusion

* Make the kronecker.jl file diff smaller

* Fix more kronecker vec trick issues

* Rename BlockDiag into BlocksOfDiagonals, as that's what it is now

* Make BlocksOfDiagonals work OK

* Fix the update and smooth parts

* Implement hcat and vcat. THIS FIXES 2nd ORDER ODES!

* Add the DiagonalEK1 back into the tests!

* Add the BlockArrays extension that replaces the BlockDiagonal one

* Fix more tests

* JuliaFormatter.jl

* Reduce the prior diff again

* Add DiagonalEK1 to second order ODE tests

* Fix the second order ODE tests

* Add a BlocksOfDiagonals docstring
---
 Project.toml                                  |   6 +-
 ext/BlockArraysExt.jl                         |  13 +
 ext/BlockDiagonalsExt.jl                      |   8 -
 src/ProbNumDiffEq.jl                          |   2 +-
 src/blockdiagonals.jl                         | 321 ----------------
 src/blocksofdiagonals.jl                      | 355 ++++++++++++++++++
 src/caches.jl                                 |  19 +-
 src/callbacks/dataupdate.jl                   |   6 +-
 src/covariance_structure.jl                   |   6 +-
 src/diffusions/apply_diffusion.jl             |  18 +-
 src/diffusions/calibration.jl                 |   9 +-
 src/filtering/markov_kernel.jl                |  38 +-
 src/filtering/predict.jl                      |  16 +-
 src/filtering/update.jl                       |  34 +-
 src/kronecker.jl                              | 141 +++----
 src/measurement_models.jl                     |  10 +-
 src/perform_step.jl                           |   2 +-
 src/preconditioning.jl                        |  24 +-
 src/priors/ioup.jl                            |  16 +-
 src/priors/ltisde.jl                          |   6 +-
 src/projection.jl                             |  68 +---
 test/Project.toml                             |   1 +
 ...blockdiagonals.jl => blocksofdiagonals.jl} |  54 ++-
 test/core/filtering.jl                        |  13 +-
 test/core/kronecker.jl                        |  34 +-
 test/core/preconditioning.jl                  |   6 +-
 test/core/priors.jl                           |  51 ++-
 test/runtests.jl                              |   4 +-
 test/secondorderode.jl                        |   9 +-
 test/state_init.jl                            |   4 +-
 30 files changed, 675 insertions(+), 619 deletions(-)
 create mode 100644 ext/BlockArraysExt.jl
 delete mode 100644 ext/BlockDiagonalsExt.jl
 delete mode 100644 src/blockdiagonals.jl
 create mode 100644 src/blocksofdiagonals.jl
 rename test/core/{blockdiagonals.jl => blocksofdiagonals.jl} (54%)

diff --git a/Project.toml b/Project.toml
index 181432f6f..bed907e17 100644
--- a/Project.toml
+++ b/Project.toml
@@ -35,18 +35,18 @@ TaylorSeries = "6aa5eb33-94cf-58f4-a9d0-e4b2c4fc25ea"
 ToeplitzMatrices = "c751599d-da0a-543b-9d20-d0a503d91d24"
 
 [weakdeps]
-BlockDiagonals = "0a1fb500-61f7-11e9-3c65-f5ef3456f9f0"
+BlockArrays = "8e7c35d0-a365-5155-bbbb-fb81a777f24e"
 DiffEqDevTools = "f3b72e0c-5b89-59e1-b016-84e28bfd966d"
 RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01"
 
 [extensions]
-BlockDiagonalsExt = "BlockDiagonals"
+BlockArraysExt = "BlockArrays"
 DiffEqDevToolsExt = "DiffEqDevTools"
 RecipesBaseExt = "RecipesBase"
 
 [compat]
 ArrayAllocators = "0.3"
-BlockDiagonals = "0.1"
+BlockArrays = "0.16"
 DiffEqBase = "6.122"
 DiffEqCallbacks = "2.36, 3"
 DiffEqDevTools = "2"
diff --git a/ext/BlockArraysExt.jl b/ext/BlockArraysExt.jl
new file mode 100644
index 000000000..0d9f1f6d3
--- /dev/null
+++ b/ext/BlockArraysExt.jl
@@ -0,0 +1,13 @@
+module BlockArraysExt
+
+import ProbNumDiffEq: BlocksOfDiagonals
+import BlockArrays: BlockArray, mortar
+import LinearAlgebra: Diagonal
+
+BlockArray(M::BlocksOfDiagonals) = begin
+    d = length(M.blocks)
+    a, b = size(M.blocks[1])
+    return mortar([Diagonal([M.blocks[k][i, j] for k in 1:d]) for i in 1:a, j in 1:b])
+end
+
+end
diff --git a/ext/BlockDiagonalsExt.jl b/ext/BlockDiagonalsExt.jl
deleted file mode 100644
index 37a7f6bfc..000000000
--- a/ext/BlockDiagonalsExt.jl
+++ /dev/null
@@ -1,8 +0,0 @@
-module BlockDiagonalsExt
-
-import ProbNumDiffEq: ProbNumDiffEqBlockDiagonal, blocks
-import BlockDiagonals: BlockDiagonal
-
-BlockDiagonal(M::ProbNumDiffEqBlockDiagonal) = BlockDiagonal(blocks(M))
-
-end
diff --git a/src/ProbNumDiffEq.jl b/src/ProbNumDiffEq.jl
index 1d3756455..63a8b141d 100644
--- a/src/ProbNumDiffEq.jl
+++ b/src/ProbNumDiffEq.jl
@@ -62,7 +62,7 @@ add!(out, toadd) = (out .+= toadd)
 
 include("fast_linalg.jl")
 include("kronecker.jl")
-include("blockdiagonals.jl")
+include("blocksofdiagonals.jl")
 include("covariance_structure.jl")
 export IsometricKroneckerCovariance, DenseCovariance, BlockDiagonalCovariance
 
diff --git a/src/blockdiagonals.jl b/src/blockdiagonals.jl
deleted file mode 100644
index 6ec085a01..000000000
--- a/src/blockdiagonals.jl
+++ /dev/null
@@ -1,321 +0,0 @@
-"""
-    ProbNumDiffEqBlockDiagonal(blocks::Vector{V}) where {T,V<:AbstractMatrix{T}}
-
-A very minimal but fast re-implementation of `BlockDiagonals.Blockdiagonal`.
-"""
-struct ProbNumDiffEqBlockDiagonal{T<:Number,V<:AbstractMatrix{T}} <: AbstractMatrix{T}
-    blocks::Vector{V}
-    function ProbNumDiffEqBlockDiagonal{T,V}(
-        blocks::Vector{V},
-    ) where {T,V<:AbstractMatrix{T}}
-        return new{T,V}(blocks)
-    end
-end
-function ProbNumDiffEqBlockDiagonal(blocks::Vector{V}) where {T,V<:AbstractMatrix{T}}
-    return ProbNumDiffEqBlockDiagonal{T,V}(blocks)
-end
-const BlockDiag = ProbNumDiffEqBlockDiagonal
-
-blocks(B::BlockDiag) = B.blocks
-nblocks(B::BlockDiag) = length(B.blocks)
-size(B::BlockDiag) = mapreduce(size, ((a, b), (c, d)) -> (a + c, b + d), blocks(B))
-
-Base.@propagate_inbounds function Base.getindex(
-    B::BlockDiag{T},
-    i::Integer,
-    j::Integer,
-) where {T}
-    all((0, 0) .< (i, j) .<= size(B)) || throw(BoundsError(B, (i, j)))
-
-    p = 1
-    Si, Sj = size(blocks(B)[p])
-    while p <= nblocks(B)
-        if i <= Si && j <= Sj
-            return blocks(B)[p][i, j]
-        elseif (i <= Si && j > Sj) || (j <= Sj && i > Si)
-            return zero(T)
-        else
-            i -= Si
-            j -= Sj
-            p += 1
-        end
-    end
-    error("This shouldn't happen")
-end
-
-Base.view(::BlockDiag, idxs...) =
-    throw(ErrorException("`BlockDiag` does not support views!"))
-
-copy(B::BlockDiag) = BlockDiag(copy.(blocks(B)))
-copy!(B::BlockDiag, A::BlockDiag) = begin
-    @assert length(A.blocks) == length(B.blocks)
-    @simd ivdep for i in eachindex(blocks(B))
-        copy!(B.blocks[i], A.blocks[i])
-    end
-    return B
-end
-similar(B::BlockDiag) = BlockDiag(similar.(blocks(B)))
-zero(B::BlockDiag) = BlockDiag(zero.(blocks(B)))
-
-# Sums of BlockDiag
-Base.:+(A::BlockDiag, B::BlockDiag) = begin
-    @assert nblocks(A) == nblocks(B)
-    return BlockDiag([Ai + Bi for (Ai, Bi) in zip(blocks(A), blocks(B))])
-end
-Base.:-(A::BlockDiag, B::BlockDiag) = begin
-    @assert nblocks(A) == nblocks(B)
-    return BlockDiag([Ai - Bi for (Ai, Bi) in zip(blocks(A), blocks(B))])
-end
-
-add!(out::BlockDiag, toadd::BlockDiag) = begin
-    @assert nblocks(out) == nblocks(toadd)
-    @simd ivdep for i in eachindex(blocks(out))
-        add!(blocks(out)[i], blocks(toadd)[i])
-    end
-    return out
-end
-
-# Mul with Scalar or UniformScaling
-Base.:*(a::Number, M::BlockDiag) = BlockDiag([a * B for B in blocks(M)])
-Base.:*(M::BlockDiag, a::Number) = BlockDiag([B * a for B in blocks(M)])
-Base.:*(U::UniformScaling, M::BlockDiag) = BlockDiag([U * B for B in blocks(M)])
-Base.:*(M::BlockDiag, U::UniformScaling) = BlockDiag([B * U for B in blocks(M)])
-
-# Mul between BockDiag's
-Base.:*(A::BlockDiag, B::BlockDiag) = begin
-    @assert length(A.blocks) == length(B.blocks)
-    return BlockDiag([Ai * Bi for (Ai, Bi) in zip(blocks(A), blocks(B))])
-end
-Base.:*(A::Adjoint{T,<:BlockDiag}, B::BlockDiag) where {T} = begin
-    @assert length(A.parent.blocks) == length(B.blocks)
-    return BlockDiag([Ai' * Bi for (Ai, Bi) in zip(blocks(A.parent), blocks(B))])
-end
-Base.:*(A::BlockDiag, B::Adjoint{T,<:BlockDiag}) where {T} = begin
-    @assert length(A.blocks) == length(B.parent.blocks)
-    return BlockDiag([Ai * Bi' for (Ai, Bi) in zip(blocks(A), blocks(B.parent))])
-end
-
-# Standard LinearAlgebra.mul!
-for _mul! in (:mul!, :_matmul!)
-    @eval $_mul!(C::BlockDiag, A::BlockDiag, B::BlockDiag) = begin
-        @assert length(C.blocks) == length(A.blocks) == length(B.blocks)
-        @simd ivdep for i in eachindex(blocks(C))
-            @inbounds $_mul!(C.blocks[i], A.blocks[i], B.blocks[i])
-        end
-        return C
-    end
-    (_mul! == :_matmul!) && @eval $_mul!(
-        C::BlockDiag{T},
-        A::BlockDiag{T},
-        B::BlockDiag{T},
-    ) where {T<:LinearAlgebra.BlasFloat} = begin
-        @assert length(C.blocks) == length(A.blocks) == length(B.blocks)
-        @simd ivdep for i in eachindex(blocks(C))
-            @inbounds $_mul!(C.blocks[i], A.blocks[i], B.blocks[i])
-        end
-        return C
-    end
-
-    @eval $_mul!(C::BlockDiag, A::BlockDiag, B::BlockDiag, alpha::Number, beta::Number) =
-        begin
-            @assert length(C.blocks) == length(A.blocks) == length(B.blocks)
-            @simd ivdep for i in eachindex(blocks(C))
-                @inbounds $_mul!(C.blocks[i], A.blocks[i], B.blocks[i], alpha, beta)
-            end
-            return C
-        end
-    (_mul! == :_matmul!) && @eval $_mul!(
-        C::BlockDiag{T},
-        A::BlockDiag{T},
-        B::BlockDiag{T},
-        alpha::Number,
-        beta::Number,
-    ) where {T<:LinearAlgebra.BlasFloat} = begin
-        @assert length(C.blocks) == length(A.blocks) == length(B.blocks)
-        @simd ivdep for i in eachindex(blocks(C))
-            @inbounds $_mul!(C.blocks[i], A.blocks[i], B.blocks[i], alpha, beta)
-        end
-        return C
-    end
-
-    @eval $_mul!(C::BlockDiag, A::Adjoint{<:Number,<:BlockDiag}, B::BlockDiag) = begin
-        @assert length(C.blocks) == length(A.parent.blocks) == length(B.blocks)
-        @simd ivdep for i in eachindex(blocks(C))
-            @inbounds $_mul!(C.blocks[i], adjoint(A.parent.blocks[i]), B.blocks[i])
-        end
-        return C
-    end
-    (_mul! == :_matmul!) && @eval $_mul!(
-        C::BlockDiag{T},
-        A::BlockDiag{T},
-        B::Adjoint{T,<:BlockDiag{T}},
-    ) where {T<:LinearAlgebra.BlasFloat} = begin
-        @assert length(C.blocks) == length(A.blocks) == length(B.parent.blocks)
-        @simd ivdep for i in eachindex(blocks(C))
-            @inbounds $_mul!(C.blocks[i], A.blocks[i], adjoint(B.parent.blocks[i]))
-        end
-        return C
-    end
-
-    @eval $_mul!(C::BlockDiag, A::BlockDiag, B::Adjoint{<:Number,<:BlockDiag}) = begin
-        @assert length(C.blocks) == length(A.blocks) == length(B.parent.blocks)
-        @simd ivdep for i in eachindex(blocks(C))
-            @inbounds $_mul!(C.blocks[i], A.blocks[i], adjoint(B.parent.blocks[i]))
-        end
-        return C
-    end
-    (_mul! == :_matmul!) && @eval $_mul!(
-        C::BlockDiag{T},
-        A::Adjoint{T,<:BlockDiag{T}},
-        B::BlockDiag{T},
-    ) where {T<:LinearAlgebra.BlasFloat} = begin
-        @assert length(C.blocks) == length(A.parent.blocks) == length(B.blocks)
-        @simd ivdep for i in eachindex(blocks(C))
-            @inbounds $_mul!(C.blocks[i], adjoint(A.parent.blocks[i]), B.blocks[i])
-        end
-        return C
-    end
-
-    @eval $_mul!(C::BlockDiag, A::Number, B::BlockDiag) = begin
-        @assert length(C.blocks) == length(B.blocks)
-        @simd ivdep for i in eachindex(blocks(C))
-            @inbounds $_mul!(C.blocks[i], A, B.blocks[i])
-        end
-        return C
-    end
-    @eval $_mul!(C::BlockDiag, A::BlockDiag, B::Number) = begin
-        @assert length(C.blocks) == length(A.blocks)
-        @simd ivdep for i in eachindex(blocks(C))
-            @inbounds $_mul!(C.blocks[i], A.blocks[i], B)
-        end
-        return C
-    end
-
-    @eval $_mul!(
-        C::AbstractVector,
-        A::BlockDiag,
-        B::AbstractVector,
-    ) = begin
-        @assert size(A, 2) == length(B)
-        @assert length(C) == size(A, 1)
-        ic, ib = 1, 1
-        for i in eachindex(blocks(A))
-            d1, d2 = size(A.blocks[i])
-            @inbounds $_mul!(view(C, ic:(ic+d1-1)), A.blocks[i], view(B, ib:(ib+d2-1)))
-            ic += d1
-            ib += d2
-        end
-        return C
-    end
-    (_mul! == :_matmul!) && @eval $_mul!(
-        C::AbstractVector{T},
-        A::BlockDiag{T},
-        B::AbstractVector{T},
-    ) where {T<:LinearAlgebra.BlasFloat} = begin
-        @assert size(A, 2) == length(B)
-        @assert length(C) == size(A, 1)
-        ic, ib = 1, 1
-        for i in eachindex(blocks(A))
-            d1, d2 = size(A.blocks[i])
-            @inbounds $_mul!(view(C, ic:(ic+d1-1)), A.blocks[i], view(B, ib:(ib+d2-1)))
-            ic += d1
-            ib += d2
-        end
-        return C
-    end
-end
-
-LinearAlgebra.rmul!(B::BlockDiag, n::Number) = begin
-    @simd ivdep for i in eachindex(B.blocks)
-        rmul!(B.blocks[i], n)
-    end
-    return B
-end
-
-LinearAlgebra.inv(A::BlockDiag) = BlockDiag(inv.(blocks(A)))
-
-copy!(A::BlockDiag, B::Diagonal) = begin
-    @assert size(A) == size(B)
-    i = 1
-    for Ai in blocks(A)
-        d = LinearAlgebra.checksquare(Ai)
-        @views copy!(Ai, Diagonal(B.diag[i:i+d-1]))
-        i += d
-    end
-    return A
-end
-
-Base.:*(D::Diagonal, A::BlockDiag) = begin
-    @assert size(D, 2) == size(A, 1)
-    local i = 1
-    outblocks = map(blocks(A)) do Ai
-        d = size(Ai, 1)
-        outi = Diagonal(view(D.diag, i:(i+d-1))) * Ai
-        i += d
-        outi
-    end
-    return BlockDiag(outblocks)
-end
-Base.:*(A::BlockDiag, D::Diagonal) = begin
-    local i = 1
-    outblocks = map(blocks(A)) do Ai
-        d = size(Ai, 2)
-        outi = Ai * Diagonal(view(D.diag, i:(i+d-1)))
-        i += d
-        outi
-    end
-    return BlockDiag(outblocks)
-end
-for _mul! in (:mul!, :_matmul!)
-    @eval $_mul!(C::BlockDiag, A::BlockDiag, B::Diagonal) = begin
-        local i = 1
-        @assert nblocks(C) == nblocks(A)
-        for j in eachindex(blocks(C))
-            Ci, Ai = blocks(C)[j], blocks(A)[j]
-            d = size(Ai, 2)
-            $_mul!(Ci, Ai, Diagonal(view(B.diag, i:(i+d-1))))
-            i += d
-        end
-        return C
-    end
-    @eval $_mul!(C::BlockDiag, A::Diagonal, B::BlockDiag) = begin
-        local i = 1
-        @assert nblocks(C) == nblocks(B)
-        for j in eachindex(blocks(C))
-            Ci, Bi = blocks(C)[j], blocks(B)[j]
-            d = size(Bi, 1)
-            $_mul!(Ci, Diagonal(view(A.diag, i:(i+d-1))), Bi)
-            i += d
-        end
-        return C
-    end
-    @eval $_mul!(C::BlockDiag, A::BlockDiag, B::Diagonal, alpha::Number, beta::Number) =
-        begin
-            local i = 1
-            @assert nblocks(C) == nblocks(A)
-            for j in eachindex(blocks(C))
-                Ci, Ai = blocks(C)[j], blocks(A)[j]
-                d = size(Ai, 2)
-                $_mul!(Ci, Ai, Diagonal(view(B.diag, i:(i+d-1))), alpha, beta)
-                i += d
-            end
-            return C
-        end
-    @eval $_mul!(C::BlockDiag, A::Diagonal, B::BlockDiag, alpha::Number, beta::Number) =
-        begin
-            i = 1
-            @assert nblocks(C) == nblocks(B)
-            for j in eachindex(blocks(C))
-                Ci, Bi = blocks(C)[j], blocks(B)[j]
-                d = size(Bi, 1)
-                @inbounds $_mul!(Ci, Diagonal(view(A.diag, i:(i+d-1))), Bi, alpha, beta)
-                i += d
-            end
-            return C
-        end
-end
-
-Base.isequal(A::BlockDiag, B::BlockDiag) =
-    length(A.blocks) == length(B.blocks) && all(map(isequal, A.blocks, B.blocks))
-==(A::BlockDiag, B::BlockDiag) =
-    length(A.blocks) == length(B.blocks) && all(map(==, A.blocks, B.blocks))
diff --git a/src/blocksofdiagonals.jl b/src/blocksofdiagonals.jl
new file mode 100644
index 000000000..2192897e0
--- /dev/null
+++ b/src/blocksofdiagonals.jl
@@ -0,0 +1,355 @@
+"""
+    BlocksOfDiagonals(blocks::Vector{V}) where {T,V<:AbstractMatrix{T}}
+
+A block matrix where each block is a diagonal matrix.
+
+The implementation of this object is closer to that of a block-diagonal matrix, as done
+e.g. in BlockDiagonals.jl. That's because `BlocksOfDiagonals` can be permuted to be a
+block-diagonal matrix. Also, in our context here `BlocksOfDiagonals` is used to represent
+covariance matrices of states, and then each block can be interpreted as the covariance
+matrix of a certain dimension of a state and its derivatives.
+
+`BlocksOfDiagonals` can also be transformed into a `BlockArrays.BlockArray`!
+Just call `BlockArrays.BlockArray(A::BlocksOfDiagonals)`.
+"""
+struct BlocksOfDiagonals{T<:Number,V<:AbstractMatrix{T}} <: AbstractMatrix{T}
+    blocks::Vector{V}
+    function BlocksOfDiagonals{T,V}(
+        blocks::Vector{V},
+    ) where {T,V<:AbstractMatrix{T}}
+        @assert all(map(b -> size(b) == size(blocks[1]), blocks))
+        return new{T,V}(blocks)
+    end
+end
+function BlocksOfDiagonals(blocks::Vector{V}) where {T,V<:AbstractMatrix{T}}
+    return BlocksOfDiagonals{T,V}(blocks)
+end
+
+blocks(B::BlocksOfDiagonals) = B.blocks
+nblocks(B::BlocksOfDiagonals) = length(B.blocks)
+size(B::BlocksOfDiagonals) = mapreduce(size, ((a, b), (c, d)) -> (a + c, b + d), blocks(B))
+
+Base.@propagate_inbounds function Base.getindex(
+    B::BlocksOfDiagonals{T},
+    i::Integer,
+    j::Integer,
+) where {T}
+    all((0, 0) .< (i, j) .<= size(B)) || throw(BoundsError(B, (i, j)))
+
+    d = nblocks(B)
+
+    block_idx_i = (i - 1) ÷ d + 1
+    block_idx_j = (j - 1) ÷ d + 1
+
+    inside_block_idx_i, inside_block_idx_j = (i - 1) % d + 1, (j - 1) % d + 1
+
+    if inside_block_idx_i != inside_block_idx_j
+        return zero(T)
+    else
+        return blocks(B)[inside_block_idx_i][block_idx_i, block_idx_j]
+    end
+    error("This shouldn't happen")
+end
+
+Base.view(::BlocksOfDiagonals, idxs...) =
+    throw(ErrorException("`BlocksOfDiagonals` does not support views!"))
+
+copy(B::BlocksOfDiagonals) = BlocksOfDiagonals(copy.(blocks(B)))
+copy!(B::BlocksOfDiagonals, A::BlocksOfDiagonals) = begin
+    @assert length(A.blocks) == length(B.blocks)
+    @simd ivdep for i in eachindex(blocks(B))
+        copy!(B.blocks[i], A.blocks[i])
+    end
+    return B
+end
+similar(B::BlocksOfDiagonals) = BlocksOfDiagonals(similar.(blocks(B)))
+zero(B::BlocksOfDiagonals) = BlocksOfDiagonals(zero.(blocks(B)))
+
+# Sums of BlocksOfDiagonals
+Base.:+(A::BlocksOfDiagonals, B::BlocksOfDiagonals) = begin
+    @assert nblocks(A) == nblocks(B)
+    return BlocksOfDiagonals([Ai + Bi for (Ai, Bi) in zip(blocks(A), blocks(B))])
+end
+Base.:-(A::BlocksOfDiagonals, B::BlocksOfDiagonals) = begin
+    @assert nblocks(A) == nblocks(B)
+    return BlocksOfDiagonals([Ai - Bi for (Ai, Bi) in zip(blocks(A), blocks(B))])
+end
+
+add!(out::BlocksOfDiagonals, toadd::BlocksOfDiagonals) = begin
+    @assert nblocks(out) == nblocks(toadd)
+    @simd ivdep for i in eachindex(blocks(out))
+        add!(blocks(out)[i], blocks(toadd)[i])
+    end
+    return out
+end
+
+# Mul with Scalar or UniformScaling
+Base.:*(a::Number, M::BlocksOfDiagonals) = BlocksOfDiagonals([a * B for B in blocks(M)])
+Base.:*(M::BlocksOfDiagonals, a::Number) = BlocksOfDiagonals([B * a for B in blocks(M)])
+Base.:*(U::UniformScaling, M::BlocksOfDiagonals) =
+    BlocksOfDiagonals([U * B for B in blocks(M)])
+Base.:*(M::BlocksOfDiagonals, U::UniformScaling) =
+    BlocksOfDiagonals([B * U for B in blocks(M)])
+
+# Mul between BockDiag's
+Base.:*(A::BlocksOfDiagonals, B::BlocksOfDiagonals) = begin
+    @assert length(A.blocks) == length(B.blocks)
+    return BlocksOfDiagonals([Ai * Bi for (Ai, Bi) in zip(blocks(A), blocks(B))])
+end
+Base.:*(A::Adjoint{T,<:BlocksOfDiagonals}, B::BlocksOfDiagonals) where {T} = begin
+    @assert length(A.parent.blocks) == length(B.blocks)
+    return BlocksOfDiagonals([Ai' * Bi for (Ai, Bi) in zip(blocks(A.parent), blocks(B))])
+end
+Base.:*(A::BlocksOfDiagonals, B::Adjoint{T,<:BlocksOfDiagonals}) where {T} = begin
+    @assert length(A.blocks) == length(B.parent.blocks)
+    return BlocksOfDiagonals([Ai * Bi' for (Ai, Bi) in zip(blocks(A), blocks(B.parent))])
+end
+
+# Standard LinearAlgebra.mul!
+for _mul! in (:mul!, :_matmul!)
+    @eval $_mul!(C::BlocksOfDiagonals, A::BlocksOfDiagonals, B::BlocksOfDiagonals) = begin
+        @assert length(C.blocks) == length(A.blocks) == length(B.blocks)
+        @simd ivdep for i in eachindex(blocks(C))
+            @inbounds $_mul!(C.blocks[i], A.blocks[i], B.blocks[i])
+        end
+        return C
+    end
+    (_mul! == :_matmul!) && @eval $_mul!(
+        C::BlocksOfDiagonals{T},
+        A::BlocksOfDiagonals{T},
+        B::BlocksOfDiagonals{T},
+    ) where {T<:LinearAlgebra.BlasFloat} = begin
+        @assert length(C.blocks) == length(A.blocks) == length(B.blocks)
+        @simd ivdep for i in eachindex(blocks(C))
+            @inbounds $_mul!(C.blocks[i], A.blocks[i], B.blocks[i])
+        end
+        return C
+    end
+
+    @eval $_mul!(
+        C::BlocksOfDiagonals,
+        A::BlocksOfDiagonals,
+        B::BlocksOfDiagonals,
+        alpha::Number,
+        beta::Number,
+    ) =
+        begin
+            @assert length(C.blocks) == length(A.blocks) == length(B.blocks)
+            @simd ivdep for i in eachindex(blocks(C))
+                @inbounds $_mul!(C.blocks[i], A.blocks[i], B.blocks[i], alpha, beta)
+            end
+            return C
+        end
+    (_mul! == :_matmul!) && @eval $_mul!(
+        C::BlocksOfDiagonals{T},
+        A::BlocksOfDiagonals{T},
+        B::BlocksOfDiagonals{T},
+        alpha::Number,
+        beta::Number,
+    ) where {T<:LinearAlgebra.BlasFloat} = begin
+        @assert length(C.blocks) == length(A.blocks) == length(B.blocks)
+        @simd ivdep for i in eachindex(blocks(C))
+            @inbounds $_mul!(C.blocks[i], A.blocks[i], B.blocks[i], alpha, beta)
+        end
+        return C
+    end
+
+    @eval $_mul!(
+        C::BlocksOfDiagonals,
+        A::Adjoint{<:Number,<:BlocksOfDiagonals},
+        B::BlocksOfDiagonals,
+    ) = begin
+        @assert length(C.blocks) == length(A.parent.blocks) == length(B.blocks)
+        @simd ivdep for i in eachindex(blocks(C))
+            @inbounds $_mul!(C.blocks[i], adjoint(A.parent.blocks[i]), B.blocks[i])
+        end
+        return C
+    end
+    (_mul! == :_matmul!) && @eval $_mul!(
+        C::BlocksOfDiagonals{T},
+        A::BlocksOfDiagonals{T},
+        B::Adjoint{T,<:BlocksOfDiagonals{T}},
+    ) where {T<:LinearAlgebra.BlasFloat} = begin
+        @assert length(C.blocks) == length(A.blocks) == length(B.parent.blocks)
+        @simd ivdep for i in eachindex(blocks(C))
+            @inbounds $_mul!(C.blocks[i], A.blocks[i], adjoint(B.parent.blocks[i]))
+        end
+        return C
+    end
+
+    @eval $_mul!(
+        C::BlocksOfDiagonals,
+        A::BlocksOfDiagonals,
+        B::Adjoint{<:Number,<:BlocksOfDiagonals},
+    ) = begin
+        @assert length(C.blocks) == length(A.blocks) == length(B.parent.blocks)
+        @simd ivdep for i in eachindex(blocks(C))
+            @inbounds $_mul!(C.blocks[i], A.blocks[i], adjoint(B.parent.blocks[i]))
+        end
+        return C
+    end
+    (_mul! == :_matmul!) && @eval $_mul!(
+        C::BlocksOfDiagonals{T},
+        A::Adjoint{T,<:BlocksOfDiagonals{T}},
+        B::BlocksOfDiagonals{T},
+    ) where {T<:LinearAlgebra.BlasFloat} = begin
+        @assert length(C.blocks) == length(A.parent.blocks) == length(B.blocks)
+        @simd ivdep for i in eachindex(blocks(C))
+            @inbounds $_mul!(C.blocks[i], adjoint(A.parent.blocks[i]), B.blocks[i])
+        end
+        return C
+    end
+
+    @eval $_mul!(C::BlocksOfDiagonals, A::Number, B::BlocksOfDiagonals) = begin
+        @assert length(C.blocks) == length(B.blocks)
+        @simd ivdep for i in eachindex(blocks(C))
+            @inbounds $_mul!(C.blocks[i], A, B.blocks[i])
+        end
+        return C
+    end
+    @eval $_mul!(C::BlocksOfDiagonals, A::BlocksOfDiagonals, B::Number) = begin
+        @assert length(C.blocks) == length(A.blocks)
+        @simd ivdep for i in eachindex(blocks(C))
+            @inbounds $_mul!(C.blocks[i], A.blocks[i], B)
+        end
+        return C
+    end
+
+    @eval $_mul!(
+        C::AbstractVector,
+        A::BlocksOfDiagonals,
+        B::AbstractVector,
+    ) = begin
+        @assert size(A, 2) == length(B)
+        @assert length(C) == size(A, 1)
+        D = nblocks(A)
+        d1, d2 = size(A.blocks[1])
+        for i in eachindex(blocks(A))
+            @inbounds $_mul!(view(C, i:D:D*d1), A.blocks[i], view(B, i:D:D*d2))
+        end
+        return C
+    end
+    (_mul! == :_matmul!) && @eval $_mul!(
+        C::AbstractVector{T},
+        A::BlocksOfDiagonals{T},
+        B::AbstractVector{T},
+    ) where {T<:LinearAlgebra.BlasFloat} = begin
+        @assert size(A, 2) == length(B)
+        @assert length(C) == size(A, 1)
+        D = nblocks(A)
+        d1, d2 = size(A.blocks[1])
+        for i in eachindex(blocks(A))
+            @inbounds $_mul!(view(C, i:D:D*d1), A.blocks[i], view(B, i:D:D*d2))
+        end
+        return C
+    end
+end
+
+LinearAlgebra.rmul!(B::BlocksOfDiagonals, n::Number) = begin
+    @simd ivdep for i in eachindex(B.blocks)
+        rmul!(B.blocks[i], n)
+    end
+    return B
+end
+
+LinearAlgebra.inv(A::BlocksOfDiagonals) = BlocksOfDiagonals(inv.(blocks(A)))
+
+copy!(A::BlocksOfDiagonals, B::Diagonal) = begin
+    @assert size(A) == size(B)
+    D = nblocks(A)
+    LinearAlgebra.checksquare(blocks(A)[1])
+    for i in eachindex(blocks(A))
+        @views copy!(blocks(A)[i], Diagonal(B.diag[i:D:end]))
+    end
+    return A
+end
+
+Base.:*(D::Diagonal, A::BlocksOfDiagonals) = begin
+    @assert size(D, 2) == size(A, 1)
+    local S = nblocks(A)
+    outblocks = map(enumerate(blocks(A))) do (i, Ai)
+        d = size(Ai, 1)
+        outi = Diagonal(view(D.diag, i:S:S*d)) * Ai
+        outi
+    end
+    return BlocksOfDiagonals(outblocks)
+end
+Base.:*(A::BlocksOfDiagonals, D::Diagonal) = begin
+    local S = nblocks(A)
+    outblocks = map(enumerate(blocks(A))) do (i, Ai)
+        d = size(Ai, 2)
+        outi = Ai * Diagonal(view(D.diag, i:S:S*d))
+        outi
+    end
+    return BlocksOfDiagonals(outblocks)
+end
+for _mul! in (:mul!, :_matmul!)
+    @eval $_mul!(C::BlocksOfDiagonals, A::BlocksOfDiagonals, B::Diagonal) = begin
+        @assert nblocks(C) == nblocks(A)
+        D = nblocks(A)
+        for i in eachindex(blocks(C))
+            Ci, Ai = blocks(C)[i], blocks(A)[i]
+            d = size(Ai, 2)
+            $_mul!(Ci, Ai, Diagonal(view(B.diag, i:D:D*d)))
+        end
+        return C
+    end
+    @eval $_mul!(C::BlocksOfDiagonals, A::Diagonal, B::BlocksOfDiagonals) = begin
+        @assert nblocks(C) == nblocks(B)
+        D = nblocks(B)
+        for i in eachindex(blocks(C))
+            Ci, Bi = blocks(C)[i], blocks(B)[i]
+            d = size(Bi, 1)
+            $_mul!(Ci, Diagonal(view(A.diag, i:D:D*d)), Bi)
+        end
+        return C
+    end
+    @eval $_mul!(
+        C::BlocksOfDiagonals,
+        A::BlocksOfDiagonals,
+        B::Diagonal,
+        alpha::Number,
+        beta::Number,
+    ) =
+        begin
+            @assert nblocks(C) == nblocks(A)
+            D = nblocks(A)
+            for i in eachindex(blocks(C))
+                Ci, Ai = blocks(C)[i], blocks(A)[i]
+                d = size(Ai, 2)
+                $_mul!(Ci, Ai, Diagonal(view(B.diag, i:D:D*d)), alpha, beta)
+            end
+            return C
+        end
+    @eval $_mul!(
+        C::BlocksOfDiagonals,
+        A::Diagonal,
+        B::BlocksOfDiagonals,
+        alpha::Number,
+        beta::Number,
+    ) =
+        begin
+            @assert nblocks(C) == nblocks(B)
+            D = nblocks(B)
+            for i in eachindex(blocks(C))
+                Ci, Bi = blocks(C)[i], blocks(B)[i]
+                d = size(Bi, 1)
+                @inbounds $_mul!(Ci, Diagonal(view(A.diag, i:D:D*d)), Bi, alpha, beta)
+            end
+            return C
+        end
+end
+
+Base.isequal(A::BlocksOfDiagonals, B::BlocksOfDiagonals) =
+    length(A.blocks) == length(B.blocks) && all(map(isequal, A.blocks, B.blocks))
+==(A::BlocksOfDiagonals, B::BlocksOfDiagonals) =
+    length(A.blocks) == length(B.blocks) && all(map(==, A.blocks, B.blocks))
+
+function Base.vcat(M1::BlocksOfDiagonals, M2::BlocksOfDiagonals)
+    @assert nblocks(M1) == nblocks(M2)
+    return BlocksOfDiagonals([vcat(B1, B2) for (B1, B2) in zip(blocks(M1), blocks(M2))])
+end
+function Base.hcat(M1::BlocksOfDiagonals, M2::BlocksOfDiagonals)
+    @assert nblocks(M1) == nblocks(M2)
+    return BlocksOfDiagonals([hcat(B1, B2) for (B1, B2) in zip(blocks(M1), blocks(M2))])
+end
diff --git a/src/caches.jl b/src/caches.jl
index ab28e0657..8cd96353f 100644
--- a/src/caches.jl
+++ b/src/caches.jl
@@ -119,7 +119,7 @@ function OrdinaryDiffEq.alg_cache(
     Proj = projection(FAC)
     E0, E1, E2 = Proj(0), Proj(1), Proj(2)
     @assert f isa SciMLBase.AbstractODEFunction
-    SolProj = solution_space_projection(FAC, is_secondorder_ode)
+    SolProj = is_secondorder_ode ? [E1; E0] : copy(E0)
 
     # Prior dynamics
     if !(dim(alg.prior) == 1 || dim(alg.prior) == d)
@@ -182,19 +182,10 @@ function OrdinaryDiffEq.alg_cache(
         !isnothing(f.jac_prototype) ?
         f.jac_prototype : zeros(uElType, length(u), length(u))
     _d = is_secondorder_ode ? 2d : d
-    pu_tmp = if is_secondorder_ode
-        Gaussian(similar(Array{uElType}, 2d),
-            PSDMatrix(
-                if FAC isa IsometricKroneckerCovariance
-                    Kronecker.kronecker(similar(Matrix{uElType}, D ÷ d, _d ÷ d), I(d))
-                elseif FAC isa BlockDiagonalCovariance
-                    error("I have no idea")
-                else
-                    similar(Matrix{uElType}, D, _d)
-                end))
-    else
-        Gaussian(similar(Array{uElType}, d), PSDMatrix(factorized_similar(FAC, D, d)))
-    end
+    pu_tmp = Gaussian(
+        similar(Array{uElType}, _d),
+        PSDMatrix(factorized_similar(FAC, D, _d)),
+    )
 
     K = factorized_similar(FAC, D, d)
     G = factorized_similar(FAC, D, D)
diff --git a/src/callbacks/dataupdate.jl b/src/callbacks/dataupdate.jl
index def5759ac..eac35dace 100644
--- a/src/callbacks/dataupdate.jl
+++ b/src/callbacks/dataupdate.jl
@@ -90,9 +90,9 @@ make_obscov_sqrt(
     H::IsometricKroneckerProduct,
     RR::IsometricKroneckerProduct,
 ) =
-    IsometricKroneckerProduct(PR.ldim, make_obscov_sqrt(PR.B, H.B, RR.B))
-make_obscov_sqrt(PR::BlockDiag, H::BlockDiag, RR::BlockDiag) =
-    BlockDiag([
+    IsometricKroneckerProduct(PR.rdim, make_obscov_sqrt(PR.B, H.B, RR.B))
+make_obscov_sqrt(PR::BlocksOfDiagonals, H::BlocksOfDiagonals, RR::BlocksOfDiagonals) =
+    BlocksOfDiagonals([
         make_obscov_sqrt(blocks(PR)[i], blocks(H)[i], blocks(RR)[i]) for
         i in eachindex(blocks(PR))
     ])
diff --git a/src/covariance_structure.jl b/src/covariance_structure.jl
index 2809a3e42..2e421f493 100644
--- a/src/covariance_structure.jl
+++ b/src/covariance_structure.jl
@@ -34,19 +34,19 @@ factorized_zeros(C::BlockDiagonalCovariance{T}, sizes...) where {T} = begin
     for s in sizes
         @assert s % C.d == 0
     end
-    return BlockDiag([Array{T}(calloc, (s ÷ C.d for s in sizes)...) for _ in 1:C.d])
+    return BlocksOfDiagonals([Array{T}(calloc, (s ÷ C.d for s in sizes)...) for _ in 1:C.d])
 end
 factorized_similar(C::BlockDiagonalCovariance{T}, size1, size2) where {T} = begin
     for s in (size1, size2)
         @assert s % C.d == 0
     end
-    return BlockDiag([similar(Matrix{T}, size1 ÷ C.d, size2 ÷ C.d) for _ in 1:C.d])
+    return BlocksOfDiagonals([similar(Matrix{T}, size1 ÷ C.d, size2 ÷ C.d) for _ in 1:C.d])
 end
 
 to_factorized_matrix(::DenseCovariance, M::AbstractMatrix) = Matrix(M)
 to_factorized_matrix(::IsometricKroneckerCovariance, M::IsometricKroneckerProduct) = M
 to_factorized_matrix(C::BlockDiagonalCovariance, M::IsometricKroneckerProduct) =
-    BlockDiag([copy(M.B) for _ in 1:C.d])
+    BlocksOfDiagonals([copy(M.B) for _ in 1:C.d])
 to_factorized_matrix(C::BlockDiagonalCovariance, M::Diagonal) =
     copy!(factorized_similar(C, size(M)...), M)
 to_factorized_matrix(
diff --git a/src/diffusions/apply_diffusion.jl b/src/diffusions/apply_diffusion.jl
index 9638ba587..3b7e2a9ee 100644
--- a/src/diffusions/apply_diffusion.jl
+++ b/src/diffusions/apply_diffusion.jl
@@ -12,17 +12,19 @@ apply_diffusion(
     diffusion::Diagonal{T,<:FillArrays.Fill},
 ) where {T} = apply_diffusion(Q, diffusion.diag.value)
 apply_diffusion(
-    Q::PSDMatrix{T,<:BlockDiag},
+    Q::PSDMatrix{T,<:BlocksOfDiagonals},
     diffusion::Diagonal{T,<:Vector},
 ) where {T} = PSDMatrix(
-    BlockDiag([blocks(Q.R)[i] * sqrt.(diffusion.diag[i]) for i in eachindex(blocks(Q.R))]))
+    BlocksOfDiagonals([
+        blocks(Q.R)[i] * sqrt.(diffusion.diag[i]) for i in eachindex(blocks(Q.R))
+    ]))
 apply_diffusion(
     Q::PSDMatrix{T,<:Matrix},
     diffusion::Diagonal{T,<:Vector},
 ) where {T} = begin
     d = size(diffusion, 1)
     q = size(Q, 1) ÷ d - 1
-    return PSDMatrix(Q.R * sqrt.(Kronecker.kronecker(diffusion, Eye(q + 1))))
+    return PSDMatrix(Q.R * sqrt.(Kronecker.kronecker(Eye(q + 1), diffusion)))
 end
 
 """
@@ -38,7 +40,7 @@ apply_diffusion!(
     return Q
 end
 apply_diffusion!(
-    Q::PSDMatrix{T,<:BlockDiag},
+    Q::PSDMatrix{T,<:BlocksOfDiagonals},
     diffusion::Diagonal{T,<:Vector},
 ) where {T} = begin
     @simd ivdep for i in eachindex(blocks(Q.R))
@@ -55,7 +57,7 @@ apply_diffusion!(
     D = size(Q, 1)
     q = D ÷ d - 1
     # _matmul!(Q.R, Q.R, Kronecker.kronecker(sqrt.(diffusion), Eye(q + 1)))
-    _matmul!(Q.R, Q.R, kron(sqrt.(diffusion), Eye(q + 1)))
+    _matmul!(Q.R, Q.R, kron(Eye(q + 1), sqrt.(diffusion)))
     return Q
 end
 
@@ -78,8 +80,8 @@ apply_diffusion!(
     diffusion::Diagonal{<:Number,<:FillArrays.Fill},
 ) = apply_diffusion!(out, Q, diffusion.diag.value)
 apply_diffusion!(
-    out::PSDMatrix{T,<:BlockDiag},
-    Q::PSDMatrix{T,<:BlockDiag},
+    out::PSDMatrix{T,<:BlocksOfDiagonals},
+    Q::PSDMatrix{T,<:BlocksOfDiagonals},
     diffusion::Diagonal{<:T,<:Vector},
 ) where {T} = begin
     @simd ivdep for i in eachindex(blocks(Q.R))
@@ -97,6 +99,6 @@ apply_diffusion!(
     D = size(Q, 1)
     q = D ÷ d - 1
     # _matmul!(out.R, Q.R, Kronecker.kronecker(sqrt.(diffusion), Eye(q + 1)))
-    _matmul!(out.R, Q.R, kron(sqrt.(diffusion), Eye(q + 1)))
+    _matmul!(out.R, Q.R, kron(Eye(q + 1), sqrt.(diffusion)))
     return out
 end
diff --git a/src/diffusions/calibration.jl b/src/diffusions/calibration.jl
index 6b22b3da3..4e1e9b1e9 100644
--- a/src/diffusions/calibration.jl
+++ b/src/diffusions/calibration.jl
@@ -17,7 +17,7 @@ invquad(v, M::IsometricKroneckerProduct; v_cache, M_cache=nothing) = begin
     @assert length(M.B) == 1
     return dot(v, v_cache) / M.B[1]
 end
-invquad(v, M::BlockDiag; v_cache, M_cache=nothing) = begin
+invquad(v, M::BlocksOfDiagonals; v_cache, M_cache=nothing) = begin
     v_cache .= v
     @assert length(M.blocks) == length(v) == length(v_cache)
     @simd ivdep for i in eachindex(v)
@@ -148,9 +148,10 @@ For more background information
 * [bosch20capos](@cite) Bosch et al, "Calibrated Adaptive Probabilistic ODE Solvers", AISTATS (2021)
 """
 function local_diagonal_diffusion(cache)
-    @unpack d, q, H, Qh, measurement, m_tmp, tmp = cache
+    @unpack d, q, H, Qh, measurement, m_tmp = cache
+    tmp = m_tmp.μ
     @unpack local_diffusion = cache
-    @assert H == cache.E1
+    @assert (H == cache.E1) || (H == cache.E2)
 
     z = measurement.μ
     # HQH = H * unfactorize(Qh) * H'
@@ -158,7 +159,7 @@ function local_diagonal_diffusion(cache)
     # c1 = view(_matmul!(cache.C_Dxd, Qh.R, H'), :, 1)
     # Q_11 = dot(c1, c1)
 
-    Q_11 = if Qh.R isa BlockDiag
+    Q_11 = if Qh.R isa BlocksOfDiagonals
         for i in 1:d
             c1 = _matmul!(
                 view(cache.C_Dxd.blocks[i], :, 1:1),
diff --git a/src/filtering/markov_kernel.jl b/src/filtering/markov_kernel.jl
index 07873ede1..5e9acf22a 100644
--- a/src/filtering/markov_kernel.jl
+++ b/src/filtering/markov_kernel.jl
@@ -121,12 +121,12 @@ function marginalize_cov!(
 end
 
 function marginalize_cov!(
-    Σ_out::PSDMatrix{T,<:BlockDiag},
-    Σ_curr::PSDMatrix{T,<:BlockDiag},
+    Σ_out::PSDMatrix{T,<:BlocksOfDiagonals},
+    Σ_curr::PSDMatrix{T,<:BlocksOfDiagonals},
     K::AffineNormalKernel{
         <:AbstractMatrix,
         <:Any,
-        <:PSDMatrix{S,<:BlockDiag},
+        <:PSDMatrix{S,<:BlocksOfDiagonals},
     };
     C_DxD::AbstractMatrix,
     C_3DxD::AbstractMatrix,
@@ -253,13 +253,13 @@ function compute_backward_kernel!(
     },
 }
     D = length(x.μ)  # full_state_dim
-    d = K.A.ldim     # ode_dimension_dim
+    d = K.A.rdim     # ode_dimension_dim
     Q = D ÷ d        # n_derivatives_dim
     _Kout =
-        AffineNormalKernel(Kout.A.B, reshape_no_alloc(Kout.b, Q, d), PSDMatrix(Kout.C.R.B))
-    _x_pred = Gaussian(reshape_no_alloc(xpred.μ, Q, d), PSDMatrix(xpred.Σ.R.B))
-    _x = Gaussian(reshape_no_alloc(x.μ, Q, d), PSDMatrix(x.Σ.R.B))
-    _K = AffineNormalKernel(K.A.B, reshape_no_alloc(K.b, Q, d), PSDMatrix(K.C.R.B))
+        AffineNormalKernel(Kout.A.B, reshape_no_alloc(Kout.b, d, Q)', PSDMatrix(Kout.C.R.B))
+    _x_pred = Gaussian(reshape_no_alloc(xpred.μ, d, Q)', PSDMatrix(xpred.Σ.R.B))
+    _x = Gaussian(reshape_no_alloc(x.μ, d, Q)', PSDMatrix(x.Σ.R.B))
+    _K = AffineNormalKernel(K.A.B, reshape_no_alloc(K.b, d, Q)', PSDMatrix(K.C.R.B))
     _C_DxD = C_DxD.B
     _diffusion =
         diffusion isa Number ? diffusion :
@@ -271,44 +271,44 @@ end
 
 function compute_backward_kernel!(
     Kout::KT1,
-    xpred::SRGaussian{T,<:BlockDiag},
-    x::SRGaussian{T,<:BlockDiag},
+    xpred::SRGaussian{T,<:BlocksOfDiagonals},
+    x::SRGaussian{T,<:BlocksOfDiagonals},
     K::KT2;
     C_DxD::AbstractMatrix,
     diffusion::Union{Number,Diagonal}=1,
 ) where {
     T,
     KT1<:AffineNormalKernel{
-        <:BlockDiag,
+        <:BlocksOfDiagonals,
         <:AbstractVector,
-        <:PSDMatrix{T,<:BlockDiag},
+        <:PSDMatrix{T,<:BlocksOfDiagonals},
     },
     KT2<:AffineNormalKernel{
-        <:BlockDiag,
+        <:BlocksOfDiagonals,
         <:Any,
-        <:PSDMatrix{T,<:BlockDiag},
+        <:PSDMatrix{T,<:BlocksOfDiagonals},
     },
 }
     d = length(blocks(xpred.Σ.R))
     q = size(blocks(xpred.Σ.R)[1], 1) - 1
 
-    @simd ivdep for i in eachindex(blocks(xpred.Σ.R))
+    @views @simd ivdep for i in eachindex(blocks(xpred.Σ.R))
         _Kout = AffineNormalKernel(
             Kout.A.blocks[i],
-            view(Kout.b, (i-1)*(q+1)+1:i*(q+1)),
+            Kout.b[i:d:end],
             PSDMatrix(Kout.C.R.blocks[i]),
         )
         _xpred = Gaussian(
-            view(xpred.μ, (i-1)*(q+1)+1:i*(q+1)),
+            xpred.μ[i:d:end],
             PSDMatrix(xpred.Σ.R.blocks[i]),
         )
         _x = Gaussian(
-            view(x.μ, (i-1)*(q+1)+1:i*(q+1)),
+            x.μ[i:d:end],
             PSDMatrix(x.Σ.R.blocks[i]),
         )
         _K = AffineNormalKernel(
             K.A.blocks[i],
-            ismissing(K.b) ? missing : view(K.b, (i-1)*(q+1)+1:i*(q+1)),
+            ismissing(K.b) ? missing : K.b[i:d:end],
             PSDMatrix(K.C.R.blocks[i]),
         )
         _C_DxD = C_DxD.blocks[i]
diff --git a/src/filtering/predict.jl b/src/filtering/predict.jl
index b37fc5bd3..dcf352ade 100644
--- a/src/filtering/predict.jl
+++ b/src/filtering/predict.jl
@@ -20,7 +20,7 @@ predict_cov(
     Q::PSDMatrix{T,<:IsometricKroneckerProduct},
 ) where {T} = begin
     P_pred_breve = predict_cov(PSDMatrix(Σ.R.B), A.B, PSDMatrix(Q.R.B))
-    return PSDMatrix(IsometricKroneckerProduct(Σ.R.ldim, P_pred_breve.R))
+    return PSDMatrix(IsometricKroneckerProduct(Σ.R.rdim, P_pred_breve.R))
 end
 
 """
@@ -116,14 +116,14 @@ function predict_cov!(
     return predict_cov!(_Σ_out, _Σ_curr, _Ah, _Qh, _C_DxD, _C_2DxD, _diffusion)
 end
 
-# BlockDiagonal version
+# BlocksOfDiagonalsonal version
 function predict_cov!(
-    Σ_out::PSDMatrix{T,<:BlockDiag},
-    Σ_curr::PSDMatrix{T,<:BlockDiag},
-    Ah::BlockDiag,
-    Qh::PSDMatrix{S,<:BlockDiag},
-    C_DxD::BlockDiag,
-    C_2DxD::BlockDiag,
+    Σ_out::PSDMatrix{T,<:BlocksOfDiagonals},
+    Σ_curr::PSDMatrix{T,<:BlocksOfDiagonals},
+    Ah::BlocksOfDiagonals,
+    Qh::PSDMatrix{S,<:BlocksOfDiagonals},
+    C_DxD::BlocksOfDiagonals,
+    C_2DxD::BlocksOfDiagonals,
     diffusion::Union{Number,Diagonal},
 ) where {T,S}
     @simd ivdep for i in eachindex(blocks(Σ_out.R))
diff --git a/src/filtering/update.jl b/src/filtering/update.jl
index 4e4468480..c7f8dcc54 100644
--- a/src/filtering/update.jl
+++ b/src/filtering/update.jl
@@ -163,12 +163,12 @@ function update!(
     R::Union{Nothing,PSDMatrix{T,<:IsometricKroneckerProduct}}=nothing,
 ) where {T}
     D = length(x_out.μ)  # full_state_dim
-    d = H.ldim           # ode_dimension_dim
+    d = H.rdim           # ode_dimension_dim
     Q = D ÷ d            # n_derivatives_dim
-    _x_out = Gaussian(reshape_no_alloc(x_out.μ, Q, d), PSDMatrix(x_out.Σ.R.B))
-    _x_pred = Gaussian(reshape_no_alloc(x_pred.μ, Q, d), PSDMatrix(x_pred.Σ.R.B))
+    _x_out = Gaussian(reshape_no_alloc(x_out.μ, d, Q)', PSDMatrix(x_out.Σ.R.B))
+    _x_pred = Gaussian(reshape_no_alloc(x_pred.μ, d, Q)', PSDMatrix(x_pred.Σ.R.B))
     _measurement = Gaussian(
-        reshape_no_alloc(measurement.μ, 1, d),
+        reshape_no_alloc(measurement.μ, d, 1)',
         measurement.Σ isa PSDMatrix ? PSDMatrix(measurement.Σ.R.B) : measurement.Σ.B,
     )
     _H = H.B
@@ -194,31 +194,31 @@ function update!(
 end
 
 function update!(
-    x_out::SRGaussian{T,<:BlockDiag},
-    x_pred::SRGaussian{T,<:BlockDiag},
+    x_out::SRGaussian{T,<:BlocksOfDiagonals},
+    x_pred::SRGaussian{T,<:BlocksOfDiagonals},
     measurement::Gaussian{
         <:AbstractVector,
-        <:Union{<:PSDMatrix{T,<:BlockDiag},<:BlockDiag},
+        <:Union{<:PSDMatrix{T,<:BlocksOfDiagonals},<:BlocksOfDiagonals},
     },
-    H::BlockDiag,
-    K1_cache::BlockDiag,
-    K2_cache::BlockDiag,
-    M_cache::BlockDiag,
-    C_dxd::BlockDiag,
+    H::BlocksOfDiagonals,
+    K1_cache::BlocksOfDiagonals,
+    K2_cache::BlocksOfDiagonals,
+    M_cache::BlocksOfDiagonals,
+    C_dxd::BlocksOfDiagonals,
     C_d::AbstractVector;
-    R::Union{Nothing,PSDMatrix{T,<:BlockDiag}}=nothing,
+    R::Union{Nothing,PSDMatrix{T,<:BlocksOfDiagonals}}=nothing,
 ) where {T}
     d = length(blocks(x_out.Σ.R))
     q = size(blocks(x_out.Σ.R)[1], 1) - 1
 
     ll = zero(eltype(x_out.μ))
-    for i in eachindex(blocks(x_out.Σ.R))
+    @views for i in eachindex(blocks(x_out.Σ.R))
         _, _ll = update!(
-            Gaussian(view(x_out.μ, (i-1)*(q+1)+1:i*(q+1)),
+            Gaussian(x_out.μ[i:d:end],
                 PSDMatrix(x_out.Σ.R.blocks[i])),
-            Gaussian(view(x_pred.μ, (i-1)*(q+1)+1:i*(q+1)),
+            Gaussian(x_pred.μ[i:d:end],
                 PSDMatrix(x_pred.Σ.R.blocks[i])),
-            Gaussian(view(measurement.μ, i:i),
+            Gaussian(measurement.μ[i:d:end],
                 if measurement.Σ isa PSDMatrix
                     PSDMatrix(measurement.Σ.R.blocks[i])
                 else
diff --git a/src/kronecker.jl b/src/kronecker.jl
index f18bffd74..cb3459179 100644
--- a/src/kronecker.jl
+++ b/src/kronecker.jl
@@ -1,72 +1,73 @@
 copy(K::Kronecker.KroneckerProduct) = Kronecker.KroneckerProduct(copy(K.A), copy(K.B))
 @doc raw"""
-    IsometricKroneckerProduct(left_factor_dim::Int64, right_factor::AbstractMatrix)
+    IsometricKroneckerProduct(left_factor_dim::Int64, left_factor::AbstractMatrix)
 
 Kronecker product of an identity and a generic matrix:
 ```math
 \begin{aligned}
-K = I_d \otimes B
+K = B \otimes I_d
 \end{aligned}
 ```
 
 # Arguments
-- `left_factor_dim::Int64`: Dimension `d` of the left identity kronecker factor.
-- `right_factor::AbstractMatrix`: Right Kronecker factor.
+- `right_factor_dim::Int64`: Dimension `d` of the left identity kronecker factor.
+- `left_factor::AbstractMatrix`: Right Kronecker factor.
 """
-struct IsometricKroneckerProduct{T<:Number,TB<:AbstractMatrix} <:
+struct RightIsometricKroneckerProduct{T<:Number,TB<:AbstractMatrix} <:
        Kronecker.AbstractKroneckerProduct{T}
-    ldim::Int64
+    rdim::Int64
     B::TB
-    function IsometricKroneckerProduct(
-        left_factor_dim::Int64,
-        right_factor::AbstractMatrix{T},
+    function RightIsometricKroneckerProduct(
+        right_factor_dim::Int64,
+        left_factor::AbstractMatrix{T},
     ) where {T}
-        return new{T,typeof(right_factor)}(left_factor_dim, right_factor)
+        return new{T,typeof(left_factor)}(right_factor_dim, left_factor)
     end
 end
-IsometricKroneckerProduct(ldim::Integer, B::AbstractVector) =
-    IsometricKroneckerProduct(ldim, reshape(B, :, 1))
-IsometricKroneckerProduct(M::AbstractMatrix) = throw(
+RightIsometricKroneckerProduct(rdim::Integer, B::AbstractVector) =
+    RightIsometricKroneckerProduct(rdim, reshape(B, :, 1))
+RightIsometricKroneckerProduct(M::AbstractMatrix) = throw(
     ArgumentError(
-        "Can not create IsometricKroneckerProduct from the provided matrix of type $(typeof(M))",
+        "Can not create RightIsometricKroneckerProduct from the provided matrix of type $(typeof(M))",
     ),
 )
 
+const IsometricKroneckerProduct = RightIsometricKroneckerProduct
 const IKP = IsometricKroneckerProduct
 
-Kronecker.getmatrices(K::IKP) = (I(K.ldim), K.B)
-Kronecker.getallfactors(K::IKP) = (I(K.ldim), K.B)
+Kronecker.getmatrices(K::IKP) = (K.B, Eye(K.rdim))
+Kronecker.getallfactors(K::IKP) = (K.B, Eye(K.rdim))
 
-Base.zero(A::IKP) = IsometricKroneckerProduct(A.ldim, zero(A.B))
-Base.one(A::IKP) = IsometricKroneckerProduct(A.ldim, one(A.B))
+Base.zero(A::IKP) = IsometricKroneckerProduct(A.rdim, zero(A.B))
+Base.one(A::IKP) = IsometricKroneckerProduct(A.rdim, one(A.B))
 copy!(A::IKP, B::IKP) = begin
     check_same_size(A, B)
     copy!(A.B, B.B)
     return A
 end
-copy(A::IKP) = IsometricKroneckerProduct(A.ldim, copy(A.B))
-similar(A::IKP) = IsometricKroneckerProduct(A.ldim, similar(A.B))
-Base.size(K::IKP) = (K.ldim * size(K.B, 1), K.ldim * size(K.B, 2))
+copy(A::IKP) = IsometricKroneckerProduct(A.rdim, copy(A.B))
+similar(A::IKP) = IsometricKroneckerProduct(A.rdim, similar(A.B))
+Base.size(K::IKP) = (K.rdim * size(K.B, 1), K.rdim * size(K.B, 2))
 
 # conversion
 Base.convert(::Type{T}, K::IKP) where {T<:IKP} =
     K isa T ? K : T(K)
 function IKP{T,TB}(K::IKP) where {T,TB}
-    IKP(K.ldim, convert(TB, K.B))
+    IKP(K.rdim, convert(TB, K.B))
 end
 
 function Base.:*(A::IKP, B::IKP)
-    @assert A.ldim == B.ldim
-    return IsometricKroneckerProduct(A.ldim, A.B * B.B)
+    @assert A.rdim == B.rdim
+    return IsometricKroneckerProduct(A.rdim, A.B * B.B)
 end
-Base.:*(K::IKP, a::Number) = IsometricKroneckerProduct(K.ldim, K.B * a)
-Base.:*(a::Number, K::IKP) = IsometricKroneckerProduct(K.ldim, a * K.B)
-LinearAlgebra.adjoint(A::IKP) = IsometricKroneckerProduct(A.ldim, A.B')
-LinearAlgebra.rmul!(A::IKP, b::Number) = IsometricKroneckerProduct(A.ldim, rmul!(A.B, b))
+Base.:*(K::IKP, a::Number) = IsometricKroneckerProduct(K.rdim, K.B * a)
+Base.:*(a::Number, K::IKP) = IsometricKroneckerProduct(K.rdim, a * K.B)
+LinearAlgebra.adjoint(A::IKP) = IsometricKroneckerProduct(A.rdim, A.B')
+LinearAlgebra.rmul!(A::IKP, b::Number) = IsometricKroneckerProduct(A.rdim, rmul!(A.B, b))
 
 function check_same_size(A::IKP, B::IKP)
-    if A.ldim != B.ldim || size(A.B) != size(B.B)
-        Ad, An, Am, Bd, Bn, Bm = A.ldim, size(A)..., B.ldim, size(B)...
+    if A.rdim != B.rdim || size(A.B) != size(B.B)
+        Ad, An, Am, Bd, Bn, Bm = A.rdim, size(A)..., B.rdim, size(B)...
         throw(
             DimensionMismatch("A has size ($Ad⋅$An,$Ad⋅$Am), B has size ($Bd⋅$Bn,$Bd⋅$Bm)"),
         )
@@ -74,9 +75,9 @@ function check_same_size(A::IKP, B::IKP)
 end
 function check_matmul_sizes(A::IKP, B::IKP)
     # For A * B
-    Ad, Bd = A.ldim, B.ldim
+    Ad, Bd = A.rdim, B.rdim
     An, Am, Bn, Bm = size(A)..., size(B)...
-    if !(A.ldim == B.ldim) || !(Am == Bn)
+    if !(A.rdim == B.rdim) || !(Am == Bn)
         throw(
             DimensionMismatch(
                 "Matrix multiplication not compatible: A has size ($Ad⋅$An,$Ad⋅$Am), B has size ($Bd⋅$Bn,$Bd⋅$Bm)",
@@ -86,9 +87,9 @@ function check_matmul_sizes(A::IKP, B::IKP)
 end
 function check_matmul_sizes(C::IKP, A::IKP, B::IKP)
     # For C = A * B
-    Ad, Bd, Cd = A.ldim, B.ldim, C.ldim
+    Ad, Bd, Cd = A.rdim, B.rdim, C.rdim
     An, Am, Bn, Bm, Cn, Cm = size(A)..., size(B)..., size(C)...
-    if !(A.ldim == B.ldim == C.ldim) || !(Am == Bn && An == Cn && Bm == Cm)
+    if !(A.rdim == B.rdim == C.rdim) || !(Am == Bn && An == Cn && Bm == Cm)
         throw(
             DimensionMismatch(
                 "Matrix multiplication not compatible: A has size ($Ad⋅$An,$Ad⋅$Am), B has size ($Bd⋅$Bn,$Bd⋅$Bm), C has size ($Cd⋅$Cn,$Cd⋅$Cm)",
@@ -99,26 +100,26 @@ end
 
 Base.:+(A::IKP, B::IKP) = begin
     check_same_size(A, B)
-    return IsometricKroneckerProduct(A.ldim, A.B + B.B)
+    return IsometricKroneckerProduct(A.rdim, A.B + B.B)
 end
-Base.:+(U::UniformScaling, K::IKP) = IsometricKroneckerProduct(K.ldim, U + K.B)
-Base.:+(K::IKP, U::UniformScaling) = IsometricKroneckerProduct(K.ldim, U + K.B)
+Base.:+(U::UniformScaling, K::IKP) = IsometricKroneckerProduct(K.rdim, U + K.B)
+Base.:+(K::IKP, U::UniformScaling) = IsometricKroneckerProduct(K.rdim, U + K.B)
 
 add!(out::IsometricKroneckerProduct, toadd::IsometricKroneckerProduct) = begin
-    @assert out.ldim == toadd.ldim
+    @assert out.rdim == toadd.rdim
     add!(out.B, toadd.B)
     return out
 end
 
-Base.:-(U::UniformScaling, K::IKP) = IsometricKroneckerProduct(K.ldim, U - K.B)
-LinearAlgebra.inv(K::IKP) = IsometricKroneckerProduct(K.ldim, inv(K.B))
+Base.:-(U::UniformScaling, K::IKP) = IsometricKroneckerProduct(K.rdim, U - K.B)
+LinearAlgebra.inv(K::IKP) = IsometricKroneckerProduct(K.rdim, inv(K.B))
 Base.:/(A::IKP, B::IKP) = begin
-    @assert A.ldim == B.ldim
-    return IsometricKroneckerProduct(A.ldim, A.B / B.B)
+    @assert A.rdim == B.rdim
+    return IsometricKroneckerProduct(A.rdim, A.B / B.B)
 end
 Base.:\(A::IKP, B::IKP) = begin
-    @assert A.ldim == B.ldim
-    return IsometricKroneckerProduct(A.ldim, A.B \ B.B)
+    @assert A.rdim == B.rdim
+    return IsometricKroneckerProduct(A.rdim, A.B \ B.B)
 end
 
 mul!(A::IKP, B::IKP, C::IKP) = begin
@@ -190,42 +191,34 @@ reshape_no_alloc(a, dims::Tuple) =
 reshape_no_alloc(a, dims...) = reshape_no_alloc(a, Tuple(dims))
 reshape_no_alloc(a::Missing, dims::Tuple) = missing
 
-function mul_vectrick!(x::AbstractVecOrMat, A::IKP, v::AbstractVecOrMat)
-    N = A.B
-    c, d = size(N)
+function _prepare_inputs_for_vectrick(A, x, v)
+    M = A.B
+    a, b = size(M)
+    V = reshape_no_alloc(transpose(v), (length(v) ÷ b, b)) |> transpose
+    X = reshape_no_alloc(transpose(x), (length(x) ÷ a, a)) |> transpose
+    return A.B, X, V
+end
 
-    V = reshape_no_alloc(v, (d, length(v) ÷ d))
-    X = reshape_no_alloc(x, (c, length(x) ÷ c))
+function mul_vectrick!(x::AbstractVecOrMat, A::IKP, v::AbstractVecOrMat)
+    N, X, V = _prepare_inputs_for_vectrick(A, x, v)
     mul!(X, N, V)
     return x
 end
 function mul_vectrick!(
     x::AbstractVecOrMat, A::IKP, v::AbstractVecOrMat, alpha::Number, beta::Number)
-    N = A.B
-    c, d = size(N)
-
-    V = reshape_no_alloc(v, (d, length(v) ÷ d))
-    X = reshape_no_alloc(x, (c, length(x) ÷ c))
+    N, X, V = _prepare_inputs_for_vectrick(A, x, v)
     mul!(X, N, V, alpha, beta)
     return x
 end
 
 function _matmul_vectrick!(x::AbstractVecOrMat, A::IKP, v::AbstractVecOrMat)
-    N = A.B
-    c, d = size(N)
-
-    V = reshape_no_alloc(v, (d, length(v) ÷ d))
-    X = reshape_no_alloc(x, (c, length(x) ÷ c))
+    N, X, V = _prepare_inputs_for_vectrick(A, x, v)
     _matmul!(X, N, V)
     return x
 end
 function _matmul_vectrick!(
     x::AbstractVecOrMat, A::IKP, v::AbstractVecOrMat, alpha::Number, beta::Number)
-    N = A.B
-    c, d = size(N)
-
-    V = reshape_no_alloc(v, (d, length(v) ÷ d))
-    X = reshape_no_alloc(x, (c, length(x) ÷ c))
+    N, X, V = _prepare_inputs_for_vectrick(A, x, v)
     _matmul!(X, N, V, alpha, beta)
     return x
 end
@@ -274,11 +267,19 @@ for TC in [:AbstractVector, :AbstractMatrix]
 end
 
 function Kronecker.ldiv_vec_trick!(x::AbstractVector, A::IKP, v::AbstractVector)
-    N = A.B
-    c, d = size(N)
-
-    V = reshape_no_alloc(v, (c, length(v) ÷ c))
-    X = reshape_no_alloc(x, (d, length(x) ÷ d))
-    copyto!(X, N \ V)
+    M = A.B
+    a, b = size(M)
+    V = reshape_no_alloc(transpose(v), (length(v) ÷ a, a)) |> transpose
+    X = reshape_no_alloc(transpose(x), (length(x) ÷ b, b)) |> transpose
+    copyto!(X, M \ V)
     return x
 end
+
+function Base.vcat(K1::IKP, K2::IKP)
+    @assert K1.rdim == K2.rdim
+    return IKP(K1.rdim, vcat(K1.B, K2.B))
+end
+function Base.hcat(K1::IKP, K2::IKP)
+    @assert K1.rdim == K2.rdim
+    return IKP(K1.rdim, hcat(K1.B, K2.B))
+end
diff --git a/src/measurement_models.jl b/src/measurement_models.jl
index a13443d5e..db0279501 100644
--- a/src/measurement_models.jl
+++ b/src/measurement_models.jl
@@ -12,8 +12,8 @@ function (m::StandardODEMeasurementModel{true})(z, x, p, t)
     d = length(z)
     D = length(x)
     q = D ÷ d - 1
-    E0_x = @view x[1:(q+1):end]
-    E1_x = @view x[2:(q+1):end]
+    E0_x = @view x[1:d]
+    E1_x = @view x[d+1:2d]
     m.f(z, E0_x, p, t)
     mul!(z, m.f.mass_matrix, E1_x, 1, -1)
     return z
@@ -38,9 +38,9 @@ function (m::SecondOrderODEMeasurementModel{true})(z, x, p, t)
     d = length(z)
     D = length(x)
     q = D ÷ d - 1
-    E0_x = @view x[1:(q+1):end]
-    E1_x = @view x[2:(q+1):end]
-    E2_x = @view x[3:(q+1):end]
+    E0_x = @view x[1:d]
+    E1_x = @view x[d+1:2d]
+    E2_x = @view x[2d+1:3d]
 
     m.f1(z, E1_x, E0_x, p, t)
 
diff --git a/src/perform_step.jl b/src/perform_step.jl
index d1c834284..f27cc6de3 100644
--- a/src/perform_step.jl
+++ b/src/perform_step.jl
@@ -229,7 +229,7 @@ end
 diag!(v::AbstractVector, M::PSDMatrix) = (sum!(abs2, v', M.R); v)
 diag!(v::AbstractVector, M::PSDMatrix{<:Number,<:IsometricKroneckerProduct}) =
     v .= sum(abs2, M.R.B)
-diag!(v::AbstractVector, M::PSDMatrix{<:Number,<:BlockDiag}) = begin
+diag!(v::AbstractVector, M::PSDMatrix{<:Number,<:BlocksOfDiagonals}) = begin
     @assert length(v) == nblocks(M.R)
     @assert size(blocks(M.R)[1], 2) == 1 # assumes all of them have the same shape
     @simd ivdep for i in eachindex(blocks(M.R))
diff --git a/src/preconditioning.jl b/src/preconditioning.jl
index ad0e494a4..acfcfa42d 100644
--- a/src/preconditioning.jl
+++ b/src/preconditioning.jl
@@ -4,15 +4,15 @@ function init_preconditioner(C::IsometricKroneckerCovariance{elType}) where {elT
     return P, PI
 end
 function init_preconditioner(C::DenseCovariance{elType}) where {elType}
-    P = kron(I(C.d), Diagonal(ones(elType, C.q + 1)))
-    PI = kron(I(C.d), Diagonal(ones(elType, C.q + 1)))
+    P = kron(Diagonal(ones(elType, C.q + 1)), Eye(C.d))
+    PI = kron(Diagonal(ones(elType, C.q + 1)), Eye(C.d))
     return P, PI
 end
 function init_preconditioner(C::BlockDiagonalCovariance{elType}) where {elType}
     B = Diagonal(ones(elType, C.q + 1))
-    P = BlockDiag([B for _ in 1:C.d])
+    P = BlocksOfDiagonals([B for _ in 1:C.d])
     BI = Diagonal(ones(elType, C.q + 1))
-    PI = BlockDiag([BI for _ in 1:C.d])
+    PI = BlocksOfDiagonals([BI for _ in 1:C.d])
     return P, PI
 end
 
@@ -27,12 +27,11 @@ function make_preconditioners!(P, PI, d, q, dt)
     return nothing
 end
 
-@fastmath @inbounds function make_preconditioner!(P::Diagonal, h, d, q)
+@fastmath function make_preconditioner!(P::Diagonal, h, d, q)
     val = factorial(q) / h^(q + 1 / 2)
     for j in 0:q
-        @simd ivdep for i in 0:d-1
-            # P[j+i*(q+1)+1, j+i*(q+1)+1] = val
-            P.diag[j+i*(q+1)+1] = val
+        @simd ivdep for i in 1:d
+            P.diag[j*d+i] = val
         end
         val /= (q - j) / h
     end
@@ -40,15 +39,14 @@ end
 end
 make_preconditioner!(P::IsometricKroneckerProduct, h, d, q) =
     (make_preconditioner!(P.B, h, 1, q); P)
-make_preconditioner!(P::BlockDiag, h, d, q) =
+make_preconditioner!(P::BlocksOfDiagonals, h, d, q) =
     (make_preconditioner!(blocks(P)[1], h, 1, q); P)
 
 @fastmath @inbounds function make_preconditioner_inv!(PI::Diagonal, h, d, q)
     val = h^(q + 1 / 2) / factorial(q)
     for j in 0:q
-        @simd ivdep for i in 0:d-1
-            # PI[j+i*(q+1)+1, j+i*(q+1)+1] = val
-            PI.diag[j+i*(q+1)+1] = val
+        @simd ivdep for i in 1:d
+            PI.diag[j*d+i] = val
         end
         val *= (q - j) / h
     end
@@ -56,5 +54,5 @@ make_preconditioner!(P::BlockDiag, h, d, q) =
 end
 make_preconditioner_inv!(PI::IsometricKroneckerProduct, h, d, q) =
     (make_preconditioner_inv!(PI.B, h, 1, q); PI)
-make_preconditioner_inv!(PI::BlockDiag, h, d, q) =
+make_preconditioner_inv!(PI::BlocksOfDiagonals, h, d, q) =
     (make_preconditioner_inv!(blocks(PI)[1], h, 1, q); PI)
diff --git a/src/priors/ioup.jl b/src/priors/ioup.jl
index 61f1d25c7..3d7bb99df 100644
--- a/src/priors/ioup.jl
+++ b/src/priors/ioup.jl
@@ -90,30 +90,30 @@ function to_sde(p::IOUP)
 
     F_breve = diagm(1 => ones(q))
     # F_breve[end, end] = r
-    F = kron(I(d), F_breve)
-    F[q+1:q+1:end, q+1:q+1:end] = R
+    F = kron(F_breve, Eye(d))
+    F[end-d+1:end, end-d+1:end] = R
 
     L_breve = zeros(q + 1)
     L_breve[end] = 1.0
-    L = kron(I(d), L_breve)
+    L = kron(L_breve, Eye(d))
 
     return LTISDE(F, L)
 end
 
 function update_sde_drift!(F::AbstractMatrix, prior::IOUP{<:Any,<:AbstractMatrix})
-    q = num_derivatives(prior)
+    d = dim(prior)
     r = prior.rate_parameter
-    F[q+1:q+1:end, q+1:q+1:end] = r
+    F[end-d+1:end, end-d+1:end] = r
 end
 function update_sde_drift!(F::AbstractMatrix, prior::IOUP{<:Any,<:AbstractVector})
-    q = num_derivatives(prior)
+    d = dim(prior)
     r = prior.rate_parameter
-    F[q+1:q+1:end, q+1:q+1:end] = Diagonal(r)
+    F[end-d+1:end, end-d+1:end] = Diagonal(r)
 end
 function update_sde_drift!(F::AbstractMatrix, prior::IOUP{<:Any,<:Number})
     d, q = dim(prior), num_derivatives(prior)
     r = prior.rate_parameter
-    F[q+1:q+1:end, q+1:q+1:end] = Diagonal(Fill(r, d))
+    F[end-d+1:end, end-d+1:end] = Diagonal(Fill(r, d))
 end
 
 function make_transition_matrices!(cache, prior::IOUP, dt)
diff --git a/src/priors/ltisde.jl b/src/priors/ltisde.jl
index 059788ac0..204d70969 100644
--- a/src/priors/ltisde.jl
+++ b/src/priors/ltisde.jl
@@ -38,8 +38,8 @@ end
 discretize(F::IsometricKroneckerProduct, L::IsometricKroneckerProduct, dt::Real) = begin
     method = FiniteHorizonGramians.ExpAndGram{eltype(F.B),13}()
     A_breve, QR_breve = FiniteHorizonGramians.exp_and_gram_chol(F.B, L.B, dt, method)
-    A = IsometricKroneckerProduct(F.ldim, A_breve)
-    Q = PSDMatrix(IsometricKroneckerProduct(F.ldim, QR_breve))
+    A = IsometricKroneckerProduct(F.rdim, A_breve)
+    Q = PSDMatrix(IsometricKroneckerProduct(F.rdim, QR_breve))
     return A, Q
 end
 
@@ -48,7 +48,7 @@ function matrix_fraction_decomposition(
     dispersion::IsometricKroneckerProduct,
     dt::Real,
 )
-    d = drift.ldim
+    d = drift.rdim
     A_breve, Q_breve = matrix_fraction_decomposition(drift.B, dispersion.B, dt)
     return IsometricKroneckerProduct(d, A_breve), IsometricKroneckerProduct(d, Q_breve)
 end
diff --git a/src/projection.jl b/src/projection.jl
index ba08e65e0..6ced99e63 100644
--- a/src/projection.jl
+++ b/src/projection.jl
@@ -5,13 +5,11 @@ function projection(
 ) where {elType}
     D = d * (q + 1)
     Proj(deriv) = begin
-        P = zeros(elType, d, D)
+        e_i = zeros(elType, q + 1, 1)
         if deriv <= q
-            @simd ivdep for i in deriv*d+1:D+1:d*D
-                @inbounds P[i] = 1
-            end
+            e_i[deriv+1] = 1
         end
-        return P
+        return kron(e_i', Eye(d))
     end
     return Proj
 end
@@ -36,65 +34,7 @@ function projection(C::BlockDiagonalCovariance{elType}) where {elType}
         if deriv <= C.q
             e_i[deriv+1] = 1
         end
-        return BlockDiag([copy(e_i)' for _ in 1:C.d])
+        return BlocksOfDiagonals([copy(e_i)' for _ in 1:C.d])
     end
     return Proj
 end
-
-function solution_space_projection(C::CovarianceStructure, is_secondorder_ode)
-    Proj = projection(C)
-    if is_secondorder_ode
-        return [Proj(1); Proj(0)]
-    else
-        return Proj(0)
-    end
-end
-
-function solution_space_projection(C::IsometricKroneckerCovariance, is_secondorder_ode)
-    Proj = projection(C)
-    if is_secondorder_ode
-        return KroneckerSecondOrderODESolutionProjector(C)
-    else
-        return Proj(0)
-    end
-end
-function solution_space_projection(C::BlockDiagonalCovariance, is_secondorder_ode)
-    Proj = projection(C)
-    if is_secondorder_ode
-        error("Not yet implemented!")
-    else
-        return Proj(0)
-    end
-end
-
-struct KroneckerSecondOrderODESolutionProjector{T,FAC,M,M2} <: AbstractMatrix{T}
-    covariance_structure::FAC
-    E0::M
-    E1::M
-    SolProjB::M2
-end
-function KroneckerSecondOrderODESolutionProjector(
-    C::IsometricKroneckerCovariance{T},
-) where {T}
-    Proj = projection(C)
-    E0, E1 = Proj(0), Proj(1)
-    SolProjB = [E1.B; E0.B]
-    return KroneckerSecondOrderODESolutionProjector{
-        T,typeof(C),typeof(E0),typeof(SolProjB),
-    }(
-        C, E0, E1, SolProjB,
-    )
-end
-function _gaussian_mul!(
-    g_out::SRGaussian, M::KroneckerSecondOrderODESolutionProjector, g_in::SRGaussian)
-    d = M.covariance_structure.d
-    _matmul!(view(g_out.μ, 1:d), M.E1, g_in.μ)
-    _matmul!(view(g_out.μ, d+1:2d), M.E0, g_in.μ)
-    _matmul!(g_out.Σ.R.A, g_in.Σ.R.B, M.SolProjB')
-    return g_out
-end
-function Base.:*(M::KroneckerSecondOrderODESolutionProjector, x::SRGaussian)
-    μ = [M.E1 * x.μ; M.E0 * x.μ]
-    Σ = PSDMatrix([x.Σ.R * M.E1' x.Σ.R * M.E0'])
-    return Gaussian(μ, Σ)
-end
diff --git a/test/Project.toml b/test/Project.toml
index 4705022bd..1dbccbcc3 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -1,5 +1,6 @@
 [deps]
 Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
+BlockArrays = "8e7c35d0-a365-5155-bbbb-fb81a777f24e"
 BlockDiagonals = "0a1fb500-61f7-11e9-3c65-f5ef3456f9f0"
 DiffEqDevTools = "f3b72e0c-5b89-59e1-b016-84e28bfd966d"
 FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
diff --git a/test/core/blockdiagonals.jl b/test/core/blocksofdiagonals.jl
similarity index 54%
rename from test/core/blockdiagonals.jl
rename to test/core/blocksofdiagonals.jl
index c0969571b..6de4de000 100644
--- a/test/core/blockdiagonals.jl
+++ b/test/core/blocksofdiagonals.jl
@@ -1,43 +1,44 @@
 using ProbNumDiffEq
 import ProbNumDiffEq as PNDE
-import ProbNumDiffEq: BlockDiag, _matmul!
+import ProbNumDiffEq: BlocksOfDiagonals, _matmul!, nblocks
 using LinearAlgebra
-using BlockDiagonals
+import BlockArrays
 using Test
 
 d1, d2 = 2, 3
 D = d1 * d2
+
 @testset "T=$T" for T in (Float64, BigFloat)
-    A = BlockDiag([randn(T, d1, d1) for _ in 1:d2])
-    B = BlockDiag([randn(T, d1, d1) for _ in 1:d2])
-    C = BlockDiag([randn(T, d1, d1) for _ in 1:d2])
+    A = BlocksOfDiagonals([randn(T, d1, d1) for _ in 1:d2])
+    B = BlocksOfDiagonals([randn(T, d1, d1) for _ in 1:d2])
+    C = BlocksOfDiagonals([randn(T, d1, d1) for _ in 1:d2])
     alpha = rand(T)
     beta = rand(T)
 
     AM, BM, CM = @test_nowarn Matrix.((A, B, C))
 
-    @test Matrix(BlockDiagonal(A)) == AM
-    @test Matrix(BlockDiagonal(B)) == BM
-    @test Matrix(BlockDiagonal(C)) == CM
+    @test Matrix(BlockArrays.BlockArray(A)) == AM
+    @test Matrix(BlockArrays.BlockArray(B)) == BM
+    @test Matrix(BlockArrays.BlockArray(C)) == CM
 
     _A = @test_nowarn copy(A)
-    @test _A isa BlockDiag
+    @test _A isa BlocksOfDiagonals
 
     _B = @test_nowarn copy!(_A, B)
     @test _B === _A
     @test _B == B
 
     _A = @test_nowarn similar(A)
-    @test _A isa BlockDiag
+    @test _A isa BlocksOfDiagonals
     @test size(_A) == size(A)
 
     _Z = @test_nowarn zero(A)
-    @test _Z isa BlockDiag
+    @test _Z isa BlocksOfDiagonals
     @test size(_Z) == size(A)
     @test all(_Z .== 0)
 
     function tttm(M) # quick type test and to matrix
-        @test M isa BlockDiag
+        @test M isa BlocksOfDiagonals
         return Matrix(M)
     end
 
@@ -72,9 +73,23 @@ D = d1 * d2
     @test tttm(_matmul!(_A, A, alpha * I(D))) ≈ alpha * AM
     @test tttm(_matmul!(_A, alpha * I(D), A)) ≈ alpha * AM
 
+    # Actual Diagonals
+    DI = Diagonal(rand(T, D))
+    @test tttm(DI * A) ≈ DI * AM
+    @test tttm(A * DI) ≈ AM * DI
+    @test tttm(mul!(copy(A), DI, A)) ≈ DI * AM
+    @test tttm(mul!(copy(A), A, DI)) ≈ AM * DI
+    @test tttm(_matmul!(copy(A), DI, A)) ≈ DI * AM
+    @test tttm(_matmul!(copy(A), A, DI)) ≈ AM * DI
+    @test tttm(mul!(copy(A), DI, A, alpha, beta)) ≈ mul!(copy(AM), DI, AM, alpha, beta)
+    @test tttm(mul!(copy(A), A, DI, alpha, beta)) ≈ mul!(copy(AM), AM, DI, alpha, beta)
+    @test tttm(_matmul!(copy(A), DI, A, alpha, beta)) ≈ mul!(copy(AM), DI, AM, alpha, beta)
+    @test tttm(_matmul!(copy(A), A, DI, alpha, beta)) ≈ mul!(copy(AM), AM, DI, alpha, beta)
+
     @test_throws ErrorException view(A, 1:2, 1:2)
 
-    tttm(copy!(copy(A), Diagonal(A)))
+    _D = Diagonal(rand(T, D))
+    @test tttm(copy!(copy(A), _D)) == _D
 
     @test tttm(A + A) ≈ AM + AM
     @test tttm(A - A) ≈ AM - AM
@@ -82,4 +97,17 @@ D = d1 * d2
     _A = copy(A)
     @test tttm(PNDE.add!(_A, A)) == AM + AM
     @test Matrix(_A) == AM + AM
+
+    # Matrix-Vector Products
+    v = rand(T, size(A, 2))
+    @test A * v == AM * v
+    x = rand(T, size(A, 1))
+    @test mul!(x, A, v) == mul!(x, AM, v)
+    @test mul!(x, A, v, alpha, beta) == mul!(x, AM, v, alpha, beta)
+    @test _matmul!(x, A, v) == _matmul!(x, AM, v)
+    @test _matmul!(x, A, v, alpha, beta) == _matmul!(x, AM, v, alpha, beta)
+
+    @test tttm([A; B]) == [AM; BM]
+    @test tttm([A B]) == [AM BM]
+    @test_broken [A B; B A] isa PNDE.BlocksOfDiagonals
 end
diff --git a/test/core/filtering.jl b/test/core/filtering.jl
index 9da066b22..f9a574ec9 100644
--- a/test/core/filtering.jl
+++ b/test/core/filtering.jl
@@ -5,9 +5,8 @@ Check the correctness of the filtering implementations vs. basic readable math c
 using Test
 using ProbNumDiffEq
 using LinearAlgebra
-import ProbNumDiffEq: IsometricKroneckerProduct, BlockDiag
+import ProbNumDiffEq: IsometricKroneckerProduct, BlocksOfDiagonals
 import ProbNumDiffEq as PNDE
-using BlockDiagonals
 using FillArrays
 import ProbNumDiffEq.GaussianDistributions: logpdf
 
@@ -79,7 +78,7 @@ import ProbNumDiffEq.GaussianDistributions: logpdf
                 end
                 _diffusions = diffusion isa Number ? diffusion * Ones(d) : diffusion.diag
 
-                QM_diff = Matrix(BlockDiagonal([σ² * _Q.B for σ² in _diffusions]))
+                QM_diff = Matrix(BlocksOfDiagonals([σ² * _Q.B for σ² in _diffusions]))
                 P_p_diff = AM * PM * AM' + QM_diff
 
                 x_curr = Gaussian(m, PSDMatrix(P_R))
@@ -104,9 +103,9 @@ import ProbNumDiffEq.GaussianDistributions: logpdf
             x_out = copy(x_curr)
             # marginalize! needs tall square-roots:
             Q_SR = if Q_R isa IsometricKroneckerProduct
-                PSDMatrix(IsometricKroneckerProduct(Q_R.ldim, [Q_R.B; zero(Q_R.B)]))
-            elseif Q_R isa BlockDiag
-                PSDMatrix(BlockDiag([[B; zero(B)] for B in Q_R.blocks]))
+                PSDMatrix(IsometricKroneckerProduct(Q_R.rdim, [Q_R.B; zero(Q_R.B)]))
+            elseif Q_R isa BlocksOfDiagonals
+                PSDMatrix(BlocksOfDiagonals([[B; zero(B)] for B in Q_R.blocks]))
             else
                 PSDMatrix([Q_R; zero(Q_R)])
             end
@@ -494,7 +493,7 @@ end
                 end
                 _diffusions =
                     diffusion isa Number ? diffusion * Ones(d) : diffusion.diag
-                QM_diff = Matrix(BlockDiagonal([σ² * _Q.B for σ² in _diffusions]))
+                QM_diff = Matrix(BlocksOfDiagonals([σ² * _Q.B for σ² in _diffusions]))
 
                 ProbNumDiffEq.compute_backward_kernel!(
                     K_backward, x_next_pred, x_curr, K_forward; C_DxD, diffusion)
diff --git a/test/core/kronecker.jl b/test/core/kronecker.jl
index 5e8ad085c..1b89755be 100644
--- a/test/core/kronecker.jl
+++ b/test/core/kronecker.jl
@@ -9,10 +9,14 @@ q = 2
 @testset "$T" for T in (Float64, BigFloat)
     R1 = rand(T, q + 1, q + 1)
     K1 = PNDE.IsometricKroneckerProduct(d, R1)
-    M1 = Matrix(K1)
+    M1 = kron(R1, I(d))
     R2 = rand(T, q + 1, q + 1)
     K2 = PNDE.IsometricKroneckerProduct(d, R2)
-    M2 = Matrix(K2)
+    M2 = kron(R2, I(d))
+
+    # Definition
+    @test Matrix(K1) == M1
+    @test Matrix(K2) == M2
 
     # Base
     K3 = PNDE.IsometricKroneckerProduct(d, copy(R2))
@@ -112,19 +116,21 @@ q = 2
     @test PNDE._matmul!(copy(v), K1, v, α, β) ≈ PNDE._matmul!(copy(v), M1, v, α, β)
     @test PNDE._matmul!(copy(A), K1, A, α, β) ≈ PNDE._matmul!(copy(A), M1, A, α, β)
 
-    if T == Float64
-        # Octavian has issues
-        @test_broken PNDE._matmul!(copy(A'), copy(A'), K1') ≈
-                     PNDE._matmul!(copy(A'), A', M1')
-        @test_broken PNDE._matmul!(copy(A'), copy(A'), K1', α, β) ≈
-                     PNDE._matmul!(copy(A'), A', M1', α, β)
-    else
-        # Uses LinearAlgebra
-        @test PNDE._matmul!(copy(A'), copy(A'), K1') ≈ PNDE._matmul!(copy(A'), A', M1')
-        @test PNDE._matmul!(copy(A'), copy(A'), K1', α, β) ≈
-              PNDE._matmul!(copy(A'), A', M1', α, β)
-    end
+    @test PNDE._matmul!(copy(A'), copy(A'), K1') ≈ PNDE._matmul!(copy(A'), A', M1')
+    @test PNDE._matmul!(copy(A'), copy(A'), K1', α, β) ≈
+          PNDE._matmul!(copy(A'), A', M1', α, β)
+
     # But it always works if all matrices are actual adjoints
     @test PNDE._matmul!(copy(A)', A', K1') ≈ PNDE._matmul!(copy(A'), A', M1')
     @test PNDE._matmul!(copy(A)', A', K1', α, β) ≈ PNDE._matmul!(copy(A'), A', M1', α, β)
+
+    # Div!
+    @test K1 \ v ≈ M1 \ v
+    _K = PNDE.IsometricKroneckerProduct(d, rand(2, 3))
+    _v = rand(2d)
+    @test _K \ _v ≈ Matrix(_K) \ _v
+
+    @test tttm([K1; K2]) == [M1; M2]
+    @test tttm([K1 K2]) == [M1 M2]
+    @test_broken [K1 K2; K2 K1] isa PNDE.IsometricKroneckerProduct
 end
diff --git a/test/core/preconditioning.jl b/test/core/preconditioning.jl
index 27c02cd62..84af017f1 100644
--- a/test/core/preconditioning.jl
+++ b/test/core/preconditioning.jl
@@ -46,6 +46,8 @@ end
     PD, PID = make_preconditioners(PNDE.DenseCovariance)
     PB, PIB = make_preconditioners(PNDE.BlockDiagonalCovariance)
 
-    @test PK == PD == PB
-    @test PIK == PID == PIB
+    @test PK == PD
+    @test PB == PD
+    @test PIK == PID
+    @test PIB == PID
 end
diff --git a/test/core/priors.jl b/test/core/priors.jl
index 688374876..d236b1567 100644
--- a/test/core/priors.jl
+++ b/test/core/priors.jl
@@ -53,6 +53,15 @@ end
 
     prior = PNDE.IWP(dim=d, num_derivatives=q)
 
+    PERM = [
+        1 0 0 0 0 0
+        0 0 0 1 0 0
+        0 1 0 0 0 0
+        0 0 0 0 1 0
+        0 0 1 0 0 0
+        0 0 0 0 0 1
+    ]
+
     # true sde parameters
     F = [
         0 1 0 0 0 0
@@ -62,6 +71,7 @@ end
         0 0 0 0 0 1
         0 0 0 0 0 0
     ]
+    F = PERM * F * PERM'
     L = [
         0 0
         0 0
@@ -70,6 +80,7 @@ end
         0 0
         0 1
     ]
+    L = PERM * L
 
     # true transition matrices
     AH_22_IBM = [
@@ -80,6 +91,7 @@ end
         0 0 0 0 1 h
         0 0 0 0 0 1
     ]
+    AH_22_IBM = PERM * AH_22_IBM * PERM'
 
     QH_22_IBM =
         σ^2 .* [
@@ -90,6 +102,7 @@ end
             0 0 0 h^4/8 h^3/3 h^2/2
             0 0 0 h^3/6 h^2/2 h
         ]
+    QH_22_IBM = PERM * QH_22_IBM * PERM'
 
     # true preconditioned transition matrices
     AH_22_PRE = [
@@ -100,6 +113,7 @@ end
         0 0 0 0 1 1
         0 0 0 0 0 1
     ]
+    AH_22_PRE = PERM * AH_22_PRE * PERM'
 
     QH_22_PRE =
         σ^2 * [
@@ -110,6 +124,7 @@ end
             0 0 0 1/4 1/3 1/2
             0 0 0 1/3 1/2 1/1
         ]
+    QH_22_PRE = PERM * QH_22_PRE * PERM'
 
     @testset "Test SDE" begin
         sde = PNDE.to_sde(prior)
@@ -136,15 +151,21 @@ end
     end
 
     @testset "Test `make_transition_matrices!`" begin
-        for FAC in (PNDE.IsometricKroneckerCovariance, PNDE.BlockDiagonalCovariance)
+        for FAC in (
+            PNDE.IsometricKroneckerCovariance,
+            PNDE.BlockDiagonalCovariance,
+        )
             A, Q, Ah, Qh, P, PI = PNDE.initialize_transition_matrices(
-                PNDE.BlockDiagonalCovariance{Float64}(d, q), prior, h)
+                FAC{Float64}(d, q), prior, h)
 
             @test AH_22_PRE ≈ A
 
-            for Γ in (σ^2, σ^2 * Eye(d), σ^2 * I(d))
+            for Γ in (σ^2, σ^2 * Eye(d))
                 @test QH_22_PRE ≈ Matrix(PNDE.apply_diffusion(Q, Γ))
             end
+            if FAC != PNDE.IsometricKroneckerCovariance
+                @test QH_22_PRE ≈ Matrix(PNDE.apply_diffusion(Q, σ^2 * I(d)))
+            end
 
             cache = (
                 d=d,
@@ -160,7 +181,7 @@ end
             make_transition_matrices!(cache, prior, h)
             @test AH_22_IBM ≈ cache.Ah
 
-            for Γ in (σ^2, σ^2 * Eye(d), σ^2 * I(d))
+            for Γ in (σ^2, σ^2 * Eye(d))
                 @test QH_22_IBM ≈ Matrix(PNDE.apply_diffusion(cache.Qh, Γ))
             end
             if FAC != PNDE.IsometricKroneckerCovariance
@@ -211,6 +232,15 @@ end
 
     prior = PNDE.IOUP(dim=d, num_derivatives=q, rate_parameter=r)
 
+    PERM = [
+        1 0 0 0 0 0
+        0 0 0 1 0 0
+        0 1 0 0 0 0
+        0 0 0 0 1 0
+        0 0 1 0 0 0
+        0 0 0 0 0 1
+    ]
+
     sde = PNDE.to_sde(prior)
     F = [
         0 1 0 0 0 0
@@ -220,6 +250,7 @@ end
         0 0 0 0 0 1
         0 0 r[2, 1] 0 0 r[2, 2]
     ]
+    F = PERM * F * PERM'
     L = [
         0 0
         0 0
@@ -228,6 +259,7 @@ end
         0 0
         0 1
     ]
+    L = PERM * L
     @test sde.F ≈ F
     @test sde.L ≈ L
 
@@ -250,6 +282,15 @@ end
 
     prior = PNDE.Matern(dim=d, num_derivatives=q, lengthscale=l)
 
+    PERM = [
+        1 0 0 0 0 0
+        0 0 0 1 0 0
+        0 1 0 0 0 0
+        0 0 0 0 1 0
+        0 0 1 0 0 0
+        0 0 0 0 0 1
+    ]
+
     sde = PNDE.to_sde(prior)
     F = [
         0 1 0 0 0 0
@@ -259,6 +300,7 @@ end
         0 0 0 0 0 1
         0 0 0 -a(1)*λ^3 -a(2)*λ^2 -a(3)*λ
     ]
+    F = PERM * F * PERM'
     L = [
         0 0
         0 0
@@ -267,6 +309,7 @@ end
         0 0
         0 1
     ]
+    L = PERM * L
     @test sde.F ≈ F
     @test sde.L ≈ L
 
diff --git a/test/runtests.jl b/test/runtests.jl
index b39c91e9a..10f2c7683 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -19,8 +19,8 @@ const GROUP = get(ENV, "GROUP", "All")
 @testset "ProbNumDiffEq" begin
     if GROUP == "All" || GROUP == "Core"
         @timedtestset "Core" begin
-            @timedsafetestset "BlockDiagonals" begin
-                include("core/blockdiagonals.jl")
+            @timedsafetestset "BlocksOfDiagonals" begin
+                include("core/blocksofdiagonals.jl")
             end
             @timedsafetestset "FastLinalg (`_matmul!`)" begin
                 include("core/fast_linalg.jl")
diff --git a/test/secondorderode.jl b/test/secondorderode.jl
index 468a174b5..a45bea6df 100644
--- a/test/secondorderode.jl
+++ b/test/secondorderode.jl
@@ -34,8 +34,13 @@ appxsol = solve(prob_base, Vern9(), abstol=1e-9, reltol=1e-6)
         EK1(initialization=ForwardDiffInit(2)),
         # EK1(initialization=ClassicSolverInit()), # unstable for this problem
         EK1(diffusionmodel=FixedDiffusion()),
-        # EK0(diffusionmodel=FixedMVDiffusion()),
-        # EK0(diffusionmodel=DynamicMVDiffusion()),
+        EK0(diffusionmodel=FixedMVDiffusion()),
+        EK0(diffusionmodel=DynamicMVDiffusion()),
+        DiagonalEK1(),
+        EK1(initialization=ForwardDiffInit(2)),
+        DiagonalEK1(diffusionmodel=FixedDiffusion()),
+        DiagonalEK1(diffusionmodel=FixedMVDiffusion()),
+        DiagonalEK1(diffusionmodel=DynamicMVDiffusion()),
     )
         sol = solve(_prob, alg)
 
diff --git a/test/state_init.jl b/test/state_init.jl
index bf30c0082..a2c02f953 100644
--- a/test/state_init.jl
+++ b/test/state_init.jl
@@ -52,7 +52,7 @@ import ODEProblemLibrary: prob_ode_fitzhughnagumo, prob_ode_pleiades
         _q = INIT isa SimpleInit ? 1 : INIT.order
         integ = init(prob, EK0(order=q, initialization=INIT))
         dus = integ.cache.x.μ
-        @test reshape(dus, :, 2)'[1:d*(_q+1)] ≈ true_init_states[1:d*(_q+1)]
+        @test dus[1:d*(_q+1)] ≈ true_init_states[1:d*(_q+1)]
     end
 
     @testset "Low-order exact init via ClassiSolverInit: `initial_update!`" begin
@@ -81,7 +81,7 @@ import ODEProblemLibrary: prob_ode_fitzhughnagumo, prob_ode_pleiades
             init(prob, EK1(order=2, initialization=ClassicSolverInit(init_on_ddu=true)))
         ProbNumDiffEq.initial_update!(integ, integ.cache, integ.alg.initialization)
         x = integ.cache.x
-        @test reshape(x.μ, :, 2)'[:] ≈ true_init_states[1:(2+1)*d]
+        @test x.μ[:] ≈ true_init_states[1:(2+1)*d]
     end
 end