diff --git a/docs/src/graphics/block_nonhermitian_lanczos.png b/docs/src/graphics/block_nonhermitian_lanczos.png new file mode 100644 index 000000000..a18052224 Binary files /dev/null and b/docs/src/graphics/block_nonhermitian_lanczos.png differ diff --git a/docs/src/processes.md b/docs/src/processes.md index 063c8d121..8a607a095 100644 --- a/docs/src/processes.md +++ b/docs/src/processes.md @@ -131,7 +131,7 @@ hermitian_lanczos(::Any, ::AbstractMatrix{FC}, ::Int) where FC <: (Union{Complex After $k$ iterations of the non-Hermitian Lanczos process (also named the Lanczos biorthogonalization process), the situation may be summarized as ```math \begin{align*} - A V_k &= V_k T_k + \beta_{k+1} v_{k+1} e_k^T = V_{k+1} T_{k+1,k}, \\ + A V_k &= V_k T_k + \beta_{k+1} v_{k+1} e_k^T = V_{k+1} T_{k+1,k}, \\ A^H U_k &= U_k T_k^H + \bar{\gamma}_{k+1} u_{k+1} e_k^T = U_{k+1} T_{k,k+1}^H, \\ V_k^H U_k &= U_k^H V_k = I_k, \end{align*} @@ -167,7 +167,44 @@ Related methods: [`BiLQ`](@ref bilq), [`QMR`](@ref qmr), [`BiLQR`](@ref bilqr), With these scaling factors, the non-Hermitian Lanczos process coincides with the Hermitian Lanczos process when $A = A^H$ and $b = c$. ```@docs -nonhermitian_lanczos +nonhermitian_lanczos(::Any, ::AbstractVector{FC}, ::AbstractVector{FC}, ::Int) where FC <: (Union{Complex{T}, T} where T <: AbstractFloat) +``` + +If the vectors $b$ and $c$ are replaced by matrices $B$ and $C$ with both $p$ columns, we can derive the block non-Hermitian Lanczos process. + +![block_nonhermitian_lanczos](./graphics/block_nonhermitian_lanczos.png) + +After $k$ iterations of the block non-Hermitian Lanczos process, the situation may be summarized as +```math +\begin{align*} + A V_k &= V_{k+1} T_{k+1,k}, \\ + A^H U_k &= U_{k+1} T_{k,k+1}^H, \\ + V_k^H U_k &= U_k^H V_k = I_{pk}, +\end{align*} +``` +where $V_k$ and $U_k$ are bases of the block Krylov subspaces $\mathcal{K}^{\square}_k(A,B)$ and $\mathcal{K}^{\square}_k (A^H,C)$, respectively, +```math +T_{k,k+1} = +\begin{bmatrix} + \Omega_1 & \Phi_2 & & \\ + \Psi_2 & \Omega_2 & \ddots & \\ + & \ddots & \ddots & \Phi_k \\ + & & \Psi_k & \Omega_k \\ + & & & \Psi_{k+1} +\end{bmatrix} +, \qquad +T_{k,k+1}^H = +\begin{bmatrix} + \Omega_1^H & \Psi_2^H & & \\ + \Phi_2^H & \Omega_2^H & \ddots & \\ + & \ddots & \ddots & \Psi_k^H \\ + & & \Phi_k^H & \Omega_k^H \\ + & & & \Phi_{k+1}^H +\end{bmatrix}. +``` + +```@docs +nonhermitian_lanczos(::Any, ::AbstractMatrix{FC}, ::AbstractMatrix{FC}, ::Int) where FC <: (Union{Complex{T}, T} where T <: AbstractFloat) ``` ## Arnoldi @@ -391,7 +428,7 @@ T_{k,k+1} = & & & \Psi_{k+1} \end{bmatrix} , \qquad -T_{k+1,k}^H = +T_{k,k+1}^H = \begin{bmatrix} \Omega_1^H & \Psi_2^H & & \\ \Phi_2^H & \Omega_2^H & \ddots & \\ diff --git a/src/block_krylov_processes.jl b/src/block_krylov_processes.jl index 7351c4899..59c149c27 100644 --- a/src/block_krylov_processes.jl +++ b/src/block_krylov_processes.jl @@ -139,6 +139,81 @@ function hermitian_lanczos(A, B::AbstractMatrix{FC}, k::Int; algo::String="mgs") return V, T end +""" + V, T, U, Tᴴ = nonhermitian_lanczos(A, B, C, k) + +#### Input arguments + +* `A`: a linear operator that models a square matrix of dimension n; +* `B`: a matrix of size n × p; +* `C`: a matrix of size n × p; +* `k`: the number of iterations of the block non-Hermitian Lanczos process. + +#### Output arguments + +* `V`: a dense n × p(k+1) matrix; +* `T`: a sparse p(k+1) × pk tridiagonal matrix with a bandwidth p; +* `U`: a dense n × p(k+1) matrix; +* `Tᴴ`: a sparse p(k+1) × pk tridiagonal matrix with a bandwidth p. +""" +function nonhermitian_lanczos(A, B::AbstractMatrix{FC}, C::AbstractMatrix{FC}, k::Int) where FC <: FloatOrComplex + m, n = size(A) + t, p = size(B) + Aᴴ = A' + pivot = NoPivot() + + V = zeros(FC, n, (k+1)*p) + U = zeros(FC, n, (k+1)*p) + T = zeros(FC, (k+1)*p, k*p) + Tᴴ = zeros(FC, (k+1)*p, k*p) + + for i = 1:k + pos1 = (i-1)*p + 1 + pos2 = i*p + pos3 = pos1 + p + pos4 = pos2 + p + vᵢ = view(V,:,pos1:pos2) + vᵢ₊₁ = view(V,:,pos3:pos4) + uᵢ = view(U,:,pos1:pos2) + uᵢ₊₁ = view(U,:,pos3:pos4) + if i == 1 + D = C' * B + F = lu(D, pivot) + Φᵢ, Ψᵢ = F.L, F.U + # Φᵢ = F.P' * Φᵢ + vᵢ .= (Ψᵢ' \ B')' + uᵢ .= (Φᵢ \ C')' + end + qv = A * vᵢ + qu = Aᴴ * uᵢ + if i ≥ 2 + pos5 = pos1 - p + pos6 = pos2 - p + vᵢ₋₁ = view(V,:,pos5:pos6) + uᵢ₋₁ = view(U,:,pos5:pos6) + Φᵢ = Tᴴ[pos1:pos2,pos5:pos6]' + T[pos5:pos6,pos1:pos2] = Φᵢ + qv = qv - vᵢ₋₁ * Φᵢ + TΨᵢ = T[pos1:pos2,pos5:pos6]' + Tᴴ[pos5:pos6,pos1:pos2] = TΨᵢ + qu = qu - uᵢ₋₁ * TΨᵢ + end + Ωᵢ = uᵢ' * qv + T[pos1:pos2,pos1:pos2] .= Ωᵢ + Tᴴ[pos1:pos2,pos1:pos2] .= Ωᵢ' + qv = qv - vᵢ * Ωᵢ + qu = qu - uᵢ * Ωᵢ' + D = qu' * qv + F = lu(D, pivot) + Φᵢ₊₁, Ψᵢ₊₁ = F.L, F.U + # Φᵢ₊₁ = F.P' * Φᵢ₊₁ + vᵢ₊₁ .= (Ψᵢ₊₁' \ qv')' + T[pos3:pos4,pos1:pos2] .= Ψᵢ₊₁ + uᵢ₊₁ .= (Φᵢ₊₁ \ qu')' + Tᴴ[pos3:pos4,pos1:pos2] .= Φᵢ₊₁' + end + return V, T, U, Tᴴ +end """ V, H = arnoldi(A, B, k; reorthogonalization=false) diff --git a/test/test_processes.jl b/test/test_processes.jl index 4e6502a49..c3d7fc902 100644 --- a/test/test_processes.jl +++ b/test/test_processes.jl @@ -21,7 +21,7 @@ end k = 20 p = 4 s = 4 - + for FC in (Float64, ComplexF64) R = real(FC) nbits_FC = sizeof(FC) @@ -78,6 +78,25 @@ end @test expected_nonhermitian_lanczos_bytes ≤ actual_nonhermitian_lanczos_bytes ≤ 1.02 * expected_nonhermitian_lanczos_bytes end + @testset "Block Non-Hermitian Lanczos" begin + A = rand(FC, n, n) + B = rand(FC, n, p) + C = rand(FC, n, p) + V, T, U, Tᴴ = nonhermitian_lanczos(A, B, C, k) + + @test norm(V[:,1:p*s]' * U[:,1:p*s] - I) ≤ 1e-4 + @test norm(U[:,1:p*s]' * V[:,1:p*s] - I) ≤ 1e-4 + @test T[1:k*p,1:k*p] ≈ Tᴴ[1:k*p,1:k*p]' + @test A * V[:,1:k*p] ≈ V * T + @test A' * U[:,1:k*p] ≈ U * Tᴴ + + # storage_block_nonhermitian_lanczos_bytes(n, p, k) = ... + + # expected_block_nonhermitian_lanczos_bytes = storage_block_nonhermitian_lanczos_bytes(n, p, k) + # actual_block_nonhermitian_lanczos_bytes = @allocated nonhermitian_lanczos(A, B, C, k) + # @test expected_block_nonhermitian_lanczos_bytes ≤ actual_block_nonhermitian_lanczos_bytes ≤ 1.02 * expected_block_nonhermitian_lanczos_bytes + end + @testset "Arnoldi" begin for reorthogonalization in (false, true) A, b = nonsymmetric_indefinite(n, FC=FC)