From e37022a3ddb22f50c70a4f40dc16e6faca2f87d8 Mon Sep 17 00:00:00 2001 From: Alexis Montoison Date: Sat, 23 Sep 2023 23:29:48 -0500 Subject: [PATCH] More modifications on block-Krylov processes --- docs/src/block_processes.md | 12 ++ src/Krylov.jl | 1 + src/block_krylov_processes.jl | 257 ++++++---------------------------- src/krylov_utils.jl | 25 ++++ src/processes_utils.jl | 196 ++++++++++++++++++++++++++ test/test_block_processes.jl | 24 +++- 6 files changed, 290 insertions(+), 225 deletions(-) create mode 100644 src/processes_utils.jl diff --git a/docs/src/block_processes.md b/docs/src/block_processes.md index d7977c7f3..eb617ab1b 100644 --- a/docs/src/block_processes.md +++ b/docs/src/block_processes.md @@ -25,6 +25,8 @@ T_{k+1,k} = \end{bmatrix}. ``` +The function [`hermitian_lanczos`](@ref hermitian_lanczos) returns $V_{k+1}$, $\Psi_1$ and $T_{k+1,k}$. + ```@docs hermitian_lanczos(::Any, ::AbstractMatrix{FC}, ::Int) where FC <: (Union{Complex{T}, T} where T <: AbstractFloat) ``` @@ -64,6 +66,8 @@ T_{k,k+1}^H = \end{bmatrix}. ``` +The function [`nonhermitian_lanczos`](@ref nonhermitian_lanczos) returns $V_{k+1}$, $\Phi_1$, $T_{k+1,k}$, $U_{k+1}$ $\Phi_1^H$ and $T_{k,k+1}^H$. + ```@docs nonhermitian_lanczos(::Any, ::AbstractMatrix{FC}, ::AbstractMatrix{FC}, ::Int) where FC <: (Union{Complex{T}, T} where T <: AbstractFloat) ``` @@ -93,6 +97,8 @@ H_{k+1,k} = \end{bmatrix}. ``` +The function [`arnoldi`](@ref arnoldi) returns $V_{k+1}$, $\Gamma$, and $H_{k+1,k}$. + ```@docs arnoldi(::Any, ::AbstractMatrix{FC}, ::Int) where FC <: (Union{Complex{T}, T} where T <: AbstractFloat) ``` @@ -132,6 +138,8 @@ L_{k+1}^H = \end{bmatrix}. ``` +The function [`golub_kahan`](@ref golub_kahan) returns $V_{k+1}$, $U_{k+1}$, $\Psi_1$ and $L_{k+1}$. + ```@docs golub_kahan(::Any, ::AbstractMatrix{FC}, ::Int) where FC <: (Union{Complex{T}, T} where T <: AbstractFloat) ``` @@ -171,6 +179,8 @@ T_{k,k+1}^H = \end{bmatrix}. ``` +The function [`saunders_simon_yip`](@ref saunders_simon_yip) returns $V_{k+1}$, $\Psi_1$, $T_{k+1,k}$, $U_{k+1}$, $\Phi_1^H$ and $T_{k,k+1}^H$. + ```@docs saunders_simon_yip(::Any, ::AbstractMatrix{FC}, ::AbstractMatrix{FC}, ::Int) where FC <: (Union{Complex{T}, T} where T <: AbstractFloat) ``` @@ -210,6 +220,8 @@ F_{k+1,k} = \end{bmatrix}. ``` +The function [`montoison_orban`](@ref montoison_orban) returns $V_{k+1}$, $\Gamma$, $H_{k+1,k}$, $U_{k+1}$, $\Lambda$, and $F_{k+1,k}$. + ```@docs montoison_orban(::Any, ::Any, ::AbstractMatrix{FC}, ::AbstractMatrix{FC}, ::Int) where FC <: (Union{Complex{T}, T} where T <: AbstractFloat) ``` diff --git a/src/Krylov.jl b/src/Krylov.jl index 56ca58f38..e2eb70952 100644 --- a/src/Krylov.jl +++ b/src/Krylov.jl @@ -6,6 +6,7 @@ include("krylov_utils.jl") include("krylov_stats.jl") include("krylov_solvers.jl") +include("processes_utils.jl") include("krylov_processes.jl") include("block_krylov_processes.jl") diff --git a/src/block_krylov_processes.jl b/src/block_krylov_processes.jl index 47aa6c566..2978142e8 100644 --- a/src/block_krylov_processes.jl +++ b/src/block_krylov_processes.jl @@ -1,202 +1,5 @@ -# """ -# Gram-Schmidt orthogonalization for a reduced QR decomposition: -# Q, R = gs(A) -# -# Input : -# A an n-by-k matrix, n ≥ k -# -# Output : -# Q an n-by-k orthonormal matrix: QᴴQ = Iₖ -# R an k-by-k upper triangular matrix: QR = A -# """ -function gs(A::AbstractMatrix{FC}) where FC <: FloatOrComplex - n, k = size(A) - Q = copy(A) - R = zeros(FC, k, k) - v = zeros(FC, n) - gs!(Q, R, v) -end - -function gs!(Q::AbstractMatrix{FC}, R::AbstractMatrix{FC}, v::AbstractVector{FC}) where FC <: FloatOrComplex - n, k = size(Q) - aⱼ = v - for j = 1:k - qⱼ = view(Q,:,j) - aⱼ .= qⱼ - for i = 1:j-1 - qᵢ = view(Q,:,i) - R[i,j] = @kdot(n, qᵢ, aⱼ) # rᵢⱼ = ⟨qᵢ , aⱼ⟩ - @kaxpy!(n, -R[i,j], qᵢ, qⱼ) # qⱼ = qⱼ - rᵢⱼqᵢ - end - R[j,j] = @knrm2(n, qⱼ) # rⱼⱼ = ‖qⱼ‖ - qⱼ ./= R[j,j] # qⱼ = qⱼ / rⱼⱼ - end - return Q, R -end - -# """ -# Modified Gram-Schmidt orthogonalization for a reduced QR decomposition: -# Q, R = mgs(A) -# -# Input : -# A an n-by-k matrix, n ≥ k -# -# # Q an n-by-k orthonormal matrix: QᴴQ = Iₖ -# # R an k-by-k upper triangular matrix: QR = A -# """ -function mgs(A::AbstractMatrix{FC}) where FC <: FloatOrComplex - n, k = size(A) - Q = copy(A) - R = zeros(FC, k, k) - mgs!(Q, R) -end - -function mgs!(Q::AbstractMatrix{FC}, R::AbstractMatrix{FC}) where FC <: FloatOrComplex - n, k = size(Q) - for i = 1:k - qᵢ = view(Q,:,i) - R[i,i] = @knrm2(n, qᵢ) # rᵢᵢ = ‖qᵢ‖ - qᵢ ./= R[i,i] # qᵢ = qᵢ / rᵢᵢ - for j = i+1:k - qⱼ = view(Q,:,j) - R[i,j] = @kdot(n, qᵢ, qⱼ) # rᵢⱼ = ⟨qᵢ , qⱼ⟩ - @kaxpy!(n, -R[i,j], qᵢ, qⱼ) # qⱼ = qⱼ - rᵢⱼqᵢ - end - end - return Q, R -end - -# Reduced QR factorization with Householder reflections: -# Q, R = householder(A) -# -# Input : -# A an n-by-k matrix, n ≥ k -# -# Output : -# Q an n-by-k orthonormal matrix: QᴴQ = Iₖ -# R an k-by-k upper triangular matrix: QR = A -function householder(A::AbstractMatrix{FC}) where FC <: FloatOrComplex - n, k = size(A) - Q = copy(A) - τ = zeros(FC, k) - R = zeros(FC, k, k) - householder!(Q, R, τ) -end - -function householder!(Q::AbstractMatrix{FC}, R::AbstractMatrix{FC}, τ::AbstractVector{FC}) where FC <: FloatOrComplex - n, k = size(Q) - @kgeqrf!(Q, τ) - for i = 1:k - for j = i:k - R[i,j] = Q[i,j] - end - end - @korgqr!(Q, τ) - return Q, R -end - -# Reduced QR factorization with Givens reflections: -# Q, R = givens(A) -# -# Input : -# A an n-by-k matrix, n ≥ k -# -# # Q an n-by-k orthonormal matrix: QᴴQ = Iₖ -# # R an k-by-k upper triangular matrix: QR = A -# """ -function givens(A::AbstractMatrix{FC}) where FC <: FloatOrComplex - n, k = size(A) - nr = n*k - div(k*(k+1), 2) - T = real(FC) - Q = copy(A) - R = zeros(FC, k, k) - C = zeros(T, nr) - S = zeros(FC, nr) - givens!(Q, R, C, S) -end - -function givens!(Q::AbstractMatrix{FC}, R::AbstractMatrix{FC}, C::AbstractVector{T}, S::AbstractVector{FC}) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} - n, k = size(Q) - pos = 0 - for j = 1:k - for i = n-1:-1:j - pos += 1 - C[pos], S[pos], Q[i,j] = sym_givens(Q[i,j], Q[i+1,j]) - if j < k - reflect!(view(Q, i, j+1:k), view(Q, i+1, j+1:k), C[pos], S[pos]) - end - end - end - for j = 1:k - for i = 1:j - R[i,j] = Q[i,j] - end - end - Q .= zero(FC) - for i = 1:k - Q[i,i] = one(FC) - end - for j = k:-1:1 - for i = j:n-1 - reflect!(view(Q, i, j:k), view(Q, i+1, j:k), C[pos], S[pos]) - pos -= 1 - end - end - return Q, R -end - -function reduced_qr!(Q::AbstractMatrix{FC}, R::AbstractMatrix{FC}, algo::String) where FC <: FloatOrComplex - n, k = size(Q) - T = real(FC) - if algo == "gs" - v = zeros(FC, n) - gs!(Q, R, v) - elseif algo == "mgs" - mgs!(Q, R) - elseif algo == "givens" - nr = n*k - div(k*(k+1), 2) - C = zeros(T, nr) - S = zeros(FC, nr) - givens!(Q, R, C, S) - elseif algo == "householder" - τ = zeros(FC, k) - householder!(Q, R, τ) - else - error("$algo is not a supported method to perform a reduced QR.") - end - return Q, R -end - -function reduced_qr(A::AbstractMatrix{FC}, algo::String) where FC <: FloatOrComplex - if algo == "gs" - Q, R = gs(A) - elseif algo == "mgs" - Q, R = mgs(A) - elseif algo == "givens" - Q, R = givens(A) - elseif algo == "householder" - Q, R = householder(A) - else - error("$algo is not a supported method to perform a reduced QR.") - end - return Q, R -end - -function add!(A, B, i1, i2, j1, j2) - i = 0 - for ii = i1:i2 - i += 1 - j = 0 - for jj = j1:j2 - j += 1 - A[ii,jj] += B[i,j] - end - end - return A -end - """ - V, T = hermitian_lanczos(A, B, k) + V, Ψ, T = hermitian_lanczos(A, B, k) #### Input arguments @@ -207,6 +10,7 @@ end #### Output arguments * `V`: a dense n × p(k+1) matrix; +* `Ψ`: a dense p × p upper triangular matrix; * `T`: a sparse p(k+1) × pk block tridiagonal matrix with a bandwidth p. """ function hermitian_lanczos(A, B::AbstractMatrix{FC}, k::Int; algo::String="mgs") where FC <: FloatOrComplex @@ -219,7 +23,8 @@ function hermitian_lanczos(A, B::AbstractMatrix{FC}, k::Int; algo::String="mgs") α = -one(FC) β = one(FC) q = zeros(FC, n, p) - ψ₁ = Ωᵢ = zeros(FC, p, p) + ψ₁ = zeros(FC, p, p) + Ωᵢ = zeros(FC, p, p) for i = 1:k pos1 = (i-1)*p + 1 @@ -245,11 +50,11 @@ function hermitian_lanczos(A, B::AbstractMatrix{FC}, k::Int; algo::String="mgs") reduced_qr!(q, Ψᵢ₊₁, algo) vᵢ₊₁ .= q end - return V, T + return V, ψ₁, T end """ - V, T, U, Tᴴ = nonhermitian_lanczos(A, B, C, k) + V, Ψ, T, U, Φᴴ, Tᴴ = nonhermitian_lanczos(A, B, C, k) #### Input arguments @@ -261,9 +66,11 @@ end #### Output arguments * `V`: a dense n × p(k+1) matrix; -* `T`: a sparse p(k+1) × pk tridiagonal matrix with a bandwidth p; +* `Ψ`: a dense p × p upper triangular matrix; +* `T`: a sparse p(k+1) × pk block tridiagonal matrix with a bandwidth p; * `U`: a dense n × p(k+1) matrix; -* `Tᴴ`: a sparse p(k+1) × pk tridiagonal matrix with a bandwidth p. +* `Φᴴ`: a dense p × p upper triangular matrix; +* `Tᴴ`: a sparse p(k+1) × pk block tridiagonal matrix with a bandwidth p. """ function nonhermitian_lanczos(A, B::AbstractMatrix{FC}, C::AbstractMatrix{FC}, k::Int) where FC <: FloatOrComplex m, n = size(A) @@ -282,6 +89,8 @@ function nonhermitian_lanczos(A, B::AbstractMatrix{FC}, C::AbstractMatrix{FC}, k qᵤ = zeros(FC, n, p) Ωᵢ = zeros(FC, p, p) D = zeros(FC, p, p) + Ψ₁ = zeros(FC, p, p) + Φ₁ᴴ = zeros(FC, p, p) for i = 1:k pos1 = (i-1)*p + 1 @@ -296,6 +105,8 @@ function nonhermitian_lanczos(A, B::AbstractMatrix{FC}, C::AbstractMatrix{FC}, k mul!(D, C', B) # D = Cᴴ * B F = lu(D, pivoting) Φᵢ, Ψᵢ = F.L, F.U + Ψ₁ .= Ψᵢ + Φ₁ᴴ .= Φᵢ' # Φᵢ = F.P' * Φᵢ vᵢ .= (Ψᵢ' \ B')' uᵢ .= (Φᵢ \ C')' @@ -328,11 +139,11 @@ function nonhermitian_lanczos(A, B::AbstractMatrix{FC}, C::AbstractMatrix{FC}, k uᵢ₊₁ .= (Φᵢ₊₁ \ qᵤ')' Tᴴ[pos3:pos4,pos1:pos2] .= Φᵢ₊₁' end - return V, T, U, Tᴴ + return V, Ψ₁, T, U, Φ₁ᴴ, Tᴴ end """ - V, H = arnoldi(A, B, k; reorthogonalization=false) + V, Γ, H = arnoldi(A, B, k; reorthogonalization=false) #### Input arguments @@ -347,6 +158,7 @@ end #### Output arguments * `V`: a dense n × p(k+1) matrix; +* `Γ`: a dense p × p upper triangular matrix; * `H`: a dense p(k+1) × pk block upper Hessenberg matrix with a lower bandwidth p. """ function arnoldi(A, B::AbstractMatrix{FC}, k::Int; algo::String="mgs", reorthogonalization::Bool=false) where FC <: FloatOrComplex @@ -359,6 +171,7 @@ function arnoldi(A, B::AbstractMatrix{FC}, k::Int; algo::String="mgs", reorthogo α = -one(FC) β = one(FC) q = zeros(FC, n, p) + Γ = zeros(FC, p, p) Ψᵢⱼ = Ψtmp = zeros(FC, p, p) for j = 1:k @@ -368,7 +181,6 @@ function arnoldi(A, B::AbstractMatrix{FC}, k::Int; algo::String="mgs", reorthogo vⱼ₊₁ = view(V,:,pos1+p:pos2+p) if j == 1 q .= B - Γ = Ψtmp reduced_qr!(q, Γ, algo) vⱼ .= q end @@ -395,11 +207,11 @@ function arnoldi(A, B::AbstractMatrix{FC}, k::Int; algo::String="mgs", reorthogo reduced_qr!(q, Ψ, algo) vⱼ₊₁ .= q end - return V, H + return V, Γ, H end """ - V, U, L = golub_kahan(A, B, k) + V, U, Ψ, L = golub_kahan(A, B, k) #### Input arguments @@ -411,6 +223,7 @@ end * `V`: a dense n × p(k+1) matrix; * `U`: a dense m × p(k+1) matrix; +* `Ψ`: a dense p × p upper triangular matrix; * `L`: a sparse p(k+1) × p(k+1) block lower bidiagonal matrix with a lower bandwidth p. """ function golub_kahan(A, B::AbstractMatrix{FC}, k::Int; algo::String="mgs") where FC <: FloatOrComplex @@ -458,11 +271,11 @@ function golub_kahan(A, B::AbstractMatrix{FC}, k::Int; algo::String="mgs") where reduced_qr!(qᵥ, Ωᵢ₊₁', algo) vᵢ₊₁ .= qᵥ end - return V, U, L + return V, U, Ψ₁, L end """ - V, T, U, Tᴴ = saunders_simon_yip(A, B, C, k) + V, Ψ, T, U, Φᴴ, Tᴴ = saunders_simon_yip(A, B, C, k) #### Input arguments @@ -474,9 +287,11 @@ end #### Output arguments * `V`: a dense m × p(k+1) matrix; -* `T`: a sparse p(k+1) × pk tridiagonal matrix with a bandwidth p; +* `Ψ`: a dense p × p upper triangular matrix; +* `T`: a sparse p(k+1) × pk block tridiagonal matrix with a bandwidth p; * `U`: a dense n × p(k+1) matrix; -* `Tᴴ`: a sparse p(k+1) × pk tridiagonal matrix with a bandwidth p. +* `Φᴴ`: a dense p × p upper triangular matrix; +* `Tᴴ`: a sparse p(k+1) × pk block tridiagonal matrix with a bandwidth p. """ function saunders_simon_yip(A, B::AbstractMatrix{FC}, C::AbstractMatrix{FC}, k::Int; algo::String="mgs") where FC <: FloatOrComplex m, n = size(A) @@ -492,6 +307,8 @@ function saunders_simon_yip(A, B::AbstractMatrix{FC}, C::AbstractMatrix{FC}, k:: β = one(FC) qᵥ = zeros(FC, m, p) qᵤ = zeros(FC, n, p) + Ψ₁ = zeros(FC, p, p) + Φ₁ᴴ = zeros(FC, p, p) Ωᵢ = Ψᵢ = TΦᵢ = zeros(FC, p, p) for i = 1:k @@ -505,10 +322,10 @@ function saunders_simon_yip(A, B::AbstractMatrix{FC}, C::AbstractMatrix{FC}, k:: uᵢ₊₁ = view(U,:,pos3:pos4) if i == 1 qᵥ .= B - reduced_qr!(qᵥ, Ψᵢ, algo) + reduced_qr!(qᵥ, Ψ₁, algo) vᵢ .= qᵥ qᵤ .= C - reduced_qr!(qᵤ, TΦᵢ, algo) + reduced_qr!(qᵤ, Φ₁ᴴ, algo) uᵢ .= qᵤ end mul!(qᵥ, A, uᵢ) @@ -537,11 +354,11 @@ function saunders_simon_yip(A, B::AbstractMatrix{FC}, C::AbstractMatrix{FC}, k:: reduced_qr!(qᵤ, TΦᵢ₊₁, algo) uᵢ₊₁ .= qᵤ end - return V, T, U, Tᴴ + return V, Ψ₁, T, U, Φ₁ᴴ, Tᴴ end """ - V, H, U, F = montoison_orban(A, B, D, C, k; reorthogonalization=false) + V, Γ, H, U, Λ, F = montoison_orban(A, B, D, C, k; reorthogonalization=false) #### Input arguments @@ -558,8 +375,10 @@ end #### Output arguments * `V`: a dense m × p(k+1) matrix; +* `Γ`: a dense p × p upper triangular matrix; * `H`: a dense p(k+1) × pk block upper Hessenberg matrix with a lower bandwidth p; * `U`: a dense n × p(k+1) matrix; +* `Λ`: a dense p × p upper triangular matrix; * `F`: a dense p(k+1) × pk block upper Hessenberg matrix with a lower bandwidth p. """ function montoison_orban(A, B, D::AbstractMatrix{FC}, C::AbstractMatrix{FC}, k::Int; algo::String="mgs", reorthogonalization::Bool=false) where FC <: FloatOrComplex @@ -575,7 +394,9 @@ function montoison_orban(A, B, D::AbstractMatrix{FC}, C::AbstractMatrix{FC}, k:: β = one(FC) qᵥ = zeros(FC, m, p) qᵤ = zeros(FC, n, p) - Γ = Λ = Ψᵢⱼ = Φᵢⱼ = Ψtmp = Φtmp = zeros(FC, p, p) + Γ = zeros(FC, p, p) + Λ = zeros(FC, p, p) + Ψᵢⱼ = Φᵢⱼ = Ψtmp = Φtmp = zeros(FC, p, p) for j = 1:k pos1 = (j-1)*p + 1 @@ -627,5 +448,5 @@ function montoison_orban(A, B, D::AbstractMatrix{FC}, C::AbstractMatrix{FC}, k:: reduced_qr!(qᵤ, Φ, algo) uⱼ₊₁ .= qᵤ end - return V, H, U, F + return V, Γ, H, U, Λ, F end diff --git a/src/krylov_utils.jl b/src/krylov_utils.jl index 58024f671..e06569c4a 100644 --- a/src/krylov_utils.jl +++ b/src/krylov_utils.jl @@ -322,6 +322,31 @@ kaxpby!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer, t kcopy!(n :: Integer, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasFloat = BLAS.blascopy!(n, x, dx, y, dy) kcopy!(n :: Integer, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: FloatOrComplex = copyto!(y, x) +# geqrf -- Computes the QR factorization of a general m-by-n matrix. +# orgqr -- Generates the real orthogonal matrix of the QR factorization formed by geqrf. +# ungqr -- Generates the complex unitary matrix of the QR factorization formed by geqrf. +# ormqr -- Multiplies a real matrix by the orthogonal matrix of the QR factorization formed by geqrf. +# unmqr -- Multiplies a complex matrix by the unitary matrix of the QR factorization formed by geqrf. + +# getrf -- Computes the LU factorization of a general m-by-n matrix. +# getrs -- Solves a system of linear equations with an LU-factored square matrix. +# getri -- Computes the inverse of an LU-factored general matrix. + +# sytrf -- Computes the Bunch-Kaufman factorization of a symmetric matrix. +# sytrs -- Solves a system of linear equations with an LDLᵀ-factored symmetric matrix. +# sytri -- Computes the inverse of a symmetric matrix using LDLᵀ Bunch-Kaufman factorization. + +# hetrf -- Computes the Bunch-Kaufman factorization of a complex Hermitian matrix. +# hetrs -- Solves a system of linear equations with an LDLᴴ-factored Hermitian matrix. +# hetri -- Computes the inverse of a complex Hermitian matrix using LDLᴴ Bunch-Kaufman factorization. + +# potrf -- Computes the Cholesky factorization of a symmetric (Hermitian) positive-definite matrix. +# potrs -- Solves a system of linear equations with a Cholesky-factored symmetric (Hermitian) positive-definite matrix. +# potri -- Computes the inverse of a Cholesky-factored symmetric (Hermitian) positive-definite matrix. + +# trtrs -- Solves a system of linear equations with a triangular matrix. +# trtri -- Computes the inverse of a triangular matrix. + kgeqrf!(A :: Matrix{T}, tau :: Vector{T}) where T <: BLAS.BlasFloat = LAPACK.geqrf!(A, tau) korgqr!(A :: Matrix{T}, tau :: Vector{T}) where T <: BLAS.BlasFloat = LAPACK.orgqr!(A, tau) kormqr!(side :: Char, trans :: Char, A :: Matrix{T}, tau :: Vector{T}, C :: Matrix{T}) where T <: BLAS.BlasFloat = LAPACK.ormqr!(side, trans, A, tau, C) diff --git a/src/processes_utils.jl b/src/processes_utils.jl new file mode 100644 index 000000000..4f133c917 --- /dev/null +++ b/src/processes_utils.jl @@ -0,0 +1,196 @@ +# """ +# Gram-Schmidt orthogonalization for a reduced QR decomposition: +# Q, R = gs(A) +# +# Input : +# A an n-by-k matrix, n ≥ k +# +# Output : +# Q an n-by-k orthonormal matrix: QᴴQ = Iₖ +# R an k-by-k upper triangular matrix: QR = A +# """ +function gs(A::AbstractMatrix{FC}) where FC <: FloatOrComplex + n, k = size(A) + Q = copy(A) + R = zeros(FC, k, k) + v = zeros(FC, n) + gs!(Q, R, v) +end + +function gs!(Q::AbstractMatrix{FC}, R::AbstractMatrix{FC}, v::AbstractVector{FC}) where FC <: FloatOrComplex + n, k = size(Q) + aⱼ = v + for j = 1:k + qⱼ = view(Q,:,j) + aⱼ .= qⱼ + for i = 1:j-1 + qᵢ = view(Q,:,i) + R[i,j] = @kdot(n, qᵢ, aⱼ) # rᵢⱼ = ⟨qᵢ , aⱼ⟩ + @kaxpy!(n, -R[i,j], qᵢ, qⱼ) # qⱼ = qⱼ - rᵢⱼqᵢ + end + R[j,j] = @knrm2(n, qⱼ) # rⱼⱼ = ‖qⱼ‖ + qⱼ ./= R[j,j] # qⱼ = qⱼ / rⱼⱼ + end + return Q, R +end + +# """ +# Modified Gram-Schmidt orthogonalization for a reduced QR decomposition: +# Q, R = mgs(A) +# +# Input : +# A an n-by-k matrix, n ≥ k +# +# # Q an n-by-k orthonormal matrix: QᴴQ = Iₖ +# # R an k-by-k upper triangular matrix: QR = A +# """ +function mgs(A::AbstractMatrix{FC}) where FC <: FloatOrComplex + n, k = size(A) + Q = copy(A) + R = zeros(FC, k, k) + mgs!(Q, R) +end + +function mgs!(Q::AbstractMatrix{FC}, R::AbstractMatrix{FC}) where FC <: FloatOrComplex + n, k = size(Q) + for i = 1:k + qᵢ = view(Q,:,i) + R[i,i] = @knrm2(n, qᵢ) # rᵢᵢ = ‖qᵢ‖ + qᵢ ./= R[i,i] # qᵢ = qᵢ / rᵢᵢ + for j = i+1:k + qⱼ = view(Q,:,j) + R[i,j] = @kdot(n, qᵢ, qⱼ) # rᵢⱼ = ⟨qᵢ , qⱼ⟩ + @kaxpy!(n, -R[i,j], qᵢ, qⱼ) # qⱼ = qⱼ - rᵢⱼqᵢ + end + end + return Q, R +end + +# Reduced QR factorization with Householder reflections: +# Q, R = householder(A) +# +# Input : +# A an n-by-k matrix, n ≥ k +# +# Output : +# Q an n-by-k orthonormal matrix: QᴴQ = Iₖ +# R an k-by-k upper triangular matrix: QR = A +function householder(A::AbstractMatrix{FC}) where FC <: FloatOrComplex + n, k = size(A) + Q = copy(A) + τ = zeros(FC, k) + R = zeros(FC, k, k) + householder!(Q, R, τ) +end + +function householder!(Q::AbstractMatrix{FC}, R::AbstractMatrix{FC}, τ::AbstractVector{FC}) where FC <: FloatOrComplex + n, k = size(Q) + @kgeqrf!(Q, τ) + for i = 1:k + for j = i:k + R[i,j] = Q[i,j] + end + end + @korgqr!(Q, τ) + return Q, R +end + +# Reduced QR factorization with Givens reflections: +# Q, R = givens(A) +# +# Input : +# A an n-by-k matrix, n ≥ k +# +# # Q an n-by-k orthonormal matrix: QᴴQ = Iₖ +# # R an k-by-k upper triangular matrix: QR = A +# """ +function givens(A::AbstractMatrix{FC}) where FC <: FloatOrComplex + n, k = size(A) + nr = n*k - div(k*(k+1), 2) + T = real(FC) + Q = copy(A) + R = zeros(FC, k, k) + C = zeros(T, nr) + S = zeros(FC, nr) + givens!(Q, R, C, S) +end + +function givens!(Q::AbstractMatrix{FC}, R::AbstractMatrix{FC}, C::AbstractVector{T}, S::AbstractVector{FC}) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + n, k = size(Q) + pos = 0 + for j = 1:k + for i = n-1:-1:j + pos += 1 + C[pos], S[pos], Q[i,j] = sym_givens(Q[i,j], Q[i+1,j]) + if j < k + reflect!(view(Q, i, j+1:k), view(Q, i+1, j+1:k), C[pos], S[pos]) + end + end + end + for j = 1:k + for i = 1:j + R[i,j] = Q[i,j] + end + end + Q .= zero(FC) + for i = 1:k + Q[i,i] = one(FC) + end + for j = k:-1:1 + for i = j:n-1 + reflect!(view(Q, i, j:k), view(Q, i+1, j:k), C[pos], S[pos]) + pos -= 1 + end + end + return Q, R +end + +function reduced_qr!(Q::AbstractMatrix{FC}, R::AbstractMatrix{FC}, algo::String) where FC <: FloatOrComplex + n, k = size(Q) + T = real(FC) + if algo == "gs" + v = zeros(FC, n) + gs!(Q, R, v) + elseif algo == "mgs" + mgs!(Q, R) + elseif algo == "givens" + nr = n*k - div(k*(k+1), 2) + C = zeros(T, nr) + S = zeros(FC, nr) + givens!(Q, R, C, S) + elseif algo == "householder" + τ = zeros(FC, k) + householder!(Q, R, τ) + else + error("$algo is not a supported method to perform a reduced QR.") + end + return Q, R +end + +function reduced_qr(A::AbstractMatrix{FC}, algo::String) where FC <: FloatOrComplex + if algo == "gs" + Q, R = gs(A) + elseif algo == "mgs" + Q, R = mgs(A) + elseif algo == "givens" + Q, R = givens(A) + elseif algo == "householder" + Q, R = householder(A) + else + error("$algo is not a supported method to perform a reduced QR.") + end + return Q, R +end + +function add!(A, B, i1, i2, j1, j2) + i = 0 + for ii = i1:i2 + i += 1 + j = 0 + for jj = j1:j2 + j += 1 + A[ii,jj] += B[i,j] + end + end + return A +end diff --git a/test/test_block_processes.jl b/test/test_block_processes.jl index 998ccc965..e881bc12c 100644 --- a/test/test_block_processes.jl +++ b/test/test_block_processes.jl @@ -20,9 +20,10 @@ B = rand(FC, n, p) @testset "algo = $algo" for algo in ("householder", "gs", "mgs", "givens") - V, T = hermitian_lanczos(A, B, k; algo) + V, Ψ₁, T = hermitian_lanczos(A, B, k; algo) @test norm(V[:,1:p*s]' * V[:,1:p*s] - I) ≤ 1e-4 + @test V[:,1:p] * Ψ₁ ≈ B @test A * V[:,1:p*k] ≈ V * T # storage_block_hermitian_lanczos_bytes(n, k) = ... @@ -37,10 +38,12 @@ A = rand(FC, n, n) B = rand(FC, n, p) C = rand(FC, n, p) - V, T, U, Tᴴ = nonhermitian_lanczos(A, B, C, k) + V, Ψ₁, T, U, Φ₁ᴴ, Tᴴ = nonhermitian_lanczos(A, B, C, k) @test norm(V[:,1:p*s]' * U[:,1:p*s] - I) ≤ 1e-4 @test norm(U[:,1:p*s]' * V[:,1:p*s] - I) ≤ 1e-4 + @test V[:,1:p] * Ψ₁ ≈ B + @test U[:,1:p] * Φ₁ᴴ ≈ C @test T[1:k*p,1:k*p] ≈ Tᴴ[1:k*p,1:k*p]' @test A * V[:,1:k*p] ≈ V * T @test A' * U[:,1:k*p] ≈ U * Tᴴ @@ -58,10 +61,10 @@ @testset "algo = $algo" for algo in ("householder", "gs", "mgs", "givens") @testset "reorthogonalization = $reorthogonalization" for reorthogonalization in (false, true) - V, H = arnoldi(A, B, k; algo, reorthogonalization) + V, Γ, H = arnoldi(A, B, k; algo, reorthogonalization) @test norm(V[:,1:p*s]' * V[:,1:p*s] - I) ≤ 1e-4 - @test norm(V' * V - I) ≤ 1e-4 + @test V[:,1:p] * Γ ≈ B @test A * V[:,1:p*k] ≈ V * H if algo == "mgs" @@ -70,6 +73,7 @@ memory += p*n * (k+1) * nbits_FC # Vₖ₊₁ memory += n*p * nbits_FC # q memory += p*k * p*(k+1) * nbits_FC # Hₖ₊₁.ₖ + memory += p*p * nbits_FC # Γ memory += p*p * nbits_FC # Ψᵢⱼ return memory end @@ -88,11 +92,12 @@ B = rand(FC, m, p) @testset "algo = $algo" for algo in ("householder", "gs", "mgs", "givens") - V, U, L = golub_kahan(A, B, k; algo) + V, U, Ψ₁, L = golub_kahan(A, B, k; algo) BL = L[1:(k+1)*p,1:k*p] @test norm(V[:,1:p*s]' * V[:,1:p*s] - I) ≤ 1e-4 @test norm(U[:,1:p*s]' * U[:,1:p*s] - I) ≤ 1e-4 + @test U[:,1:p] * Ψ₁ ≈ B @test A * V[:,1:k*p] ≈ U * BL @test A' * U ≈ V * L' @test A' * A * V[:,1:k*p] ≈ V * L' * BL @@ -112,10 +117,12 @@ C = rand(FC, n, p) @testset "algo = $algo" for algo in ("householder", "gs", "mgs", "givens") - V, T, U, Tᴴ = saunders_simon_yip(A, B, C, k; algo) + V, Ψ₁, T, U, Φ₁ᴴ, Tᴴ = saunders_simon_yip(A, B, C, k; algo) @test norm(V[:,1:p*s]' * V[:,1:p*s] - I) ≤ 1e-4 @test norm(U[:,1:p*s]' * U[:,1:p*s] - I) ≤ 1e-4 + @test V[:,1:p] * Ψ₁ ≈ B + @test U[:,1:p] * Φ₁ᴴ ≈ C @test T[1:k*p,1:k*p] ≈ Tᴴ[1:k*p,1:k*p]' @test A * U[:,1:k*p] ≈ V * T @test A' * V[:,1:k*p] ≈ U * Tᴴ @@ -138,10 +145,12 @@ @testset "algo = $algo" for algo in ("householder", "gs", "mgs", "givens") @testset "reorthogonalization = $reorthogonalization" for reorthogonalization in (false, true) - V, H, U, F = montoison_orban(A, B, D, C, k; algo, reorthogonalization) + V, Γ, H, U, Λ, F = montoison_orban(A, B, D, C, k; algo, reorthogonalization) @test norm(V[:,1:p*s]' * V[:,1:p*s] - I) ≤ 1e-4 @test norm(U[:,1:p*s]' * U[:,1:p*s] - I) ≤ 1e-4 + @test V[:,1:p] * Γ ≈ D + @test U[:,1:p] * Λ ≈ C @test A * U[:,1:k*p] ≈ V * H @test B * V[:,1:k*p] ≈ U * F @test B * A * U[:,1:(k-1)*p] ≈ U * F * H[1:k*p,1:(k-1)*p] @@ -152,6 +161,7 @@ memory = 0 memory += p*(n+m) * (k+1) * nbits_FC # Vₖ₊₁ and Uₖ₊₁ memory += 2 * p*k * p*(k+1) * nbits_FC # Hₖ₊₁.ₖ and Fₖ₊₁.ₖ + memory += 2 * p*p * nbits_FC # Γ and Λ memory += p*p * nbits_FC # Ψᵢⱼ and Φᵢⱼ memory += (n+m)*p * nbits_FC # qᵥ and qᵤ return memory