From 21d9c95aac650735e9a6fdb79aa007c7f7096837 Mon Sep 17 00:00:00 2001 From: Alexis Montoison Date: Wed, 4 Oct 2023 00:01:09 -0500 Subject: [PATCH] Almost done! --- src/block_krylov_processes.jl | 274 +++++++++++++++++++++++++++------- src/krylov_processes.jl | 16 +- src/processes_utils.jl | 17 +-- test/runtests.jl | 2 + test/test_block_processes.jl | 158 +++++++++++++------- test/test_cr.jl | 1 - 6 files changed, 339 insertions(+), 129 deletions(-) diff --git a/src/block_krylov_processes.jl b/src/block_krylov_processes.jl index 43fe9179e..140c84937 100644 --- a/src/block_krylov_processes.jl +++ b/src/block_krylov_processes.jl @@ -17,38 +17,77 @@ function hermitian_lanczos(A, B::AbstractMatrix{FC}, k::Int; algo::String="mgs") m, n = size(A) t, p = size(B) + nnzT = p*p + (k-1)*p*(2*p+1) + div(p*(p+1), 2) + colptr = zeros(Int, p*k+1) + rowval = zeros(Int, nnzT) + nzval = zeros(FC, nnzT) + + colptr[1] = 1 + for j = 1:k*p + pos = colptr[j] + for i = max(1, j-p):j+p + rowval[pos] = i + pos += 1 + end + colptr[j+1] = pos + end + V = zeros(FC, n, (k+1)*p) - T = spzeros(FC, (k+1)*p, k*p) + T = SparseMatrixCSC((k+1)*p, k*p, colptr, rowval, nzval) α = -one(FC) β = one(FC) q = zeros(FC, n, p) ψ₁ = zeros(FC, p, p) - Ωᵢ = zeros(FC, p, p) + Ωᵢ = Ψᵢ = Ψᵢ₊₁ = zeros(FC, p, p) for i = 1:k pos1 = (i-1)*p + 1 pos2 = i*p + pos3 = pos1 + p + pos4 = pos2 + p vᵢ = view(V,:,pos1:pos2) - vᵢ₊₁ = view(V,:,pos1+p:pos2+p) + vᵢ₊₁ = view(V,:,pos3:pos4) + if i == 1 q .= B reduced_qr!(q, ψ₁, algo) vᵢ .= q end + mul!(q, A, vᵢ) + if i ≥ 2 - vᵢ₋₁ = view(V,:,pos1-p:pos2-p) - Ψᵢ = T[pos1:pos2,pos1-p:pos2-p] - T[pos1-p:pos2-p,pos1:pos2] .= Ψᵢ' + pos5 = pos1 - p + pos6 = pos2 - p + vᵢ₋₁ = view(V,:,pos5:pos6) mul!(q, vᵢ₋₁, Ψᵢ', α, β) # q = q - vᵢ₋₁ * Ψᵢᴴ end + mul!(Ωᵢ, vᵢ', q) # Ωᵢ = vᵢᴴ * q mul!(q, vᵢ, Ωᵢ, α, β) # q = q - vᵢ * Ωᵢᴴ - T[pos1:pos2,pos1:pos2] .= Ωᵢ - Ψᵢ₊₁ = view(T,pos1+p:pos2+p,pos1:pos2) + + # Store the block Ωᵢ in Tₖ₊₁.ₖ + for ii = 1:p + indi = pos1+ii-1 + for jj = 1:p + indj = pos1+jj-1 + T[indi,indj] = Ωᵢ[ii,jj] + end + end + reduced_qr!(q, Ψᵢ₊₁, algo) vᵢ₊₁ .= q + + # Store the block Ψᵢ₊₁ in Tₖ₊₁.ₖ + for ii = 1:p + indi = pos3+ii-1 + for jj = 1:p + indj = pos1+jj-1 + (ii ≤ jj) && (T[indi,indj] = Ψᵢ₊₁[ii,jj]) + (ii ≤ jj) && (i < k) && (T[indj,indi] = conj(Ψᵢ₊₁[ii,jj])) + end + end end return V, ψ₁, T end @@ -58,7 +97,7 @@ end #### Input arguments -* `A`: a linear operator that models a sqᵤare matrix of dimension n; +* `A`: a linear operator that models a square matrix of dimension n; * `B`: a matrix of size n × p; * `C`: a matrix of size n × p; * `k`: the number of iterations of the block non-Hermitian Lanczos process. @@ -147,7 +186,7 @@ end #### Input arguments -* `A`: a linear operator that models a sqᵤare matrix of dimension n; +* `A`: a linear operator that models a square matrix of dimension n; * `B`: a matrix of size n × p; * `k`: the number of iterations of the block Arnoldi process. @@ -177,33 +216,41 @@ function arnoldi(A, B::AbstractMatrix{FC}, k::Int; algo::String="mgs", reorthogo for j = 1:k pos1 = (j-1)*p + 1 pos2 = j*p + pos3 = pos1 + p + pos4 = pos2 + p vⱼ = view(V,:,pos1:pos2) - vⱼ₊₁ = view(V,:,pos1+p:pos2+p) + vⱼ₊₁ = view(V,:,pos3:pos4) + if j == 1 q .= B reduced_qr!(q, Γ, algo) vⱼ .= q end + mul!(q, A, vⱼ) + for i = 1:j - pos3 = (i-1)*p + 1 - pos4 = i*p - vᵢ = view(V,:,pos3:pos4) + pos5 = (i-1)*p + 1 + pos6 = i*p + vᵢ = view(V,:,pos5:pos6) mul!(Ψᵢⱼ, vᵢ', q) # Ψᵢⱼ = vᵢᴴ * q mul!(q, vᵢ, Ψᵢⱼ, α, β) # q = q - vᵢ * Ψᵢⱼ - H[pos3:pos4,pos1:pos2] .= Ψᵢⱼ + H[pos5:pos6,pos1:pos2] .= Ψᵢⱼ end + if reorthogonalization for i = 1:j - pos3 = (i-1)*p + 1 - pos4 = i*p - vᵢ = view(V,:,pos3:pos4) + pos5 = (i-1)*p + 1 + pos6 = i*p + vᵢ = view(V,:,pos5:pos6) mul!(Ψtmp, vᵢ', q) # Ψtmp = vᵢᴴ * q mul!(q, vᵢ, Ψtmp, α, β) # q = q - vᵢ * Ψtmp - add!(H, Ψtmp, pos3, pos4, pos1, pos2) + Hᵢⱼ = view(H,pos5:pos6,pos1:pos2) + Hᵢⱼ .= Hᵢⱼ .+ Ψtmp end end - Ψ = view(H, pos1+p:pos2+p,pos1:pos2) + + Ψ = view(H, pos3:pos4,pos1:pos2) reduced_qr!(q, Ψ, algo) vⱼ₊₁ .= q end @@ -231,15 +278,31 @@ function golub_kahan(A, B::AbstractMatrix{FC}, k::Int; algo::String="mgs") where t, p = size(B) Aᴴ = A' + nnzL = p*k*(p+1) + div(p*(p+1), 2) + colptr = zeros(Int, p*(k+1)+1) + rowval = zeros(Int, nnzL) + nzval = zeros(FC, nnzL) + + colptr[1] = 1 + for j = 1:(k+1)*p + pos = colptr[j] + for i = j:min((k+1)*p,j+p) + rowval[pos] = i + pos += 1 + end + colptr[j+1] = pos + end + V = zeros(FC, n, (k+1)*p) U = zeros(FC, m, (k+1)*p) - L = spzeros(FC, (k+1)*p, (k+1)*p) + L = SparseMatrixCSC((k+1)*p, (k+1)*p, colptr, rowval, nzval) α = -one(FC) β = one(FC) qᵥ = zeros(FC, n, p) qᵤ = zeros(FC, m, p) Ψ₁ = zeros(FC, p, p) + Ψᵢ₊₁ = TΩᵢ = TΩᵢ₊₁ = zeros(FC, p, p) for i = 1:k pos1 = (i-1)*p + 1 @@ -250,26 +313,55 @@ function golub_kahan(A, B::AbstractMatrix{FC}, k::Int; algo::String="mgs") where vᵢ₊₁ = view(V,:,pos3:pos4) uᵢ = view(U,:,pos1:pos2) uᵢ₊₁ = view(U,:,pos3:pos4) + if i == 1 qᵤ .= B reduced_qr!(qᵤ, Ψ₁, algo) uᵢ .= qᵤ + mul!(qᵥ, Aᴴ, uᵢ) - Ωᵢ = view(L,pos1:pos2,pos1:pos2) - reduced_qr!(qᵥ, Ωᵢ', algo) + reduced_qr!(qᵥ, TΩᵢ, algo) vᵢ .= qᵥ + + # Store the block Ω₁ in Lₖ₊₁.ₖ₊₁ + for ii = 1:p + indi = pos1+ii-1 + for jj = 1:p + indj = pos1+jj-1 + (ii ≤ jj) && (L[indj,indi] = conj(TΩᵢ[ii,jj])) + end + end end - Ωᵢ = view(L,pos1:pos2,pos1:pos2) + mul!(qᵤ, A, vᵢ) - mul!(qᵤ, uᵢ, Ωᵢ, α, β) # qᵤ = qᵤ - uᵢ * Ωᵢ - Ψᵢ₊₁ = view(L,pos3:pos4,pos1:pos2) + mul!(qᵤ, uᵢ, TΩᵢ', α, β) # qᵤ = qᵤ - uᵢ * Ωᵢ + reduced_qr!(qᵤ, Ψᵢ₊₁, algo) uᵢ₊₁ .= qᵤ + + # Store the block Ψᵢ₊₁ in Lₖ₊₁.ₖ₊₁ + for ii = 1:p + indi = pos3+ii-1 + for jj = 1:p + indj = pos1+jj-1 + (ii ≤ jj) && (L[indi,indj] = Ψᵢ₊₁[ii,jj]) + end + end + mul!(qᵥ, Aᴴ, uᵢ₊₁) mul!(qᵥ, vᵢ, Ψᵢ₊₁', α, β) # qᵥ = qᵥ - vᵢ * Ψᵢ₊₁ᴴ - Ωᵢ₊₁ = view(L,pos3:pos4,pos3:pos4) - reduced_qr!(qᵥ, Ωᵢ₊₁', algo) + + reduced_qr!(qᵥ, TΩᵢ₊₁, algo) vᵢ₊₁ .= qᵥ + + # Store the block Ωᵢ₊₁ in Lₖ₊₁.ₖ₊₁ + for ii = 1:p + indi = pos3+ii-1 + for jj = 1:p + indj = pos3+jj-1 + (ii ≤ jj) && (L[indj,indi] = conj(TΩᵢ₊₁[ii,jj])) + end + end end return V, U, Ψ₁, L end @@ -298,10 +390,26 @@ function saunders_simon_yip(A, B::AbstractMatrix{FC}, C::AbstractMatrix{FC}, k:: t, p = size(B) Aᴴ = A' + nnzT = p*p + (k-1)*p*(2*p+1) + div(p*(p+1), 2) + colptr = zeros(Int, p*k+1) + rowval = zeros(Int, nnzT) + nzval_T = zeros(FC, nnzT) + nzval_Tᴴ = zeros(FC, nnzT) + + colptr[1] = 1 + for j = 1:k*p + pos = colptr[j] + for i = max(1, j-p):j+p + rowval[pos] = i + pos += 1 + end + colptr[j+1] = pos + end + V = zeros(FC, m, (k+1)*p) U = zeros(FC, n, (k+1)*p) - T = spzeros(FC, (k+1)*p, k*p) - Tᴴ = spzeros(FC, (k+1)*p, k*p) + T = SparseMatrixCSC((k+1)*p, k*p, colptr, rowval, nzval_T) + Tᴴ = SparseMatrixCSC((k+1)*p, k*p, colptr, rowval, nzval_Tᴴ) α = -one(FC) β = one(FC) @@ -309,7 +417,7 @@ function saunders_simon_yip(A, B::AbstractMatrix{FC}, C::AbstractMatrix{FC}, k:: qᵤ = zeros(FC, n, p) Ψ₁ = zeros(FC, p, p) Φ₁ᴴ = zeros(FC, p, p) - Ωᵢ = Ψᵢ = TΦᵢ = zeros(FC, p, p) + Ωᵢ = Ψᵢ = Ψᵢ₊₁ = TΦᵢ = TΦᵢ₊₁ = zeros(FC, p, p) for i = 1:k pos1 = (i-1)*p + 1 @@ -320,6 +428,7 @@ function saunders_simon_yip(A, B::AbstractMatrix{FC}, C::AbstractMatrix{FC}, k:: vᵢ₊₁ = view(V,:,pos3:pos4) uᵢ = view(U,:,pos1:pos2) uᵢ₊₁ = view(U,:,pos3:pos4) + if i == 1 qᵥ .= B reduced_qr!(qᵥ, Ψ₁, algo) @@ -328,31 +437,66 @@ function saunders_simon_yip(A, B::AbstractMatrix{FC}, C::AbstractMatrix{FC}, k:: reduced_qr!(qᵤ, Φ₁ᴴ, algo) uᵢ .= qᵤ end + mul!(qᵥ, A, uᵢ) mul!(qᵤ, Aᴴ, vᵢ) + if i ≥ 2 pos5 = pos1 - p pos6 = pos2 - p vᵢ₋₁ = view(V,:,pos5:pos6) uᵢ₋₁ = view(U,:,pos5:pos6) - Φᵢ = view(Tᴴ,pos1:pos2,pos5:pos6)' - view(T,pos5:pos6,pos1:pos2) .= Φᵢ - mul!(qᵥ, vᵢ₋₁, Φᵢ, α, β) # qᵥ = qᵥ - vᵢ₋₁ * Φᵢ - TΨᵢ = view(T,pos1:pos2,pos5:pos6)' - view(Tᴴ,pos5:pos6,pos1:pos2) .= TΨᵢ - mul!(qᵤ, uᵢ₋₁, TΨᵢ, α, β) # qᵤ = qᵤ - uᵢ₋₁ * Ψᵢᴴ + + mul!(qᵥ, vᵢ₋₁, TΦᵢ', α, β) # qᵥ = qᵥ - vᵢ₋₁ * Φᵢ + for ii = 1:p + indi = pos1+ii-1 + for jj = 1:p + indj = pos5+jj-1 + Ψᵢ[ii,jj] = T[indi,indj] + end + end + mul!(qᵤ, uᵢ₋₁, Ψᵢ', α, β) # qᵤ = qᵤ - uᵢ₋₁ * Ψᵢᴴ end + mul!(Ωᵢ, vᵢ', qᵥ) - view(T,pos1:pos2,pos1:pos2) .= Ωᵢ - view(Tᴴ,pos1:pos2,pos1:pos2) .= Ωᵢ' mul!(qᵥ, vᵢ, Ωᵢ, α, β) # qᵥ = qᵥ - vᵢ * Ωᵢ mul!(qᵤ, uᵢ, Ωᵢ', α, β) # qᵤ = qᵤ - uᵢ * Ωᵢᴴ - Ψᵢ₊₁ = view(T,pos3:pos4,pos1:pos2) + + # Store the block Ωᵢ in Tₖ₊₁.ₖ + for ii = 1:p + indi = pos1+ii-1 + for jj = 1:p + indj = pos1+jj-1 + T[indi,indj] = Ωᵢ[ii,jj] + Tᴴ[indi,indj] = conj(Ωᵢ[jj,ii]) + end + end + reduced_qr!(qᵥ, Ψᵢ₊₁, algo) vᵢ₊₁ .= qᵥ - TΦᵢ₊₁ = view(Tᴴ,pos3:pos4,pos1:pos2) + + # Store the block Ψᵢ₊₁ in Tₖ₊₁.ₖ + for ii = 1:p + indi = pos3+ii-1 + for jj = 1:p + indj = pos1+jj-1 + (ii ≤ jj) && (T[indi,indj] = Ψᵢ₊₁[ii,jj]) + (ii ≤ jj) && (i < k) && (Tᴴ[indj,indi] = conj(Ψᵢ₊₁[ii,jj])) + end + end + reduced_qr!(qᵤ, TΦᵢ₊₁, algo) uᵢ₊₁ .= qᵤ + + # Store the block Φᵢ₊₁ in Tₖ₊₁.ₖ + for ii = 1:p + indi = pos3+ii-1 + for jj = 1:p + indj = pos1+jj-1 + (ii ≤ jj) && (Tᴴ[indi,indj] = TΦᵢ₊₁[ii,jj]) + (ii ≤ jj) && (i < k) && (T[indj,indi] = conj(TΦᵢ₊₁[ii,jj])) + end + end end return V, Ψ₁, T, U, Φ₁ᴴ, Tᴴ end @@ -401,50 +545,66 @@ function montoison_orban(A, B, D::AbstractMatrix{FC}, C::AbstractMatrix{FC}, k:: for j = 1:k pos1 = (j-1)*p + 1 pos2 = j*p + pos3 = pos1 + p + pos4 = pos2 + p vⱼ = view(V,:,pos1:pos2) - vⱼ₊₁ = view(V,:,pos1+p:pos2+p) + vⱼ₊₁ = view(V,:,pos3:pos4) uⱼ = view(U,:,pos1:pos2) - uⱼ₊₁ = view(U,:,pos1+p:pos2+p) + uⱼ₊₁ = view(U,:,pos3:pos4) + if j == 1 qᵥ .= D reduced_qr!(qᵥ, Γ, algo) vⱼ .= qᵥ + qᵤ .= C reduced_qr!(qᵤ, Λ, algo) uⱼ .= qᵤ end + mul!(qᵥ, A, uⱼ) mul!(qᵤ, B, vⱼ) + for i = 1:j - pos3 = (i-1)*p + 1 - pos4 = i*p - vᵢ = view(V,:,pos3:pos4) - uᵢ = view(U,:,pos3:pos4) + pos5 = (i-1)*p + 1 + pos6 = i*p + vᵢ = view(V,:,pos5:pos6) + uᵢ = view(U,:,pos5:pos6) + mul!(Ψᵢⱼ, vᵢ', qᵥ) # Ψᵢⱼ = vᵢᴴ * qᵥ mul!(qᵥ, vᵢ, Ψᵢⱼ, α, β) # qᵥ = qᵥ - vᵢ * Ψᵢⱼ - H[pos3:pos4,pos1:pos2] .= Ψᵢⱼ + H[pos5:pos6,pos1:pos2] .= Ψᵢⱼ + mul!(Φᵢⱼ, uᵢ', qᵤ) # Φᵢⱼ = uᵢᴴ * qᵤ mul!(qᵤ, uᵢ, Φᵢⱼ, α, β) # qᵤ = qᵤ - uᵢ * Φᵢⱼ - F[pos3:pos4,pos1:pos2] .= Φᵢⱼ + F[pos5:pos6,pos1:pos2] .= Φᵢⱼ end + if reorthogonalization for i = 1:j - pos3 = (i-1)*p + 1 - pos4 = i*p - vᵢ = view(V,:,pos3:pos4) - uᵢ = view(U,:,pos3:pos4) + pos5 = (i-1)*p + 1 + pos6 = i*p + vᵢ = view(V,:,pos5:pos6) + uᵢ = view(U,:,pos5:pos6) + mul!(Ψtmp, vᵢ', qᵥ) # Ψtmp = vᵢᴴ * qᵥ mul!(qᵥ, vᵢ, Ψtmp, α, β) # qᵥ = qᵥ - vᵢ * Ψtmp - add!(H, Ψtmp, pos3, pos4, pos1, pos2) + Hᵢⱼ = view(H,pos5:pos6,pos1:pos2) + Hᵢⱼ .= Hᵢⱼ .+ Ψtmp + mul!(Φtmp, uᵢ', qᵤ) # Φtmp = uᵢᴴ * qᵤ mul!(qᵤ, uᵢ, Φtmp, α, β) # qᵤ = qᵤ - uᵢ * Φtmp - add!(F, Φtmp, pos3, pos4, pos1, pos2) + Fᵢⱼ = view(F,pos5:pos6,pos1:pos2) + Fᵢⱼ .= Fᵢⱼ .+ Φtmp + # add!(F, Φtmp, pos5, pos6, pos1, pos2) end end - Ψ = view(H, pos1+p:pos2+p,pos1:pos2) + + Ψ = view(H, pos3:pos4,pos1:pos2) reduced_qr!(qᵥ, Ψ, algo) vⱼ₊₁ .= qᵥ - Φ = view(F, pos1+p:pos2+p,pos1:pos2) + + Φ = view(F, pos3:pos4,pos1:pos2) reduced_qr!(qᵤ, Φ, algo) uⱼ₊₁ .= qᵤ end diff --git a/src/krylov_processes.jl b/src/krylov_processes.jl index 357e4cd41..0e9b64cd2 100644 --- a/src/krylov_processes.jl +++ b/src/krylov_processes.jl @@ -43,7 +43,7 @@ function hermitian_lanczos(A, b::AbstractVector{FC}, k::Int) where FC <: FloatOr end end - local β₁ + β₁ = zero(R) V = M(undef, n, k+1) T = SparseMatrixCSC(k+1, k, colptr, rowval, nzval) @@ -99,6 +99,7 @@ end function nonhermitian_lanczos(A, b::AbstractVector{FC}, c::AbstractVector{FC}, k::Int) where FC <: FloatOrComplex m, n = size(A) Aᴴ = A' + R = real(FC) S = ktypeof(b) M = vector_to_matrix(S) @@ -121,7 +122,7 @@ function nonhermitian_lanczos(A, b::AbstractVector{FC}, c::AbstractVector{FC}, k end end - local β₁, γ₁ᴴ + β₁ = γ₁ᴴ = zero(R) V = M(undef, n, k+1) U = M(undef, n, k+1) T = SparseMatrixCSC(k+1, k, colptr, rowval, nzval_T) @@ -196,10 +197,11 @@ end """ function arnoldi(A, b::AbstractVector{FC}, k::Int; reorthogonalization::Bool=false) where FC <: FloatOrComplex m, n = size(A) + R = real(FC) S = ktypeof(b) M = vector_to_matrix(S) - local β + β = zero(R) V = M(undef, n, k+1) H = zeros(FC, k+1, k) @@ -275,7 +277,7 @@ function golub_kahan(A, b::AbstractVector{FC}, k::Int) where FC <: FloatOrComple end end - local β₁ + β₁ = zero(R) V = M(undef, n, k+1) U = M(undef, m, k+1) L = SparseMatrixCSC(k+1, k+1, colptr, rowval, nzval) @@ -337,6 +339,7 @@ end function saunders_simon_yip(A, b::AbstractVector{FC}, c::AbstractVector{FC}, k::Int) where FC <: FloatOrComplex m, n = size(A) Aᴴ = A' + R = real(FC) S = ktypeof(b) M = vector_to_matrix(S) @@ -359,7 +362,7 @@ function saunders_simon_yip(A, b::AbstractVector{FC}, c::AbstractVector{FC}, k:: end end - local β₁, γ₁ᴴ + β₁ = γ₁ᴴ = zero(R) V = M(undef, m, k+1) U = M(undef, n, k+1) T = SparseMatrixCSC(k+1, k, colptr, rowval, nzval_T) @@ -437,10 +440,11 @@ end """ function montoison_orban(A, B, b::AbstractVector{FC}, c::AbstractVector{FC}, k::Int; reorthogonalization::Bool=false) where FC <: FloatOrComplex m, n = size(A) + R = real(FC) S = ktypeof(b) M = vector_to_matrix(S) - local β, γ + β = γ = zero(R) V = M(undef, m, k+1) U = M(undef, n, k+1) H = zeros(FC, k+1, k) diff --git a/src/processes_utils.jl b/src/processes_utils.jl index 4f133c917..057cafb9d 100644 --- a/src/processes_utils.jl +++ b/src/processes_utils.jl @@ -20,6 +20,7 @@ end function gs!(Q::AbstractMatrix{FC}, R::AbstractMatrix{FC}, v::AbstractVector{FC}) where FC <: FloatOrComplex n, k = size(Q) aⱼ = v + R .= zero(FC) for j = 1:k qⱼ = view(Q,:,j) aⱼ .= qⱼ @@ -53,6 +54,7 @@ end function mgs!(Q::AbstractMatrix{FC}, R::AbstractMatrix{FC}) where FC <: FloatOrComplex n, k = size(Q) + R .= zero(FC) for i = 1:k qᵢ = view(Q,:,i) R[i,i] = @knrm2(n, qᵢ) # rᵢᵢ = ‖qᵢ‖ @@ -85,6 +87,7 @@ end function householder!(Q::AbstractMatrix{FC}, R::AbstractMatrix{FC}, τ::AbstractVector{FC}) where FC <: FloatOrComplex n, k = size(Q) + R .= zero(FC) @kgeqrf!(Q, τ) for i = 1:k for j = i:k @@ -117,6 +120,7 @@ end function givens!(Q::AbstractMatrix{FC}, R::AbstractMatrix{FC}, C::AbstractVector{T}, S::AbstractVector{FC}) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} n, k = size(Q) + R .= zero(FC) pos = 0 for j = 1:k for i = n-1:-1:j @@ -181,16 +185,3 @@ function reduced_qr(A::AbstractMatrix{FC}, algo::String) where FC <: FloatOrComp end return Q, R end - -function add!(A, B, i1, i2, j1, j2) - i = 0 - for ii = i1:i2 - i += 1 - j = 0 - for jj = j1:j2 - j += 1 - A[ii,jj] += B[i,j] - end - end - return A -end diff --git a/test/runtests.jl b/test/runtests.jl index 6698e90f6..040d2edd1 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,6 +1,8 @@ using Krylov, LinearAlgebra, SparseArrays, Printf, Random, Test import Krylov.KRYLOV_SOLVERS +Random.seed!(666) + include("test_utils.jl") include("test_aux.jl") include("test_stats.jl") diff --git a/test/test_block_processes.jl b/test/test_block_processes.jl index e881bc12c..03629d78e 100644 --- a/test/test_block_processes.jl +++ b/test/test_block_processes.jl @@ -1,15 +1,13 @@ - @testset "Block processes" begin m = 250 n = 500 k = 20 p = 4 s = 4 + verbose = true for FC in (Float64, ComplexF64) - R = real(FC) nbits_FC = sizeof(FC) - nbits_R = sizeof(R) nbits_I = sizeof(Int) @testset "Data Type: $FC" begin @@ -25,12 +23,28 @@ @test norm(V[:,1:p*s]' * V[:,1:p*s] - I) ≤ 1e-4 @test V[:,1:p] * Ψ₁ ≈ B @test A * V[:,1:p*k] ≈ V * T + end - # storage_block_hermitian_lanczos_bytes(n, k) = ... + @testset "memory" begin + function storage_block_hermitian_lanczos_bytes(n, p, k) + nnzT = p*p + (k-1)*p*(2*p+1) + div(p*(p+1), 2) + memory = 0 + memory += (p*k+1) * nbits_I # Tₖ₊₁.ₖ -- colptr + memory += nnzT * nbits_I # Tₖ₊₁.ₖ -- rowval + memory += nnzT * nbits_FC # Tₖ₊₁.ₖ -- nzval + memory += p*p * nbits_FC # Ψ₁ + memory += p*p * nbits_FC # Ωᵢ, Ψᵢ and Ψᵢ₊₁ + memory += n*p * (k+1) * nbits_FC # Vₖ₊₁ + memory += p*n * nbits_FC # q + return memory + end - # expected_block_hermitian_lanczos_bytes = storage_block_hermitian_lanczos_bytes(n, p, k) - # actual_block_hermitian_lanczos_bytes = @allocated hermitian_lanczos(A, B, k; algo) - # @test expected_block_hermitian_lanczos_bytes ≤ actual_block_hermitian_lanczos_bytes ≤ 1.02 * expected_block_hermitian_lanczos_bytes + expected_block_hermitian_lanczos_bytes = storage_block_hermitian_lanczos_bytes(n, p, k) + actual_block_hermitian_lanczos_bytes = @allocated hermitian_lanczos(A, B, k; algo="mgs") + verbose && println("Block Hermitian Lanczos | $FC") + verbose && println(expected_block_hermitian_lanczos_bytes, " ≤ ", actual_block_hermitian_lanczos_bytes, " ≤ ", 1.02 * expected_block_hermitian_lanczos_bytes, " ?") + verbose && println() + @test expected_block_hermitian_lanczos_bytes ≤ actual_block_hermitian_lanczos_bytes ≤ 1.02 * expected_block_hermitian_lanczos_bytes end end @@ -48,11 +62,16 @@ @test A * V[:,1:k*p] ≈ V * T @test A' * U[:,1:k*p] ≈ U * Tᴴ - # storage_block_nonhermitian_lanczos_bytes(n, p, k) = ... - - # expected_block_nonhermitian_lanczos_bytes = storage_block_nonhermitian_lanczos_bytes(n, p, k) - # actual_block_nonhermitian_lanczos_bytes = @allocated nonhermitian_lanczos(A, B, C, k) - # @test expected_block_nonhermitian_lanczos_bytes ≤ actual_block_nonhermitian_lanczos_bytes ≤ 1.02 * expected_block_nonhermitian_lanczos_bytes + @testset "memory" begin + # storage_block_nonhermitian_lanczos_bytes(n, p, k) = ... + # + # expected_block_nonhermitian_lanczos_bytes = storage_block_nonhermitian_lanczos_bytes(n, p, k) + # actual_block_nonhermitian_lanczos_bytes = @allocated nonhermitian_lanczos(A, B, C, k) + # verbose && println("Block non-Hermitian Lanczos") + # verbose && println(expected_block_nonhermitian_lanczos_bytes, " ≤ ", actual_block_nonhermitian_lanczos_bytes, " ≤ ", 1.02 * expected_block_nonhermitian_lanczos_bytes, " ?") + # verbose && println() + # @test expected_block_nonhermitian_lanczos_bytes ≤ actual_block_nonhermitian_lanczos_bytes ≤ 1.02 * expected_block_nonhermitian_lanczos_bytes + end end @testset "Block Arnoldi" begin @@ -66,24 +85,26 @@ @test norm(V[:,1:p*s]' * V[:,1:p*s] - I) ≤ 1e-4 @test V[:,1:p] * Γ ≈ B @test A * V[:,1:p*k] ≈ V * H + end + end - if algo == "mgs" - function storage_block_arnoldi_bytes(n, p, k) - memory = 0 - memory += p*n * (k+1) * nbits_FC # Vₖ₊₁ - memory += n*p * nbits_FC # q - memory += p*k * p*(k+1) * nbits_FC # Hₖ₊₁.ₖ - memory += p*p * nbits_FC # Γ - memory += p*p * nbits_FC # Ψᵢⱼ - return memory - end - - expected_block_arnoldi_bytes = storage_block_arnoldi_bytes(n, p, k) - actual_block_arnoldi_bytes = @allocated arnoldi(A, B, k; algo, reorthogonalization) - println(expected_block_arnoldi_bytes, " ≤ ", actual_block_arnoldi_bytes, " ≤ ", 1.02 * expected_block_arnoldi_bytes, " ?") - @test expected_block_arnoldi_bytes ≤ actual_block_arnoldi_bytes ≤ 1.02 * expected_block_arnoldi_bytes - end + @testset "memory -- reorthogonalization = $reorthogonalization" for reorthogonalization in (false, true) + function storage_block_arnoldi_bytes(n, p, k) + memory = 0 + memory += p*n * (k+1) * nbits_FC # Vₖ₊₁ + memory += n*p * nbits_FC # q + memory += p*k * p*(k+1) * nbits_FC # Hₖ₊₁.ₖ + memory += p*p * nbits_FC # Γ + memory += p*p * nbits_FC # Ψᵢⱼ and Ψtmp + return memory end + + expected_block_arnoldi_bytes = storage_block_arnoldi_bytes(n, p, k) + actual_block_arnoldi_bytes = @allocated arnoldi(A, B, k; algo="mgs", reorthogonalization) + verbose && println("Block Arnoldi | $FC") + verbose && println(expected_block_arnoldi_bytes, " ≤ ", actual_block_arnoldi_bytes, " ≤ ", 1.02 * expected_block_arnoldi_bytes, " ?") + verbose && println() + @test expected_block_arnoldi_bytes ≤ actual_block_arnoldi_bytes ≤ 1.02 * expected_block_arnoldi_bytes end end @@ -102,12 +123,28 @@ @test A' * U ≈ V * L' @test A' * A * V[:,1:k*p] ≈ V * L' * BL @test A * A' * U[:,1:k*p] ≈ U * BL * L[1:k*p,1:k*p]' + end - # storage_block_golub_kahan_bytes(m, n, p, k) = ... + @testset "memory" begin + function storage_block_golub_kahan_bytes(m, n, p, k) + nnzL = p*k*(p+1) + div(p*(p+1), 2) + memory = 0 + memory += (p*(k+1)+1) * nbits_I # Lₖ₊₁ -- colptr + memory += nnzL * nbits_I # Lₖ₊₁ -- rowval + memory += nnzL * nbits_FC # Lₖ₊₁ -- nzval + memory += p*p * nbits_FC # Ψ₁ + memory += p*p * nbits_FC # Ψᵢ₊₁, TΩᵢ and TΩᵢ₊₁ + memory += p*(n+m) * (k+1) * nbits_FC # Vₖ₊₁ and Uₖ₊₁ + memory += p*(n+m) * nbits_FC # qᵥ and qᵤ + return memory + end - # expected_block_golub_kahan_bytes = storage_block_golub_kahan_bytes(m, n, p, k) - # actual_block_golub_kahan_bytes = @allocated golub_kahan(A, B, k; algo) - # @test expected_block_golub_kahan_bytes ≤ actual_block_golub_kahan_bytes ≤ 1.02 * expected_block_golub_kahan_bytes + expected_block_golub_kahan_bytes = storage_block_golub_kahan_bytes(m, n, p, k) + actual_block_golub_kahan_bytes = @allocated golub_kahan(A, B, k; algo="mgs") + verbose && println("Block Golub-Kahan | $FC") + verbose && println(expected_block_golub_kahan_bytes, " ≤ ", actual_block_golub_kahan_bytes, " ≤ ", 1.02 * expected_block_golub_kahan_bytes, " ?") + verbose && println() + @test expected_block_golub_kahan_bytes ≤ actual_block_golub_kahan_bytes ≤ 1.02 * expected_block_golub_kahan_bytes end end @@ -128,12 +165,27 @@ @test A' * V[:,1:k*p] ≈ U * Tᴴ @test A' * A * U[:,1:(k-1)*p] ≈ U * Tᴴ * T[1:k*p,1:(k-1)*p] @test A * A' * V[:,1:(k-1)*p] ≈ V * T * Tᴴ[1:k*p,1:(k-1)*p] + end - # storage_block_saunders_simon_yip_bytes(m, n, p, k) = ... + @testset "memory" begin + function storage_block_saunders_simon_yip_bytes(m, n, p, k) + nnzT = p*p + (k-1)*p*(2*p+1) + div(p*(p+1), 2) + memory = 0 + memory += (p*k+1) * nbits_I # Tₖ₊₁.ₖ and (Tₖ.ₖ₊₁)ᴴ -- colptr + memory += nnzT * nbits_I # Tₖ₊₁.ₖ and (Tₖ.ₖ₊₁)ᴴ -- rowval + memory += 2 * nnzT * nbits_FC # Tₖ₊₁.ₖ and (Tₖ.ₖ₊₁)ᴴ -- nzval + memory += 2 * p*p * nbits_FC # Ψ₁ and Φ₁ᴴ + memory += p*p * nbits_FC # Ωᵢ, Ψᵢ, Ψᵢ₊₁, TΦᵢ and TΦᵢ₊₁ + memory += p*(n+m) * (k+1) * nbits_FC # Vₖ₊₁ and Uₖ₊₁ + memory += p*(n+m) * nbits_FC # qᵥ and qᵤ + end - # expected_block_saunders_simon_yip_bytes = storage_block_saunders_simon_yip_bytes(m, n, p, k) - # actual_block_saunders_simon_yip_bytes = @allocated saunders_simon_yip(A, B, C, k; algo) - # @test expected_block_saunders_simon_yip_bytes ≤ actual_block_saunders_simon_yip_bytes ≤ 1.02 * expected_block_saunders_simon_yip_bytes + expected_block_saunders_simon_yip_bytes = storage_block_saunders_simon_yip_bytes(m, n, p, k) + actual_block_saunders_simon_yip_bytes = @allocated saunders_simon_yip(A, B, C, k; algo="mgs") + verbose && println("Block Saunders-Simon-Yip") + verbose && println(expected_block_saunders_simon_yip_bytes, " ≤ ", actual_block_saunders_simon_yip_bytes, " ≤ ", 1.02 * expected_block_saunders_simon_yip_bytes, " ?") + verbose && println() + @test expected_block_saunders_simon_yip_bytes ≤ actual_block_saunders_simon_yip_bytes ≤ 1.02 * expected_block_saunders_simon_yip_bytes end end @@ -155,24 +207,26 @@ @test B * V[:,1:k*p] ≈ U * F @test B * A * U[:,1:(k-1)*p] ≈ U * F * H[1:k*p,1:(k-1)*p] @test A * B * V[:,1:(k-1)*p] ≈ V * H * F[1:k*p,1:(k-1)*p] + end + end - if algo == "mgs" - function storage_block_montoison_orban_bytes(m, n, p, k) - memory = 0 - memory += p*(n+m) * (k+1) * nbits_FC # Vₖ₊₁ and Uₖ₊₁ - memory += 2 * p*k * p*(k+1) * nbits_FC # Hₖ₊₁.ₖ and Fₖ₊₁.ₖ - memory += 2 * p*p * nbits_FC # Γ and Λ - memory += p*p * nbits_FC # Ψᵢⱼ and Φᵢⱼ - memory += (n+m)*p * nbits_FC # qᵥ and qᵤ - return memory - end - - expected_block_montoison_orban_bytes = storage_block_montoison_orban_bytes(m, n, p, k) - actual_block_montoison_orban_bytes = @allocated montoison_orban(A, B, D, C, k; algo, reorthogonalization) - println(expected_block_montoison_orban_bytes, " ≤ ", actual_block_montoison_orban_bytes, " ≤ ", 1.02 * expected_block_montoison_orban_bytes, " ?") - @test expected_block_montoison_orban_bytes ≤ actual_block_montoison_orban_bytes ≤ 1.02 * expected_block_montoison_orban_bytes - end + @testset "memory -- reorthogonalization = $reorthogonalization" for reorthogonalization in (false, true) + function storage_block_montoison_orban_bytes(m, n, p, k) + memory = 0 + memory += 2 * p*p * nbits_FC # Γ and Λ + memory += p*p * nbits_FC # Ψᵢⱼ, Φᵢⱼ, Ψtmp and Φtmp + memory += 2 * p*k * p*(k+1) * nbits_FC # Hₖ₊₁.ₖ and Fₖ₊₁.ₖ + memory += p*(n+m) * (k+1) * nbits_FC # Vₖ₊₁ and Uₖ₊₁ + memory += p*(n+m)* nbits_FC # qᵥ and qᵤ + return memory end + + expected_block_montoison_orban_bytes = storage_block_montoison_orban_bytes(m, n, p, k) + actual_block_montoison_orban_bytes = @allocated montoison_orban(A, B, D, C, k; algo="mgs", reorthogonalization) + verbose && println("Block Montoison-Orban | $FC") + verbose && println(expected_block_montoison_orban_bytes, " ≤ ", actual_block_montoison_orban_bytes, " ≤ ", 1.02 * expected_block_montoison_orban_bytes, " ?") + verbose && println() + @test expected_block_montoison_orban_bytes ≤ actual_block_montoison_orban_bytes ≤ 1.02 * expected_block_montoison_orban_bytes end end end diff --git a/test/test_cr.jl b/test/test_cr.jl index 5ae0774bd..45366ce45 100644 --- a/test/test_cr.jl +++ b/test/test_cr.jl @@ -23,7 +23,6 @@ # Sparse Laplacian A, _ = sparse_laplacian(FC=FC) - Random.seed!(0) b = randn(size(A, 1)) itmax = 0 # case: ‖x*‖ > Δ