From bfa8573da47b7846458d9085b2c30ba2b35e5159 Mon Sep 17 00:00:00 2001 From: Alexis Montoison Date: Fri, 25 Jun 2021 14:47:38 -0400 Subject: [PATCH] Use 5-arguments mul! in BILQ and QMR --- src/bilq.jl | 36 +++++++++++++++++------------------- src/krylov_solvers.jl | 12 ++---------- src/qmr.jl | 36 +++++++++++++++++------------------- test/test_alloc.jl | 8 ++++---- test/test_show.jl | 2 +- 5 files changed, 41 insertions(+), 53 deletions(-) diff --git a/src/bilq.jl b/src/bilq.jl index a2de6cb69..1bedb28fd 100644 --- a/src/bilq.jl +++ b/src/bilq.jl @@ -52,7 +52,7 @@ function bilq!(solver :: BilqSolver{T,S}, A, b :: AbstractVector{T}; c :: Abstra Aᵀ = A' # Set up workspace. - uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, x, d̅ = solver.uₖ₋₁, solver.uₖ, solver.q, solver.vₖ₋₁, solver.vₖ, solver.p, solver.x, solver.d̅ + uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, d̅ = solver.uₖ₋₁, solver.uₖ, solver.vₖ₋₁, solver.vₖ, solver.x, solver.d̅ # Initial solution x₀ and residual norm ‖r₀‖. x .= zero(T) @@ -100,20 +100,23 @@ function bilq!(solver :: BilqSolver{T,S}, A, b :: AbstractVector{T}; c :: Abstra # AVₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ # AᵀUₖ = Uₖ(Tₖ)ᵀ + γₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᵀ - mul!(q, A , vₖ) # Forms vₖ₊₁ : q ← Avₖ - mul!(p, Aᵀ, uₖ) # Forms uₖ₊₁ : p ← Aᵀuₖ + mul!(vₖ₋₁, A , vₖ, one(T), -γₖ) # Forms vₖ₊₁ : vₖ₋₁ ← Avₖ - γₖvₖ₋₁ + mul!(uₖ₋₁, Aᵀ, uₖ, one(T), -βₖ) # Forms uₖ₊₁ : uₖ₋₁ ← Aᵀuₖ - βₖuₖ₋₁ - @kaxpy!(n, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁ - @kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - βₖ * uₖ₋₁ + αₖ = @kdot(n, vₖ₋₁, uₖ) # αₖ = (Avₖ- γₖvₖ₋₁)ᵀuₖ - αₖ = @kdot(n, q, uₖ) # αₖ = qᵀuₖ + @kaxpy!(n, -αₖ, vₖ, vₖ₋₁) # vₖ₋₁ ← vₖ₋₁ - αₖ * vₖ + @kaxpy!(n, -αₖ, uₖ, uₖ₋₁) # uₖ₋₁ ← uₖ₋₁ - αₖ * uₖ - @kaxpy!(n, -αₖ, vₖ, q) # q ← q - αₖ * vₖ - @kaxpy!(n, -αₖ, uₖ, p) # p ← p - αₖ * uₖ + qᵗp = @kdot(n, uₖ₋₁, vₖ₋₁) # qᵗp = ⟨Avₖ - γₖvₖ₋₁ - αₖvₖ, Aᵀuₖ - βₖuₖ₋₁ -αₖuₖ⟩ + βₖ₊₁ = √(abs(qᵗp)) # βₖ₊₁ = √(|qᵗp|) + γₖ₊₁ = qᵗp / βₖ₊₁ # γₖ₊₁ = qᵗp / βₖ₊₁ - qᵗp = @kdot(n, p, q) # qᵗp = ⟨q,p⟩ - βₖ₊₁ = √(abs(qᵗp)) # βₖ₊₁ = √(|qᵗp|) - γₖ₊₁ = qᵗp / βₖ₊₁ # γₖ₊₁ = qᵗp / βₖ₊₁ + # Compute vₖ₊₁ and uₖ₊₁. + if qᵗp ≠ zero(T) + @. vₖ₋₁ = vₖ₋₁ / βₖ₊₁ # βₖ₊₁vₖ₊₁ = Avₖ - γₖvₖ₋₁ - αₖvₖ + @. uₖ₋₁ = uₖ₋₁ / γₖ₊₁ # γₖ₊₁uₖ₊₁ = Aᵀuₖ - βₖuₖ₋₁ - αₖuₖ + end # Update the LQ factorization of Tₖ = L̅ₖQₖ. # [ α₁ γ₂ 0 • • • 0 ] [ δ₁ 0 • • • • 0 ] @@ -187,14 +190,9 @@ function bilq!(solver :: BilqSolver{T,S}, A, b :: AbstractVector{T}; c :: Abstra @kaxpby!(n, -cₖ, vₖ, sₖ, d̅) end - # Compute vₖ₊₁ and uₖ₊₁. - @. vₖ₋₁ = vₖ # vₖ₋₁ ← vₖ - @. uₖ₋₁ = uₖ # uₖ₋₁ ← uₖ - - if qᵗp ≠ 0 - @. vₖ = q / βₖ₊₁ # βₖ₊₁vₖ₊₁ = q - @. uₖ = p / γₖ₊₁ # γₖ₊₁uₖ₊₁ = p - end + # Update uₖ₋₁, vₖ₋₁, uₖ and vₖ. + @kswap(uₖ₋₁, uₖ) + @kswap(vₖ₋₁, vₖ) # Compute ⟨vₖ,vₖ₊₁⟩ and ‖vₖ₊₁‖ vₖᵀvₖ₊₁ = @kdot(n, vₖ₋₁, vₖ) diff --git a/src/krylov_solvers.jl b/src/krylov_solvers.jl index 2ce7ab985..8b27588cc 100644 --- a/src/krylov_solvers.jl +++ b/src/krylov_solvers.jl @@ -710,10 +710,8 @@ may be used in order to create these vectors. mutable struct BilqSolver{T,S} <: KrylovSolver{T,S} uₖ₋₁ :: S uₖ :: S - q :: S vₖ₋₁ :: S vₖ :: S - p :: S x :: S d̅ :: S @@ -721,13 +719,11 @@ mutable struct BilqSolver{T,S} <: KrylovSolver{T,S} T = eltype(S) uₖ₋₁ = S(undef, n) uₖ = S(undef, n) - q = S(undef, n) vₖ₋₁ = S(undef, n) vₖ = S(undef, n) - p = S(undef, n) x = S(undef, n) d̅ = S(undef, n) - solver = new{T,S}(uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, x, d̅) + solver = new{T,S}(uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, d̅) return solver end @@ -751,10 +747,8 @@ may be used in order to create these vectors. mutable struct QmrSolver{T,S} <: KrylovSolver{T,S} uₖ₋₁ :: S uₖ :: S - q :: S vₖ₋₁ :: S vₖ :: S - p :: S x :: S wₖ₋₂ :: S wₖ₋₁ :: S @@ -763,14 +757,12 @@ mutable struct QmrSolver{T,S} <: KrylovSolver{T,S} T = eltype(S) uₖ₋₁ = S(undef, n) uₖ = S(undef, n) - q = S(undef, n) vₖ₋₁ = S(undef, n) vₖ = S(undef, n) - p = S(undef, n) x = S(undef, n) wₖ₋₂ = S(undef, n) wₖ₋₁ = S(undef, n) - solver = new{T,S}(uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, x, wₖ₋₂, wₖ₋₁) + solver = new{T,S}(uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, wₖ₋₂, wₖ₋₁) return solver end diff --git a/src/qmr.jl b/src/qmr.jl index d4dfebedb..e223b35e8 100644 --- a/src/qmr.jl +++ b/src/qmr.jl @@ -59,7 +59,7 @@ function qmr!(solver :: QmrSolver{T,S}, A, b :: AbstractVector{T}; c :: Abstract Aᵀ = A' # Set up workspace. - uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, x, wₖ₋₂, wₖ₋₁ = solver.uₖ₋₁, solver.uₖ, solver.q, solver.vₖ₋₁, solver.vₖ, solver.p, solver.x, solver.wₖ₋₂, solver.wₖ₋₁ + uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, wₖ₋₂, wₖ₋₁ = solver.uₖ₋₁, solver.uₖ, solver.vₖ₋₁, solver.vₖ, solver.x, solver.wₖ₋₂, solver.wₖ₋₁ # Initial solution x₀ and residual norm ‖r₀‖. x .= zero(T) @@ -105,20 +105,23 @@ function qmr!(solver :: QmrSolver{T,S}, A, b :: AbstractVector{T}; c :: Abstract # AVₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ # AᵀUₖ = Uₖ(Tₖ)ᵀ + γₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᵀ - mul!(q, A , vₖ) # Forms vₖ₊₁ : q ← Avₖ - mul!(p, Aᵀ, uₖ) # Forms uₖ₊₁ : p ← Aᵀuₖ + mul!(vₖ₋₁, A , vₖ, one(T), -γₖ) # Forms vₖ₊₁ : vₖ₋₁ ← Avₖ - γₖvₖ₋₁ + mul!(uₖ₋₁, Aᵀ, uₖ, one(T), -βₖ) # Forms uₖ₊₁ : uₖ₋₁ ← Aᵀuₖ - βₖuₖ₋₁ - @kaxpy!(n, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁ - @kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - βₖ * uₖ₋₁ + αₖ = @kdot(n, vₖ₋₁, uₖ) # αₖ = (Avₖ- γₖvₖ₋₁)ᵀuₖ - αₖ = @kdot(n, q, uₖ) # αₖ = qᵀuₖ + @kaxpy!(n, -αₖ, vₖ, vₖ₋₁) # vₖ₋₁ ← vₖ₋₁ - αₖ * vₖ + @kaxpy!(n, -αₖ, uₖ, uₖ₋₁) # uₖ₋₁ ← uₖ₋₁ - αₖ * uₖ - @kaxpy!(n, -αₖ, vₖ, q) # q ← q - αₖ * vₖ - @kaxpy!(n, -αₖ, uₖ, p) # p ← p - αₖ * uₖ + qᵗp = @kdot(n, uₖ₋₁, vₖ₋₁) # qᵗp = ⟨Avₖ - γₖvₖ₋₁ - αₖvₖ, Aᵀuₖ - βₖuₖ₋₁ -αₖuₖ⟩ + βₖ₊₁ = √(abs(qᵗp)) # βₖ₊₁ = √(|qᵗp|) + γₖ₊₁ = qᵗp / βₖ₊₁ # γₖ₊₁ = qᵗp / βₖ₊₁ - qᵗp = @kdot(n, p, q) # qᵗp = ⟨q,p⟩ - βₖ₊₁ = √(abs(qᵗp)) # βₖ₊₁ = √(|qᵗp|) - γₖ₊₁ = qᵗp / βₖ₊₁ # γₖ₊₁ = qᵗp / βₖ₊₁ + # Compute vₖ₊₁ and uₖ₊₁. + if qᵗp ≠ zero(T) + @. vₖ₋₁ = vₖ₋₁ / βₖ₊₁ # βₖ₊₁vₖ₊₁ = Avₖ - γₖvₖ₋₁ - αₖvₖ + @. uₖ₋₁ = uₖ₋₁ / γₖ₊₁ # γₖ₊₁uₖ₊₁ = Aᵀuₖ - βₖuₖ₋₁ - αₖuₖ + end # Update the QR factorization of Tₖ₊₁.ₖ = Qₖ [ Rₖ ]. # [ Oᵀ ] @@ -201,14 +204,9 @@ function qmr!(solver :: QmrSolver{T,S}, A, b :: AbstractVector{T}; c :: Abstract # xₖ ← xₖ₋₁ + ζₖ * wₖ @kaxpy!(n, ζₖ, wₖ, x) - # Compute vₖ₊₁ and uₖ₊₁. - @. vₖ₋₁ = vₖ # vₖ₋₁ ← vₖ - @. uₖ₋₁ = uₖ # uₖ₋₁ ← uₖ - - if qᵗp ≠ zero(T) - @. vₖ = q / βₖ₊₁ # βₖ₊₁vₖ₊₁ = q - @. uₖ = p / γₖ₊₁ # γₖ₊₁uₖ₊₁ = p - end + # Update uₖ₋₁, vₖ₋₁, uₖ and vₖ. + @kswap(uₖ₋₁, uₖ) + @kswap(vₖ₋₁, vₖ) # Compute τₖ₊₁ = τₖ + ‖vₖ₊₁‖² τₖ₊₁ = τₖ + @kdot(n, vₖ, vₖ) diff --git a/test/test_alloc.jl b/test/test_alloc.jl index ee6250b3a..7c99b3577 100644 --- a/test/test_alloc.jl +++ b/test/test_alloc.jl @@ -364,8 +364,8 @@ function test_alloc() @test (VERSION < v"1.5") || (inplace_trilqr_bytes == 208) # BILQ needs: - # - 8 n-vectors: uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, d̅, p, q - storage_bilq(n) = 8 * n + # - 6 n-vectors: uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, d̅ + storage_bilq(n) = 6 * n storage_bilq_bytes(n) = 8 * storage_bilq(n) expected_bilq_bytes = storage_bilq_bytes(n) @@ -409,8 +409,8 @@ function test_alloc() @test (VERSION < v"1.5") || (inplace_minres_qlp_bytes == 208) # QMR needs: - # - 9 n-vectors: uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, wₖ₋₁, wₖ, p, q - storage_qmr(n) = 9 * n + # - 7 n-vectors: uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, wₖ₋₁, wₖ + storage_qmr(n) = 7 * n storage_qmr_bytes(n) = 8 * storage_qmr(n) expected_qmr_bytes = storage_qmr_bytes(n) diff --git a/test/test_show.jl b/test/test_show.jl index 0116f4512..0b5c45884 100644 --- a/test/test_show.jl +++ b/test/test_show.jl @@ -1,4 +1,4 @@ -@testset "Increase coverage of show" begin +@testset "show" begin solver = Krylov.SimpleStats(true, true, Float64[], Float64[], "t") io = IOBuffer() show(io, solver)