Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use 5-arguments mul! in BILQ and QMR #356

Merged
merged 1 commit into from
Jun 25, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 17 additions & 19 deletions src/bilq.jl
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ function bilq!(solver :: BilqSolver{T,S}, A, b :: AbstractVector{T}; c :: Abstra
Aᵀ = A'

# Set up workspace.
uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, x, d̅ = solver.uₖ₋₁, solver.uₖ, solver.q, solver.vₖ₋₁, solver.vₖ, solver.p, solver.x, solver.d̅
uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, d̅ = solver.uₖ₋₁, solver.uₖ, solver.vₖ₋₁, solver.vₖ, solver.x, solver.d̅

# Initial solution x₀ and residual norm ‖r₀‖.
x .= zero(T)
Expand Down Expand Up @@ -100,20 +100,23 @@ function bilq!(solver :: BilqSolver{T,S}, A, b :: AbstractVector{T}; c :: Abstra
# AVₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ
# AᵀUₖ = Uₖ(Tₖ)ᵀ + γₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᵀ

mul!(q, A , vₖ) # Forms vₖ₊₁ : q ← Avₖ
mul!(p, Aᵀ, uₖ) # Forms uₖ₊₁ : p ← Aᵀuₖ
mul!(vₖ₋₁, A , vₖ, one(T), -γₖ) # Forms vₖ₊₁ : vₖ₋₁ ← Avₖ - γₖvₖ₋₁
mul!(uₖ₋₁, Aᵀ, uₖ, one(T), -βₖ) # Forms uₖ₊₁ : uₖ₋₁ ← Aᵀuₖ - βₖuₖ₋₁

@kaxpy!(n, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁
@kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - βₖ * uₖ₋₁
αₖ = @kdot(n, vₖ₋₁, uₖ) # αₖ = (Avₖ- γₖvₖ₋₁)ᵀuₖ

αₖ = @kdot(n, q, uₖ) # αₖ = qᵀuₖ
@kaxpy!(n, -αₖ, vₖ, vₖ₋₁) # vₖ₋₁ ← vₖ₋₁ - αₖ * vₖ
@kaxpy!(n, -αₖ, uₖ, uₖ₋₁) # uₖ₋₁ ← uₖ₋₁ - αₖ * uₖ

@kaxpy!(n, -αₖ, vₖ, q) # q ← q - αₖ * vₖ
@kaxpy!(n, -αₖ, uₖ, p) # p ← p - αₖ * uₖ
qᵗp = @kdot(n, uₖ₋₁, vₖ₋₁) # qᵗp = ⟨Avₖ - γₖvₖ₋₁ - αₖvₖ, Aᵀuₖ - βₖuₖ₋₁ -αₖuₖ⟩
βₖ₊₁ = √(abs(qᵗp)) # βₖ₊₁ = √(|qᵗp|)
γₖ₊₁ = qᵗp / βₖ₊₁ # γₖ₊₁ = qᵗp / βₖ₊₁

qᵗp = @kdot(n, p, q) # qᵗp = ⟨q,p⟩
βₖ₊₁ = √(abs(qᵗp)) # βₖ₊₁ = √(|qᵗp|)
γₖ₊₁ = qᵗp / βₖ₊₁ # γₖ₊₁ = qᵗp / βₖ₊₁
# Compute vₖ₊₁ and uₖ₊₁.
if qᵗp ≠ zero(T)
@. vₖ₋₁ = vₖ₋₁ / βₖ₊₁ # βₖ₊₁vₖ₊₁ = Avₖ - γₖvₖ₋₁ - αₖvₖ
@. uₖ₋₁ = uₖ₋₁ / γₖ₊₁ # γₖ₊₁uₖ₊₁ = Aᵀuₖ - βₖuₖ₋₁ - αₖuₖ
end

# Update the LQ factorization of Tₖ = L̅ₖQₖ.
# [ α₁ γ₂ 0 • • • 0 ] [ δ₁ 0 • • • • 0 ]
Expand Down Expand Up @@ -187,14 +190,9 @@ function bilq!(solver :: BilqSolver{T,S}, A, b :: AbstractVector{T}; c :: Abstra
@kaxpby!(n, -cₖ, vₖ, sₖ, d̅)
end

# Compute vₖ₊₁ and uₖ₊₁.
@. vₖ₋₁ = vₖ # vₖ₋₁ ← vₖ
@. uₖ₋₁ = uₖ # uₖ₋₁ ← uₖ

if qᵗp ≠ 0
@. vₖ = q / βₖ₊₁ # βₖ₊₁vₖ₊₁ = q
@. uₖ = p / γₖ₊₁ # γₖ₊₁uₖ₊₁ = p
end
# Update uₖ₋₁, vₖ₋₁, uₖ and vₖ.
@kswap(uₖ₋₁, uₖ)
@kswap(vₖ₋₁, vₖ)

# Compute ⟨vₖ,vₖ₊₁⟩ and ‖vₖ₊₁‖
vₖᵀvₖ₊₁ = @kdot(n, vₖ₋₁, vₖ)
Expand Down
12 changes: 2 additions & 10 deletions src/krylov_solvers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -708,24 +708,20 @@ may be used in order to create these vectors.
mutable struct BilqSolver{T,S} <: KrylovSolver{T,S}
uₖ₋₁ :: S
uₖ :: S
q :: S
vₖ₋₁ :: S
vₖ :: S
p :: S
x :: S
d̅ :: S

function BilqSolver(n, m, S)
T = eltype(S)
uₖ₋₁ = S(undef, n)
uₖ = S(undef, n)
q = S(undef, n)
vₖ₋₁ = S(undef, n)
vₖ = S(undef, n)
p = S(undef, n)
x = S(undef, n)
d̅ = S(undef, n)
solver = new{T,S}(uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, x, d̅)
solver = new{T,S}(uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, d̅)
return solver
end

Expand All @@ -749,10 +745,8 @@ may be used in order to create these vectors.
mutable struct QmrSolver{T,S} <: KrylovSolver{T,S}
uₖ₋₁ :: S
uₖ :: S
q :: S
vₖ₋₁ :: S
vₖ :: S
p :: S
x :: S
wₖ₋₂ :: S
wₖ₋₁ :: S
Expand All @@ -761,14 +755,12 @@ mutable struct QmrSolver{T,S} <: KrylovSolver{T,S}
T = eltype(S)
uₖ₋₁ = S(undef, n)
uₖ = S(undef, n)
q = S(undef, n)
vₖ₋₁ = S(undef, n)
vₖ = S(undef, n)
p = S(undef, n)
x = S(undef, n)
wₖ₋₂ = S(undef, n)
wₖ₋₁ = S(undef, n)
solver = new{T,S}(uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, x, wₖ₋₂, wₖ₋₁)
solver = new{T,S}(uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, wₖ₋₂, wₖ₋₁)
return solver
end

Expand Down
36 changes: 17 additions & 19 deletions src/qmr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ function qmr!(solver :: QmrSolver{T,S}, A, b :: AbstractVector{T}; c :: Abstract
Aᵀ = A'

# Set up workspace.
uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, x, wₖ₋₂, wₖ₋₁ = solver.uₖ₋₁, solver.uₖ, solver.q, solver.vₖ₋₁, solver.vₖ, solver.p, solver.x, solver.wₖ₋₂, solver.wₖ₋₁
uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, wₖ₋₂, wₖ₋₁ = solver.uₖ₋₁, solver.uₖ, solver.vₖ₋₁, solver.vₖ, solver.x, solver.wₖ₋₂, solver.wₖ₋₁

# Initial solution x₀ and residual norm ‖r₀‖.
x .= zero(T)
Expand Down Expand Up @@ -105,20 +105,23 @@ function qmr!(solver :: QmrSolver{T,S}, A, b :: AbstractVector{T}; c :: Abstract
# AVₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ
# AᵀUₖ = Uₖ(Tₖ)ᵀ + γₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᵀ

mul!(q, A , vₖ) # Forms vₖ₊₁ : q ← Avₖ
mul!(p, Aᵀ, uₖ) # Forms uₖ₊₁ : p ← Aᵀuₖ
mul!(vₖ₋₁, A , vₖ, one(T), -γₖ) # Forms vₖ₊₁ : vₖ₋₁ ← Avₖ - γₖvₖ₋₁
mul!(uₖ₋₁, Aᵀ, uₖ, one(T), -βₖ) # Forms uₖ₊₁ : uₖ₋₁ ← Aᵀuₖ - βₖuₖ₋₁

@kaxpy!(n, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁
@kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - βₖ * uₖ₋₁
αₖ = @kdot(n, vₖ₋₁, uₖ) # αₖ = (Avₖ- γₖvₖ₋₁)ᵀuₖ

αₖ = @kdot(n, q, uₖ) # αₖ = qᵀuₖ
@kaxpy!(n, -αₖ, vₖ, vₖ₋₁) # vₖ₋₁ ← vₖ₋₁ - αₖ * vₖ
@kaxpy!(n, -αₖ, uₖ, uₖ₋₁) # uₖ₋₁ ← uₖ₋₁ - αₖ * uₖ

@kaxpy!(n, -αₖ, vₖ, q) # q ← q - αₖ * vₖ
@kaxpy!(n, -αₖ, uₖ, p) # p ← p - αₖ * uₖ
qᵗp = @kdot(n, uₖ₋₁, vₖ₋₁) # qᵗp = ⟨Avₖ - γₖvₖ₋₁ - αₖvₖ, Aᵀuₖ - βₖuₖ₋₁ -αₖuₖ⟩
βₖ₊₁ = √(abs(qᵗp)) # βₖ₊₁ = √(|qᵗp|)
γₖ₊₁ = qᵗp / βₖ₊₁ # γₖ₊₁ = qᵗp / βₖ₊₁

qᵗp = @kdot(n, p, q) # qᵗp = ⟨q,p⟩
βₖ₊₁ = √(abs(qᵗp)) # βₖ₊₁ = √(|qᵗp|)
γₖ₊₁ = qᵗp / βₖ₊₁ # γₖ₊₁ = qᵗp / βₖ₊₁
# Compute vₖ₊₁ and uₖ₊₁.
if qᵗp ≠ zero(T)
@. vₖ₋₁ = vₖ₋₁ / βₖ₊₁ # βₖ₊₁vₖ₊₁ = Avₖ - γₖvₖ₋₁ - αₖvₖ
@. uₖ₋₁ = uₖ₋₁ / γₖ₊₁ # γₖ₊₁uₖ₊₁ = Aᵀuₖ - βₖuₖ₋₁ - αₖuₖ
end

# Update the QR factorization of Tₖ₊₁.ₖ = Qₖ [ Rₖ ].
# [ Oᵀ ]
Expand Down Expand Up @@ -201,14 +204,9 @@ function qmr!(solver :: QmrSolver{T,S}, A, b :: AbstractVector{T}; c :: Abstract
# xₖ ← xₖ₋₁ + ζₖ * wₖ
@kaxpy!(n, ζₖ, wₖ, x)

# Compute vₖ₊₁ and uₖ₊₁.
@. vₖ₋₁ = vₖ # vₖ₋₁ ← vₖ
@. uₖ₋₁ = uₖ # uₖ₋₁ ← uₖ

if qᵗp ≠ zero(T)
@. vₖ = q / βₖ₊₁ # βₖ₊₁vₖ₊₁ = q
@. uₖ = p / γₖ₊₁ # γₖ₊₁uₖ₊₁ = p
end
# Update uₖ₋₁, vₖ₋₁, uₖ and vₖ.
@kswap(uₖ₋₁, uₖ)
@kswap(vₖ₋₁, vₖ)

# Compute τₖ₊₁ = τₖ + ‖vₖ₊₁‖²
τₖ₊₁ = τₖ + @kdot(n, vₖ, vₖ)
Expand Down
8 changes: 4 additions & 4 deletions test/test_alloc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -364,8 +364,8 @@ function test_alloc()
@test (VERSION < v"1.5") || (inplace_trilqr_bytes == 208)

# BILQ needs:
# - 8 n-vectors: uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, d̅, p, q
storage_bilq(n) = 8 * n
# - 6 n-vectors: uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, d̅
storage_bilq(n) = 6 * n
storage_bilq_bytes(n) = 8 * storage_bilq(n)

expected_bilq_bytes = storage_bilq_bytes(n)
Expand Down Expand Up @@ -409,8 +409,8 @@ function test_alloc()
@test (VERSION < v"1.5") || (inplace_minres_qlp_bytes == 208)

# QMR needs:
# - 9 n-vectors: uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, wₖ₋₁, wₖ, p, q
storage_qmr(n) = 9 * n
# - 7 n-vectors: uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, wₖ₋₁, wₖ
storage_qmr(n) = 7 * n
storage_qmr_bytes(n) = 8 * storage_qmr(n)

expected_qmr_bytes = storage_qmr_bytes(n)
Expand Down
2 changes: 1 addition & 1 deletion test/test_show.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
@testset "Increase coverage of show" begin
@testset "show" begin
solver = Krylov.SimpleStats(true, true, Float64[], Float64[], "t")
io = IOBuffer()
show(io, solver)
Expand Down