From ad1fab9ac6b07ab90897844c375e725600856fab Mon Sep 17 00:00:00 2001
From: Alexis Montoison
Date: Wed, 7 Sep 2022 01:01:16 -0400
Subject: [PATCH 001/132] [KrylovSolvers] inner constructors -> outer
constructors
---
src/krylov_solvers.jl | 1420 ++++++++++++++++++++---------------------
1 file changed, 710 insertions(+), 710 deletions(-)
diff --git a/src/krylov_solvers.jl b/src/krylov_solvers.jl
index 8a109a2be..a6da85bd5 100644
--- a/src/krylov_solvers.jl
+++ b/src/krylov_solvers.jl
@@ -68,29 +68,29 @@ mutable struct MinresSolver{T,FC,S} <: KrylovSolver{T,FC,S}
err_vec :: Vector{T}
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function MinresSolver(n, m, S; window :: Int=5)
- FC = eltype(S)
- T = real(FC)
- Δx = S(undef, 0)
- x = S(undef, n)
- r1 = S(undef, n)
- r2 = S(undef, n)
- w1 = S(undef, n)
- w2 = S(undef, n)
- y = S(undef, n)
- v = S(undef, 0)
- err_vec = zeros(T, window)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(Δx, x, r1, r2, w1, w2, y, v, err_vec, false, stats)
- return solver
- end
+function MinresSolver(n, m, S; window :: Int=5)
+ FC = eltype(S)
+ T = real(FC)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ r1 = S(undef, n)
+ r2 = S(undef, n)
+ w1 = S(undef, n)
+ w2 = S(undef, n)
+ y = S(undef, n)
+ v = S(undef, 0)
+ err_vec = zeros(T, window)
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = MinresSolver{T,FC,S}(Δx, x, r1, r2, w1, w2, y, v, err_vec, false, stats)
+ return solver
+end
- function MinresSolver(A, b; window :: Int=5)
- n, m = size(A)
- S = ktypeof(b)
- MinresSolver(n, m, S, window=window)
- end
+function MinresSolver(A, b; window :: Int=5)
+ n, m = size(A)
+ S = ktypeof(b)
+ MinresSolver(n, m, S, window=window)
end
"""
@@ -112,26 +112,26 @@ mutable struct CgSolver{T,FC,S} <: KrylovSolver{T,FC,S}
z :: S
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function CgSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- Δx = S(undef, 0)
- x = S(undef, n)
- r = S(undef, n)
- p = S(undef, n)
- Ap = S(undef, n)
- z = S(undef, 0)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(Δx, x, r, p, Ap, z, false, stats)
- return solver
- end
+function CgSolver(n, m, S)
+ FC = eltype(S)
+ T = real(FC)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ r = S(undef, n)
+ p = S(undef, n)
+ Ap = S(undef, n)
+ z = S(undef, 0)
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = CgSolver{T,FC,S}(Δx, x, r, p, Ap, z, false, stats)
+ return solver
+end
- function CgSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- CgSolver(n, m, S)
- end
+function CgSolver(A, b)
+ n, m = size(A)
+ S = ktypeof(b)
+ CgSolver(n, m, S)
end
"""
@@ -154,27 +154,27 @@ mutable struct CrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
Mq :: S
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function CrSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- Δx = S(undef, 0)
- x = S(undef, n)
- r = S(undef, n)
- p = S(undef, n)
- q = S(undef, n)
- Ar = S(undef, n)
- Mq = S(undef, 0)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(Δx, x, r, p, q, Ar, Mq, false, stats)
- return solver
- end
+function CrSolver(n, m, S)
+ FC = eltype(S)
+ T = real(FC)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ r = S(undef, n)
+ p = S(undef, n)
+ q = S(undef, n)
+ Ar = S(undef, n)
+ Mq = S(undef, 0)
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = CrSolver{T,FC,S}(Δx, x, r, p, q, Ar, Mq, false, stats)
+ return solver
+end
- function CrSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- CrSolver(n, m, S)
- end
+function CrSolver(A, b)
+ n, m = size(A)
+ S = ktypeof(b)
+ CrSolver(n, m, S)
end
"""
@@ -200,30 +200,30 @@ mutable struct SymmlqSolver{T,FC,S} <: KrylovSolver{T,FC,S}
sprod :: Vector{T}
warm_start :: Bool
stats :: SymmlqStats{T}
+end
- function SymmlqSolver(n, m, S; window :: Int=5)
- FC = eltype(S)
- T = real(FC)
- Δx = S(undef, 0)
- x = S(undef, n)
- Mvold = S(undef, n)
- Mv = S(undef, n)
- Mv_next = S(undef, n)
- w̅ = S(undef, n)
- v = S(undef, 0)
- clist = zeros(T, window)
- zlist = zeros(T, window)
- sprod = ones(T, window)
- stats = SymmlqStats(0, false, T[], Union{T, Missing}[], T[], Union{T, Missing}[], T(NaN), T(NaN), "unknown")
- solver = new{T,FC,S}(Δx, x, Mvold, Mv, Mv_next, w̅, v, clist, zlist, sprod, false, stats)
- return solver
- end
+function SymmlqSolver(n, m, S; window :: Int=5)
+ FC = eltype(S)
+ T = real(FC)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ Mvold = S(undef, n)
+ Mv = S(undef, n)
+ Mv_next = S(undef, n)
+ w̅ = S(undef, n)
+ v = S(undef, 0)
+ clist = zeros(T, window)
+ zlist = zeros(T, window)
+ sprod = ones(T, window)
+ stats = SymmlqStats(0, false, T[], Union{T, Missing}[], T[], Union{T, Missing}[], T(NaN), T(NaN), "unknown")
+ solver = SymmlqSolver{T,FC,S}(Δx, x, Mvold, Mv, Mv_next, w̅, v, clist, zlist, sprod, false, stats)
+ return solver
+end
- function SymmlqSolver(A, b; window :: Int=5)
- n, m = size(A)
- S = ktypeof(b)
- SymmlqSolver(n, m, S, window=window)
- end
+function SymmlqSolver(A, b; window :: Int=5)
+ n, m = size(A)
+ S = ktypeof(b)
+ SymmlqSolver(n, m, S, window=window)
end
"""
@@ -246,27 +246,27 @@ mutable struct CgLanczosSolver{T,FC,S} <: KrylovSolver{T,FC,S}
v :: S
warm_start :: Bool
stats :: LanczosStats{T}
+end
- function CgLanczosSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- Δx = S(undef, 0)
- x = S(undef, n)
- Mv = S(undef, n)
- Mv_prev = S(undef, n)
- p = S(undef, n)
- Mv_next = S(undef, n)
- v = S(undef, 0)
- stats = LanczosStats(0, false, T[], false, T(NaN), T(NaN), "unknown")
- solver = new{T,FC,S}(Δx, x, Mv, Mv_prev, p, Mv_next, v, false, stats)
- return solver
- end
+function CgLanczosSolver(n, m, S)
+ FC = eltype(S)
+ T = real(FC)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ Mv = S(undef, n)
+ Mv_prev = S(undef, n)
+ p = S(undef, n)
+ Mv_next = S(undef, n)
+ v = S(undef, 0)
+ stats = LanczosStats(0, false, T[], false, T(NaN), T(NaN), "unknown")
+ solver = CgLanczosSolver{T,FC,S}(Δx, x, Mv, Mv_prev, p, Mv_next, v, false, stats)
+ return solver
+end
- function CgLanczosSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- CgLanczosSolver(n, m, S)
- end
+function CgLanczosSolver(A, b)
+ n, m = size(A)
+ S = ktypeof(b)
+ CgLanczosSolver(n, m, S)
end
"""
@@ -294,34 +294,34 @@ mutable struct CgLanczosShiftSolver{T,FC,S} <: KrylovSolver{T,FC,S}
converged :: BitVector
not_cv :: BitVector
stats :: LanczosShiftStats{T}
+end
- function CgLanczosShiftSolver(n, m, nshifts, S)
- FC = eltype(S)
- T = real(FC)
- Mv = S(undef, n)
- Mv_prev = S(undef, n)
- Mv_next = S(undef, n)
- v = S(undef, 0)
- x = [S(undef, n) for i = 1 : nshifts]
- p = [S(undef, n) for i = 1 : nshifts]
- σ = Vector{T}(undef, nshifts)
- δhat = Vector{T}(undef, nshifts)
- ω = Vector{T}(undef, nshifts)
- γ = Vector{T}(undef, nshifts)
- rNorms = Vector{T}(undef, nshifts)
- indefinite = BitVector(undef, nshifts)
- converged = BitVector(undef, nshifts)
- not_cv = BitVector(undef, nshifts)
- stats = LanczosShiftStats(0, false, [T[] for i = 1 : nshifts], indefinite, T(NaN), T(NaN), "unknown")
- solver = new{T,FC,S}(Mv, Mv_prev, Mv_next, v, x, p, σ, δhat, ω, γ, rNorms, converged, not_cv, stats)
- return solver
- end
+function CgLanczosShiftSolver(n, m, nshifts, S)
+ FC = eltype(S)
+ T = real(FC)
+ Mv = S(undef, n)
+ Mv_prev = S(undef, n)
+ Mv_next = S(undef, n)
+ v = S(undef, 0)
+ x = [S(undef, n) for i = 1 : nshifts]
+ p = [S(undef, n) for i = 1 : nshifts]
+ σ = Vector{T}(undef, nshifts)
+ δhat = Vector{T}(undef, nshifts)
+ ω = Vector{T}(undef, nshifts)
+ γ = Vector{T}(undef, nshifts)
+ rNorms = Vector{T}(undef, nshifts)
+ indefinite = BitVector(undef, nshifts)
+ converged = BitVector(undef, nshifts)
+ not_cv = BitVector(undef, nshifts)
+ stats = LanczosShiftStats(0, false, [T[] for i = 1 : nshifts], indefinite, T(NaN), T(NaN), "unknown")
+ solver = CgLanczosShiftSolver{T,FC,S}(Mv, Mv_prev, Mv_next, v, x, p, σ, δhat, ω, γ, rNorms, converged, not_cv, stats)
+ return solver
+end
- function CgLanczosShiftSolver(A, b, nshifts)
- n, m = size(A)
- S = ktypeof(b)
- CgLanczosShiftSolver(n, m, nshifts, S)
- end
+function CgLanczosShiftSolver(A, b, nshifts)
+ n, m = size(A)
+ S = ktypeof(b)
+ CgLanczosShiftSolver(n, m, nshifts, S)
end
"""
@@ -345,28 +345,28 @@ mutable struct MinresQlpSolver{T,FC,S} <: KrylovSolver{T,FC,S}
vₖ :: S
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function MinresQlpSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- Δx = S(undef, 0)
- wₖ₋₁ = S(undef, n)
- wₖ = S(undef, n)
- M⁻¹vₖ₋₁ = S(undef, n)
- M⁻¹vₖ = S(undef, n)
- x = S(undef, n)
- p = S(undef, n)
- vₖ = S(undef, 0)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(Δx, wₖ₋₁, wₖ, M⁻¹vₖ₋₁, M⁻¹vₖ, x, p, vₖ, false, stats)
- return solver
- end
+function MinresQlpSolver(n, m, S)
+ FC = eltype(S)
+ T = real(FC)
+ Δx = S(undef, 0)
+ wₖ₋₁ = S(undef, n)
+ wₖ = S(undef, n)
+ M⁻¹vₖ₋₁ = S(undef, n)
+ M⁻¹vₖ = S(undef, n)
+ x = S(undef, n)
+ p = S(undef, n)
+ vₖ = S(undef, 0)
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = MinresQlpSolver{T,FC,S}(Δx, wₖ₋₁, wₖ, M⁻¹vₖ₋₁, M⁻¹vₖ, x, p, vₖ, false, stats)
+ return solver
+end
- function MinresQlpSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- MinresQlpSolver(n, m, S)
- end
+function MinresQlpSolver(A, b)
+ n, m = size(A)
+ S = ktypeof(b)
+ MinresQlpSolver(n, m, S)
end
"""
@@ -393,31 +393,31 @@ mutable struct DqgmresSolver{T,FC,S} <: KrylovSolver{T,FC,S}
H :: Vector{FC}
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function DqgmresSolver(n, m, memory, S)
- memory = min(n, memory)
- FC = eltype(S)
- T = real(FC)
- Δx = S(undef, 0)
- x = S(undef, n)
- t = S(undef, n)
- z = S(undef, 0)
- w = S(undef, 0)
- P = [S(undef, n) for i = 1 : memory]
- V = [S(undef, n) for i = 1 : memory]
- c = Vector{T}(undef, memory)
- s = Vector{FC}(undef, memory)
- H = Vector{FC}(undef, memory+2)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(Δx, x, t, z, w, P, V, c, s, H, false, stats)
- return solver
- end
+function DqgmresSolver(n, m, memory, S)
+ memory = min(n, memory)
+ FC = eltype(S)
+ T = real(FC)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ t = S(undef, n)
+ z = S(undef, 0)
+ w = S(undef, 0)
+ P = [S(undef, n) for i = 1 : memory]
+ V = [S(undef, n) for i = 1 : memory]
+ c = Vector{T}(undef, memory)
+ s = Vector{FC}(undef, memory)
+ H = Vector{FC}(undef, memory+2)
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = DqgmresSolver{T,FC,S}(Δx, x, t, z, w, P, V, c, s, H, false, stats)
+ return solver
+end
- function DqgmresSolver(A, b, memory = 20)
- n, m = size(A)
- S = ktypeof(b)
- DqgmresSolver(n, m, memory, S)
- end
+function DqgmresSolver(A, b, memory = 20)
+ n, m = size(A)
+ S = ktypeof(b)
+ DqgmresSolver(n, m, memory, S)
end
"""
@@ -443,30 +443,30 @@ mutable struct DiomSolver{T,FC,S} <: KrylovSolver{T,FC,S}
H :: Vector{FC}
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function DiomSolver(n, m, memory, S)
- memory = min(n, memory)
- FC = eltype(S)
- T = real(FC)
- Δx = S(undef, 0)
- x = S(undef, n)
- t = S(undef, n)
- z = S(undef, 0)
- w = S(undef, 0)
- P = [S(undef, n) for i = 1 : memory]
- V = [S(undef, n) for i = 1 : memory]
- L = Vector{FC}(undef, memory)
- H = Vector{FC}(undef, memory+2)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(Δx, x, t, z, w, P, V, L, H, false, stats)
- return solver
- end
+function DiomSolver(n, m, memory, S)
+ memory = min(n, memory)
+ FC = eltype(S)
+ T = real(FC)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ t = S(undef, n)
+ z = S(undef, 0)
+ w = S(undef, 0)
+ P = [S(undef, n) for i = 1 : memory]
+ V = [S(undef, n) for i = 1 : memory]
+ L = Vector{FC}(undef, memory)
+ H = Vector{FC}(undef, memory+2)
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = DiomSolver{T,FC,S}(Δx, x, t, z, w, P, V, L, H, false, stats)
+ return solver
+end
- function DiomSolver(A, b, memory = 20)
- n, m = size(A)
- S = ktypeof(b)
- DiomSolver(n, m, memory, S)
- end
+function DiomSolver(A, b, memory = 20)
+ n, m = size(A)
+ S = ktypeof(b)
+ DiomSolver(n, m, memory, S)
end
"""
@@ -491,29 +491,29 @@ mutable struct UsymlqSolver{T,FC,S} <: KrylovSolver{T,FC,S}
q :: S
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function UsymlqSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- uₖ₋₁ = S(undef, m)
- uₖ = S(undef, m)
- p = S(undef, m)
- Δx = S(undef, 0)
- x = S(undef, m)
- d̅ = S(undef, m)
- vₖ₋₁ = S(undef, n)
- vₖ = S(undef, n)
- q = S(undef, n)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(uₖ₋₁, uₖ, p, Δx, x, d̅, vₖ₋₁, vₖ, q, false, stats)
- return solver
- end
+function UsymlqSolver(n, m, S)
+ FC = eltype(S)
+ T = real(FC)
+ uₖ₋₁ = S(undef, m)
+ uₖ = S(undef, m)
+ p = S(undef, m)
+ Δx = S(undef, 0)
+ x = S(undef, m)
+ d̅ = S(undef, m)
+ vₖ₋₁ = S(undef, n)
+ vₖ = S(undef, n)
+ q = S(undef, n)
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = UsymlqSolver{T,FC,S}(uₖ₋₁, uₖ, p, Δx, x, d̅, vₖ₋₁, vₖ, q, false, stats)
+ return solver
+end
- function UsymlqSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- UsymlqSolver(n, m, S)
- end
+function UsymlqSolver(A, b)
+ n, m = size(A)
+ S = ktypeof(b)
+ UsymlqSolver(n, m, S)
end
"""
@@ -539,30 +539,30 @@ mutable struct UsymqrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
p :: S
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function UsymqrSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- vₖ₋₁ = S(undef, n)
- vₖ = S(undef, n)
- q = S(undef, n)
- Δx = S(undef, 0)
- x = S(undef, m)
- wₖ₋₂ = S(undef, m)
- wₖ₋₁ = S(undef, m)
- uₖ₋₁ = S(undef, m)
- uₖ = S(undef, m)
- p = S(undef, m)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(vₖ₋₁, vₖ, q, Δx, x, wₖ₋₂, wₖ₋₁, uₖ₋₁, uₖ, p, false, stats)
- return solver
- end
+function UsymqrSolver(n, m, S)
+ FC = eltype(S)
+ T = real(FC)
+ vₖ₋₁ = S(undef, n)
+ vₖ = S(undef, n)
+ q = S(undef, n)
+ Δx = S(undef, 0)
+ x = S(undef, m)
+ wₖ₋₂ = S(undef, m)
+ wₖ₋₁ = S(undef, m)
+ uₖ₋₁ = S(undef, m)
+ uₖ = S(undef, m)
+ p = S(undef, m)
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = UsymqrSolver{T,FC,S}(vₖ₋₁, vₖ, q, Δx, x, wₖ₋₂, wₖ₋₁, uₖ₋₁, uₖ, p, false, stats)
+ return solver
+end
- function UsymqrSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- UsymqrSolver(n, m, S)
- end
+function UsymqrSolver(A, b)
+ n, m = size(A)
+ S = ktypeof(b)
+ UsymqrSolver(n, m, S)
end
"""
@@ -594,36 +594,36 @@ mutable struct TricgSolver{T,FC,S} <: KrylovSolver{T,FC,S}
vₖ :: S
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function TricgSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- y = S(undef, m)
- N⁻¹uₖ₋₁ = S(undef, m)
- N⁻¹uₖ = S(undef, m)
- p = S(undef, m)
- gy₂ₖ₋₁ = S(undef, m)
- gy₂ₖ = S(undef, m)
- x = S(undef, n)
- M⁻¹vₖ₋₁ = S(undef, n)
- M⁻¹vₖ = S(undef, n)
- q = S(undef, n)
- gx₂ₖ₋₁ = S(undef, n)
- gx₂ₖ = S(undef, n)
- Δx = S(undef, 0)
- Δy = S(undef, 0)
- uₖ = S(undef, 0)
- vₖ = S(undef, 0)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(y, N⁻¹uₖ₋₁, N⁻¹uₖ, p, gy₂ₖ₋₁, gy₂ₖ, x, M⁻¹vₖ₋₁, M⁻¹vₖ, q, gx₂ₖ₋₁, gx₂ₖ, Δx, Δy, uₖ, vₖ, false, stats)
- return solver
- end
+function TricgSolver(n, m, S)
+ FC = eltype(S)
+ T = real(FC)
+ y = S(undef, m)
+ N⁻¹uₖ₋₁ = S(undef, m)
+ N⁻¹uₖ = S(undef, m)
+ p = S(undef, m)
+ gy₂ₖ₋₁ = S(undef, m)
+ gy₂ₖ = S(undef, m)
+ x = S(undef, n)
+ M⁻¹vₖ₋₁ = S(undef, n)
+ M⁻¹vₖ = S(undef, n)
+ q = S(undef, n)
+ gx₂ₖ₋₁ = S(undef, n)
+ gx₂ₖ = S(undef, n)
+ Δx = S(undef, 0)
+ Δy = S(undef, 0)
+ uₖ = S(undef, 0)
+ vₖ = S(undef, 0)
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = TricgSolver{T,FC,S}(y, N⁻¹uₖ₋₁, N⁻¹uₖ, p, gy₂ₖ₋₁, gy₂ₖ, x, M⁻¹vₖ₋₁, M⁻¹vₖ, q, gx₂ₖ₋₁, gx₂ₖ, Δx, Δy, uₖ, vₖ, false, stats)
+ return solver
+end
- function TricgSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- TricgSolver(n, m, S)
- end
+function TricgSolver(A, b)
+ n, m = size(A)
+ S = ktypeof(b)
+ TricgSolver(n, m, S)
end
"""
@@ -659,40 +659,40 @@ mutable struct TrimrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
vₖ :: S
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function TrimrSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- y = S(undef, m)
- N⁻¹uₖ₋₁ = S(undef, m)
- N⁻¹uₖ = S(undef, m)
- p = S(undef, m)
- gy₂ₖ₋₃ = S(undef, m)
- gy₂ₖ₋₂ = S(undef, m)
- gy₂ₖ₋₁ = S(undef, m)
- gy₂ₖ = S(undef, m)
- x = S(undef, n)
- M⁻¹vₖ₋₁ = S(undef, n)
- M⁻¹vₖ = S(undef, n)
- q = S(undef, n)
- gx₂ₖ₋₃ = S(undef, n)
- gx₂ₖ₋₂ = S(undef, n)
- gx₂ₖ₋₁ = S(undef, n)
- gx₂ₖ = S(undef, n)
- Δx = S(undef, 0)
- Δy = S(undef, 0)
- uₖ = S(undef, 0)
- vₖ = S(undef, 0)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(y, N⁻¹uₖ₋₁, N⁻¹uₖ, p, gy₂ₖ₋₃, gy₂ₖ₋₂, gy₂ₖ₋₁, gy₂ₖ, x, M⁻¹vₖ₋₁, M⁻¹vₖ, q, gx₂ₖ₋₃, gx₂ₖ₋₂, gx₂ₖ₋₁, gx₂ₖ, Δx, Δy, uₖ, vₖ, false, stats)
- return solver
- end
+function TrimrSolver(n, m, S)
+ FC = eltype(S)
+ T = real(FC)
+ y = S(undef, m)
+ N⁻¹uₖ₋₁ = S(undef, m)
+ N⁻¹uₖ = S(undef, m)
+ p = S(undef, m)
+ gy₂ₖ₋₃ = S(undef, m)
+ gy₂ₖ₋₂ = S(undef, m)
+ gy₂ₖ₋₁ = S(undef, m)
+ gy₂ₖ = S(undef, m)
+ x = S(undef, n)
+ M⁻¹vₖ₋₁ = S(undef, n)
+ M⁻¹vₖ = S(undef, n)
+ q = S(undef, n)
+ gx₂ₖ₋₃ = S(undef, n)
+ gx₂ₖ₋₂ = S(undef, n)
+ gx₂ₖ₋₁ = S(undef, n)
+ gx₂ₖ = S(undef, n)
+ Δx = S(undef, 0)
+ Δy = S(undef, 0)
+ uₖ = S(undef, 0)
+ vₖ = S(undef, 0)
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = TrimrSolver{T,FC,S}(y, N⁻¹uₖ₋₁, N⁻¹uₖ, p, gy₂ₖ₋₃, gy₂ₖ₋₂, gy₂ₖ₋₁, gy₂ₖ, x, M⁻¹vₖ₋₁, M⁻¹vₖ, q, gx₂ₖ₋₃, gx₂ₖ₋₂, gx₂ₖ₋₁, gx₂ₖ, Δx, Δy, uₖ, vₖ, false, stats)
+ return solver
+end
- function TrimrSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- TrimrSolver(n, m, S)
- end
+function TrimrSolver(A, b)
+ n, m = size(A)
+ S = ktypeof(b)
+ TrimrSolver(n, m, S)
end
"""
@@ -721,33 +721,33 @@ mutable struct TrilqrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
wₖ₋₂ :: S
warm_start :: Bool
stats :: AdjointStats{T}
+end
- function TrilqrSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- uₖ₋₁ = S(undef, m)
- uₖ = S(undef, m)
- p = S(undef, m)
- d̅ = S(undef, m)
- Δx = S(undef, 0)
- x = S(undef, m)
- vₖ₋₁ = S(undef, n)
- vₖ = S(undef, n)
- q = S(undef, n)
- Δy = S(undef, 0)
- y = S(undef, n)
- wₖ₋₃ = S(undef, n)
- wₖ₋₂ = S(undef, n)
- stats = AdjointStats(0, false, false, T[], T[], "unknown")
- solver = new{T,FC,S}(uₖ₋₁, uₖ, p, d̅, Δx, x, vₖ₋₁, vₖ, q, Δy, y, wₖ₋₃, wₖ₋₂, false, stats)
- return solver
- end
+function TrilqrSolver(n, m, S)
+ FC = eltype(S)
+ T = real(FC)
+ uₖ₋₁ = S(undef, m)
+ uₖ = S(undef, m)
+ p = S(undef, m)
+ d̅ = S(undef, m)
+ Δx = S(undef, 0)
+ x = S(undef, m)
+ vₖ₋₁ = S(undef, n)
+ vₖ = S(undef, n)
+ q = S(undef, n)
+ Δy = S(undef, 0)
+ y = S(undef, n)
+ wₖ₋₃ = S(undef, n)
+ wₖ₋₂ = S(undef, n)
+ stats = AdjointStats(0, false, false, T[], T[], "unknown")
+ solver = TrilqrSolver{T,FC,S}(uₖ₋₁, uₖ, p, d̅, Δx, x, vₖ₋₁, vₖ, q, Δy, y, wₖ₋₃, wₖ₋₂, false, stats)
+ return solver
+end
- function TrilqrSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- TrilqrSolver(n, m, S)
- end
+function TrilqrSolver(A, b)
+ n, m = size(A)
+ S = ktypeof(b)
+ TrilqrSolver(n, m, S)
end
"""
@@ -772,29 +772,29 @@ mutable struct CgsSolver{T,FC,S} <: KrylovSolver{T,FC,S}
vw :: S
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function CgsSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- Δx = S(undef, 0)
- x = S(undef, n)
- r = S(undef, n)
- u = S(undef, n)
- p = S(undef, n)
- q = S(undef, n)
- ts = S(undef, n)
- yz = S(undef, 0)
- vw = S(undef, 0)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(Δx, x, r, u, p, q, ts, yz, vw, false, stats)
- return solver
- end
+function CgsSolver(n, m, S)
+ FC = eltype(S)
+ T = real(FC)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ r = S(undef, n)
+ u = S(undef, n)
+ p = S(undef, n)
+ q = S(undef, n)
+ ts = S(undef, n)
+ yz = S(undef, 0)
+ vw = S(undef, 0)
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = CgsSolver{T,FC,S}(Δx, x, r, u, p, q, ts, yz, vw, false, stats)
+ return solver
+end
- function CgsSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- CgsSolver(n, m, S)
- end
+function CgsSolver(A, b)
+ n, m = size(A)
+ S = ktypeof(b)
+ CgsSolver(n, m, S)
end
"""
@@ -819,29 +819,29 @@ mutable struct BicgstabSolver{T,FC,S} <: KrylovSolver{T,FC,S}
t :: S
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function BicgstabSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- Δx = S(undef, 0)
- x = S(undef, n)
- r = S(undef, n)
- p = S(undef, n)
- v = S(undef, n)
- s = S(undef, n)
- qd = S(undef, n)
- yz = S(undef, 0)
- t = S(undef, 0)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(Δx, x, r, p, v, s, qd, yz, t, false, stats)
- return solver
- end
+function BicgstabSolver(n, m, S)
+ FC = eltype(S)
+ T = real(FC)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ r = S(undef, n)
+ p = S(undef, n)
+ v = S(undef, n)
+ s = S(undef, n)
+ qd = S(undef, n)
+ yz = S(undef, 0)
+ t = S(undef, 0)
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = BicgstabSolver{T,FC,S}(Δx, x, r, p, v, s, qd, yz, t, false, stats)
+ return solver
+end
- function BicgstabSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- BicgstabSolver(n, m, S)
- end
+function BicgstabSolver(A, b)
+ n, m = size(A)
+ S = ktypeof(b)
+ BicgstabSolver(n, m, S)
end
"""
@@ -866,29 +866,29 @@ mutable struct BilqSolver{T,FC,S} <: KrylovSolver{T,FC,S}
d̅ :: S
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function BilqSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- uₖ₋₁ = S(undef, n)
- uₖ = S(undef, n)
- q = S(undef, n)
- vₖ₋₁ = S(undef, n)
- vₖ = S(undef, n)
- p = S(undef, n)
- Δx = S(undef, 0)
- x = S(undef, n)
- d̅ = S(undef, n)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, Δx, x, d̅, false, stats)
- return solver
- end
+function BilqSolver(n, m, S)
+ FC = eltype(S)
+ T = real(FC)
+ uₖ₋₁ = S(undef, n)
+ uₖ = S(undef, n)
+ q = S(undef, n)
+ vₖ₋₁ = S(undef, n)
+ vₖ = S(undef, n)
+ p = S(undef, n)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ d̅ = S(undef, n)
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = BilqSolver{T,FC,S}(uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, Δx, x, d̅, false, stats)
+ return solver
+end
- function BilqSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- BilqSolver(n, m, S)
- end
+function BilqSolver(A, b)
+ n, m = size(A)
+ S = ktypeof(b)
+ BilqSolver(n, m, S)
end
"""
@@ -914,30 +914,30 @@ mutable struct QmrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
wₖ₋₁ :: S
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function QmrSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- uₖ₋₁ = S(undef, n)
- uₖ = S(undef, n)
- q = S(undef, n)
- vₖ₋₁ = S(undef, n)
- vₖ = S(undef, n)
- p = S(undef, n)
- Δx = S(undef, 0)
- x = S(undef, n)
- wₖ₋₂ = S(undef, n)
- wₖ₋₁ = S(undef, n)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, Δx, x, wₖ₋₂, wₖ₋₁, false, stats)
- return solver
- end
+function QmrSolver(n, m, S)
+ FC = eltype(S)
+ T = real(FC)
+ uₖ₋₁ = S(undef, n)
+ uₖ = S(undef, n)
+ q = S(undef, n)
+ vₖ₋₁ = S(undef, n)
+ vₖ = S(undef, n)
+ p = S(undef, n)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ wₖ₋₂ = S(undef, n)
+ wₖ₋₁ = S(undef, n)
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = QmrSolver{T,FC,S}(uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, Δx, x, wₖ₋₂, wₖ₋₁, false, stats)
+ return solver
+end
- function QmrSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- QmrSolver(n, m, S)
- end
+function QmrSolver(A, b)
+ n, m = size(A)
+ S = ktypeof(b)
+ QmrSolver(n, m, S)
end
"""
@@ -966,33 +966,33 @@ mutable struct BilqrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
wₖ₋₂ :: S
warm_start :: Bool
stats :: AdjointStats{T}
+end
- function BilqrSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- uₖ₋₁ = S(undef, n)
- uₖ = S(undef, n)
- q = S(undef, n)
- vₖ₋₁ = S(undef, n)
- vₖ = S(undef, n)
- p = S(undef, n)
- Δx = S(undef, 0)
- x = S(undef, n)
- Δy = S(undef, 0)
- y = S(undef, n)
- d̅ = S(undef, n)
- wₖ₋₃ = S(undef, n)
- wₖ₋₂ = S(undef, n)
- stats = AdjointStats(0, false, false, T[], T[], "unknown")
- solver = new{T,FC,S}(uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, Δx, x, Δy, y, d̅, wₖ₋₃, wₖ₋₂, false, stats)
- return solver
- end
+function BilqrSolver(n, m, S)
+ FC = eltype(S)
+ T = real(FC)
+ uₖ₋₁ = S(undef, n)
+ uₖ = S(undef, n)
+ q = S(undef, n)
+ vₖ₋₁ = S(undef, n)
+ vₖ = S(undef, n)
+ p = S(undef, n)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ Δy = S(undef, 0)
+ y = S(undef, n)
+ d̅ = S(undef, n)
+ wₖ₋₃ = S(undef, n)
+ wₖ₋₂ = S(undef, n)
+ stats = AdjointStats(0, false, false, T[], T[], "unknown")
+ solver = BilqrSolver{T,FC,S}(uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, Δx, x, Δy, y, d̅, wₖ₋₃, wₖ₋₂, false, stats)
+ return solver
+end
- function BilqrSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- BilqrSolver(n, m, S)
- end
+function BilqrSolver(A, b)
+ n, m = size(A)
+ S = ktypeof(b)
+ BilqrSolver(n, m, S)
end
"""
@@ -1013,26 +1013,26 @@ mutable struct CglsSolver{T,FC,S} <: KrylovSolver{T,FC,S}
q :: S
Mr :: S
stats :: SimpleStats{T}
+end
- function CglsSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- x = S(undef, m)
- p = S(undef, m)
- s = S(undef, m)
- r = S(undef, n)
- q = S(undef, n)
- Mr = S(undef, 0)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(x, p, s, r, q, Mr, stats)
- return solver
- end
+function CglsSolver(n, m, S)
+ FC = eltype(S)
+ T = real(FC)
+ x = S(undef, m)
+ p = S(undef, m)
+ s = S(undef, m)
+ r = S(undef, n)
+ q = S(undef, n)
+ Mr = S(undef, 0)
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = CglsSolver{T,FC,S}(x, p, s, r, q, Mr, stats)
+ return solver
+end
- function CglsSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- CglsSolver(n, m, S)
- end
+function CglsSolver(A, b)
+ n, m = size(A)
+ S = ktypeof(b)
+ CglsSolver(n, m, S)
end
"""
@@ -1055,28 +1055,28 @@ mutable struct CrlsSolver{T,FC,S} <: KrylovSolver{T,FC,S}
s :: S
Ms :: S
stats :: SimpleStats{T}
+end
- function CrlsSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- x = S(undef, m)
- p = S(undef, m)
- Ar = S(undef, m)
- q = S(undef, m)
- r = S(undef, n)
- Ap = S(undef, n)
- s = S(undef, n)
- Ms = S(undef, 0)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(x, p, Ar, q, r, Ap, s, Ms, stats)
- return solver
- end
+function CrlsSolver(n, m, S)
+ FC = eltype(S)
+ T = real(FC)
+ x = S(undef, m)
+ p = S(undef, m)
+ Ar = S(undef, m)
+ q = S(undef, m)
+ r = S(undef, n)
+ Ap = S(undef, n)
+ s = S(undef, n)
+ Ms = S(undef, 0)
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = CrlsSolver{T,FC,S}(x, p, Ar, q, r, Ap, s, Ms, stats)
+ return solver
+end
- function CrlsSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- CrlsSolver(n, m, S)
- end
+function CrlsSolver(A, b)
+ n, m = size(A)
+ S = ktypeof(b)
+ CrlsSolver(n, m, S)
end
"""
@@ -1098,27 +1098,27 @@ mutable struct CgneSolver{T,FC,S} <: KrylovSolver{T,FC,S}
s :: S
z :: S
stats :: SimpleStats{T}
+end
- function CgneSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- x = S(undef, m)
- p = S(undef, m)
- Aᵀz = S(undef, m)
- r = S(undef, n)
- q = S(undef, n)
- s = S(undef, 0)
- z = S(undef, 0)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(x, p, Aᵀz, r, q, s, z, stats)
- return solver
- end
+function CgneSolver(n, m, S)
+ FC = eltype(S)
+ T = real(FC)
+ x = S(undef, m)
+ p = S(undef, m)
+ Aᵀz = S(undef, m)
+ r = S(undef, n)
+ q = S(undef, n)
+ s = S(undef, 0)
+ z = S(undef, 0)
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = CgneSolver{T,FC,S}(x, p, Aᵀz, r, q, s, z, stats)
+ return solver
+end
- function CgneSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- CgneSolver(n, m, S)
- end
+function CgneSolver(A, b)
+ n, m = size(A)
+ S = ktypeof(b)
+ CgneSolver(n, m, S)
end
"""
@@ -1140,27 +1140,27 @@ mutable struct CrmrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
Mq :: S
s :: S
stats :: SimpleStats{T}
+end
- function CrmrSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- x = S(undef, m)
- p = S(undef, m)
- Aᵀr = S(undef, m)
- r = S(undef, n)
- q = S(undef, n)
- Mq = S(undef, 0)
- s = S(undef, 0)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(x, p, Aᵀr, r, q, Mq, s, stats)
- return solver
- end
+function CrmrSolver(n, m, S)
+ FC = eltype(S)
+ T = real(FC)
+ x = S(undef, m)
+ p = S(undef, m)
+ Aᵀr = S(undef, m)
+ r = S(undef, n)
+ q = S(undef, n)
+ Mq = S(undef, 0)
+ s = S(undef, 0)
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = CrmrSolver{T,FC,S}(x, p, Aᵀr, r, q, Mq, s, stats)
+ return solver
+end
- function CrmrSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- CrmrSolver(n, m, S)
- end
+function CrmrSolver(A, b)
+ n, m = size(A)
+ S = ktypeof(b)
+ CrmrSolver(n, m, S)
end
"""
@@ -1184,29 +1184,29 @@ mutable struct LslqSolver{T,FC,S} <: KrylovSolver{T,FC,S}
v :: S
err_vec :: Vector{T}
stats :: LSLQStats{T}
+end
- function LslqSolver(n, m, S; window :: Int=5)
- FC = eltype(S)
- T = real(FC)
- x = S(undef, m)
- Nv = S(undef, m)
- Aᵀu = S(undef, m)
- w̄ = S(undef, m)
- Mu = S(undef, n)
- Av = S(undef, n)
- u = S(undef, 0)
- v = S(undef, 0)
- err_vec = zeros(T, window)
- stats = LSLQStats(0, false, false, T[], T[], T[], false, T[], T[], "unknown")
- solver = new{T,FC,S}(x, Nv, Aᵀu, w̄, Mu, Av, u, v, err_vec, stats)
- return solver
- end
+function LslqSolver(n, m, S; window :: Int=5)
+ FC = eltype(S)
+ T = real(FC)
+ x = S(undef, m)
+ Nv = S(undef, m)
+ Aᵀu = S(undef, m)
+ w̄ = S(undef, m)
+ Mu = S(undef, n)
+ Av = S(undef, n)
+ u = S(undef, 0)
+ v = S(undef, 0)
+ err_vec = zeros(T, window)
+ stats = LSLQStats(0, false, false, T[], T[], T[], false, T[], T[], "unknown")
+ solver = LslqSolver{T,FC,S}(x, Nv, Aᵀu, w̄, Mu, Av, u, v, err_vec, stats)
+ return solver
+end
- function LslqSolver(A, b; window :: Int=5)
- n, m = size(A)
- S = ktypeof(b)
- LslqSolver(n, m, S, window=window)
- end
+function LslqSolver(A, b; window :: Int=5)
+ n, m = size(A)
+ S = ktypeof(b)
+ LslqSolver(n, m, S, window=window)
end
"""
@@ -1230,29 +1230,29 @@ mutable struct LsqrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
v :: S
err_vec :: Vector{T}
stats :: SimpleStats{T}
+end
- function LsqrSolver(n, m, S; window :: Int=5)
- FC = eltype(S)
- T = real(FC)
- x = S(undef, m)
- Nv = S(undef, m)
- Aᵀu = S(undef, m)
- w = S(undef, m)
- Mu = S(undef, n)
- Av = S(undef, n)
- u = S(undef, 0)
- v = S(undef, 0)
- err_vec = zeros(T, window)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(x, Nv, Aᵀu, w, Mu, Av, u, v, err_vec, stats)
- return solver
- end
+function LsqrSolver(n, m, S; window :: Int=5)
+ FC = eltype(S)
+ T = real(FC)
+ x = S(undef, m)
+ Nv = S(undef, m)
+ Aᵀu = S(undef, m)
+ w = S(undef, m)
+ Mu = S(undef, n)
+ Av = S(undef, n)
+ u = S(undef, 0)
+ v = S(undef, 0)
+ err_vec = zeros(T, window)
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = LsqrSolver{T,FC,S}(x, Nv, Aᵀu, w, Mu, Av, u, v, err_vec, stats)
+ return solver
+end
- function LsqrSolver(A, b; window :: Int=5)
- n, m = size(A)
- S = ktypeof(b)
- LsqrSolver(n, m, S, window=window)
- end
+function LsqrSolver(A, b; window :: Int=5)
+ n, m = size(A)
+ S = ktypeof(b)
+ LsqrSolver(n, m, S, window=window)
end
"""
@@ -1277,30 +1277,30 @@ mutable struct LsmrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
v :: S
err_vec :: Vector{T}
stats :: LsmrStats{T}
+end
- function LsmrSolver(n, m, S; window :: Int=5)
- FC = eltype(S)
- T = real(FC)
- x = S(undef, m)
- Nv = S(undef, m)
- Aᵀu = S(undef, m)
- h = S(undef, m)
- hbar = S(undef, m)
- Mu = S(undef, n)
- Av = S(undef, n)
- u = S(undef, 0)
- v = S(undef, 0)
- err_vec = zeros(T, window)
- stats = LsmrStats(0, false, false, T[], T[], zero(T), zero(T), zero(T), zero(T), zero(T), "unknown")
- solver = new{T,FC,S}(x, Nv, Aᵀu, h, hbar, Mu, Av, u, v, err_vec, stats)
- return solver
- end
+function LsmrSolver(n, m, S; window :: Int=5)
+ FC = eltype(S)
+ T = real(FC)
+ x = S(undef, m)
+ Nv = S(undef, m)
+ Aᵀu = S(undef, m)
+ h = S(undef, m)
+ hbar = S(undef, m)
+ Mu = S(undef, n)
+ Av = S(undef, n)
+ u = S(undef, 0)
+ v = S(undef, 0)
+ err_vec = zeros(T, window)
+ stats = LsmrStats(0, false, false, T[], T[], zero(T), zero(T), zero(T), zero(T), zero(T), "unknown")
+ solver = LsmrSolver{T,FC,S}(x, Nv, Aᵀu, h, hbar, Mu, Av, u, v, err_vec, stats)
+ return solver
+end
- function LsmrSolver(A, b; window :: Int=5)
- n, m = size(A)
- S = ktypeof(b)
- LsmrSolver(n, m, S, window=window)
- end
+function LsmrSolver(A, b; window :: Int=5)
+ n, m = size(A)
+ S = ktypeof(b)
+ LsmrSolver(n, m, S, window=window)
end
"""
@@ -1325,30 +1325,30 @@ mutable struct LnlqSolver{T,FC,S} <: KrylovSolver{T,FC,S}
v :: S
q :: S
stats :: LNLQStats{T}
+end
- function LnlqSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- x = S(undef, m)
- Nv = S(undef, m)
- Aᵀu = S(undef, m)
- y = S(undef, n)
- w̄ = S(undef, n)
- Mu = S(undef, n)
- Av = S(undef, n)
- u = S(undef, 0)
- v = S(undef, 0)
- q = S(undef, 0)
- stats = LNLQStats(0, false, T[], false, T[], T[], "unknown")
- solver = new{T,FC,S}(x, Nv, Aᵀu, y, w̄, Mu, Av, u, v, q, stats)
- return solver
- end
+function LnlqSolver(n, m, S)
+ FC = eltype(S)
+ T = real(FC)
+ x = S(undef, m)
+ Nv = S(undef, m)
+ Aᵀu = S(undef, m)
+ y = S(undef, n)
+ w̄ = S(undef, n)
+ Mu = S(undef, n)
+ Av = S(undef, n)
+ u = S(undef, 0)
+ v = S(undef, 0)
+ q = S(undef, 0)
+ stats = LNLQStats(0, false, T[], false, T[], T[], "unknown")
+ solver = LnlqSolver{T,FC,S}(x, Nv, Aᵀu, y, w̄, Mu, Av, u, v, q, stats)
+ return solver
+end
- function LnlqSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- LnlqSolver(n, m, S)
- end
+function LnlqSolver(A, b)
+ n, m = size(A)
+ S = ktypeof(b)
+ LnlqSolver(n, m, S)
end
"""
@@ -1373,30 +1373,30 @@ mutable struct CraigSolver{T,FC,S} <: KrylovSolver{T,FC,S}
v :: S
w2 :: S
stats :: SimpleStats{T}
+end
- function CraigSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- x = S(undef, m)
- Nv = S(undef, m)
- Aᵀu = S(undef, m)
- y = S(undef, n)
- w = S(undef, n)
- Mu = S(undef, n)
- Av = S(undef, n)
- u = S(undef, 0)
- v = S(undef, 0)
- w2 = S(undef, 0)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(x, Nv, Aᵀu, y, w, Mu, Av, u, v, w2, stats)
- return solver
- end
+function CraigSolver(n, m, S)
+ FC = eltype(S)
+ T = real(FC)
+ x = S(undef, m)
+ Nv = S(undef, m)
+ Aᵀu = S(undef, m)
+ y = S(undef, n)
+ w = S(undef, n)
+ Mu = S(undef, n)
+ Av = S(undef, n)
+ u = S(undef, 0)
+ v = S(undef, 0)
+ w2 = S(undef, 0)
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = CraigSolver{T,FC,S}(x, Nv, Aᵀu, y, w, Mu, Av, u, v, w2, stats)
+ return solver
+end
- function CraigSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- CraigSolver(n, m, S)
- end
+function CraigSolver(A, b)
+ n, m = size(A)
+ S = ktypeof(b)
+ CraigSolver(n, m, S)
end
"""
@@ -1423,32 +1423,32 @@ mutable struct CraigmrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
v :: S
q :: S
stats :: SimpleStats{T}
+end
- function CraigmrSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- x = S(undef, m)
- Nv = S(undef, m)
- Aᵀu = S(undef, m)
- d = S(undef, m)
- y = S(undef, n)
- Mu = S(undef, n)
- w = S(undef, n)
- wbar = S(undef, n)
- Av = S(undef, n)
- u = S(undef, 0)
- v = S(undef, 0)
- q = S(undef, 0)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(x, Nv, Aᵀu, d, y, Mu, w, wbar, Av, u, v, q, stats)
- return solver
- end
+function CraigmrSolver(n, m, S)
+ FC = eltype(S)
+ T = real(FC)
+ x = S(undef, m)
+ Nv = S(undef, m)
+ Aᵀu = S(undef, m)
+ d = S(undef, m)
+ y = S(undef, n)
+ Mu = S(undef, n)
+ w = S(undef, n)
+ wbar = S(undef, n)
+ Av = S(undef, n)
+ u = S(undef, 0)
+ v = S(undef, 0)
+ q = S(undef, 0)
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = CraigmrSolver{T,FC,S}(x, Nv, Aᵀu, d, y, Mu, w, wbar, Av, u, v, q, stats)
+ return solver
+end
- function CraigmrSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- CraigmrSolver(n, m, S)
- end
+function CraigmrSolver(A, b)
+ n, m = size(A)
+ S = ktypeof(b)
+ CraigmrSolver(n, m, S)
end
"""
@@ -1476,31 +1476,31 @@ mutable struct GmresSolver{T,FC,S} <: KrylovSolver{T,FC,S}
warm_start :: Bool
inner_iter :: Int
stats :: SimpleStats{T}
+end
- function GmresSolver(n, m, memory, S)
- memory = min(n, memory)
- FC = eltype(S)
- T = real(FC)
- Δx = S(undef, 0)
- x = S(undef, n)
- w = S(undef, n)
- p = S(undef, 0)
- q = S(undef, 0)
- V = [S(undef, n) for i = 1 : memory]
- c = Vector{T}(undef, memory)
- s = Vector{FC}(undef, memory)
- z = Vector{FC}(undef, memory)
- R = Vector{FC}(undef, div(memory * (memory+1), 2))
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(Δx, x, w, p, q, V, c, s, z, R, false, 0, stats)
- return solver
- end
+function GmresSolver(n, m, memory, S)
+ memory = min(n, memory)
+ FC = eltype(S)
+ T = real(FC)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ w = S(undef, n)
+ p = S(undef, 0)
+ q = S(undef, 0)
+ V = [S(undef, n) for i = 1 : memory]
+ c = Vector{T}(undef, memory)
+ s = Vector{FC}(undef, memory)
+ z = Vector{FC}(undef, memory)
+ R = Vector{FC}(undef, div(memory * (memory+1), 2))
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = GmresSolver{T,FC,S}(Δx, x, w, p, q, V, c, s, z, R, false, 0, stats)
+ return solver
+end
- function GmresSolver(A, b, memory = 20)
- n, m = size(A)
- S = ktypeof(b)
- GmresSolver(n, m, memory, S)
- end
+function GmresSolver(A, b, memory = 20)
+ n, m = size(A)
+ S = ktypeof(b)
+ GmresSolver(n, m, memory, S)
end
"""
@@ -1526,30 +1526,30 @@ mutable struct FomSolver{T,FC,S} <: KrylovSolver{T,FC,S}
U :: Vector{FC}
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function FomSolver(n, m, memory, S)
- memory = min(n, memory)
- FC = eltype(S)
- T = real(FC)
- Δx = S(undef, 0)
- x = S(undef, n)
- w = S(undef, n)
- p = S(undef, 0)
- q = S(undef, 0)
- V = [S(undef, n) for i = 1 : memory]
- l = Vector{FC}(undef, memory)
- z = Vector{FC}(undef, memory)
- U = Vector{FC}(undef, div(memory * (memory+1), 2))
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(Δx, x, w, p, q, V, l, z, U, false, stats)
- return solver
- end
+function FomSolver(n, m, memory, S)
+ memory = min(n, memory)
+ FC = eltype(S)
+ T = real(FC)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ w = S(undef, n)
+ p = S(undef, 0)
+ q = S(undef, 0)
+ V = [S(undef, n) for i = 1 : memory]
+ l = Vector{FC}(undef, memory)
+ z = Vector{FC}(undef, memory)
+ U = Vector{FC}(undef, div(memory * (memory+1), 2))
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = FomSolver{T,FC,S}(Δx, x, w, p, q, V, l, z, U, false, stats)
+ return solver
+end
- function FomSolver(A, b, memory = 20)
- n, m = size(A)
- S = ktypeof(b)
- FomSolver(n, m, memory, S)
- end
+function FomSolver(A, b, memory = 20)
+ n, m = size(A)
+ S = ktypeof(b)
+ FomSolver(n, m, memory, S)
end
"""
@@ -1582,37 +1582,37 @@ mutable struct GpmrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
R :: Vector{FC}
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function GpmrSolver(n, m, memory, S)
- memory = min(n + m, memory)
- FC = eltype(S)
- T = real(FC)
- wA = S(undef, 0)
- wB = S(undef, 0)
- dA = S(undef, n)
- dB = S(undef, m)
- Δx = S(undef, 0)
- Δy = S(undef, 0)
- x = S(undef, n)
- y = S(undef, m)
- q = S(undef, 0)
- p = S(undef, 0)
- V = [S(undef, n) for i = 1 : memory]
- U = [S(undef, m) for i = 1 : memory]
- gs = Vector{FC}(undef, 4 * memory)
- gc = Vector{T}(undef, 4 * memory)
- zt = Vector{FC}(undef, 2 * memory)
- R = Vector{FC}(undef, memory * (2memory + 1))
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(wA, wB, dA, dB, Δx, Δy, x, y, q, p, V, U, gs, gc, zt, R, false, stats)
- return solver
- end
+function GpmrSolver(n, m, memory, S)
+ memory = min(n + m, memory)
+ FC = eltype(S)
+ T = real(FC)
+ wA = S(undef, 0)
+ wB = S(undef, 0)
+ dA = S(undef, n)
+ dB = S(undef, m)
+ Δx = S(undef, 0)
+ Δy = S(undef, 0)
+ x = S(undef, n)
+ y = S(undef, m)
+ q = S(undef, 0)
+ p = S(undef, 0)
+ V = [S(undef, n) for i = 1 : memory]
+ U = [S(undef, m) for i = 1 : memory]
+ gs = Vector{FC}(undef, 4 * memory)
+ gc = Vector{T}(undef, 4 * memory)
+ zt = Vector{FC}(undef, 2 * memory)
+ R = Vector{FC}(undef, memory * (2memory + 1))
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = GpmrSolver{T,FC,S}(wA, wB, dA, dB, Δx, Δy, x, y, q, p, V, U, gs, gc, zt, R, false, stats)
+ return solver
+end
- function GpmrSolver(A, b, memory = 20)
- n, m = size(A)
- S = ktypeof(b)
- GpmrSolver(n, m, memory, S)
- end
+function GpmrSolver(A, b, memory = 20)
+ n, m = size(A)
+ S = ktypeof(b)
+ GpmrSolver(n, m, memory, S)
end
"""
From 7c5e2337fc2231300496f27cc2ee80f52e18c807 Mon Sep 17 00:00:00 2001
From: Alexis Montoison
Date: Wed, 7 Sep 2022 00:34:21 -0400
Subject: [PATCH 002/132] Add a reference to BiCG paper
---
src/bilq.jl | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/src/bilq.jl b/src/bilq.jl
index 39725fbfe..b41f890a8 100644
--- a/src/bilq.jl
+++ b/src/bilq.jl
@@ -39,9 +39,10 @@ where `kwargs` are the same keyword arguments as above.
The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
and `false` otherwise.
-#### Reference
+#### References
* A. Montoison and D. Orban, [*BiLQ: An Iterative Method for Nonsymmetric Linear Systems with a Quasi-Minimum Error Property*](https://doi.org/10.1137/19M1290991), SIAM Journal on Matrix Analysis and Applications, 41(3), pp. 1145--1166, 2020.
+* R. Fletcher, [*Conjugate gradient methods for indefinite systems*](https://doi.org/10.1007/BFb0080116), Numerical Analysis, pp. 73--89, 1976.
"""
function bilq end
From 3cc80c6d154be9799ddc6eb05311349138795b37 Mon Sep 17 00:00:00 2001
From: Alexis <35051714+amontoison@users.noreply.github.com>
Date: Wed, 7 Sep 2022 14:04:59 -0400
Subject: [PATCH 003/132] Update src/bilq.jl
---
src/bilq.jl | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/bilq.jl b/src/bilq.jl
index b41f890a8..ce84d3ec1 100644
--- a/src/bilq.jl
+++ b/src/bilq.jl
@@ -42,7 +42,7 @@ and `false` otherwise.
#### References
* A. Montoison and D. Orban, [*BiLQ: An Iterative Method for Nonsymmetric Linear Systems with a Quasi-Minimum Error Property*](https://doi.org/10.1137/19M1290991), SIAM Journal on Matrix Analysis and Applications, 41(3), pp. 1145--1166, 2020.
-* R. Fletcher, [*Conjugate gradient methods for indefinite systems*](https://doi.org/10.1007/BFb0080116), Numerical Analysis, pp. 73--89, 1976.
+* R. Fletcher, [*Conjugate gradient methods for indefinite systems*](https://doi.org/10.1007/BFb0080116), Numerical Analysis, Springer, pp. 73--89, 1976.
"""
function bilq end
From 69e7c1d80068356fdcbe376d7c9c508c8b503b28 Mon Sep 17 00:00:00 2001
From: Alexis Montoison
Date: Thu, 18 Aug 2022 23:12:15 -0400
Subject: [PATCH 004/132] [documentation] Add a section about preconditioners
---
docs/make.jl | 3 +-
docs/src/preconditioners.md | 63 +++++++++++++++++++++++++++++++++++++
src/cgne.jl | 18 +++++------
src/crmr.jl | 24 +++++++-------
src/krylov_solvers.jl | 6 ++--
test/test_cgne.jl | 14 ++++-----
test/test_crmr.jl | 10 +++---
7 files changed, 101 insertions(+), 37 deletions(-)
create mode 100644 docs/src/preconditioners.md
diff --git a/docs/make.jl b/docs/make.jl
index 57ad87cd2..48263fe25 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -15,12 +15,13 @@ makedocs(
"Krylov methods" => ["Symmetric positive definite linear systems" => "solvers/spd.md",
"Symmetric indefinite linear systems" => "solvers/sid.md",
"Unsymmetric linear systems" => "solvers/unsymmetric.md",
- "Least-norm problems" => "solvers/ln.md",
+ "Minimum-norm problems" => "solvers/ln.md",
"Least-squares problems" => "solvers/ls.md",
"Adjoint systems" => "solvers/as.md",
"Saddle-point and symmetric quasi-definite systems" => "solvers/sp_sqd.md",
"Generalized saddle-point and unsymmetric partitioned systems" => "solvers/gsp.md"],
"In-place methods" => "inplace.md",
+ "Preconditioners" => "preconditioners.md",
"GPU support" => "gpu.md",
"Warm start" => "warm_start.md",
"Factorization-free operators" => "factorization-free.md",
diff --git a/docs/src/preconditioners.md b/docs/src/preconditioners.md
new file mode 100644
index 000000000..22b0f445f
--- /dev/null
+++ b/docs/src/preconditioners.md
@@ -0,0 +1,63 @@
+## [Preconditioners](@id preconditioners)
+
+The solvers in Krylov.jl support preconditioners that modify a given linear systems $Ax = b$ into a form that allows a faster convergence.
+
+It exists three variants of preconditioning:
+
+| Left preconditioning | Two-sided preconditioning | Right preconditioning |
+|:--------------------:|:--------------------------:|:-------------------------:|
+| $MAx = Mb$ | $MANy = Mb$~~with~~$x = Ny$| $ANy = b$~~with~~$x = Ny$ |
+
+#### Unsymmetric linear systems
+
+A Krylov method dedicated to unsymmetric systems allows the three variants.
+We provide these preconditioners with the arguments `M` and `N`.
+It concerns the methods [`CGS`](@ref cgs), [`BiCGSTAB`](@ref bicgstab), [`DQGMRES`](@ref dqgmres), [`GMRES`](@ref gmres), [`DIOM`](@ref diom) and [`FOM`](@ref fom).
+
+#### Symmetric linear systems
+
+When $A$ is symmetric, we can only use the centered / split preconditioning $LAL^Tx = Lb$.
+It is a special case of two-sided preconditioning $M=L=N^T$ that maintains the symmetry of the linear systems.
+Krylov methods dedicated to symmetric systems take directly as input a symmetric positive preconditioner $P=LL^T$.
+We provide this preconditioner with the argument `M` in [`SYMMLQ`](@ref symmlq), [`CG`](@ref cg), [`CG-LANCZOS`](@ref cg_lanczos), [`CG-LANCZOS-SHIFT`](@ref cg_lanczos_shift), [`CR`](@ref cr), [`MINRES`](@ref minres) and [`MINRES-QLP`](@ref minres_qlp).
+
+#### Least-squares problems
+
+For linear least-squares problem $\min \|b - Ax\|^2_2$, a preconditioner `M` modifies the problem such that $\min \|b - Ax\|^2_M$ is solved.
+It is equivalent to solve the normal equation $A^TMAx = A^TMb$ instead of $A^TAx = A^Tb$.
+We provide a symmetric positive definite preconditioner with the argument `M` in [`CGLS`](@ref cgls), [`CRLS`](@ref crls), [`LSLQ`](@ref lslq), [`LSQR`](@ref lsqr) and [`LSMR`](@ref lsmr).
+
+#### Minimum-norm problems
+
+For minimum-norm problem $\min \|x\|^2_2$~~s.t.~~$Ax = b$, a preconditioner `N` modifies the problem such that $\min \|x\|^2_{N^{-1}}$~~s.t.~~$Ax = b$ is solved.
+It is equivalent to solve the normal equation $ANA^Tx = b$ instead of $AA^Tx = b$.
+We provide a symmetric positive definite preconditioner with the argument `N` in [`CGNE`](@ref cgne), [`CRMR`](@ref crmr), [`LNLQ`](@ref lnlq), [`CRAIG`](@ref craig) and [`CRAIGMR`](@ref craigmr).
+
+#### Saddle-point and symmetric quasi-definite systems
+
+When a symmetric system $Kz = d$ has the 2x2 block structure
+```math
+ \begin{bmatrix} \tau E & \phantom{-}A \\ A^T & \nu F \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} b \\ c \end{bmatrix},
+```
+where $E$ and $F$ are symmetric positive definite, [`TriCG`](@ref tricg) and [`TriMR`](@ref trimr) can take advantage of this structure if preconditioners `M` and `N` such that $M = E^{-1}$ and $N = F^{-1}$ are available.
+
+#### Generalized saddle-point and unsymmetric partitioned systems
+
+When an unsymmetric system $Kz = d$ has the 2x2 block structure
+```math
+ \begin{bmatrix} \lambda M & A \\ B & \mu N \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} b \\ c \end{bmatrix},
+```
+[`GPMR`](@ref gpmr) can take advantage of this structure if preconditioners `C`, `D`, `E` and `F` such that $CE = M^{-1}$ and $DF = N^{-1}$ are available.
+
+!!! tip
+ A preconditioner `P` only needs to support the operation `mul!(y, P, x)` to be used in Krylov.jl.
+
+!!! note
+ Our implementations of [`BiLQ`](@ref bilq), [`QMR`](@ref qmr), [`BiLQR`](@ref bilqr), [`USYMLQ`](@ref usymlq), [`USYMQR`](@ref usymqr) and [`TriLQR`](@ref trilqr) don't support preconditioning.
+
+## Packages that provide preconditioners
+
+- [IncompleteLU.jl](https://github.com/haampie/IncompleteLU.jl) implements the left-looking or Crout version of ILU decompositions.
+- [ILUZero.jl](https://github.com/mcovalt/ILUZero.jl) is a Julia implementation of incomplete LU factorization with zero level of fill-in.
+- [LimitedLDLFactorizations.jl](https://github.com/JuliaSmoothOptimizers/LimitedLDLFactorizations.jl) for limited-memory LDLᵀ factorization of symmetric matrices.
+- [AlgebraicMultigrid.jl](https://github.com/JuliaLinearAlgebra/AlgebraicMultigrid.jl) provides two algebraic multigrid (AMG) preconditioners.
diff --git a/src/cgne.jl b/src/cgne.jl
index 2859414e1..2f720b57c 100644
--- a/src/cgne.jl
+++ b/src/cgne.jl
@@ -31,7 +31,7 @@ export cgne, cgne!
"""
(x, stats) = cgne(A, b::AbstractVector{FC};
- M=I, λ::T=zero(T), atol::T=√eps(T), rtol::T=√eps(T),
+ N=I, λ::T=zero(T), atol::T=√eps(T), rtol::T=√eps(T),
itmax::Int=0, verbose::Int=0, history::Bool=false,
ldiv::Bool=false, callback=solver->false)
@@ -60,7 +60,7 @@ CGNE produces monotonic errors ‖x-x*‖₂ but not residuals ‖r‖₂.
It is formally equivalent to CRAIG, though can be slightly less accurate,
but simpler to implement. Only the x-part of the solution is returned.
-A preconditioner M may be provided in the form of a linear operator.
+A preconditioner N may be provided in the form of a linear operator.
The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
and `false` otherwise.
@@ -88,7 +88,7 @@ See [`CgneSolver`](@ref) for more details about the `solver`.
function cgne! end
function cgne!(solver :: CgneSolver{T,FC,S}, A, b :: AbstractVector{FC};
- M=I, λ :: T=zero(T), atol :: T=√eps(T), rtol :: T=√eps(T),
+ N=I, λ :: T=zero(T), atol :: T=√eps(T), rtol :: T=√eps(T),
itmax :: Int=0, verbose :: Int=0, history :: Bool=false,
ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
@@ -96,8 +96,8 @@ function cgne!(solver :: CgneSolver{T,FC,S}, A, b :: AbstractVector{FC};
length(b) == m || error("Inconsistent problem size")
(verbose > 0) && @printf("CGNE: system of %d equations in %d variables\n", m, n)
- # Tests M = Iₙ
- MisI = (M === I)
+ # Tests N = Iₙ
+ NisI = (N === I)
# Check type consistency
eltype(A) == FC || error("eltype(A) ≠ $FC")
@@ -107,16 +107,16 @@ function cgne!(solver :: CgneSolver{T,FC,S}, A, b :: AbstractVector{FC};
Aᵀ = A'
# Set up workspace.
- allocate_if(!MisI, solver, :z, S, m)
+ allocate_if(!NisI, solver, :z, S, m)
allocate_if(λ > 0, solver, :s, S, m)
x, p, Aᵀz, r, q, s, stats = solver.x, solver.p, solver.Aᵀz, solver.r, solver.q, solver.s, solver.stats
rNorms = stats.residuals
reset!(stats)
- z = MisI ? r : solver.z
+ z = NisI ? r : solver.z
x .= zero(FC)
r .= b
- MisI || mulorldiv!(z, M, r, ldiv)
+ NisI || mulorldiv!(z, N, r, ldiv)
rNorm = @knrm2(m, r) # Marginally faster than norm(r)
history && push!(rNorms, rNorm)
if rNorm == 0
@@ -158,7 +158,7 @@ function cgne!(solver :: CgneSolver{T,FC,S}, A, b :: AbstractVector{FC};
α = γ / δ
@kaxpy!(n, α, p, x) # Faster than x = x + α * p
@kaxpy!(m, -α, q, r) # Faster than r = r - α * q
- MisI || mulorldiv!(z, M, r, ldiv)
+ NisI || mulorldiv!(z, N, r, ldiv)
γ_next = @kdotr(m, r, z) # Faster than γ_next = dot(r, z)
β = γ_next / γ
mul!(Aᵀz, Aᵀ, z)
diff --git a/src/crmr.jl b/src/crmr.jl
index deb5cf79f..6ed2b3c60 100644
--- a/src/crmr.jl
+++ b/src/crmr.jl
@@ -12,7 +12,7 @@
#
# AAᵀy = b.
#
-# This method is equivalent to Craig-MR, described in
+# This method is equivalent to CRAIGMR, described in
#
# D. Orban and M. Arioli. Iterative Solution of Symmetric Quasi-Definite Linear Systems,
# Volume 3 of Spotlights. SIAM, Philadelphia, PA, 2017.
@@ -29,7 +29,7 @@ export crmr, crmr!
"""
(x, stats) = crmr(A, b::AbstractVector{FC};
- M=I, λ::T=zero(T), atol::T=√eps(T),
+ N=I, λ::T=zero(T), atol::T=√eps(T),
rtol::T=√eps(T), itmax::Int=0, verbose::Int=0, history::Bool=false,
ldiv::Bool=false, callback=solver->false)
@@ -58,7 +58,7 @@ CRMR produces monotonic residuals ‖r‖₂.
It is formally equivalent to CRAIG-MR, though can be slightly less accurate,
but simpler to implement. Only the x-part of the solution is returned.
-A preconditioner M may be provided.
+A preconditioner N may be provided.
The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
and `false` otherwise.
@@ -86,7 +86,7 @@ See [`CrmrSolver`](@ref) for more details about the `solver`.
function crmr! end
function crmr!(solver :: CrmrSolver{T,FC,S}, A, b :: AbstractVector{FC};
- M=I, λ :: T=zero(T), atol :: T=√eps(T),
+ N=I, λ :: T=zero(T), atol :: T=√eps(T),
rtol :: T=√eps(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false,
ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
@@ -94,8 +94,8 @@ function crmr!(solver :: CrmrSolver{T,FC,S}, A, b :: AbstractVector{FC};
length(b) == m || error("Inconsistent problem size")
(verbose > 0) && @printf("CRMR: system of %d equations in %d variables\n", m, n)
- # Tests M = Iₙ
- MisI = (M === I)
+ # Tests N = Iₙ
+ NisI = (N === I)
# Check type consistency
eltype(A) == FC || error("eltype(A) ≠ $FC")
@@ -105,16 +105,16 @@ function crmr!(solver :: CrmrSolver{T,FC,S}, A, b :: AbstractVector{FC};
Aᵀ = A'
# Set up workspace.
- allocate_if(!MisI, solver, :Mq, S, m)
+ allocate_if(!NisI, solver, :Nq, S, m)
allocate_if(λ > 0, solver, :s , S, m)
x, p, Aᵀr, r = solver.x, solver.p, solver.Aᵀr, solver.r
q, s, stats = solver.q, solver.s, solver.stats
rNorms, ArNorms = stats.residuals, stats.Aresiduals
reset!(stats)
- Mq = MisI ? q : solver.Mq
+ Nq = NisI ? q : solver.Nq
x .= zero(FC) # initial estimation x = 0
- mulorldiv!(r, M, b, ldiv) # initial residual r = M * (b - Ax) = M * b
+ mulorldiv!(r, N, b, ldiv) # initial residual r = M * (b - Ax) = M * b
bNorm = @knrm2(m, r) # norm(b - A * x0) if x0 ≠ 0.
rNorm = bNorm # + λ * ‖x0‖ if x0 ≠ 0 and λ > 0.
history && push!(rNorms, rNorm)
@@ -149,10 +149,10 @@ function crmr!(solver :: CrmrSolver{T,FC,S}, A, b :: AbstractVector{FC};
while ! (solved || inconsistent || tired || user_requested_exit)
mul!(q, A, p)
λ > 0 && @kaxpy!(m, λ, s, q) # q = q + λ * s
- MisI || mulorldiv!(Mq, M, q, ldiv)
- α = γ / @kdotr(m, q, Mq) # Compute qᵗ * M * q
+ NisI || mulorldiv!(Nq, N, q, ldiv)
+ α = γ / @kdotr(m, q, Nq) # Compute qᵗ * M * q
@kaxpy!(n, α, p, x) # Faster than x = x + α * p
- @kaxpy!(m, -α, Mq, r) # Faster than r = r - α * Mq
+ @kaxpy!(m, -α, Nq, r) # Faster than r = r - α * Nq
rNorm = @knrm2(m, r) # norm(r)
mul!(Aᵀr, Aᵀ, r)
γ_next = @kdotr(n, Aᵀr, Aᵀr) # Faster than γ_next = dot(Aᵀr, Aᵀr)
diff --git a/src/krylov_solvers.jl b/src/krylov_solvers.jl
index a6da85bd5..d557d91ae 100644
--- a/src/krylov_solvers.jl
+++ b/src/krylov_solvers.jl
@@ -1137,7 +1137,7 @@ mutable struct CrmrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
Aᵀr :: S
r :: S
q :: S
- Mq :: S
+ Nq :: S
s :: S
stats :: SimpleStats{T}
end
@@ -1150,10 +1150,10 @@ function CrmrSolver(n, m, S)
Aᵀr = S(undef, m)
r = S(undef, n)
q = S(undef, n)
- Mq = S(undef, 0)
+ Nq = S(undef, 0)
s = S(undef, 0)
stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = CrmrSolver{T,FC,S}(x, p, Aᵀr, r, q, Mq, s, stats)
+ solver = CrmrSolver{T,FC,S}(x, p, Aᵀr, r, q, Nq, s, stats)
return solver
end
diff --git a/test/test_cgne.jl b/test/test_cgne.jl
index 64cbc0ea7..a48b4569e 100644
--- a/test/test_cgne.jl
+++ b/test/test_cgne.jl
@@ -1,6 +1,6 @@
-function test_cgne(A, b; λ=0.0, M=I)
+function test_cgne(A, b; λ=0.0, N=I)
(nrow, ncol) = size(A)
- (x, stats) = cgne(A, b, λ=λ, M=M)
+ (x, stats) = cgne(A, b, λ=λ, N=N)
r = b - A * x
if λ > 0
s = r / sqrt(λ)
@@ -69,8 +69,8 @@ end
@test stats.status == "x = 0 is a zero-residual solution"
# Test with Jacobi (or diagonal) preconditioner
- A, b, M = square_preconditioned(FC=FC)
- (x, stats, resid) = test_cgne(A, b, M=M)
+ A, b, N = square_preconditioned(FC=FC)
+ (x, stats, resid) = test_cgne(A, b, N=N)
@test(resid ≤ cgne_tol)
@test(stats.solved)
(xI, xmin, xmin_norm) = check_min_norm(A, b, x)
@@ -81,8 +81,8 @@ end
A = 0.5 * [19.0 17.0 15.0 13.0 11.0 9.0 7.0 5.0 3.0 1.0;
2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0]
b = [1.0; 0.0]
- M = Diagonal(1 ./ (A * A'))
- (x, stats, resid) = test_cgne(A, b, M=M)
+ N = Diagonal(1 ./ (A * A'))
+ (x, stats, resid) = test_cgne(A, b, N=N)
@test(resid ≤ cgne_tol)
@test(stats.solved)
(xI, xmin, xmin_norm) = check_min_norm(A, b, x)
@@ -92,7 +92,7 @@ end
for transpose ∈ (false, true)
A, b, c, D = small_sp(transpose, FC=FC)
D⁻¹ = inv(D)
- (x, stats) = cgne(A, b, M=D⁻¹, λ=1.0)
+ (x, stats) = cgne(A, b, N=D⁻¹, λ=1.0)
end
# test callback function
diff --git a/test/test_crmr.jl b/test/test_crmr.jl
index 6354f329f..d0f902df6 100644
--- a/test/test_crmr.jl
+++ b/test/test_crmr.jl
@@ -1,6 +1,6 @@
-function test_crmr(A, b; λ=0.0, M=I, history=false)
+function test_crmr(A, b; λ=0.0, N=I, history=false)
(nrow, ncol) = size(A)
- (x, stats) = crmr(A, b, λ=λ, M=M, history=history)
+ (x, stats) = crmr(A, b, λ=λ, N=N, history=history)
r = b - A * x
if λ > 0
s = r / sqrt(λ)
@@ -76,8 +76,8 @@ end
A = 0.5 * [19.0 17.0 15.0 13.0 11.0 9.0 7.0 5.0 3.0 1.0;
2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0]
b = [1.0; 0.0]
- M = Diagonal(1 ./ (A * A'))
- (x, stats, resid) = test_crmr(A, b, M=M)
+ N = Diagonal(1 ./ (A * A'))
+ (x, stats, resid) = test_crmr(A, b, N=N)
@test(resid ≤ crmr_tol)
@test(stats.solved)
(xI, xmin, xmin_norm) = check_min_norm(A, b, x)
@@ -87,7 +87,7 @@ end
for transpose ∈ (false, true)
A, b, c, D = small_sp(transpose, FC=FC)
D⁻¹ = inv(D)
- (x, stats) = crmr(A, b, M=D⁻¹, λ=1.0)
+ (x, stats) = crmr(A, b, N=D⁻¹, λ=1.0)
end
# test callback function
From cce31e670b0a62888e1a7cbb506ff72c337c122f Mon Sep 17 00:00:00 2001
From: Alexis Montoison
Date: Fri, 19 Aug 2022 03:24:51 -0400
Subject: [PATCH 005/132] Add more details about preconditioned LS and LN
problems
---
docs/src/preconditioners.md | 47 ++++++++++++++++++++++++++++---------
1 file changed, 36 insertions(+), 11 deletions(-)
diff --git a/docs/src/preconditioners.md b/docs/src/preconditioners.md
index 22b0f445f..0f51e27fb 100644
--- a/docs/src/preconditioners.md
+++ b/docs/src/preconditioners.md
@@ -4,9 +4,9 @@ The solvers in Krylov.jl support preconditioners that modify a given linear syst
It exists three variants of preconditioning:
-| Left preconditioning | Two-sided preconditioning | Right preconditioning |
-|:--------------------:|:--------------------------:|:-------------------------:|
-| $MAx = Mb$ | $MANy = Mb$~~with~~$x = Ny$| $ANy = b$~~with~~$x = Ny$ |
+| Left preconditioning | Two-sided preconditioning | Right preconditioning |
+|:--------------------:|:-------------------------------:|:------------------------------:|
+| $MAx = Mb$ | $MANy = Mb~~\text{with}~~x = Ny$| $ANy = b~~\text{with}~~x = Ny$ |
#### Unsymmetric linear systems
@@ -23,31 +23,56 @@ We provide this preconditioner with the argument `M` in [`SYMMLQ`](@ref symmlq),
#### Least-squares problems
-For linear least-squares problem $\min \|b - Ax\|^2_2$, a preconditioner `M` modifies the problem such that $\min \|b - Ax\|^2_M$ is solved.
-It is equivalent to solve the normal equation $A^TMAx = A^TMb$ instead of $A^TAx = A^Tb$.
+| Formulation | Without preconditioning | With preconditioning |
+|:---------------------:|:-----------------------:|:-----------------------:|
+| least-squares problem | $\min \\|b - Ax\\|^2_2$ | $\min \\|b - Ax\\|^2_M$ |
+| Normal equation | $A^TAx = A^Tb$ | $A^TMAx = A^TMb$ |
+| Augmented system | $\begin{bmatrix} I & A \\ A^T & 0 \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ | $\begin{bmatrix} M & A \\ A^T & 0 \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ |
+
We provide a symmetric positive definite preconditioner with the argument `M` in [`CGLS`](@ref cgls), [`CRLS`](@ref crls), [`LSLQ`](@ref lslq), [`LSQR`](@ref lsqr) and [`LSMR`](@ref lsmr).
+A second positive definite preconditioner `N` is supported by [`LSLQ`](@ref lslq), [`LSQR`](@ref lsqr) and [`LSMR`](@ref lsmr).
+It is dedicated to regularized least-squares problems.
+
+| Formulation | Without preconditioning | With preconditioning |
+|:---------------------:|:-----------------------------------------------:|:------------------------------------------------------:|
+| least-squares problem | $\min \\|b - Ax\\|^2_2 + \lambda^2 \\|x\\|^2_2$ | $\min \\|b - Ax\\|^2_M + \lambda^2 \\|x\\|^2_{N^{-1}}$ |
+| Normal equation | $(A^TA + \lambda^2 I)x = A^Tb$ | $(A^TMA + \lambda^2 N^{-1})x = A^TMb$ |
+| Augmented system | $\begin{bmatrix} I & A \\ A^T & -\lambda^2 I \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ | $\begin{bmatrix} M & A \\ A^T & -\lambda^2 N \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ |
+
#### Minimum-norm problems
-For minimum-norm problem $\min \|x\|^2_2$~~s.t.~~$Ax = b$, a preconditioner `N` modifies the problem such that $\min \|x\|^2_{N^{-1}}$~~s.t.~~$Ax = b$ is solved.
-It is equivalent to solve the normal equation $ANA^Tx = b$ instead of $AA^Tx = b$.
+| Formulation | Without preconditioning | With preconditioning |
+|:--------------------:|:---------------------------------------:|:----------------------------------------------:|
+| minimum-norm problem | $\min \\|x\\|^2_2~~\text{s.t.}~~Ax = b$ | $\min \\|x\\|^2_{N^{-1}}~~\text{s.t.}~~Ax = b$ |
+| Normal equation | $AA^Ty = b~~\text{with}~~x = A^Ty$ | $ANA^Ty = b~~\text{with}~~x = NA^Ty$ |
+| Augmented system | $\begin{bmatrix} -I & A^T \\ \phantom{-}A & 0 \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ | $\begin{bmatrix} -N & A^T \\ \phantom{-}A & 0 \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ |
+
We provide a symmetric positive definite preconditioner with the argument `N` in [`CGNE`](@ref cgne), [`CRMR`](@ref crmr), [`LNLQ`](@ref lnlq), [`CRAIG`](@ref craig) and [`CRAIGMR`](@ref craigmr).
+A second positive definite preconditioner `M` is supported by [`LNLQ`](@ref lslq), [`CRAIG`](@ref lsqr) and [`CRAIGMR`](@ref lsmr).
+It is dedicated to penalized minimum-norm problems.
+
+| Formulation | Without preconditioning | With preconditioning |
+|:--------------------:|:-------------------------------------------------------------------:|:---------------------------------------------------------------------------------------:|
+| minimum-norm problem | $\min \\|x\\|^2_2 + \\|y\\|^2_2~~\text{s.t.}~~Ax + \lambda^2 y = b$ | $\min \\|x\\|^2_{N^{-1}} + \\|y\\|^2_{M^{-1}}~~\text{s.t.}~~Ax + \lambda^2 M^{-1}y = b$ |
+| Normal equation | $(AA^T + \lambda^2 I)y = b~~\text{with}~~x = A^Ty$ | $(ANA^T + \lambda^2 M^{-1})y = b~~\text{with}~~x = NA^Ty$ |
+| Augmented system | $\begin{bmatrix} -I & A^T \\ \phantom{-}A & \lambda^2 I \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ | $\begin{bmatrix} -N^{-1} & A^T \\ \phantom{-}A & \lambda^2 M^{-1} \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ |
#### Saddle-point and symmetric quasi-definite systems
When a symmetric system $Kz = d$ has the 2x2 block structure
```math
- \begin{bmatrix} \tau E & \phantom{-}A \\ A^T & \nu F \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} b \\ c \end{bmatrix},
+ \begin{bmatrix} \tau M^{-1} & \phantom{-}A \\ A^T & \nu N^{-1} \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} b \\ c \end{bmatrix},
```
-where $E$ and $F$ are symmetric positive definite, [`TriCG`](@ref tricg) and [`TriMR`](@ref trimr) can take advantage of this structure if preconditioners `M` and `N` such that $M = E^{-1}$ and $N = F^{-1}$ are available.
+where $M^{-1}$ and $N^{-1}$ are symmetric positive definite, [`TriCG`](@ref tricg) and [`TriMR`](@ref trimr) can take advantage of this structure if preconditioners `M` and `N` that model $M$ and $N$ are available.
#### Generalized saddle-point and unsymmetric partitioned systems
When an unsymmetric system $Kz = d$ has the 2x2 block structure
```math
- \begin{bmatrix} \lambda M & A \\ B & \mu N \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} b \\ c \end{bmatrix},
+ \begin{bmatrix} \lambda M^{-1} & A \\ B & \mu N^{-1} \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} b \\ c \end{bmatrix},
```
-[`GPMR`](@ref gpmr) can take advantage of this structure if preconditioners `C`, `D`, `E` and `F` such that $CE = M^{-1}$ and $DF = N^{-1}$ are available.
+[`GPMR`](@ref gpmr) can take advantage of this structure if preconditioners `C`, `D`, `E` and `F` such that $CE = M$ and $DF = N$ are available.
!!! tip
A preconditioner `P` only needs to support the operation `mul!(y, P, x)` to be used in Krylov.jl.
From 58a28db778e35c26a988a13d9dec2bc6b95f845f Mon Sep 17 00:00:00 2001
From: Alexis Montoison
Date: Mon, 22 Aug 2022 01:05:20 -0400
Subject: [PATCH 006/132] [documentation] Add more details about
preconditioners
---
docs/make.jl | 2 +-
docs/src/preconditioners.md | 126 ++++++++++++++++++++++++------------
2 files changed, 85 insertions(+), 43 deletions(-)
diff --git a/docs/make.jl b/docs/make.jl
index 48263fe25..f59bfac0c 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -15,7 +15,7 @@ makedocs(
"Krylov methods" => ["Symmetric positive definite linear systems" => "solvers/spd.md",
"Symmetric indefinite linear systems" => "solvers/sid.md",
"Unsymmetric linear systems" => "solvers/unsymmetric.md",
- "Minimum-norm problems" => "solvers/ln.md",
+ "Least-norm problems" => "solvers/ln.md",
"Least-squares problems" => "solvers/ls.md",
"Adjoint systems" => "solvers/as.md",
"Saddle-point and symmetric quasi-definite systems" => "solvers/sp_sqd.md",
diff --git a/docs/src/preconditioners.md b/docs/src/preconditioners.md
index 0f51e27fb..15cb3f362 100644
--- a/docs/src/preconditioners.md
+++ b/docs/src/preconditioners.md
@@ -1,81 +1,122 @@
## [Preconditioners](@id preconditioners)
-The solvers in Krylov.jl support preconditioners that modify a given linear systems $Ax = b$ into a form that allows a faster convergence.
+The solvers in Krylov.jl support preconditioners, i.e., transformations that modify a linear systems $Ax = b$ into an equivalent form that may yield faster convergence in finite-precision arithmetic.
+Preconditioning can be used to reduce the condition number of the problem or clusterize its eigenvalues for instance.
-It exists three variants of preconditioning:
+The design of preconditioners is highly dependent on the origin of the problem and most preconditioners need to take application dependent information and structures into account.
+Specialized preconditioners generally outperform generic preconditioners such as incomplete factorizations.
-| Left preconditioning | Two-sided preconditioning | Right preconditioning |
-|:--------------------:|:-------------------------------:|:------------------------------:|
-| $MAx = Mb$ | $MANy = Mb~~\text{with}~~x = Ny$| $ANy = b~~\text{with}~~x = Ny$ |
+The construction of a preconditioner necessitates a trade-off because we need to apply it at least once per iteration within a Krylov method.
+Hence, a preconditioner must be constructed such that it is cheap to apply, while also capturing the characteristics of the original system in some sense.
-#### Unsymmetric linear systems
+There exist three variants of preconditioning:
-A Krylov method dedicated to unsymmetric systems allows the three variants.
-We provide these preconditioners with the arguments `M` and `N`.
-It concerns the methods [`CGS`](@ref cgs), [`BiCGSTAB`](@ref bicgstab), [`DQGMRES`](@ref dqgmres), [`GMRES`](@ref gmres), [`DIOM`](@ref diom) and [`FOM`](@ref fom).
+| Left preconditioning | Two-sided preconditioning | Right preconditioning |
+|:----------------------------------:|:----------------------------------------------------------------------:|:--------------------------------------------:|
+| $P_{\ell}^{-1}Ax = P_{\ell}^{-1}b$ | $P_{\ell}^{-1}AP_r^{-1}y = P_{\ell}^{-1}b~~\text{with}~~x = P_r^{-1}y$ | $AP_r^{-1}y = b~~\text{with}~~x = P_r^{-1}y$ |
-#### Symmetric linear systems
+where $P_{\ell}$ and $P_r$ are square and nonsingular.
-When $A$ is symmetric, we can only use the centered / split preconditioning $LAL^Tx = Lb$.
-It is a special case of two-sided preconditioning $M=L=N^T$ that maintains the symmetry of the linear systems.
-Krylov methods dedicated to symmetric systems take directly as input a symmetric positive preconditioner $P=LL^T$.
-We provide this preconditioner with the argument `M` in [`SYMMLQ`](@ref symmlq), [`CG`](@ref cg), [`CG-LANCZOS`](@ref cg_lanczos), [`CG-LANCZOS-SHIFT`](@ref cg_lanczos_shift), [`CR`](@ref cr), [`MINRES`](@ref minres) and [`MINRES-QLP`](@ref minres_qlp).
+We consider that $P_{\ell}^1$ and $P_r^1$ are the default preconditioners in Krylov.jl and that we can apply them with the operation $y \leftarrow P^{-1} * x$.
+It is also common to call $P_{\ell}$ and $P_r$ the preconditioners if the equivalent operation $y \leftarrow P \\ x$ is available.
+We support both approach thanks to the argument `ldiv` of the Krylov solvers.
-#### Least-squares problems
+!!! tip
+ A preconditioner only needs to support the operation `mul!(y, P⁻¹, x)` when `ldiv=false` or `ldiv!(y, P, x)` when `ldiv=true` to be used in Krylov.jl.
+
+#### Square non-Hermitian linear systems
+
+Methods concerned: [`CGS`](@ref cgs), [`BiCGSTAB`](@ref bicgstab), [`DQGMRES`](@ref dqgmres), [`GMRES`](@ref gmres), [`DIOM`](@ref diom) and [`FOM`](@ref fom).
+
+A Krylov method dedicated to non-Hermitian linear systems allows the three variants.
+
+| Preconditioners | $P_{\ell}^{-1}$ | $P_{\ell}$ | $P_r^{-1}$ | $P_r$ |
+|:---------------:|:---------------------:|:--------------------:|:---------------------:|:--------------------:|
+| Arguments | `M` with `ldiv=false` | `M` with `ldiv=true` | `N` with `ldiv=false` | `N` with `ldiv=true` |
+
+#### Hermitian linear systems
+
+Methods concerned: [`SYMMLQ`](@ref symmlq), [`CG`](@ref cg), [`CG-LANCZOS`](@ref cg_lanczos), [`CG-LANCZOS-SHIFT`](@ref cg_lanczos_shift), [`CR`](@ref cr), [`MINRES`](@ref minres) and [`MINRES-QLP`](@ref minres_qlp).
+
+When $A$ is symmetric, we can only use the centered preconditioning $L^{-1}AL^{-T}x = L^{-1}b$.
+This split preconditioning is a special case of two-sided preconditioning $P_{\ell} = L = P_r^T$ that maintains the symmetry / hermicity of the linear systems.
+
+| Preconditioners | $P^{-1} = L^{-T}L^{-1}$ | $P = LL^{T}$ |
+|:---------------:|:-----------------------:|:--------------------:|
+| Arguments | `M` with `ldiv=false` | `M` with `ldiv=true` |
-| Formulation | Without preconditioning | With preconditioning |
-|:---------------------:|:-----------------------:|:-----------------------:|
-| least-squares problem | $\min \\|b - Ax\\|^2_2$ | $\min \\|b - Ax\\|^2_M$ |
-| Normal equation | $A^TAx = A^Tb$ | $A^TMAx = A^TMb$ |
-| Augmented system | $\begin{bmatrix} I & A \\ A^T & 0 \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ | $\begin{bmatrix} M & A \\ A^T & 0 \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ |
+The preconditioner must be symmetric positive definite.
-We provide a symmetric positive definite preconditioner with the argument `M` in [`CGLS`](@ref cgls), [`CRLS`](@ref crls), [`LSLQ`](@ref lslq), [`LSQR`](@ref lsqr) and [`LSMR`](@ref lsmr).
+#### Linear least-squares problems
-A second positive definite preconditioner `N` is supported by [`LSLQ`](@ref lslq), [`LSQR`](@ref lsqr) and [`LSMR`](@ref lsmr).
-It is dedicated to regularized least-squares problems.
+Methods concerned: [`CGLS`](@ref cgls), [`CRLS`](@ref crls), [`LSLQ`](@ref lslq), [`LSQR`](@ref lsqr) and [`LSMR`](@ref lsmr).
+
+| Formulation | Without preconditioning | With preconditioning |
+|:---------------------:|:-----------------------:|:------------------------------:|
+| least-squares problem | $\min \\|b - Ax\\|^2_2$ | $\min \\|b - Ax\\|^2_{E^{-1}}$ |
+| Normal equation | $A^TAx = A^Tb$ | $A^TE^{-1}Ax = A^TE^{-1}b$ |
+| Augmented system | $\begin{bmatrix} I & A \\ A^T & 0 \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ | $\begin{bmatrix} E & A \\ A^T & 0 \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ |
+
+| Preconditioners | $E^{-1}$ | $E$ | $F^{-1}$ | $F$ |
+|:---------------:|:-----------------------:|:--------------------:|:-----------------------:|:--------------------:|
+| Arguments | `M` with `ldiv=false` | `M` with `ldiv=true` | `N` with `ldiv=false` | `N` with `ldiv=true` |
+
+[`LSLQ`](@ref lslq), [`LSQR`](@ref lsqr) and [`LSMR`](@ref lsmr) can handle a second preconditioner `N` for regularized least-squares problems.
| Formulation | Without preconditioning | With preconditioning |
|:---------------------:|:-----------------------------------------------:|:------------------------------------------------------:|
-| least-squares problem | $\min \\|b - Ax\\|^2_2 + \lambda^2 \\|x\\|^2_2$ | $\min \\|b - Ax\\|^2_M + \lambda^2 \\|x\\|^2_{N^{-1}}$ |
-| Normal equation | $(A^TA + \lambda^2 I)x = A^Tb$ | $(A^TMA + \lambda^2 N^{-1})x = A^TMb$ |
-| Augmented system | $\begin{bmatrix} I & A \\ A^T & -\lambda^2 I \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ | $\begin{bmatrix} M & A \\ A^T & -\lambda^2 N \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ |
+| least-squares problem | $\min \\|b - Ax\\|^2_2 + \lambda^2 \\|x\\|^2_2$ | $\min \\|b - Ax\\|^2_{E^{-1}} + \lambda^2 \\|x\\|^2_F$ |
+| Normal equation | $(A^TA + \lambda^2 I)x = A^Tb$ | $(A^TE^{-1}A + \lambda^2 F)x = A^TE^{-1}b$ |
+| Augmented system | $\begin{bmatrix} I & A \\ A^T & -\lambda^2 I \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ | $\begin{bmatrix} E & A \\ A^T & -\lambda^2 F \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ |
+
+The preconditioners must be symmetric positive definite.
+
+#### Linear least-norm problems
-#### Minimum-norm problems
+Methods concerned: [`CGNE`](@ref cgne), [`CRMR`](@ref crmr), [`LNLQ`](@ref lnlq), [`CRAIG`](@ref craig) and [`CRAIGMR`](@ref craigmr).
| Formulation | Without preconditioning | With preconditioning |
|:--------------------:|:---------------------------------------:|:----------------------------------------------:|
-| minimum-norm problem | $\min \\|x\\|^2_2~~\text{s.t.}~~Ax = b$ | $\min \\|x\\|^2_{N^{-1}}~~\text{s.t.}~~Ax = b$ |
-| Normal equation | $AA^Ty = b~~\text{with}~~x = A^Ty$ | $ANA^Ty = b~~\text{with}~~x = NA^Ty$ |
-| Augmented system | $\begin{bmatrix} -I & A^T \\ \phantom{-}A & 0 \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ | $\begin{bmatrix} -N & A^T \\ \phantom{-}A & 0 \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ |
+| minimum-norm problem | $\min \\|x\\|^2_2~~\text{s.t.}~~Ax = b$ | $\min \\|x\\|^2_F~~\text{s.t.}~~Ax = b$ |
+| Normal equation | $AA^Ty = b~~\text{with}~~x = A^Ty$ | $AF^{-1}A^Ty = b~~\text{with}~~x = F^{-1}A^Ty$ |
+| Augmented system | $\begin{bmatrix} -I & A^T \\ \phantom{-}A & 0 \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ | $\begin{bmatrix} -F & A^T \\ \phantom{-}A & 0 \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ |
-We provide a symmetric positive definite preconditioner with the argument `N` in [`CGNE`](@ref cgne), [`CRMR`](@ref crmr), [`LNLQ`](@ref lnlq), [`CRAIG`](@ref craig) and [`CRAIGMR`](@ref craigmr).
-A second positive definite preconditioner `M` is supported by [`LNLQ`](@ref lslq), [`CRAIG`](@ref lsqr) and [`CRAIGMR`](@ref lsmr).
-It is dedicated to penalized minimum-norm problems.
+| Preconditioners | $E^{-1}$ | $E$ | $F^{-1}$ | $F$ |
+|:---------------:|:-----------------------:|:--------------------:|:-----------------------:|:--------------------:|
+| Arguments | `M` with `ldiv=false` | `M` with `ldiv=true` | `N` with `ldiv=false` | `N` with `ldiv=true` |
+
+[`LNLQ`](@ref lslq), [`CRAIG`](@ref lsqr) and [`CRAIGMR`](@ref lsmr) can handle a second preconditioner `M` for penalized minimum-norm problems.
| Formulation | Without preconditioning | With preconditioning |
|:--------------------:|:-------------------------------------------------------------------:|:---------------------------------------------------------------------------------------:|
-| minimum-norm problem | $\min \\|x\\|^2_2 + \\|y\\|^2_2~~\text{s.t.}~~Ax + \lambda^2 y = b$ | $\min \\|x\\|^2_{N^{-1}} + \\|y\\|^2_{M^{-1}}~~\text{s.t.}~~Ax + \lambda^2 M^{-1}y = b$ |
-| Normal equation | $(AA^T + \lambda^2 I)y = b~~\text{with}~~x = A^Ty$ | $(ANA^T + \lambda^2 M^{-1})y = b~~\text{with}~~x = NA^Ty$ |
-| Augmented system | $\begin{bmatrix} -I & A^T \\ \phantom{-}A & \lambda^2 I \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ | $\begin{bmatrix} -N^{-1} & A^T \\ \phantom{-}A & \lambda^2 M^{-1} \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ |
+| minimum-norm problem | $\min \\|x\\|^2_2 + \\|y\\|^2_2~~\text{s.t.}~~Ax + \lambda^2 y = b$ | $\min \\|x\\|^2_F + \\|y\\|^2_E~~\text{s.t.}~~Ax + \lambda^2 Ey = b$ |
+| Normal equation | $(AA^T + \lambda^2 I)y = b~~\text{with}~~x = A^Ty$ | $(AF^{-1}A^T + \lambda^2 E)y = b~~\text{with}~~x = F^{-1}A^Ty$ |
+| Augmented system | $\begin{bmatrix} -I & A^T \\ \phantom{-}A & \lambda^2 I \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ | $\begin{bmatrix} -F & A^T \\ \phantom{-}A & \lambda^2 E \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ |
#### Saddle-point and symmetric quasi-definite systems
When a symmetric system $Kz = d$ has the 2x2 block structure
```math
- \begin{bmatrix} \tau M^{-1} & \phantom{-}A \\ A^T & \nu N^{-1} \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} b \\ c \end{bmatrix},
+ \begin{bmatrix} \tau E & \phantom{-}A \\ A^T & \nu F \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} b \\ c \end{bmatrix},
```
-where $M^{-1}$ and $N^{-1}$ are symmetric positive definite, [`TriCG`](@ref tricg) and [`TriMR`](@ref trimr) can take advantage of this structure if preconditioners `M` and `N` that model $M$ and $N$ are available.
+where $E$ and $F$ are symmetric positive definite, [`TriCG`](@ref tricg) and [`TriMR`](@ref trimr) can take advantage of this structure if preconditioners `M` and `N` that model the inverse of $E$ and $F$ are available.
+
+| Preconditioners | $E^{-1}$ | $E$ | $F^{-1}$ | $F$ |
+|:---------------:|:---------------------:|:--------------------:|:---------------------:|:--------------------:|
+| Arguments | `M` with `ldiv=false` | `M` with `ldiv=true` | `N` with `ldiv=false` | `N` with `ldiv=true` |
+
#### Generalized saddle-point and unsymmetric partitioned systems
When an unsymmetric system $Kz = d$ has the 2x2 block structure
```math
- \begin{bmatrix} \lambda M^{-1} & A \\ B & \mu N^{-1} \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} b \\ c \end{bmatrix},
+ \begin{bmatrix} \lambda M & A \\ B & \mu N \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} b \\ c \end{bmatrix},
```
-[`GPMR`](@ref gpmr) can take advantage of this structure if preconditioners `C`, `D`, `E` and `F` such that $CE = M$ and $DF = N$ are available.
+[`GPMR`](@ref gpmr) can take advantage of this structure if we model the inverse of $M$ and $N$ with the help of preconditioners `C`, `D`, `E` and `F`.
-!!! tip
- A preconditioner `P` only needs to support the operation `mul!(y, P, x)` to be used in Krylov.jl.
+| Relations | $CE = M^{-1}$ | $EC = M$ | $DF = N^{-1}$ | $FD = N$ |
+|:---------------:|:---------------------------:|:--------------------------:|:---------------------------:|:--------------------------:|
+| Arguments | `C` / `E` with `ldiv=false` | `C` / `E` with `ldiv=true` | `D` / `F` with `ldiv=false` | `D` / `F` with `ldiv=true` |
!!! note
Our implementations of [`BiLQ`](@ref bilq), [`QMR`](@ref qmr), [`BiLQR`](@ref bilqr), [`USYMLQ`](@ref usymlq), [`USYMQR`](@ref usymqr) and [`TriLQR`](@ref trilqr) don't support preconditioning.
@@ -86,3 +127,4 @@ When an unsymmetric system $Kz = d$ has the 2x2 block structure
- [ILUZero.jl](https://github.com/mcovalt/ILUZero.jl) is a Julia implementation of incomplete LU factorization with zero level of fill-in.
- [LimitedLDLFactorizations.jl](https://github.com/JuliaSmoothOptimizers/LimitedLDLFactorizations.jl) for limited-memory LDLᵀ factorization of symmetric matrices.
- [AlgebraicMultigrid.jl](https://github.com/JuliaLinearAlgebra/AlgebraicMultigrid.jl) provides two algebraic multigrid (AMG) preconditioners.
+- [RandomizedPreconditioners.jl](https://github.com/tjdiamandis/RandomizedPreconditioners.jl) uses randomized numerical linear algebra to construct approximate inverses of matrices.
From 08c0e1ed6e6f06892c80ff0227cbe51b5d5f822e Mon Sep 17 00:00:00 2001
From: Alexis Montoison
Date: Wed, 24 Aug 2022 09:24:37 -0400
Subject: [PATCH 007/132] Final version of preconditioners.md
---
docs/src/preconditioners.md | 113 ++++++++++++++++++++++++++----------
1 file changed, 81 insertions(+), 32 deletions(-)
diff --git a/docs/src/preconditioners.md b/docs/src/preconditioners.md
index 15cb3f362..416c3fd6b 100644
--- a/docs/src/preconditioners.md
+++ b/docs/src/preconditioners.md
@@ -1,4 +1,4 @@
-## [Preconditioners](@id preconditioners)
+# [Preconditioners](@id preconditioners)
The solvers in Krylov.jl support preconditioners, i.e., transformations that modify a linear systems $Ax = b$ into an equivalent form that may yield faster convergence in finite-precision arithmetic.
Preconditioning can be used to reduce the condition number of the problem or clusterize its eigenvalues for instance.
@@ -6,7 +6,7 @@ Preconditioning can be used to reduce the condition number of the problem or clu
The design of preconditioners is highly dependent on the origin of the problem and most preconditioners need to take application dependent information and structures into account.
Specialized preconditioners generally outperform generic preconditioners such as incomplete factorizations.
-The construction of a preconditioner necessitates a trade-off because we need to apply it at least once per iteration within a Krylov method.
+The construction of a preconditioner also necessitates a trade-off because we need to apply it at least once per iteration within a Krylov method.
Hence, a preconditioner must be constructed such that it is cheap to apply, while also capturing the characteristics of the original system in some sense.
There exist three variants of preconditioning:
@@ -17,37 +17,40 @@ There exist three variants of preconditioning:
where $P_{\ell}$ and $P_r$ are square and nonsingular.
-We consider that $P_{\ell}^1$ and $P_r^1$ are the default preconditioners in Krylov.jl and that we can apply them with the operation $y \leftarrow P^{-1} * x$.
-It is also common to call $P_{\ell}$ and $P_r$ the preconditioners if the equivalent operation $y \leftarrow P \\ x$ is available.
-We support both approach thanks to the argument `ldiv` of the Krylov solvers.
+We consider that $P_{\ell}^{-1}$ and $P_r^{-1}$ are the default preconditioners in Krylov.jl and that we can apply them with the operation $y \leftarrow P^{-1} * x$.
+It is also common to call $P_{\ell}$ and $P_r$ the preconditioners if the equivalent operation $y \leftarrow P~\backslash~x$ is available.
+Krylov.jl supports both approach thanks to the argument `ldiv` of the Krylov solvers.
+
+## How to use preconditioners in Krylov.jl?
!!! tip
A preconditioner only needs to support the operation `mul!(y, P⁻¹, x)` when `ldiv=false` or `ldiv!(y, P, x)` when `ldiv=true` to be used in Krylov.jl.
-#### Square non-Hermitian linear systems
+### Square non-Hermitian linear systems
Methods concerned: [`CGS`](@ref cgs), [`BiCGSTAB`](@ref bicgstab), [`DQGMRES`](@ref dqgmres), [`GMRES`](@ref gmres), [`DIOM`](@ref diom) and [`FOM`](@ref fom).
-A Krylov method dedicated to non-Hermitian linear systems allows the three variants.
+A Krylov method dedicated to non-Hermitian linear systems allows the three variants of preconditioning.
| Preconditioners | $P_{\ell}^{-1}$ | $P_{\ell}$ | $P_r^{-1}$ | $P_r$ |
|:---------------:|:---------------------:|:--------------------:|:---------------------:|:--------------------:|
| Arguments | `M` with `ldiv=false` | `M` with `ldiv=true` | `N` with `ldiv=false` | `N` with `ldiv=true` |
-#### Hermitian linear systems
+### Hermitian linear systems
Methods concerned: [`SYMMLQ`](@ref symmlq), [`CG`](@ref cg), [`CG-LANCZOS`](@ref cg_lanczos), [`CG-LANCZOS-SHIFT`](@ref cg_lanczos_shift), [`CR`](@ref cr), [`MINRES`](@ref minres) and [`MINRES-QLP`](@ref minres_qlp).
-When $A$ is symmetric, we can only use the centered preconditioning $L^{-1}AL^{-T}x = L^{-1}b$.
-This split preconditioning is a special case of two-sided preconditioning $P_{\ell} = L = P_r^T$ that maintains the symmetry / hermicity of the linear systems.
+When $A$ is Hermitian, we can only use the centered preconditioning $L^{-1}AL^{-T}y = L^{-1}b$ with $x = L^{-T}y$.
+This split preconditioning is a special case of two-sided preconditioning $P_{\ell} = L = P_r^T$ that maintains the hermicity of the linear systems.
| Preconditioners | $P^{-1} = L^{-T}L^{-1}$ | $P = LL^{T}$ |
|:---------------:|:-----------------------:|:--------------------:|
| Arguments | `M` with `ldiv=false` | `M` with `ldiv=true` |
-The preconditioner must be symmetric positive definite.
+!!! warning
+ The preconditioner `M` must be hermitian and positive definite.
-#### Linear least-squares problems
+### Linear least-squares problems
Methods concerned: [`CGLS`](@ref cgls), [`CRLS`](@ref crls), [`LSLQ`](@ref lslq), [`LSQR`](@ref lsqr) and [`LSMR`](@ref lsmr).
@@ -57,11 +60,7 @@ Methods concerned: [`CGLS`](@ref cgls), [`CRLS`](@ref crls), [`LSLQ`](@ref lslq)
| Normal equation | $A^TAx = A^Tb$ | $A^TE^{-1}Ax = A^TE^{-1}b$ |
| Augmented system | $\begin{bmatrix} I & A \\ A^T & 0 \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ | $\begin{bmatrix} E & A \\ A^T & 0 \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ |
-| Preconditioners | $E^{-1}$ | $E$ | $F^{-1}$ | $F$ |
-|:---------------:|:-----------------------:|:--------------------:|:-----------------------:|:--------------------:|
-| Arguments | `M` with `ldiv=false` | `M` with `ldiv=true` | `N` with `ldiv=false` | `N` with `ldiv=true` |
-
-[`LSLQ`](@ref lslq), [`LSQR`](@ref lsqr) and [`LSMR`](@ref lsmr) can handle a second preconditioner `N` for regularized least-squares problems.
+[`LSLQ`](@ref lslq), [`LSQR`](@ref lsqr) and [`LSMR`](@ref lsmr) also handle regularized least-squares problems.
| Formulation | Without preconditioning | With preconditioning |
|:---------------------:|:-----------------------------------------------:|:------------------------------------------------------:|
@@ -69,9 +68,14 @@ Methods concerned: [`CGLS`](@ref cgls), [`CRLS`](@ref crls), [`LSLQ`](@ref lslq)
| Normal equation | $(A^TA + \lambda^2 I)x = A^Tb$ | $(A^TE^{-1}A + \lambda^2 F)x = A^TE^{-1}b$ |
| Augmented system | $\begin{bmatrix} I & A \\ A^T & -\lambda^2 I \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ | $\begin{bmatrix} E & A \\ A^T & -\lambda^2 F \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ |
-The preconditioners must be symmetric positive definite.
+| Preconditioners | $E^{-1}$ | $E$ | $F^{-1}$ | $F$ |
+|:---------------:|:-----------------------:|:--------------------:|:-----------------------:|:--------------------:|
+| Arguments | `M` with `ldiv=false` | `M` with `ldiv=true` | `N` with `ldiv=false` | `N` with `ldiv=true` |
-#### Linear least-norm problems
+!!! warning
+ The preconditioners `M` and `N` must be hermitian and positive definite.
+
+### Linear least-norm problems
Methods concerned: [`CGNE`](@ref cgne), [`CRMR`](@ref crmr), [`LNLQ`](@ref lnlq), [`CRAIG`](@ref craig) and [`CRAIGMR`](@ref craigmr).
@@ -81,11 +85,7 @@ Methods concerned: [`CGNE`](@ref cgne), [`CRMR`](@ref crmr), [`LNLQ`](@ref lnlq)
| Normal equation | $AA^Ty = b~~\text{with}~~x = A^Ty$ | $AF^{-1}A^Ty = b~~\text{with}~~x = F^{-1}A^Ty$ |
| Augmented system | $\begin{bmatrix} -I & A^T \\ \phantom{-}A & 0 \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ | $\begin{bmatrix} -F & A^T \\ \phantom{-}A & 0 \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ |
-| Preconditioners | $E^{-1}$ | $E$ | $F^{-1}$ | $F$ |
-|:---------------:|:-----------------------:|:--------------------:|:-----------------------:|:--------------------:|
-| Arguments | `M` with `ldiv=false` | `M` with `ldiv=true` | `N` with `ldiv=false` | `N` with `ldiv=true` |
-
-[`LNLQ`](@ref lslq), [`CRAIG`](@ref lsqr) and [`CRAIGMR`](@ref lsmr) can handle a second preconditioner `M` for penalized minimum-norm problems.
+[`LNLQ`](@ref lslq), [`CRAIG`](@ref lsqr) and [`CRAIGMR`](@ref lsmr) also handle penalized minimum-norm problems.
| Formulation | Without preconditioning | With preconditioning |
|:--------------------:|:-------------------------------------------------------------------:|:---------------------------------------------------------------------------------------:|
@@ -93,34 +93,46 @@ Methods concerned: [`CGNE`](@ref cgne), [`CRMR`](@ref crmr), [`LNLQ`](@ref lnlq)
| Normal equation | $(AA^T + \lambda^2 I)y = b~~\text{with}~~x = A^Ty$ | $(AF^{-1}A^T + \lambda^2 E)y = b~~\text{with}~~x = F^{-1}A^Ty$ |
| Augmented system | $\begin{bmatrix} -I & A^T \\ \phantom{-}A & \lambda^2 I \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ | $\begin{bmatrix} -F & A^T \\ \phantom{-}A & \lambda^2 E \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ |
-#### Saddle-point and symmetric quasi-definite systems
+| Preconditioners | $E^{-1}$ | $E$ | $F^{-1}$ | $F$ |
+|:---------------:|:-----------------------:|:--------------------:|:-----------------------:|:--------------------:|
+| Arguments | `M` with `ldiv=false` | `M` with `ldiv=true` | `N` with `ldiv=false` | `N` with `ldiv=true` |
+
+!!! warning
+ The preconditioners `M` and `N` must be hermitian and positive definite.
-When a symmetric system $Kz = d$ has the 2x2 block structure
+### Saddle-point and symmetric quasi-definite systems
+
+When a Hermitian system $Kz = d$ has the 2x2 block structure
```math
\begin{bmatrix} \tau E & \phantom{-}A \\ A^T & \nu F \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} b \\ c \end{bmatrix},
```
-where $E$ and $F$ are symmetric positive definite, [`TriCG`](@ref tricg) and [`TriMR`](@ref trimr) can take advantage of this structure if preconditioners `M` and `N` that model the inverse of $E$ and $F$ are available.
+where $E$ and $F$ are Hermitian and positive definite, [`TriCG`](@ref tricg) and [`TriMR`](@ref trimr) can take advantage of this form if preconditioners `M` and `N` that model the inverse of $E$ and $F$ are available.
| Preconditioners | $E^{-1}$ | $E$ | $F^{-1}$ | $F$ |
|:---------------:|:---------------------:|:--------------------:|:---------------------:|:--------------------:|
| Arguments | `M` with `ldiv=false` | `M` with `ldiv=true` | `N` with `ldiv=false` | `N` with `ldiv=true` |
+!!! warning
+ The preconditioners `M` and `N` must be hermitian and positive definite.
-#### Generalized saddle-point and unsymmetric partitioned systems
+### Generalized saddle-point and unsymmetric partitioned systems
-When an unsymmetric system $Kz = d$ has the 2x2 block structure
+When an non-Hermitian system $Kz = d$ has the 2x2 block structure
```math
\begin{bmatrix} \lambda M & A \\ B & \mu N \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} b \\ c \end{bmatrix},
```
[`GPMR`](@ref gpmr) can take advantage of this structure if we model the inverse of $M$ and $N$ with the help of preconditioners `C`, `D`, `E` and `F`.
-| Relations | $CE = M^{-1}$ | $EC = M$ | $DF = N^{-1}$ | $FD = N$ |
-|:---------------:|:---------------------------:|:--------------------------:|:---------------------------:|:--------------------------:|
-| Arguments | `C` / `E` with `ldiv=false` | `C` / `E` with `ldiv=true` | `D` / `F` with `ldiv=false` | `D` / `F` with `ldiv=true` |
+| Relations | $CE = M^{-1}$ | $EC = M$ | $DF = N^{-1}$ | $FD = N$ |
+|:---------------:|:-----------------------------:|:----------------------------:|:-----------------------------:|:----------------------------:|
+| Arguments | `C` and `E` with `ldiv=false` | `C` and `E` with `ldiv=true` | `D` and `F` with `ldiv=false` | `D` and `F` with `ldiv=true` |
!!! note
Our implementations of [`BiLQ`](@ref bilq), [`QMR`](@ref qmr), [`BiLQR`](@ref bilqr), [`USYMLQ`](@ref usymlq), [`USYMQR`](@ref usymqr) and [`TriLQR`](@ref trilqr) don't support preconditioning.
+!!! info
+ The default value of a preconditioner in Krylov.jl is the identity operator `I`.
+
## Packages that provide preconditioners
- [IncompleteLU.jl](https://github.com/haampie/IncompleteLU.jl) implements the left-looking or Crout version of ILU decompositions.
@@ -128,3 +140,40 @@ When an unsymmetric system $Kz = d$ has the 2x2 block structure
- [LimitedLDLFactorizations.jl](https://github.com/JuliaSmoothOptimizers/LimitedLDLFactorizations.jl) for limited-memory LDLᵀ factorization of symmetric matrices.
- [AlgebraicMultigrid.jl](https://github.com/JuliaLinearAlgebra/AlgebraicMultigrid.jl) provides two algebraic multigrid (AMG) preconditioners.
- [RandomizedPreconditioners.jl](https://github.com/tjdiamandis/RandomizedPreconditioners.jl) uses randomized numerical linear algebra to construct approximate inverses of matrices.
+
+## Examples
+
+```julia
+using Krylov
+n, m = size(A)
+d = [A[i,i] ≠ 0 ? 1 / abs(A[i,i]) : 1 for i=1:n] # Jacobi preconditioner
+P⁻¹ = diagm(d)
+x, stats = symmlq(A, b, M=P⁻¹)
+```
+
+```julia
+using Krylov
+n, m = size(A)
+d = [1 / norm(A[:,i]) for i=1:m] # diagonal preconditioner
+P⁻¹ = diagm(d)
+x, stats = minres(A, b, M=P⁻¹)
+```
+
+```julia
+using IncompleteLU, Krylov
+Pℓ = ilu(A)
+x, stats = gmres(A, b, M=Pℓ, ldiv=true) # left preconditioning
+```
+
+```julia
+using LimitedLDLFactorizations, Krylov
+P = lldl(A)
+P.D .= abs.(P.D)
+x, stats = cg(A, b, M=P, ldiv=true) # centered preconditioning
+```
+
+```julia
+using ILUZero, Krylov
+Pᵣ = ilu0(A)
+x, stats = bicgstab(A, b, N=Pᵣ, ldiv=true) # right preconditioning
+```
From 016d6c66d35eddce4dd1859e151b7c1428596376 Mon Sep 17 00:00:00 2001
From: Alexis Montoison
Date: Fri, 2 Sep 2022 18:10:02 -0400
Subject: [PATCH 008/132] Fix tests with CGNE
---
test/test_cgne.jl | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/test/test_cgne.jl b/test/test_cgne.jl
index a48b4569e..c1a3e798b 100644
--- a/test/test_cgne.jl
+++ b/test/test_cgne.jl
@@ -1,6 +1,6 @@
-function test_cgne(A, b; λ=0.0, N=I)
+function test_cgne(A, b; λ=0.0, N=I, history=false)
(nrow, ncol) = size(A)
- (x, stats) = cgne(A, b, λ=λ, N=N)
+ (x, stats) = cgne(A, b, λ=λ, N=N, history=history)
r = b - A * x
if λ > 0
s = r / sqrt(λ)
From 5d1a87f819424af856a7f5ee2788eda6844fee61 Mon Sep 17 00:00:00 2001
From: Alexis <35051714+amontoison@users.noreply.github.com>
Date: Tue, 6 Sep 2022 17:54:18 -0400
Subject: [PATCH 009/132] Apply suggestions from @dpo
Co-authored-by: Dominique
---
docs/src/preconditioners.md | 37 +++++++++++++++++--------------------
1 file changed, 17 insertions(+), 20 deletions(-)
diff --git a/docs/src/preconditioners.md b/docs/src/preconditioners.md
index 416c3fd6b..e57baf842 100644
--- a/docs/src/preconditioners.md
+++ b/docs/src/preconditioners.md
@@ -1,12 +1,12 @@
# [Preconditioners](@id preconditioners)
The solvers in Krylov.jl support preconditioners, i.e., transformations that modify a linear systems $Ax = b$ into an equivalent form that may yield faster convergence in finite-precision arithmetic.
-Preconditioning can be used to reduce the condition number of the problem or clusterize its eigenvalues for instance.
+Preconditioning can be used to reduce the condition number of the problem or cluster its eigenvalues or singular values for instance.
-The design of preconditioners is highly dependent on the origin of the problem and most preconditioners need to take application dependent information and structures into account.
+The design of preconditioners is highly dependent on the origin of the problem and most preconditioners need to take application-dependent information and structure into account.
Specialized preconditioners generally outperform generic preconditioners such as incomplete factorizations.
-The construction of a preconditioner also necessitates a trade-off because we need to apply it at least once per iteration within a Krylov method.
+The construction of a preconditioner necessitates trade-offs because we need to apply it at least once per iteration within a Krylov method.
Hence, a preconditioner must be constructed such that it is cheap to apply, while also capturing the characteristics of the original system in some sense.
There exist three variants of preconditioning:
@@ -17,14 +17,14 @@ There exist three variants of preconditioning:
where $P_{\ell}$ and $P_r$ are square and nonsingular.
-We consider that $P_{\ell}^{-1}$ and $P_r^{-1}$ are the default preconditioners in Krylov.jl and that we can apply them with the operation $y \leftarrow P^{-1} * x$.
+In Krylov.jl , we call $P_{\ell}^{-1}$ and $P_r^{-1}$ the preconditioners and we assume that we can apply them with the operation $y \leftarrow P^{-1} * x$.
It is also common to call $P_{\ell}$ and $P_r$ the preconditioners if the equivalent operation $y \leftarrow P~\backslash~x$ is available.
-Krylov.jl supports both approach thanks to the argument `ldiv` of the Krylov solvers.
+Krylov.jl supports both approaches thanks to the argument `ldiv` of the Krylov solvers.
## How to use preconditioners in Krylov.jl?
!!! tip
- A preconditioner only needs to support the operation `mul!(y, P⁻¹, x)` when `ldiv=false` or `ldiv!(y, P, x)` when `ldiv=true` to be used in Krylov.jl.
+ A preconditioner only need support the operation `mul!(y, P⁻¹, x)` when `ldiv=false` or `ldiv!(y, P, x)` when `ldiv=true` to be used in Krylov.jl.
### Square non-Hermitian linear systems
@@ -40,8 +40,9 @@ A Krylov method dedicated to non-Hermitian linear systems allows the three varia
Methods concerned: [`SYMMLQ`](@ref symmlq), [`CG`](@ref cg), [`CG-LANCZOS`](@ref cg_lanczos), [`CG-LANCZOS-SHIFT`](@ref cg_lanczos_shift), [`CR`](@ref cr), [`MINRES`](@ref minres) and [`MINRES-QLP`](@ref minres_qlp).
-When $A$ is Hermitian, we can only use the centered preconditioning $L^{-1}AL^{-T}y = L^{-1}b$ with $x = L^{-T}y$.
-This split preconditioning is a special case of two-sided preconditioning $P_{\ell} = L = P_r^T$ that maintains the hermicity of the linear systems.
+When $A$ is Hermitian, we can only use centered preconditioning $L^{-1}AL^{-T}y = L^{-1}b$ with $x = L^{-T}y$.
+Centered preconditioning is a special case of two-sided preconditioning with $P_{\ell} = L = P_r^T$ that maintains hermicity.
+However, there is no need to specify $L$ and one may specify $M$ directly.
| Preconditioners | $P^{-1} = L^{-T}L^{-1}$ | $P = LL^{T}$ |
|:---------------:|:-----------------------:|:--------------------:|
@@ -56,7 +57,7 @@ Methods concerned: [`CGLS`](@ref cgls), [`CRLS`](@ref crls), [`LSLQ`](@ref lslq)
| Formulation | Without preconditioning | With preconditioning |
|:---------------------:|:-----------------------:|:------------------------------:|
-| least-squares problem | $\min \\|b - Ax\\|^2_2$ | $\min \\|b - Ax\\|^2_{E^{-1}}$ |
+| least-squares problem | $\min \\tfrac{1}{2} \\|b - Ax\\|^2_2$ | $\min \\tfrac{1}{2} \\|b - Ax\\|^2_{E^{-1}}$ |
| Normal equation | $A^TAx = A^Tb$ | $A^TE^{-1}Ax = A^TE^{-1}b$ |
| Augmented system | $\begin{bmatrix} I & A \\ A^T & 0 \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ | $\begin{bmatrix} E & A \\ A^T & 0 \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ |
@@ -64,7 +65,7 @@ Methods concerned: [`CGLS`](@ref cgls), [`CRLS`](@ref crls), [`LSLQ`](@ref lslq)
| Formulation | Without preconditioning | With preconditioning |
|:---------------------:|:-----------------------------------------------:|:------------------------------------------------------:|
-| least-squares problem | $\min \\|b - Ax\\|^2_2 + \lambda^2 \\|x\\|^2_2$ | $\min \\|b - Ax\\|^2_{E^{-1}} + \lambda^2 \\|x\\|^2_F$ |
+| least-squares problem | $\min \\tfrac{1}{2} \\|b - Ax\\|^2_2 + \\tfrac{1}{2} \lambda^2 \\|x\\|^2_2$ | $\min \\tfrac{1}{2} \\|b - Ax\\|^2_{E^{-1}} + \\tfrac{1}{2} \lambda^2 \\|x\\|^2_F$ |
| Normal equation | $(A^TA + \lambda^2 I)x = A^Tb$ | $(A^TE^{-1}A + \lambda^2 F)x = A^TE^{-1}b$ |
| Augmented system | $\begin{bmatrix} I & A \\ A^T & -\lambda^2 I \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ | $\begin{bmatrix} E & A \\ A^T & -\lambda^2 F \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ |
@@ -81,7 +82,7 @@ Methods concerned: [`CGNE`](@ref cgne), [`CRMR`](@ref crmr), [`LNLQ`](@ref lnlq)
| Formulation | Without preconditioning | With preconditioning |
|:--------------------:|:---------------------------------------:|:----------------------------------------------:|
-| minimum-norm problem | $\min \\|x\\|^2_2~~\text{s.t.}~~Ax = b$ | $\min \\|x\\|^2_F~~\text{s.t.}~~Ax = b$ |
+| minimum-norm problem | $\min \\tfrac{1}{2} \\|x\\|^2_2~~\text{s.t.}~~Ax = b$ | $\min \\tfrac{1}{2} \\|x\\|^2_F~~\text{s.t.}~~Ax = b$ |
| Normal equation | $AA^Ty = b~~\text{with}~~x = A^Ty$ | $AF^{-1}A^Ty = b~~\text{with}~~x = F^{-1}A^Ty$ |
| Augmented system | $\begin{bmatrix} -I & A^T \\ \phantom{-}A & 0 \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ | $\begin{bmatrix} -F & A^T \\ \phantom{-}A & 0 \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ |
@@ -89,7 +90,7 @@ Methods concerned: [`CGNE`](@ref cgne), [`CRMR`](@ref crmr), [`LNLQ`](@ref lnlq)
| Formulation | Without preconditioning | With preconditioning |
|:--------------------:|:-------------------------------------------------------------------:|:---------------------------------------------------------------------------------------:|
-| minimum-norm problem | $\min \\|x\\|^2_2 + \\|y\\|^2_2~~\text{s.t.}~~Ax + \lambda^2 y = b$ | $\min \\|x\\|^2_F + \\|y\\|^2_E~~\text{s.t.}~~Ax + \lambda^2 Ey = b$ |
+| minimum-norm problem | $\min \\tfrac{1}{2} \\|x\\|^2_2 + \\tfrac{1}{2} \\|y\\|^2_2~~\text{s.t.}~~Ax + \lambda^2 y = b$ | $\min \\tfrac{1}{2} \\|x\\|^2_F + \\tfrac{1}{2} \\|y\\|^2_E~~\text{s.t.}~~Ax + \lambda^2 Ey = b$ |
| Normal equation | $(AA^T + \lambda^2 I)y = b~~\text{with}~~x = A^Ty$ | $(AF^{-1}A^T + \lambda^2 E)y = b~~\text{with}~~x = F^{-1}A^Ty$ |
| Augmented system | $\begin{bmatrix} -I & A^T \\ \phantom{-}A & \lambda^2 I \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ | $\begin{bmatrix} -F & A^T \\ \phantom{-}A & \lambda^2 E \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ |
@@ -102,11 +103,9 @@ Methods concerned: [`CGNE`](@ref cgne), [`CRMR`](@ref crmr), [`LNLQ`](@ref lnlq)
### Saddle-point and symmetric quasi-definite systems
-When a Hermitian system $Kz = d$ has the 2x2 block structure
+[`TriCG`](@ref tricg) and [`TriMR`](@ref trimr) can take advantage of the structure of Hermitian systems $Kz = d$ with the 2x2 block structure
```math
\begin{bmatrix} \tau E & \phantom{-}A \\ A^T & \nu F \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} b \\ c \end{bmatrix},
-```
-where $E$ and $F$ are Hermitian and positive definite, [`TriCG`](@ref tricg) and [`TriMR`](@ref trimr) can take advantage of this form if preconditioners `M` and `N` that model the inverse of $E$ and $F$ are available.
| Preconditioners | $E^{-1}$ | $E$ | $F^{-1}$ | $F$ |
|:---------------:|:---------------------:|:--------------------:|:---------------------:|:--------------------:|
@@ -117,11 +116,9 @@ where $E$ and $F$ are Hermitian and positive definite, [`TriCG`](@ref tricg) and
### Generalized saddle-point and unsymmetric partitioned systems
-When an non-Hermitian system $Kz = d$ has the 2x2 block structure
+[`GPMR`](@ref gpmr) can take advantage of the structure of general square systems $Kz = d$ with the 2x2 block structure
```math
\begin{bmatrix} \lambda M & A \\ B & \mu N \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} b \\ c \end{bmatrix},
-```
-[`GPMR`](@ref gpmr) can take advantage of this structure if we model the inverse of $M$ and $N$ with the help of preconditioners `C`, `D`, `E` and `F`.
| Relations | $CE = M^{-1}$ | $EC = M$ | $DF = N^{-1}$ | $FD = N$ |
|:---------------:|:-----------------------------:|:----------------------------:|:-----------------------------:|:----------------------------:|
@@ -135,8 +132,8 @@ When an non-Hermitian system $Kz = d$ has the 2x2 block structure
## Packages that provide preconditioners
-- [IncompleteLU.jl](https://github.com/haampie/IncompleteLU.jl) implements the left-looking or Crout version of ILU decompositions.
-- [ILUZero.jl](https://github.com/mcovalt/ILUZero.jl) is a Julia implementation of incomplete LU factorization with zero level of fill-in.
+- [IncompleteLU.jl](https://github.com/haampie/IncompleteLU.jl) implements the left-looking and Crout versions of ILU decompositions.
+- [ILUZero.jl](https://github.com/mcovalt/ILUZero.jl) is a Julia implementation of incomplete LU factorization with zero level of fill-in.
- [LimitedLDLFactorizations.jl](https://github.com/JuliaSmoothOptimizers/LimitedLDLFactorizations.jl) for limited-memory LDLᵀ factorization of symmetric matrices.
- [AlgebraicMultigrid.jl](https://github.com/JuliaLinearAlgebra/AlgebraicMultigrid.jl) provides two algebraic multigrid (AMG) preconditioners.
- [RandomizedPreconditioners.jl](https://github.com/tjdiamandis/RandomizedPreconditioners.jl) uses randomized numerical linear algebra to construct approximate inverses of matrices.
From 5d5484737fb7f0ffe53e5710cc28e1f3e882284e Mon Sep 17 00:00:00 2001
From: Alexis Montoison
Date: Tue, 6 Sep 2022 18:37:27 -0400
Subject: [PATCH 010/132] Update test_solvers.jl
---
test/test_solvers.jl | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/test/test_solvers.jl b/test/test_solvers.jl
index 468fa5a05..a6003088b 100644
--- a/test/test_solvers.jl
+++ b/test/test_solvers.jl
@@ -631,7 +631,7 @@ function test_solvers(FC)
│ Aᵀr│ Vector{$FC}│ 64│
│ r│ Vector{$FC}│ 32│
│ q│ Vector{$FC}│ 32│
- │ Mq│ Vector{$FC}│ 0│
+ │ Nq│ Vector{$FC}│ 0│
│ s│ Vector{$FC}│ 0│
└──────────┴───────────────┴─────────────────┘
"""
From c9674536f0bde06530abdf36297b2fd687e30763 Mon Sep 17 00:00:00 2001
From: Alexis Montoison
Date: Tue, 6 Sep 2022 18:37:54 -0400
Subject: [PATCH 011/132] Update preconditioners.md
---
docs/src/preconditioners.md | 52 ++++++++++++++++++-------------------
1 file changed, 25 insertions(+), 27 deletions(-)
diff --git a/docs/src/preconditioners.md b/docs/src/preconditioners.md
index e57baf842..1bc9b1a7f 100644
--- a/docs/src/preconditioners.md
+++ b/docs/src/preconditioners.md
@@ -23,8 +23,9 @@ Krylov.jl supports both approaches thanks to the argument `ldiv` of the Krylov s
## How to use preconditioners in Krylov.jl?
-!!! tip
- A preconditioner only need support the operation `mul!(y, P⁻¹, x)` when `ldiv=false` or `ldiv!(y, P, x)` when `ldiv=true` to be used in Krylov.jl.
+!!! info
+ - A preconditioner only need support the operation `mul!(y, P⁻¹, x)` when `ldiv=false` or `ldiv!(y, P, x)` when `ldiv=true` to be used in Krylov.jl.
+ - The default value of a preconditioner in Krylov.jl is the identity operator `I`.
### Square non-Hermitian linear systems
@@ -42,11 +43,11 @@ Methods concerned: [`SYMMLQ`](@ref symmlq), [`CG`](@ref cg), [`CG-LANCZOS`](@ref
When $A$ is Hermitian, we can only use centered preconditioning $L^{-1}AL^{-T}y = L^{-1}b$ with $x = L^{-T}y$.
Centered preconditioning is a special case of two-sided preconditioning with $P_{\ell} = L = P_r^T$ that maintains hermicity.
-However, there is no need to specify $L$ and one may specify $M$ directly.
+However, there is no need to specify $L$ and one may specify $P_c = LL^T$ or its inverse directly.
-| Preconditioners | $P^{-1} = L^{-T}L^{-1}$ | $P = LL^{T}$ |
-|:---------------:|:-----------------------:|:--------------------:|
-| Arguments | `M` with `ldiv=false` | `M` with `ldiv=true` |
+| Preconditioners | $P_c^{-1}$ | $P_c$ |
+|:---------------:|:-------------------------:|:--------------------:|
+| Arguments | `M` with `ldiv=false` | `M` with `ldiv=true` |
!!! warning
The preconditioner `M` must be hermitian and positive definite.
@@ -55,18 +56,18 @@ However, there is no need to specify $L$ and one may specify $M$ directly.
Methods concerned: [`CGLS`](@ref cgls), [`CRLS`](@ref crls), [`LSLQ`](@ref lslq), [`LSQR`](@ref lsqr) and [`LSMR`](@ref lsmr).
-| Formulation | Without preconditioning | With preconditioning |
-|:---------------------:|:-----------------------:|:------------------------------:|
-| least-squares problem | $\min \\tfrac{1}{2} \\|b - Ax\\|^2_2$ | $\min \\tfrac{1}{2} \\|b - Ax\\|^2_{E^{-1}}$ |
-| Normal equation | $A^TAx = A^Tb$ | $A^TE^{-1}Ax = A^TE^{-1}b$ |
+| Formulation | Without preconditioning | With preconditioning |
+|:---------------------:|:------------------------------------:|:-------------------------------------------:|
+| least-squares problem | $\min \tfrac{1}{2} \\|b - Ax\\|^2_2$ | $\min \tfrac{1}{2} \\|b - Ax\\|^2_{E^{-1}}$ |
+| Normal equation | $A^TAx = A^Tb$ | $A^TE^{-1}Ax = A^TE^{-1}b$ |
| Augmented system | $\begin{bmatrix} I & A \\ A^T & 0 \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ | $\begin{bmatrix} E & A \\ A^T & 0 \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ |
[`LSLQ`](@ref lslq), [`LSQR`](@ref lsqr) and [`LSMR`](@ref lsmr) also handle regularized least-squares problems.
-| Formulation | Without preconditioning | With preconditioning |
-|:---------------------:|:-----------------------------------------------:|:------------------------------------------------------:|
-| least-squares problem | $\min \\tfrac{1}{2} \\|b - Ax\\|^2_2 + \\tfrac{1}{2} \lambda^2 \\|x\\|^2_2$ | $\min \\tfrac{1}{2} \\|b - Ax\\|^2_{E^{-1}} + \\tfrac{1}{2} \lambda^2 \\|x\\|^2_F$ |
-| Normal equation | $(A^TA + \lambda^2 I)x = A^Tb$ | $(A^TE^{-1}A + \lambda^2 F)x = A^TE^{-1}b$ |
+| Formulation | Without preconditioning | With preconditioning |
+|:---------------------:|:--------------------------------------------------------------------------:|:---------------------------------------------------------------------------------:|
+| least-squares problem | $\min \tfrac{1}{2} \\|b - Ax\\|^2_2 + \\tfrac{1}{2} \lambda^2 \\|x\\|^2_2$ | $\min \tfrac{1}{2} \\|b - Ax\\|^2_{E^{-1}} + \\tfrac{1}{2} \lambda^2 \\|x\\|^2_F$ |
+| Normal equation | $(A^TA + \lambda^2 I)x = A^Tb$ | $(A^TE^{-1}A + \lambda^2 F)x = A^TE^{-1}b$ |
| Augmented system | $\begin{bmatrix} I & A \\ A^T & -\lambda^2 I \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ | $\begin{bmatrix} E & A \\ A^T & -\lambda^2 F \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ |
| Preconditioners | $E^{-1}$ | $E$ | $F^{-1}$ | $F$ |
@@ -80,18 +81,18 @@ Methods concerned: [`CGLS`](@ref cgls), [`CRLS`](@ref crls), [`LSLQ`](@ref lslq)
Methods concerned: [`CGNE`](@ref cgne), [`CRMR`](@ref crmr), [`LNLQ`](@ref lnlq), [`CRAIG`](@ref craig) and [`CRAIGMR`](@ref craigmr).
-| Formulation | Without preconditioning | With preconditioning |
-|:--------------------:|:---------------------------------------:|:----------------------------------------------:|
-| minimum-norm problem | $\min \\tfrac{1}{2} \\|x\\|^2_2~~\text{s.t.}~~Ax = b$ | $\min \\tfrac{1}{2} \\|x\\|^2_F~~\text{s.t.}~~Ax = b$ |
-| Normal equation | $AA^Ty = b~~\text{with}~~x = A^Ty$ | $AF^{-1}A^Ty = b~~\text{with}~~x = F^{-1}A^Ty$ |
+| Formulation | Without preconditioning | With preconditioning |
+|:--------------------:|:----------------------------------------------------:|:----------------------------------------------------:|
+| minimum-norm problem | $\min \tfrac{1}{2} \\|x\\|^2_2~~\text{s.t.}~~Ax = b$ | $\min \tfrac{1}{2} \\|x\\|^2_F~~\text{s.t.}~~Ax = b$ |
+| Normal equation | $AA^Ty = b~~\text{with}~~x = A^Ty$ | $AF^{-1}A^Ty = b~~\text{with}~~x = F^{-1}A^Ty$ |
| Augmented system | $\begin{bmatrix} -I & A^T \\ \phantom{-}A & 0 \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ | $\begin{bmatrix} -F & A^T \\ \phantom{-}A & 0 \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ |
[`LNLQ`](@ref lslq), [`CRAIG`](@ref lsqr) and [`CRAIGMR`](@ref lsmr) also handle penalized minimum-norm problems.
-| Formulation | Without preconditioning | With preconditioning |
-|:--------------------:|:-------------------------------------------------------------------:|:---------------------------------------------------------------------------------------:|
-| minimum-norm problem | $\min \\tfrac{1}{2} \\|x\\|^2_2 + \\tfrac{1}{2} \\|y\\|^2_2~~\text{s.t.}~~Ax + \lambda^2 y = b$ | $\min \\tfrac{1}{2} \\|x\\|^2_F + \\tfrac{1}{2} \\|y\\|^2_E~~\text{s.t.}~~Ax + \lambda^2 Ey = b$ |
-| Normal equation | $(AA^T + \lambda^2 I)y = b~~\text{with}~~x = A^Ty$ | $(AF^{-1}A^T + \lambda^2 E)y = b~~\text{with}~~x = F^{-1}A^Ty$ |
+| Formulation | Without preconditioning | With preconditioning |
+|:--------------------:|:---------------------------------------------------------------------------------------------:|:----------------------------------------------------------------------------------------------:|
+| minimum-norm problem | $\min \tfrac{1}{2} \\|x\\|^2_2 + \tfrac{1}{2} \\|y\\|^2_2~~\text{s.t.}~~Ax + \lambda^2 y = b$ | $\min \tfrac{1}{2} \\|x\\|^2_F + \tfrac{1}{2} \\|y\\|^2_E~~\text{s.t.}~~Ax + \lambda^2 Ey = b$ |
+| Normal equation | $(AA^T + \lambda^2 I)y = b~~\text{with}~~x = A^Ty$ | $(AF^{-1}A^T + \lambda^2 E)y = b~~\text{with}~~x = F^{-1}A^Ty$ |
| Augmented system | $\begin{bmatrix} -I & A^T \\ \phantom{-}A & \lambda^2 I \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ | $\begin{bmatrix} -F & A^T \\ \phantom{-}A & \lambda^2 E \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ |
| Preconditioners | $E^{-1}$ | $E$ | $F^{-1}$ | $F$ |
@@ -106,7 +107,7 @@ Methods concerned: [`CGNE`](@ref cgne), [`CRMR`](@ref crmr), [`LNLQ`](@ref lnlq)
[`TriCG`](@ref tricg) and [`TriMR`](@ref trimr) can take advantage of the structure of Hermitian systems $Kz = d$ with the 2x2 block structure
```math
\begin{bmatrix} \tau E & \phantom{-}A \\ A^T & \nu F \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} b \\ c \end{bmatrix},
-
+```
| Preconditioners | $E^{-1}$ | $E$ | $F^{-1}$ | $F$ |
|:---------------:|:---------------------:|:--------------------:|:---------------------:|:--------------------:|
| Arguments | `M` with `ldiv=false` | `M` with `ldiv=true` | `N` with `ldiv=false` | `N` with `ldiv=true` |
@@ -119,7 +120,7 @@ Methods concerned: [`CGNE`](@ref cgne), [`CRMR`](@ref crmr), [`LNLQ`](@ref lnlq)
[`GPMR`](@ref gpmr) can take advantage of the structure of general square systems $Kz = d$ with the 2x2 block structure
```math
\begin{bmatrix} \lambda M & A \\ B & \mu N \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} b \\ c \end{bmatrix},
-
+```
| Relations | $CE = M^{-1}$ | $EC = M$ | $DF = N^{-1}$ | $FD = N$ |
|:---------------:|:-----------------------------:|:----------------------------:|:-----------------------------:|:----------------------------:|
| Arguments | `C` and `E` with `ldiv=false` | `C` and `E` with `ldiv=true` | `D` and `F` with `ldiv=false` | `D` and `F` with `ldiv=true` |
@@ -127,9 +128,6 @@ Methods concerned: [`CGNE`](@ref cgne), [`CRMR`](@ref crmr), [`LNLQ`](@ref lnlq)
!!! note
Our implementations of [`BiLQ`](@ref bilq), [`QMR`](@ref qmr), [`BiLQR`](@ref bilqr), [`USYMLQ`](@ref usymlq), [`USYMQR`](@ref usymqr) and [`TriLQR`](@ref trilqr) don't support preconditioning.
-!!! info
- The default value of a preconditioner in Krylov.jl is the identity operator `I`.
-
## Packages that provide preconditioners
- [IncompleteLU.jl](https://github.com/haampie/IncompleteLU.jl) implements the left-looking and Crout versions of ILU decompositions.
From 55cbe4e374331d6cffc86b67ab838ca0e86065b1 Mon Sep 17 00:00:00 2001
From: Alexis Montoison
Date: Tue, 6 Sep 2022 18:42:53 -0400
Subject: [PATCH 012/132] fix \tfrac in preconditioners.jl
---
docs/src/preconditioners.md | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/docs/src/preconditioners.md b/docs/src/preconditioners.md
index 1bc9b1a7f..7f3fb931e 100644
--- a/docs/src/preconditioners.md
+++ b/docs/src/preconditioners.md
@@ -64,10 +64,10 @@ Methods concerned: [`CGLS`](@ref cgls), [`CRLS`](@ref crls), [`LSLQ`](@ref lslq)
[`LSLQ`](@ref lslq), [`LSQR`](@ref lsqr) and [`LSMR`](@ref lsmr) also handle regularized least-squares problems.
-| Formulation | Without preconditioning | With preconditioning |
-|:---------------------:|:--------------------------------------------------------------------------:|:---------------------------------------------------------------------------------:|
-| least-squares problem | $\min \tfrac{1}{2} \\|b - Ax\\|^2_2 + \\tfrac{1}{2} \lambda^2 \\|x\\|^2_2$ | $\min \tfrac{1}{2} \\|b - Ax\\|^2_{E^{-1}} + \\tfrac{1}{2} \lambda^2 \\|x\\|^2_F$ |
-| Normal equation | $(A^TA + \lambda^2 I)x = A^Tb$ | $(A^TE^{-1}A + \lambda^2 F)x = A^TE^{-1}b$ |
+| Formulation | Without preconditioning | With preconditioning |
+|:---------------------:|:-------------------------------------------------------------------------:|:--------------------------------------------------------------------------------:|
+| least-squares problem | $\min \tfrac{1}{2} \\|b - Ax\\|^2_2 + \tfrac{1}{2} \lambda^2 \\|x\\|^2_2$ | $\min \tfrac{1}{2} \\|b - Ax\\|^2_{E^{-1}} + \tfrac{1}{2} \lambda^2 \\|x\\|^2_F$ |
+| Normal equation | $(A^TA + \lambda^2 I)x = A^Tb$ | $(A^TE^{-1}A + \lambda^2 F)x = A^TE^{-1}b$ |
| Augmented system | $\begin{bmatrix} I & A \\ A^T & -\lambda^2 I \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ | $\begin{bmatrix} E & A \\ A^T & -\lambda^2 F \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ |
| Preconditioners | $E^{-1}$ | $E$ | $F^{-1}$ | $F$ |
From dd4277086b1f8d9ea8ff8429b8914e65ba963322 Mon Sep 17 00:00:00 2001
From: Alexis <35051714+amontoison@users.noreply.github.com>
Date: Tue, 6 Sep 2022 21:26:33 -0400
Subject: [PATCH 013/132] Update docs/src/preconditioners.md
Co-authored-by: Dominique
---
docs/src/preconditioners.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/src/preconditioners.md b/docs/src/preconditioners.md
index 7f3fb931e..9e248b994 100644
--- a/docs/src/preconditioners.md
+++ b/docs/src/preconditioners.md
@@ -1,6 +1,6 @@
# [Preconditioners](@id preconditioners)
-The solvers in Krylov.jl support preconditioners, i.e., transformations that modify a linear systems $Ax = b$ into an equivalent form that may yield faster convergence in finite-precision arithmetic.
+The solvers in Krylov.jl support preconditioners, i.e., transformations that modify a linear system $Ax = b$ into an equivalent form that may yield faster convergence in finite-precision arithmetic.
Preconditioning can be used to reduce the condition number of the problem or cluster its eigenvalues or singular values for instance.
The design of preconditioners is highly dependent on the origin of the problem and most preconditioners need to take application-dependent information and structure into account.
From 645e094ede5d034b63de3524d2717e0b19dcedc1 Mon Sep 17 00:00:00 2001
From: Alexis Montoison
Date: Tue, 6 Sep 2022 21:32:52 -0400
Subject: [PATCH 014/132] A^T -> A^H
---
docs/src/preconditioners.md | 24 ++++++++++++------------
1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/docs/src/preconditioners.md b/docs/src/preconditioners.md
index 9e248b994..133020dc0 100644
--- a/docs/src/preconditioners.md
+++ b/docs/src/preconditioners.md
@@ -41,9 +41,9 @@ A Krylov method dedicated to non-Hermitian linear systems allows the three varia
Methods concerned: [`SYMMLQ`](@ref symmlq), [`CG`](@ref cg), [`CG-LANCZOS`](@ref cg_lanczos), [`CG-LANCZOS-SHIFT`](@ref cg_lanczos_shift), [`CR`](@ref cr), [`MINRES`](@ref minres) and [`MINRES-QLP`](@ref minres_qlp).
-When $A$ is Hermitian, we can only use centered preconditioning $L^{-1}AL^{-T}y = L^{-1}b$ with $x = L^{-T}y$.
-Centered preconditioning is a special case of two-sided preconditioning with $P_{\ell} = L = P_r^T$ that maintains hermicity.
-However, there is no need to specify $L$ and one may specify $P_c = LL^T$ or its inverse directly.
+When $A$ is Hermitian, we can only use centered preconditioning $L^{-1}AL^{-H}y = L^{-1}b$ with $x = L^{-H}y$.
+Centered preconditioning is a special case of two-sided preconditioning with $P_{\ell} = L = P_r^H$ that maintains hermicity.
+However, there is no need to specify $L$ and one may specify $P_c = LL^H$ or its inverse directly.
| Preconditioners | $P_c^{-1}$ | $P_c$ |
|:---------------:|:-------------------------:|:--------------------:|
@@ -59,16 +59,16 @@ Methods concerned: [`CGLS`](@ref cgls), [`CRLS`](@ref crls), [`LSLQ`](@ref lslq)
| Formulation | Without preconditioning | With preconditioning |
|:---------------------:|:------------------------------------:|:-------------------------------------------:|
| least-squares problem | $\min \tfrac{1}{2} \\|b - Ax\\|^2_2$ | $\min \tfrac{1}{2} \\|b - Ax\\|^2_{E^{-1}}$ |
-| Normal equation | $A^TAx = A^Tb$ | $A^TE^{-1}Ax = A^TE^{-1}b$ |
-| Augmented system | $\begin{bmatrix} I & A \\ A^T & 0 \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ | $\begin{bmatrix} E & A \\ A^T & 0 \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ |
+| Normal equation | $A^HAx = A^Hb$ | $A^HE^{-1}Ax = A^HE^{-1}b$ |
+| Augmented system | $\begin{bmatrix} I & A \\ A^H & 0 \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ | $\begin{bmatrix} E & A \\ A^H & 0 \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ |
[`LSLQ`](@ref lslq), [`LSQR`](@ref lsqr) and [`LSMR`](@ref lsmr) also handle regularized least-squares problems.
| Formulation | Without preconditioning | With preconditioning |
|:---------------------:|:-------------------------------------------------------------------------:|:--------------------------------------------------------------------------------:|
| least-squares problem | $\min \tfrac{1}{2} \\|b - Ax\\|^2_2 + \tfrac{1}{2} \lambda^2 \\|x\\|^2_2$ | $\min \tfrac{1}{2} \\|b - Ax\\|^2_{E^{-1}} + \tfrac{1}{2} \lambda^2 \\|x\\|^2_F$ |
-| Normal equation | $(A^TA + \lambda^2 I)x = A^Tb$ | $(A^TE^{-1}A + \lambda^2 F)x = A^TE^{-1}b$ |
-| Augmented system | $\begin{bmatrix} I & A \\ A^T & -\lambda^2 I \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ | $\begin{bmatrix} E & A \\ A^T & -\lambda^2 F \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ |
+| Normal equation | $(A^HA + \lambda^2 I)x = A^Hb$ | $(A^HE^{-1}A + \lambda^2 F)x = A^HE^{-1}b$ |
+| Augmented system | $\begin{bmatrix} I & A \\ A^H & -\lambda^2 I \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ | $\begin{bmatrix} E & A \\ A^H & -\lambda^2 F \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ |
| Preconditioners | $E^{-1}$ | $E$ | $F^{-1}$ | $F$ |
|:---------------:|:-----------------------:|:--------------------:|:-----------------------:|:--------------------:|
@@ -84,16 +84,16 @@ Methods concerned: [`CGNE`](@ref cgne), [`CRMR`](@ref crmr), [`LNLQ`](@ref lnlq)
| Formulation | Without preconditioning | With preconditioning |
|:--------------------:|:----------------------------------------------------:|:----------------------------------------------------:|
| minimum-norm problem | $\min \tfrac{1}{2} \\|x\\|^2_2~~\text{s.t.}~~Ax = b$ | $\min \tfrac{1}{2} \\|x\\|^2_F~~\text{s.t.}~~Ax = b$ |
-| Normal equation | $AA^Ty = b~~\text{with}~~x = A^Ty$ | $AF^{-1}A^Ty = b~~\text{with}~~x = F^{-1}A^Ty$ |
-| Augmented system | $\begin{bmatrix} -I & A^T \\ \phantom{-}A & 0 \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ | $\begin{bmatrix} -F & A^T \\ \phantom{-}A & 0 \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ |
+| Normal equation | $AA^Hy = b~~\text{with}~~x = A^Hy$ | $AF^{-1}A^Hy = b~~\text{with}~~x = F^{-1}A^Hy$ |
+| Augmented system | $\begin{bmatrix} -I & A^H \\ \phantom{-}A & 0 \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ | $\begin{bmatrix} -F & A^H \\ \phantom{-}A & 0 \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ |
[`LNLQ`](@ref lslq), [`CRAIG`](@ref lsqr) and [`CRAIGMR`](@ref lsmr) also handle penalized minimum-norm problems.
| Formulation | Without preconditioning | With preconditioning |
|:--------------------:|:---------------------------------------------------------------------------------------------:|:----------------------------------------------------------------------------------------------:|
| minimum-norm problem | $\min \tfrac{1}{2} \\|x\\|^2_2 + \tfrac{1}{2} \\|y\\|^2_2~~\text{s.t.}~~Ax + \lambda^2 y = b$ | $\min \tfrac{1}{2} \\|x\\|^2_F + \tfrac{1}{2} \\|y\\|^2_E~~\text{s.t.}~~Ax + \lambda^2 Ey = b$ |
-| Normal equation | $(AA^T + \lambda^2 I)y = b~~\text{with}~~x = A^Ty$ | $(AF^{-1}A^T + \lambda^2 E)y = b~~\text{with}~~x = F^{-1}A^Ty$ |
-| Augmented system | $\begin{bmatrix} -I & A^T \\ \phantom{-}A & \lambda^2 I \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ | $\begin{bmatrix} -F & A^T \\ \phantom{-}A & \lambda^2 E \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ |
+| Normal equation | $(AA^H + \lambda^2 I)y = b~~\text{with}~~x = A^Hy$ | $(AF^{-1}A^H + \lambda^2 E)y = b~~\text{with}~~x = F^{-1}A^Hy$ |
+| Augmented system | $\begin{bmatrix} -I & A^H \\ \phantom{-}A & \lambda^2 I \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ | $\begin{bmatrix} -F & A^H \\ \phantom{-}A & \lambda^2 E \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ |
| Preconditioners | $E^{-1}$ | $E$ | $F^{-1}$ | $F$ |
|:---------------:|:-----------------------:|:--------------------:|:-----------------------:|:--------------------:|
@@ -106,7 +106,7 @@ Methods concerned: [`CGNE`](@ref cgne), [`CRMR`](@ref crmr), [`LNLQ`](@ref lnlq)
[`TriCG`](@ref tricg) and [`TriMR`](@ref trimr) can take advantage of the structure of Hermitian systems $Kz = d$ with the 2x2 block structure
```math
- \begin{bmatrix} \tau E & \phantom{-}A \\ A^T & \nu F \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} b \\ c \end{bmatrix},
+ \begin{bmatrix} \tau E & \phantom{-}A \\ A^H & \nu F \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} b \\ c \end{bmatrix},
```
| Preconditioners | $E^{-1}$ | $E$ | $F^{-1}$ | $F$ |
|:---------------:|:---------------------:|:--------------------:|:---------------------:|:--------------------:|
From 330304a3478519acbbf72fc268f9666d215cc26f Mon Sep 17 00:00:00 2001
From: Alexis Montoison
Date: Tue, 6 Sep 2022 18:59:51 -0400
Subject: [PATCH 015/132] =?UTF-8?q?[documentation]=20Use=20A=E1=B4=B4=20in?=
=?UTF-8?q?stead=20of=20A=E1=B5=80?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
README.md | 6 +++---
docs/src/examples/tricg.md | 12 ++++++------
docs/src/examples/trimr.md | 6 +++---
docs/src/gpu.md | 2 +-
docs/src/index.md | 6 +++---
docs/src/warm_start.md | 4 ++--
6 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/README.md b/README.md
index a4664e187..ced20f308 100644
--- a/README.md
+++ b/README.md
@@ -71,7 +71,7 @@ Overdetermined sytems are less common but also occur.
4. Adjoint systems
- Ax = b and Aᵀy = c
+ Ax = b and Aᴴy = c
where **_A_** can have any shape.
@@ -81,7 +81,7 @@ where **_A_** can have any shape.
[M A] [x] = [b]
- [Aᵀ -N] [y] [c]
+ [Aᴴ -N] [y] [c]
where **_A_** can have any shape.
@@ -94,7 +94,7 @@ where **_A_** can have any shape.
[B N] [y] [c]
-where **_A_** can have any shape and **_B_** has the shape of **_Aᵀ_**.
+where **_A_** can have any shape and **_B_** has the shape of **_Aᴴ_**.
**_A_**, **_B_**, **_b_** and **_c_** must be all nonzero.
Krylov solvers are particularly appropriate in situations where such problems must be solved but a factorization is not possible, either because:
diff --git a/docs/src/examples/tricg.md b/docs/src/examples/tricg.md
index e981c2f7e..61750de5f 100644
--- a/docs/src/examples/tricg.md
+++ b/docs/src/examples/tricg.md
@@ -14,7 +14,7 @@ N = diagm(0 => [5.0 * i for i = 1:n])
c = -b
# [I A] [x] = [b]
-# [Aᵀ -I] [y] [c]
+# [Aᴴ -I] [y] [c]
(x, y, stats) = tricg(A, b, c)
K = [eye(m) A; A' -eye(n)]
B = [b; c]
@@ -23,7 +23,7 @@ resid = norm(r)
@printf("TriCG: Relative residual: %8.1e\n", resid)
# [-I A] [x] = [b]
-# [ Aᵀ I] [y] [c]
+# [ Aᴴ I] [y] [c]
(x, y, stats) = tricg(A, b, c, flip=true)
K = [-eye(m) A; A' eye(n)]
B = [b; c]
@@ -32,7 +32,7 @@ resid = norm(r)
@printf("TriCG: Relative residual: %8.1e\n", resid)
# [I A] [x] = [b]
-# [Aᵀ I] [y] [c]
+# [Aᴴ I] [y] [c]
(x, y, stats) = tricg(A, b, c, spd=true)
K = [eye(m) A; A' eye(n)]
B = [b; c]
@@ -41,7 +41,7 @@ resid = norm(r)
@printf("TriCG: Relative residual: %8.1e\n", resid)
# [-I A] [x] = [b]
-# [ Aᵀ -I] [y] [c]
+# [ Aᴴ -I] [y] [c]
(x, y, stats) = tricg(A, b, c, snd=true)
K = [-eye(m) A; A' -eye(n)]
B = [b; c]
@@ -50,7 +50,7 @@ resid = norm(r)
@printf("TriCG: Relative residual: %8.1e\n", resid)
# [τI A] [x] = [b]
-# [ Aᵀ νI] [y] [c]
+# [ Aᴴ νI] [y] [c]
(τ, ν) = (1e-4, 1e2)
(x, y, stats) = tricg(A, b, c, τ=τ, ν=ν)
K = [τ*eye(m) A; A' ν*eye(n)]
@@ -60,7 +60,7 @@ resid = norm(r)
@printf("TriCG: Relative residual: %8.1e\n", resid)
# [M⁻¹ A ] [x] = [b]
-# [Aᵀ -N⁻¹] [y] [c]
+# [Aᴴ -N⁻¹] [y] [c]
(x, y, stats) = tricg(A, b, c, M=M, N=N, verbose=1)
K = [inv(M) A; A' -inv(N)]
H = BlockDiagonalOperator(M, N)
diff --git a/docs/src/examples/trimr.md b/docs/src/examples/trimr.md
index 2aa48be1e..adc4e82e5 100644
--- a/docs/src/examples/trimr.md
+++ b/docs/src/examples/trimr.md
@@ -14,7 +14,7 @@ m, n = size(A)
c = -b
# [D A] [x] = [b]
-# [Aᵀ 0] [y] [c]
+# [Aᴴ 0] [y] [c]
llt_D = cholesky(D)
opD⁻¹ = LinearOperator(Float64, 5, 5, true, true, (y, v) -> ldiv!(y, llt_D, v))
opH⁻¹ = BlockDiagonalOperator(opD⁻¹, eye(n))
@@ -34,7 +34,7 @@ N = diagm(0 => [5.0 * i for i = 1:n])
c = -b
# [I A] [x] = [b]
-# [Aᵀ -I] [y] [c]
+# [Aᴴ -I] [y] [c]
(x, y, stats) = trimr(A, b, c)
K = [eye(m) A; A' -eye(n)]
B = [b; c]
@@ -43,7 +43,7 @@ resid = norm(r)
@printf("TriMR: Relative residual: %8.1e\n", resid)
# [M A] [x] = [b]
-# [Aᵀ -N] [y] [c]
+# [Aᴴ -N] [y] [c]
ldlt_M = ldl(M)
ldlt_N = ldl(N)
opM⁻¹ = LinearOperator(Float64, size(M,1), size(M,2), true, true, (y, v) -> ldiv!(y, ldlt_M, v))
diff --git a/docs/src/gpu.md b/docs/src/gpu.md
index 4c9887f24..3c6bc1e29 100644
--- a/docs/src/gpu.md
+++ b/docs/src/gpu.md
@@ -50,7 +50,7 @@ using CUDA, CUDA.CUSPARSE
A_gpu = CuSparseMatrixCSC(A_cpu) # A = CuSparseMatrixCSR(A_cpu)
b_gpu = CuVector(b_cpu)
-# LLᵀ ≈ A for CuSparseMatrixCSC or CuSparseMatrixCSR matrices
+# LLᴴ ≈ A for CuSparseMatrixCSC or CuSparseMatrixCSR matrices
P = ic02(A_gpu, 'O')
# Solve Py = x
diff --git a/docs/src/index.md b/docs/src/index.md
index ce657436d..00694b4de 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -46,7 +46,7 @@ Overdetermined sytems are less common but also occur.
4 - Adjoint systems
```math
- Ax = b \quad \text{and} \quad A^T y = c
+ Ax = b \quad \text{and} \quad A^H y = c
```
where **_A_** can have any shape.
@@ -54,7 +54,7 @@ where **_A_** can have any shape.
5 - Saddle-point and symmetric quasi-definite (SQD) systems
```math
- \begin{bmatrix} M & \phantom{-}A \\ A^T & -N \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \left(\begin{bmatrix} b \\ 0 \end{bmatrix},\begin{bmatrix} 0 \\ c \end{bmatrix},\begin{bmatrix} b \\ c \end{bmatrix}\right)
+ \begin{bmatrix} M & \phantom{-}A \\ A^H & -N \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \left(\begin{bmatrix} b \\ 0 \end{bmatrix},\begin{bmatrix} 0 \\ c \end{bmatrix},\begin{bmatrix} b \\ c \end{bmatrix}\right)
```
where **_A_** can have any shape.
@@ -65,7 +65,7 @@ where **_A_** can have any shape.
\begin{bmatrix} M & A \\ B & N \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} b \\ c \end{bmatrix}
```
-where **_A_** can have any shape and **_B_** has the shape of **_Aᵀ_**.
+where **_A_** can have any shape and **_B_** has the shape of **_Aᴴ_**.
**_A_**, **_B_**, **_b_** and **_c_** must be all nonzero.
Krylov solvers are particularly appropriate in situations where such problems must be solved but a factorization is not possible, either because:
diff --git a/docs/src/warm_start.md b/docs/src/warm_start.md
index 030cad6c0..e1d680efd 100644
--- a/docs/src/warm_start.md
+++ b/docs/src/warm_start.md
@@ -41,14 +41,14 @@ Explicit restarts cannot be avoided in certain block methods, such as TriMR, due
```julia
# [E A] [x] = [b]
-# [Aᵀ F] [y] [c]
+# [Aᴴ F] [y] [c]
M = inv(E)
N = inv(F)
x₀, y₀, stats = trimr(A, b, c, M=M, N=N)
# E and F are not available inside TriMR
b₀ = b - Ex₀ - Ay
-c₀ = c - Aᵀx₀ - Fy
+c₀ = c - Aᴴx₀ - Fy
Δx, Δy, stats = trimr(A, b₀, c₀, M=M, N=N)
x = x₀ + Δx
From 604c96066cc6bf24e9dfe05b92592204c08973c4 Mon Sep 17 00:00:00 2001
From: Alexis Montoison
Date: Tue, 6 Sep 2022 19:04:15 -0400
Subject: [PATCH 016/132] =?UTF-8?q?[test]=20Use=20A=E1=B4=B4=20instead=20o?=
=?UTF-8?q?f=20A=E1=B5=80?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
test/get_div_grad.jl | 4 ++--
test/test_allocations.jl | 16 ++++++++--------
test/test_bicgstab.jl | 4 ++--
test/test_bilq.jl | 4 ++--
test/test_bilqr.jl | 4 ++--
test/test_cgs.jl | 4 ++--
test/test_qmr.jl | 4 ++--
test/test_solvers.jl | 16 ++++++++--------
test/test_trilqr.jl | 2 +-
test/test_utils.jl | 6 +++---
10 files changed, 32 insertions(+), 32 deletions(-)
diff --git a/test/get_div_grad.jl b/test/get_div_grad.jl
index 6d6bf012e..ae27e5061 100644
--- a/test/get_div_grad.jl
+++ b/test/get_div_grad.jl
@@ -1,8 +1,8 @@
# Identity matrix.
eye(n::Int; FC=Float64) = sparse(one(FC) * I, n, n)
-# Compute the energy norm ‖r‖ₚ = √(rᵀPr) where P is a symmetric and positive definite matrix.
-metric(r, P) = sqrt(dot(r, P * r))
+# Compute the energy norm ‖r‖ₚ = √(rᴴPr) where P is a symmetric and positive definite matrix.
+metric(r, P) = sqrt(real(dot(r, P * r)))
# Based on Lars Ruthotto's initial implementation.
function get_div_grad(n1 :: Int, n2 :: Int, n3 :: Int)
diff --git a/test/test_allocations.jl b/test/test_allocations.jl
index 4c6817499..790fcc7a8 100644
--- a/test/test_allocations.jl
+++ b/test/test_allocations.jl
@@ -254,7 +254,7 @@
@testset "CGNE" begin
# CGNE needs:
- # - 3 n-vectors: x, p, Aᵀz
+ # - 3 n-vectors: x, p, Aᴴz
# - 2 m-vectors: r, q
storage_cgne(n, m) = 3 * n + 2 * m
storage_cgne_bytes(n, m) = nbits * storage_cgne(n, m)
@@ -272,7 +272,7 @@
@testset "CRMR" begin
# CRMR needs:
- # - 3 n-vectors: x, p, Aᵀr
+ # - 3 n-vectors: x, p, Aᴴr
# - 2 m-vectors: r, q
storage_crmr(n, m) = 3 * n + 2 * m
storage_crmr_bytes(n, m) = nbits * storage_crmr(n, m)
@@ -290,7 +290,7 @@
@testset "LNLQ" begin
# LNLQ needs:
- # - 3 n-vectors: x, v, Aᵀu
+ # - 3 n-vectors: x, v, Aᴴu
# - 4 m-vectors: y, w̄, u, Av
storage_lnlq(n, m) = 3 * n + 4 * m
storage_lnlq_bytes(n, m) = nbits * storage_lnlq(n, m)
@@ -308,7 +308,7 @@
@testset "CRAIG" begin
# CRAIG needs:
- # - 3 n-vectors: x, v, Aᵀu
+ # - 3 n-vectors: x, v, Aᴴu
# - 4 m-vectors: y, w, u, Av
storage_craig(n, m) = 3 * n + 4 * m
storage_craig_bytes(n, m) = nbits * storage_craig(n, m)
@@ -326,7 +326,7 @@
@testset "CRAIGMR" begin
# CRAIGMR needs:
- # - 4 n-vectors: x, v, Aᵀu, d
+ # - 4 n-vectors: x, v, Aᴴu, d
# - 5 m-vectors: y, u, w, wbar, Av
storage_craigmr(n, m) = 4 * n + 5 * m
storage_craigmr_bytes(n, m) = nbits * storage_craigmr(n, m)
@@ -362,7 +362,7 @@
@testset "LSLQ" begin
# LSLQ needs:
- # - 4 m-vectors: x_lq, v, Aᵀu, w̄ (= x_cg)
+ # - 4 m-vectors: x_lq, v, Aᴴu, w̄ (= x_cg)
# - 2 n-vectors: u, Av
storage_lslq(n, m) = 4 * m + 2 * n
storage_lslq_bytes(n, m) = nbits * storage_lslq(n, m)
@@ -398,7 +398,7 @@
@testset "LSQR" begin
# LSQR needs:
- # - 4 m-vectors: x, v, w, Aᵀu
+ # - 4 m-vectors: x, v, w, Aᴴu
# - 2 n-vectors: u, Av
storage_lsqr(n, m) = 4 * m + 2 * n
storage_lsqr_bytes(n, m) = nbits * storage_lsqr(n, m)
@@ -416,7 +416,7 @@
@testset "LSMR" begin
# LSMR needs:
- # - 5 m-vectors: x, v, h, hbar, Aᵀu
+ # - 5 m-vectors: x, v, h, hbar, Aᴴu
# - 2 n-vectors: u, Av
storage_lsmr(n, m) = 5 * m + 2 * n
storage_lsmr_bytes(n, m) = nbits * storage_lsmr(n, m)
diff --git a/test/test_bicgstab.jl b/test/test_bicgstab.jl
index ce4e6dcd4..6817acf3d 100644
--- a/test/test_bicgstab.jl
+++ b/test/test_bicgstab.jl
@@ -82,10 +82,10 @@
@test(resid ≤ bicgstab_tol)
@test(stats.solved)
- # Test bᵀc == 0
+ # Test bᴴc == 0
A, b, c = bc_breakdown(FC=FC)
(x, stats) = bicgstab(A, b, c=c)
- @test stats.status == "Breakdown bᵀc = 0"
+ @test stats.status == "Breakdown bᴴc = 0"
# test callback function
solver = BicgstabSolver(A, b)
diff --git a/test/test_bilq.jl b/test/test_bilq.jl
index 900d1f6e5..40b9872db 100644
--- a/test/test_bilq.jl
+++ b/test/test_bilq.jl
@@ -66,10 +66,10 @@
@test(resid ≤ bilq_tol)
@test(stats.solved)
- # Test bᵀc == 0
+ # Test bᴴc == 0
A, b, c = bc_breakdown(FC=FC)
(x, stats) = bilq(A, b, c=c)
- @test stats.status == "Breakdown bᵀc = 0"
+ @test stats.status == "Breakdown bᴴc = 0"
# test callback function
diff --git a/test/test_bilqr.jl b/test/test_bilqr.jl
index 6dab06ec7..fd46aade4 100644
--- a/test/test_bilqr.jl
+++ b/test/test_bilqr.jl
@@ -46,10 +46,10 @@
@test(resid_dual ≤ bilqr_tol)
@test(stats.solved_dual)
- # Test bᵀc == 0
+ # Test bᴴc == 0
A, b, c = bc_breakdown(FC=FC)
(x, t, stats) = bilqr(A, b, c)
- @test stats.status == "Breakdown bᵀc = 0"
+ @test stats.status == "Breakdown bᴴc = 0"
# test callback function
A, b, c = adjoint_pde(FC=FC)
diff --git a/test/test_cgs.jl b/test/test_cgs.jl
index 5c505bb70..832cd76c3 100644
--- a/test/test_cgs.jl
+++ b/test/test_cgs.jl
@@ -74,10 +74,10 @@
@test(resid ≤ cgs_tol)
@test(stats.solved)
- # Test bᵀc == 0
+ # Test bᴴc == 0
A, b, c = bc_breakdown(FC=FC)
(x, stats) = cgs(A, b, c=c)
- @test stats.status == "Breakdown bᵀc = 0"
+ @test stats.status == "Breakdown bᴴc = 0"
# test callback function
A, b = sparse_laplacian(FC=FC)
diff --git a/test/test_qmr.jl b/test/test_qmr.jl
index 184b9877d..4a6b8c1c9 100644
--- a/test/test_qmr.jl
+++ b/test/test_qmr.jl
@@ -58,10 +58,10 @@
@test(resid ≤ qmr_tol)
@test(stats.solved)
- # Test bᵀc == 0
+ # Test bᴴc == 0
A, b, c = bc_breakdown(FC=FC)
(x, stats) = qmr(A, b, c=c)
- @test stats.status == "Breakdown bᵀc = 0"
+ @test stats.status == "Breakdown bᴴc = 0"
# test callback function
solver = QmrSolver(A, b)
diff --git a/test/test_solvers.jl b/test/test_solvers.jl
index a6003088b..6f60cb737 100644
--- a/test/test_solvers.jl
+++ b/test/test_solvers.jl
@@ -628,7 +628,7 @@ function test_solvers(FC)
├──────────┼───────────────┼─────────────────┤
│ x│ Vector{$FC}│ 64│
│ p│ Vector{$FC}│ 64│
- │ Aᵀr│ Vector{$FC}│ 64│
+ │ Aᴴr│ Vector{$FC}│ 64│
│ r│ Vector{$FC}│ 32│
│ q│ Vector{$FC}│ 32│
│ Nq│ Vector{$FC}│ 0│
@@ -694,7 +694,7 @@ function test_solvers(FC)
├─────────────┼───────────────┼─────────────────┤
│ x│ Vector{$FC}│ 64│
│ Nv│ Vector{$FC}│ 64│
- │ Aᵀu│ Vector{$FC}│ 64│
+ │ Aᴴu│ Vector{$FC}│ 64│
│ d│ Vector{$FC}│ 64│
│ y│ Vector{$FC}│ 32│
│ Mu│ Vector{$FC}│ 32│
@@ -719,7 +719,7 @@ function test_solvers(FC)
├──────────┼───────────────┼─────────────────┤
│ x│ Vector{$FC}│ 64│
│ p│ Vector{$FC}│ 64│
- │ Aᵀz│ Vector{$FC}│ 64│
+ │ Aᴴz│ Vector{$FC}│ 64│
│ r│ Vector{$FC}│ 32│
│ q│ Vector{$FC}│ 32│
│ s│ Vector{$FC}│ 0│
@@ -739,7 +739,7 @@ function test_solvers(FC)
├──────────┼───────────────┼─────────────────┤
│ x│ Vector{$FC}│ 64│
│ Nv│ Vector{$FC}│ 64│
- │ Aᵀu│ Vector{$FC}│ 64│
+ │ Aᴴu│ Vector{$FC}│ 64│
│ y│ Vector{$FC}│ 32│
│ w̄│ Vector{$FC}│ 32│
│ Mu│ Vector{$FC}│ 32│
@@ -762,7 +762,7 @@ function test_solvers(FC)
├───────────┼───────────────┼─────────────────┤
│ x│ Vector{$FC}│ 64│
│ Nv│ Vector{$FC}│ 64│
- │ Aᵀu│ Vector{$FC}│ 64│
+ │ Aᴴu│ Vector{$FC}│ 64│
│ y│ Vector{$FC}│ 32│
│ w│ Vector{$FC}│ 32│
│ Mu│ Vector{$FC}│ 32│
@@ -785,7 +785,7 @@ function test_solvers(FC)
├──────────┼───────────────┼─────────────────┤
│ x│ Vector{$FC}│ 32│
│ Nv│ Vector{$FC}│ 32│
- │ Aᵀu│ Vector{$FC}│ 32│
+ │ Aᴴu│ Vector{$FC}│ 32│
│ w̄│ Vector{$FC}│ 32│
│ Mu│ Vector{$FC}│ 64│
│ Av│ Vector{$FC}│ 64│
@@ -826,7 +826,7 @@ function test_solvers(FC)
├──────────┼───────────────┼─────────────────┤
│ x│ Vector{$FC}│ 32│
│ Nv│ Vector{$FC}│ 32│
- │ Aᵀu│ Vector{$FC}│ 32│
+ │ Aᴴu│ Vector{$FC}│ 32│
│ w│ Vector{$FC}│ 32│
│ Mu│ Vector{$FC}│ 64│
│ Av│ Vector{$FC}│ 64│
@@ -869,7 +869,7 @@ function test_solvers(FC)
├──────────┼───────────────┼─────────────────┤
│ x│ Vector{$FC}│ 32│
│ Nv│ Vector{$FC}│ 32│
- │ Aᵀu│ Vector{$FC}│ 32│
+ │ Aᴴu│ Vector{$FC}│ 32│
│ h│ Vector{$FC}│ 32│
│ hbar│ Vector{$FC}│ 32│
│ Mu│ Vector{$FC}│ 64│
diff --git a/test/test_trilqr.jl b/test/test_trilqr.jl
index 7d7927372..baf8a597e 100644
--- a/test/test_trilqr.jl
+++ b/test/test_trilqr.jl
@@ -74,7 +74,7 @@
@test(resid_dual ≤ trilqr_tol)
@test(stats.solved_dual)
- # Test consistent Ax = b and inconsistent Aᵀt = c.
+ # Test consistent Ax = b and inconsistent Aᴴt = c.
A, b, c = rectangular_adjoint(FC=FC)
(x, t, stats) = trilqr(A, b, c)
diff --git a/test/test_utils.jl b/test/test_utils.jl
index ed72056b6..fbfe2e4e0 100644
--- a/test/test_utils.jl
+++ b/test/test_utils.jl
@@ -175,10 +175,10 @@ function square_adjoint(n :: Int=100; FC=Float64)
return A, b, c
end
-# Adjoint systems with Ax = b underdetermined consistent and Aᵀt = c overdetermined insconsistent.
+# Adjoint systems with Ax = b underdetermined consistent and Aᴴt = c overdetermined insconsistent.
function rectangular_adjoint(n :: Int=10, m :: Int=25; FC=Float64)
- Aᵀ, c = over_inconsistent(m, n; FC=FC)
- A = adjoint(Aᵀ)
+ Aᴴ, c = over_inconsistent(m, n; FC=FC)
+ A = adjoint(Aᴴ)
b = A * ones(FC, m)
return A, b, c
end
From 73e95a799b87dc6eb097b324fa3e8a618f6cece3 Mon Sep 17 00:00:00 2001
From: Alexis Montoison
Date: Tue, 6 Sep 2022 21:22:06 -0400
Subject: [PATCH 017/132] =?UTF-8?q?[code]=20Use=20A=E1=B4=B4=20instead=20o?=
=?UTF-8?q?f=20A=E1=B5=80?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
src/bicgstab.jl | 6 ++---
src/bilq.jl | 36 +++++++++++++--------------
src/bilqr.jl | 54 ++++++++++++++++++++---------------------
src/cg_lanczos.jl | 8 +++---
src/cg_lanczos_shift.jl | 10 ++++----
src/cgls.jl | 22 ++++++++---------
src/cgne.jl | 14 +++++------
src/cgs.jl | 4 +--
src/cr.jl | 38 ++++++++++++++---------------
src/craig.jl | 24 +++++++++---------
src/craigmr.jl | 34 +++++++++++++-------------
src/crls.jl | 24 +++++++++---------
src/crmr.jl | 26 ++++++++++----------
src/fom.jl | 2 +-
src/gmres.jl | 4 +--
src/gpmr.jl | 12 ++++-----
src/krylov_solvers.jl | 48 ++++++++++++++++++------------------
src/krylov_utils.jl | 8 +++---
src/lnlq.jl | 42 ++++++++++++++++----------------
src/lslq.jl | 36 +++++++++++++--------------
src/lsmr.jl | 38 ++++++++++++++---------------
src/lsqr.jl | 34 +++++++++++++-------------
src/minres.jl | 8 +++---
src/minres_qlp.jl | 4 +--
src/qmr.jl | 30 +++++++++++------------
src/tricg.jl | 26 ++++++++++----------
src/trilqr.jl | 28 ++++++++++-----------
src/trimr.jl | 20 +++++++--------
src/usymlq.jl | 12 ++++-----
src/usymqr.jl | 26 ++++++++++----------
30 files changed, 339 insertions(+), 339 deletions(-)
diff --git a/src/bicgstab.jl b/src/bicgstab.jl
index c3b914599..3e5635775 100644
--- a/src/bicgstab.jl
+++ b/src/bicgstab.jl
@@ -26,10 +26,10 @@ export bicgstab, bicgstab!
Solve the square linear system Ax = b using the BICGSTAB method.
BICGSTAB requires two initial vectors `b` and `c`.
-The relation `bᵀc ≠ 0` must be satisfied and by default `c = b`.
+The relation `bᴴc ≠ 0` must be satisfied and by default `c = b`.
The Biconjugate Gradient Stabilized method is a variant of BiCG, like CGS,
-but using different updates for the Aᵀ-sequence in order to obtain smoother
+but using different updates for the Aᴴ-sequence in order to obtain smoother
convergence than CGS.
If BICGSTAB stagnates, we recommend DQGMRES and BiLQ as alternative methods for unsymmetric square systems.
@@ -157,7 +157,7 @@ function bicgstab!(solver :: BicgstabSolver{T,FC,S}, A, b :: AbstractVector{FC};
if next_ρ == 0
stats.niter = 0
stats.solved, stats.inconsistent = false, false
- stats.status = "Breakdown bᵀc = 0"
+ stats.status = "Breakdown bᴴc = 0"
solver.warm_start = false
return solver
end
diff --git a/src/bilq.jl b/src/bilq.jl
index ce84d3ec1..f40538245 100644
--- a/src/bilq.jl
+++ b/src/bilq.jl
@@ -24,7 +24,7 @@ export bilq, bilq!
Solve the square linear system Ax = b using the BiLQ method.
BiLQ is based on the Lanczos biorthogonalization process and requires two initial vectors `b` and `c`.
-The relation `bᵀc ≠ 0` must be satisfied and by default `c = b`.
+The relation `bᴴc ≠ 0` must be satisfied and by default `c = b`.
When `A` is symmetric and `b = c`, BiLQ is equivalent to SYMMLQ.
An option gives the possibility of transferring to the BiCG point,
@@ -90,7 +90,7 @@ function bilq!(solver :: BilqSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Ab
ktypeof(c) == S || error("ktypeof(c) ≠ $S")
# Compute the adjoint of A
- Aᵀ = A'
+ Aᴴ = A'
# Set up workspace.
uₖ₋₁, uₖ, q, vₖ₋₁, vₖ = solver.uₖ₋₁, solver.uₖ, solver.q, solver.vₖ₋₁, solver.vₖ
@@ -127,25 +127,25 @@ function bilq!(solver :: BilqSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Ab
kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, bNorm)
# Initialize the Lanczos biorthogonalization process.
- cᵗb = @kdot(n, c, r₀) # ⟨c,r₀⟩
- if cᵗb == 0
+ cᴴb = @kdot(n, c, r₀) # ⟨c,r₀⟩
+ if cᴴb == 0
stats.niter = 0
stats.solved = false
stats.inconsistent = false
- stats.status = "Breakdown bᵀc = 0"
+ stats.status = "Breakdown bᴴc = 0"
solver.warm_start = false
return solver
end
- βₖ = √(abs(cᵗb)) # β₁γ₁ = cᵀ(b - Ax₀)
- γₖ = cᵗb / βₖ # β₁γ₁ = cᵀ(b - Ax₀)
+ βₖ = √(abs(cᴴb)) # β₁γ₁ = cᴴ(b - Ax₀)
+ γₖ = cᴴb / βₖ # β₁γ₁ = cᴴ(b - Ax₀)
vₖ₋₁ .= zero(FC) # v₀ = 0
uₖ₋₁ .= zero(FC) # u₀ = 0
vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁
uₖ .= c ./ conj(γₖ) # u₁ = c / γ̄₁
cₖ₋₁ = cₖ = -one(T) # Givens cosines used for the LQ factorization of Tₖ
sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the LQ factorization of Tₖ
- d̅ .= zero(FC) # Last column of D̅ₖ = Vₖ(Qₖ)ᵀ
+ d̅ .= zero(FC) # Last column of D̅ₖ = Vₖ(Qₖ)ᴴ
ζₖ₋₁ = ζbarₖ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁
ζₖ₋₂ = ηₖ = zero(FC) # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ
δbarₖ₋₁ = δbarₖ = zero(FC) # Coefficients of Lₖ₋₁ and L̅ₖ modified over the course of two iterations
@@ -165,10 +165,10 @@ function bilq!(solver :: BilqSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Ab
# Continue the Lanczos biorthogonalization process.
# AVₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ
- # AᵀUₖ = Uₖ(Tₖ)ᵀ + γ̄ₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᵀ
+ # AᴴUₖ = Uₖ(Tₖ)ᴴ + γ̄ₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᴴ
mul!(q, A , vₖ) # Forms vₖ₊₁ : q ← Avₖ
- mul!(p, Aᵀ, uₖ) # Forms uₖ₊₁ : p ← Aᵀuₖ
+ mul!(p, Aᴴ, uₖ) # Forms uₖ₊₁ : p ← Aᴴuₖ
@kaxpy!(n, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁
@kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - β̄ₖ * uₖ₋₁
@@ -178,9 +178,9 @@ function bilq!(solver :: BilqSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Ab
@kaxpy!(n, - αₖ , vₖ, q) # q ← q - αₖ * vₖ
@kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ
- pᵗq = @kdot(n, p, q) # pᵗq = ⟨p,q⟩
- βₖ₊₁ = √(abs(pᵗq)) # βₖ₊₁ = √(|pᵗq|)
- γₖ₊₁ = pᵗq / βₖ₊₁ # γₖ₊₁ = pᵗq / βₖ₊₁
+ pᴴq = @kdot(n, p, q) # pᴴq = ⟨p,q⟩
+ βₖ₊₁ = √(abs(pᴴq)) # βₖ₊₁ = √(|pᴴq|)
+ γₖ₊₁ = pᴴq / βₖ₊₁ # γₖ₊₁ = pᴴq / βₖ₊₁
# Update the LQ factorization of Tₖ = L̅ₖQₖ.
# [ α₁ γ₂ 0 • • • 0 ] [ δ₁ 0 • • • • 0 ]
@@ -235,7 +235,7 @@ function bilq!(solver :: BilqSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Ab
ηₖ = -ϵₖ₋₂ * ζₖ₋₂ - λₖ₋₁ * ζₖ₋₁
end
- # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Vₖ(Qₖ)ᵀ.
+ # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Vₖ(Qₖ)ᴴ.
# [d̅ₖ₋₁ vₖ] [cₖ s̄ₖ] = [dₖ₋₁ d̅ₖ] ⟷ dₖ₋₁ = cₖ * d̅ₖ₋₁ + sₖ * vₖ
# [sₖ -cₖ] ⟷ d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * vₖ
if iter ≥ 2
@@ -258,13 +258,13 @@ function bilq!(solver :: BilqSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Ab
@. vₖ₋₁ = vₖ # vₖ₋₁ ← vₖ
@. uₖ₋₁ = uₖ # uₖ₋₁ ← uₖ
- if pᵗq ≠ 0
+ if pᴴq ≠ 0
@. vₖ = q / βₖ₊₁ # βₖ₊₁vₖ₊₁ = q
@. uₖ = p / conj(γₖ₊₁) # γ̄ₖ₊₁uₖ₊₁ = p
end
# Compute ⟨vₖ,vₖ₊₁⟩ and ‖vₖ₊₁‖
- vₖᵀvₖ₊₁ = @kdot(n, vₖ₋₁, vₖ)
+ vₖᴴvₖ₊₁ = @kdot(n, vₖ₋₁, vₖ)
norm_vₖ₊₁ = @knrm2(n, vₖ)
# Compute BiLQ residual norm
@@ -274,7 +274,7 @@ function bilq!(solver :: BilqSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Ab
else
μₖ = βₖ * (sₖ₋₁ * ζₖ₋₂ - cₖ₋₁ * cₖ * ζₖ₋₁) + αₖ * sₖ * ζₖ₋₁
ωₖ = βₖ₊₁ * sₖ * ζₖ₋₁
- θₖ = conj(μₖ) * ωₖ * vₖᵀvₖ₊₁
+ θₖ = conj(μₖ) * ωₖ * vₖᴴvₖ₊₁
rNorm_lq = sqrt(abs2(μₖ) * norm_vₖ^2 + abs2(ωₖ) * norm_vₖ₊₁^2 + 2 * real(θₖ))
end
history && push!(rNorms, rNorm_lq)
@@ -300,7 +300,7 @@ function bilq!(solver :: BilqSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Ab
solved_lq = rNorm_lq ≤ ε
solved_cg = transfer_to_bicg && (abs(δbarₖ) > eps(T)) && (rNorm_cg ≤ ε)
tired = iter ≥ itmax
- breakdown = !solved_lq && !solved_cg && (pᵗq == 0)
+ breakdown = !solved_lq && !solved_cg && (pᴴq == 0)
kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm_lq)
end
(verbose > 0) && @printf("\n")
diff --git a/src/bilqr.jl b/src/bilqr.jl
index 09fef1f6c..7284597dc 100644
--- a/src/bilqr.jl
+++ b/src/bilqr.jl
@@ -1,5 +1,5 @@
# An implementation of BILQR for the solution of square
-# consistent linear adjoint systems Ax = b and Aᵀy = c.
+# consistent linear adjoint systems Ax = b and Aᴴy = c.
#
# This method is described in
#
@@ -24,11 +24,11 @@ export bilqr, bilqr!
Combine BiLQ and QMR to solve adjoint systems.
[0 A] [y] = [b]
- [Aᵀ 0] [x] [c]
+ [Aᴴ 0] [x] [c]
-The relation `bᵀc ≠ 0` must be satisfied.
+The relation `bᴴc ≠ 0` must be satisfied.
BiLQ is used for solving primal system `Ax = b`.
-QMR is used for solving dual system `Aᵀy = c`.
+QMR is used for solving dual system `Aᴴy = c`.
An option gives the possibility of transferring from the BiLQ point to the
BiCG point, when it exists. The transfer is based on the residual norm.
@@ -94,7 +94,7 @@ function bilqr!(solver :: BilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
ktypeof(c) == S || error("ktypeof(c) ≠ $S")
# Compute the adjoint of A
- Aᵀ = A'
+ Aᴴ = A'
# Set up workspace.
uₖ₋₁, uₖ, q, vₖ₋₁, vₖ = solver.uₖ₋₁, solver.uₖ, solver.q, solver.vₖ₋₁, solver.vₖ
@@ -109,7 +109,7 @@ function bilqr!(solver :: BilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
if warm_start
mul!(r₀, A, Δx)
@kaxpby!(n, one(FC), b, -one(FC), r₀)
- mul!(s₀, Aᵀ, Δy)
+ mul!(s₀, Aᴴ, Δy)
@kaxpby!(n, one(FC), c, -one(FC), s₀)
end
@@ -117,7 +117,7 @@ function bilqr!(solver :: BilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
x .= zero(FC) # x₀
bNorm = @knrm2(n, r₀) # rNorm = ‖r₀‖
- # Initial solution t₀ and residual norm ‖s₀‖ = ‖c - Aᵀy₀‖.
+ # Initial solution t₀ and residual norm ‖s₀‖ = ‖c - Aᴴy₀‖.
t .= zero(FC) # t₀
cNorm = @knrm2(n, s₀) # sNorm = ‖s₀‖
@@ -132,34 +132,34 @@ function bilqr!(solver :: BilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e\n", iter, bNorm, cNorm)
# Initialize the Lanczos biorthogonalization process.
- cᵗb = @kdot(n, s₀, r₀) # ⟨s₀,r₀⟩ = ⟨c - Aᵀy₀,b - Ax₀⟩
- if cᵗb == 0
+ cᴴb = @kdot(n, s₀, r₀) # ⟨s₀,r₀⟩ = ⟨c - Aᴴy₀,b - Ax₀⟩
+ if cᴴb == 0
stats.niter = 0
stats.solved_primal = false
stats.solved_dual = false
- stats.status = "Breakdown bᵀc = 0"
+ stats.status = "Breakdown bᴴc = 0"
solver.warm_start = false
return solver
end
# Set up workspace.
- βₖ = √(abs(cᵗb)) # β₁γ₁ = (c - Aᵀy₀)ᵀ(b - Ax₀)
- γₖ = cᵗb / βₖ # β₁γ₁ = (c - Aᵀy₀)ᵀ(b - Ax₀)
+ βₖ = √(abs(cᴴb)) # β₁γ₁ = (c - Aᴴy₀)ᴴ(b - Ax₀)
+ γₖ = cᴴb / βₖ # β₁γ₁ = (c - Aᴴy₀)ᴴ(b - Ax₀)
vₖ₋₁ .= zero(FC) # v₀ = 0
uₖ₋₁ .= zero(FC) # u₀ = 0
vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁
- uₖ .= s₀ ./ conj(γₖ) # u₁ = (c - Aᵀy₀) / γ̄₁
+ uₖ .= s₀ ./ conj(γₖ) # u₁ = (c - Aᴴy₀) / γ̄₁
cₖ₋₁ = cₖ = -one(T) # Givens cosines used for the LQ factorization of Tₖ
sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the LQ factorization of Tₖ
- d̅ .= zero(FC) # Last column of D̅ₖ = Vₖ(Qₖ)ᵀ
+ d̅ .= zero(FC) # Last column of D̅ₖ = Vₖ(Qₖ)ᴴ
ζₖ₋₁ = ζbarₖ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁
ζₖ₋₂ = ηₖ = zero(FC) # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ
δbarₖ₋₁ = δbarₖ = zero(FC) # Coefficients of Lₖ₋₁ and L̅ₖ modified over the course of two iterations
ψbarₖ₋₁ = ψₖ₋₁ = zero(FC) # ψₖ₋₁ and ψbarₖ are the last components of h̅ₖ = Qₖγ̄₁e₁
norm_vₖ = bNorm / βₖ # ‖vₖ‖ is used for residual norm estimates
ϵₖ₋₃ = λₖ₋₂ = zero(FC) # Components of Lₖ₋₁
- wₖ₋₃ .= zero(FC) # Column k-3 of Wₖ = Uₖ(Lₖ)⁻ᵀ
- wₖ₋₂ .= zero(FC) # Column k-2 of Wₖ = Uₖ(Lₖ)⁻ᵀ
+ wₖ₋₃ .= zero(FC) # Column k-3 of Wₖ = Uₖ(Lₖ)⁻ᴴ
+ wₖ₋₂ .= zero(FC) # Column k-2 of Wₖ = Uₖ(Lₖ)⁻ᴴ
τₖ = zero(T) # τₖ is used for the dual residual norm estimate
# Stopping criterion.
@@ -180,10 +180,10 @@ function bilqr!(solver :: BilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
# Continue the Lanczos biorthogonalization process.
# AVₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ
- # AᵀUₖ = Uₖ(Tₖ)ᵀ + γ̄ₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᵀ
+ # AᴴUₖ = Uₖ(Tₖ)ᴴ + γ̄ₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᴴ
mul!(q, A , vₖ) # Forms vₖ₊₁ : q ← Avₖ
- mul!(p, Aᵀ, uₖ) # Forms uₖ₊₁ : p ← Aᵀuₖ
+ mul!(p, Aᴴ, uₖ) # Forms uₖ₊₁ : p ← Aᴴuₖ
@kaxpy!(n, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁
@kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - β̄ₖ * uₖ₋₁
@@ -193,9 +193,9 @@ function bilqr!(solver :: BilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
@kaxpy!(n, - αₖ , vₖ, q) # q ← q - αₖ * vₖ
@kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ
- pᵗq = @kdot(n, p, q) # pᵗq = ⟨p,q⟩
- βₖ₊₁ = √(abs(pᵗq)) # βₖ₊₁ = √(|pᵗq|)
- γₖ₊₁ = pᵗq / βₖ₊₁ # γₖ₊₁ = pᵗq / βₖ₊₁
+ pᴴq = @kdot(n, p, q) # pᴴq = ⟨p,q⟩
+ βₖ₊₁ = √(abs(pᴴq)) # βₖ₊₁ = √(|pᴴq|)
+ γₖ₊₁ = pᴴq / βₖ₊₁ # γₖ₊₁ = pᴴq / βₖ₊₁
# Update the LQ factorization of Tₖ = L̅ₖQₖ.
# [ α₁ γ₂ 0 • • • 0 ] [ δ₁ 0 • • • • 0 ]
@@ -251,7 +251,7 @@ function bilqr!(solver :: BilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
ηₖ = -ϵₖ₋₂ * ζₖ₋₂ - λₖ₋₁ * ζₖ₋₁
end
- # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Vₖ(Qₖ)ᵀ.
+ # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Vₖ(Qₖ)ᴴ.
# [d̅ₖ₋₁ vₖ] [cₖ s̄ₖ] = [dₖ₋₁ d̅ₖ] ⟷ dₖ₋₁ = cₖ * d̅ₖ₋₁ + sₖ * vₖ
# [sₖ -cₖ] ⟷ d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * vₖ
if iter ≥ 2
@@ -271,7 +271,7 @@ function bilqr!(solver :: BilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
end
# Compute ⟨vₖ,vₖ₊₁⟩ and ‖vₖ₊₁‖
- vₖᵀvₖ₊₁ = @kdot(n, vₖ, q) / βₖ₊₁
+ vₖᴴvₖ₊₁ = @kdot(n, vₖ, q) / βₖ₊₁
norm_vₖ₊₁ = @knrm2(n, q) / βₖ₊₁
# Compute BiLQ residual norm
@@ -281,7 +281,7 @@ function bilqr!(solver :: BilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
else
μₖ = βₖ * (sₖ₋₁ * ζₖ₋₂ - cₖ₋₁ * cₖ * ζₖ₋₁) + αₖ * sₖ * ζₖ₋₁
ωₖ = βₖ₊₁ * sₖ * ζₖ₋₁
- θₖ = conj(μₖ) * ωₖ * vₖᵀvₖ₊₁
+ θₖ = conj(μₖ) * ωₖ * vₖᴴvₖ₊₁
rNorm_lq = sqrt(abs2(μₖ) * norm_vₖ^2 + abs2(ωₖ) * norm_vₖ₊₁^2 + 2 * real(θₖ))
end
history && push!(rNorms, rNorm_lq)
@@ -318,7 +318,7 @@ function bilqr!(solver :: BilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
ψbarₖ = sₖ * ψbarₖ₋₁
end
- # Compute the direction wₖ₋₁, the last column of Wₖ₋₁ = (Uₖ₋₁)(Lₖ₋₁)⁻ᵀ ⟷ (L̄ₖ₋₁)(Wₖ₋₁)ᵀ = (Uₖ₋₁)ᵀ.
+ # Compute the direction wₖ₋₁, the last column of Wₖ₋₁ = (Uₖ₋₁)(Lₖ₋₁)⁻ᴴ ⟷ (L̄ₖ₋₁)(Wₖ₋₁)ᵀ = (Uₖ₋₁)ᵀ.
# w₁ = u₁ / δ̄₁
if iter == 2
wₖ₋₁ = wₖ₋₂
@@ -372,7 +372,7 @@ function bilqr!(solver :: BilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
@. vₖ₋₁ = vₖ # vₖ₋₁ ← vₖ
@. uₖ₋₁ = uₖ # uₖ₋₁ ← uₖ
- if pᵗq ≠ zero(FC)
+ if pᴴq ≠ zero(FC)
@. vₖ = q / βₖ₊₁ # βₖ₊₁vₖ₊₁ = q
@. uₖ = p / conj(γₖ₊₁) # γ̄ₖ₊₁uₖ₊₁ = p
end
@@ -392,7 +392,7 @@ function bilqr!(solver :: BilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
user_requested_exit = callback(solver) :: Bool
tired = iter ≥ itmax
- breakdown = !solved_lq && !solved_cg && (pᵗq == 0)
+ breakdown = !solved_lq && !solved_cg && (pᴴq == 0)
kdisplay(iter, verbose) && solved_primal && !solved_dual && @printf("%5d %7s %7.1e\n", iter, "", sNorm)
kdisplay(iter, verbose) && !solved_primal && solved_dual && @printf("%5d %7.1e %7s\n", iter, rNorm_lq, "")
diff --git a/src/cg_lanczos.jl b/src/cg_lanczos.jl
index a8e24f02f..2f2dae16d 100644
--- a/src/cg_lanczos.jl
+++ b/src/cg_lanczos.jl
@@ -111,7 +111,7 @@ function cg_lanczos!(solver :: CgLanczosSolver{T,FC,S}, A, b :: AbstractVector{F
Mv .= b
end
MisI || mulorldiv!(v, M, Mv, ldiv) # v₁ = M⁻¹r₀
- β = sqrt(@kdotr(n, v, Mv)) # β₁ = v₁ᵀ M v₁
+ β = sqrt(@kdotr(n, v, Mv)) # β₁ = v₁ᴴ M v₁
σ = β
rNorm = σ
history && push!(rNorms, rNorm)
@@ -157,10 +157,10 @@ function cg_lanczos!(solver :: CgLanczosSolver{T,FC,S}, A, b :: AbstractVector{F
# Form next Lanczos vector.
# βₖ₊₁Mvₖ₊₁ = Avₖ - δₖMvₖ - βₖMvₖ₋₁
mul!(Mv_next, A, v) # Mvₖ₊₁ ← Avₖ
- δ = @kdotr(n, v, Mv_next) # δₖ = vₖᵀ A vₖ
+ δ = @kdotr(n, v, Mv_next) # δₖ = vₖᴴ A vₖ
# Check curvature. Exit fast if requested.
- # It is possible to show that σₖ² (δₖ - ωₖ₋₁ / γₖ₋₁) = pₖᵀ A pₖ.
+ # It is possible to show that σₖ² (δₖ - ωₖ₋₁ / γₖ₋₁) = pₖᴴ A pₖ.
γ = one(T) / (δ - ω / γ) # γₖ = 1 / (δₖ - ωₖ₋₁ / γₖ₋₁)
indefinite |= (γ ≤ 0)
(check_curvature & indefinite) && continue
@@ -172,7 +172,7 @@ function cg_lanczos!(solver :: CgLanczosSolver{T,FC,S}, A, b :: AbstractVector{F
end
@. Mv = Mv_next # Mvₖ ← Mvₖ₊₁
MisI || mulorldiv!(v, M, Mv, ldiv) # vₖ₊₁ = M⁻¹ * Mvₖ₊₁
- β = sqrt(@kdotr(n, v, Mv)) # βₖ₊₁ = vₖ₊₁ᵀ M vₖ₊₁
+ β = sqrt(@kdotr(n, v, Mv)) # βₖ₊₁ = vₖ₊₁ᴴ M vₖ₊₁
@kscal!(n, one(FC) / β, v) # vₖ₊₁ ← vₖ₊₁ / βₖ₊₁
MisI || @kscal!(n, one(FC) / β, Mv) # Mvₖ₊₁ ← Mvₖ₊₁ / βₖ₊₁
Anorm2 += β_prev^2 + β^2 + δ^2 # Use ‖Tₖ₊₁‖₂ as increasing approximation of ‖A‖₂.
diff --git a/src/cg_lanczos_shift.jl b/src/cg_lanczos_shift.jl
index 01f11e41f..ff873e5b4 100644
--- a/src/cg_lanczos_shift.jl
+++ b/src/cg_lanczos_shift.jl
@@ -92,7 +92,7 @@ function cg_lanczos_shift!(solver :: CgLanczosShiftSolver{T,FC,S}, A, b :: Abstr
end
Mv .= b # Mv₁ ← b
MisI || mulorldiv!(v, M, Mv, ldiv) # v₁ = M⁻¹ * Mv₁
- β = sqrt(@kdotr(n, v, Mv)) # β₁ = v₁ᵀ M v₁
+ β = sqrt(@kdotr(n, v, Mv)) # β₁ = v₁ᴴ M v₁
rNorms .= β
if history
for i = 1 : nshifts
@@ -157,7 +157,7 @@ function cg_lanczos_shift!(solver :: CgLanczosShiftSolver{T,FC,S}, A, b :: Abstr
# Form next Lanczos vector.
# βₖ₊₁Mvₖ₊₁ = Avₖ - δₖMvₖ - βₖMvₖ₋₁
mul!(Mv_next, A, v) # Mvₖ₊₁ ← Avₖ
- δ = @kdotr(n, v, Mv_next) # δₖ = vₖᵀ A vₖ
+ δ = @kdotr(n, v, Mv_next) # δₖ = vₖᴴ A vₖ
@kaxpy!(n, -δ, Mv, Mv_next) # Mvₖ₊₁ ← Mvₖ₊₁ - δₖMvₖ
if iter > 0
@kaxpy!(n, -β, Mv_prev, Mv_next) # Mvₖ₊₁ ← Mvₖ₊₁ - βₖMvₖ₋₁
@@ -165,12 +165,12 @@ function cg_lanczos_shift!(solver :: CgLanczosShiftSolver{T,FC,S}, A, b :: Abstr
end
@. Mv = Mv_next # Mvₖ ← Mvₖ₊₁
MisI || mulorldiv!(v, M, Mv, ldiv) # vₖ₊₁ = M⁻¹ * Mvₖ₊₁
- β = sqrt(@kdotr(n, v, Mv)) # βₖ₊₁ = vₖ₊₁ᵀ M vₖ₊₁
+ β = sqrt(@kdotr(n, v, Mv)) # βₖ₊₁ = vₖ₊₁ᴴ M vₖ₊₁
@kscal!(n, one(FC) / β, v) # vₖ₊₁ ← vₖ₊₁ / βₖ₊₁
MisI || @kscal!(n, one(FC) / β, Mv) # Mvₖ₊₁ ← Mvₖ₊₁ / βₖ₊₁
- # Check curvature: vₖᵀ(A + sᵢI)vₖ = vₖᵀAvₖ + sᵢ‖vₖ‖² = δₖ + ρₖ * sᵢ with ρₖ = ‖vₖ‖².
- # It is possible to show that σₖ² (δₖ + ρₖ * sᵢ - ωₖ₋₁ / γₖ₋₁) = pₖᵀ (A + sᵢ I) pₖ.
+ # Check curvature: vₖᴴ(A + sᵢI)vₖ = vₖᴴAvₖ + sᵢ‖vₖ‖² = δₖ + ρₖ * sᵢ with ρₖ = ‖vₖ‖².
+ # It is possible to show that σₖ² (δₖ + ρₖ * sᵢ - ωₖ₋₁ / γₖ₋₁) = pₖᴴ (A + sᵢ I) pₖ.
MisI || (ρ = @kdotr(n, v, v))
for i = 1 : nshifts
δhat[i] = δ + ρ * shifts[i]
diff --git a/src/cgls.jl b/src/cgls.jl
index f5529fbfb..43fa5a6b6 100644
--- a/src/cgls.jl
+++ b/src/cgls.jl
@@ -5,7 +5,7 @@
#
# equivalently, of the normal equations
#
-# AᵀAx = Aᵀb.
+# AᴴAx = Aᴴb.
#
# CGLS is formally equivalent to applying the conjugate gradient method
# to the normal equations but should be more stable. It is also formally
@@ -45,11 +45,11 @@ Solve the regularized linear least-squares problem
using the Conjugate Gradient (CG) method, where λ ≥ 0 is a regularization
parameter. This method is equivalent to applying CG to the normal equations
- (AᵀA + λI) x = Aᵀb
+ (AᴴA + λI) x = Aᴴb
but is more stable.
-CGLS produces monotonic residuals ‖r‖₂ but not optimality residuals ‖Aᵀr‖₂.
+CGLS produces monotonic residuals ‖r‖₂ but not optimality residuals ‖Aᴴr‖₂.
It is formally equivalent to LSQR, though can be slightly less accurate,
but simpler to implement.
@@ -95,7 +95,7 @@ function cgls!(solver :: CglsSolver{T,FC,S}, A, b :: AbstractVector{FC};
ktypeof(b) == S || error("ktypeof(b) ≠ $S")
# Compute the adjoint of A
- Aᵀ = A'
+ Aᴴ = A'
# Set up workspace.
allocate_if(!MisI, solver, :Mr, S, m)
@@ -117,9 +117,9 @@ function cgls!(solver :: CglsSolver{T,FC,S}, A, b :: AbstractVector{FC};
return solver
end
MisI || mulorldiv!(Mr, M, r, ldiv)
- mul!(s, Aᵀ, Mr)
+ mul!(s, Aᴴ, Mr)
p .= s
- γ = @kdotr(n, s, s) # γ = sᵀs
+ γ = @kdotr(n, s, s) # γ = sᴴs
iter = 0
itmax == 0 && (itmax = m + n)
@@ -128,7 +128,7 @@ function cgls!(solver :: CglsSolver{T,FC,S}, A, b :: AbstractVector{FC};
history && push!(rNorms, rNorm)
history && push!(ArNorms, ArNorm)
ε = atol + rtol * ArNorm
- (verbose > 0) && @printf("%5s %8s %8s\n", "k", "‖Aᵀr‖", "‖r‖")
+ (verbose > 0) && @printf("%5s %8s %8s\n", "k", "‖Aᴴr‖", "‖r‖")
kdisplay(iter, verbose) && @printf("%5d %8.2e %8.2e\n", iter, ArNorm, rNorm)
status = "unknown"
@@ -140,8 +140,8 @@ function cgls!(solver :: CglsSolver{T,FC,S}, A, b :: AbstractVector{FC};
while ! (solved || tired || user_requested_exit)
mul!(q, A, p)
MisI || mulorldiv!(Mq, M, q, ldiv)
- δ = @kdotr(m, q, Mq) # δ = qᵀMq
- λ > 0 && (δ += λ * @kdotr(n, p, p)) # δ = δ + pᵀp
+ δ = @kdotr(m, q, Mq) # δ = qᴴMq
+ λ > 0 && (δ += λ * @kdotr(n, p, p)) # δ = δ + pᴴp
α = γ / δ
# if a trust-region constraint is give, compute step to the boundary
@@ -154,9 +154,9 @@ function cgls!(solver :: CglsSolver{T,FC,S}, A, b :: AbstractVector{FC};
@kaxpy!(n, α, p, x) # Faster than x = x + α * p
@kaxpy!(m, -α, q, r) # Faster than r = r - α * q
MisI || mulorldiv!(Mr, M, r, ldiv)
- mul!(s, Aᵀ, Mr)
+ mul!(s, Aᴴ, Mr)
λ > 0 && @kaxpy!(n, -λ, x, s) # s = A' * r - λ * x
- γ_next = @kdotr(n, s, s) # γ_next = sᵀs
+ γ_next = @kdotr(n, s, s) # γ_next = sᴴs
β = γ_next / γ
@kaxpby!(n, one(FC), s, β, p) # p = s + βp
γ = γ_next
diff --git a/src/cgne.jl b/src/cgne.jl
index 2f720b57c..68039d2de 100644
--- a/src/cgne.jl
+++ b/src/cgne.jl
@@ -10,7 +10,7 @@
# and is equivalent to applying the conjugate gradient method
# to the linear system
#
-# AAᵀy = b.
+# AAᴴy = b.
#
# This method is also known as Craig's method, CGME, and other
# names, and is described in
@@ -46,7 +46,7 @@ using the Conjugate Gradient (CG) method, where λ ≥ 0 is a regularization
parameter. This method is equivalent to applying CG to the normal equations
of the second kind
- (AAᵀ + λI) y = b
+ (AAᴴ + λI) y = b
but is more stable. When λ = 0, this method solves the minimum-norm problem
@@ -104,12 +104,12 @@ function cgne!(solver :: CgneSolver{T,FC,S}, A, b :: AbstractVector{FC};
ktypeof(b) == S || error("ktypeof(b) ≠ $S")
# Compute the adjoint of A
- Aᵀ = A'
+ Aᴴ = A'
# Set up workspace.
allocate_if(!NisI, solver, :z, S, m)
allocate_if(λ > 0, solver, :s, S, m)
- x, p, Aᵀz, r, q, s, stats = solver.x, solver.p, solver.Aᵀz, solver.r, solver.q, solver.s, solver.stats
+ x, p, Aᴴz, r, q, s, stats = solver.x, solver.p, solver.Aᴴz, solver.r, solver.q, solver.s, solver.stats
rNorms = stats.residuals
reset!(stats)
z = NisI ? r : solver.z
@@ -126,7 +126,7 @@ function cgne!(solver :: CgneSolver{T,FC,S}, A, b :: AbstractVector{FC};
return solver
end
λ > 0 && (s .= r)
- mul!(p, Aᵀ, z)
+ mul!(p, Aᴴ, z)
# Use ‖p‖ to detect inconsistent system.
# An inconsistent system will necessarily have AA' singular.
@@ -161,8 +161,8 @@ function cgne!(solver :: CgneSolver{T,FC,S}, A, b :: AbstractVector{FC};
NisI || mulorldiv!(z, N, r, ldiv)
γ_next = @kdotr(m, r, z) # Faster than γ_next = dot(r, z)
β = γ_next / γ
- mul!(Aᵀz, Aᵀ, z)
- @kaxpby!(n, one(FC), Aᵀz, β, p) # Faster than p = Aᵀz + β * p
+ mul!(Aᴴz, Aᴴ, z)
+ @kaxpby!(n, one(FC), Aᴴz, β, p) # Faster than p = Aᴴz + β * p
pNorm = @knrm2(n, p)
if λ > 0
@kaxpby!(m, one(FC), r, β, s) # s = r + β * s
diff --git a/src/cgs.jl b/src/cgs.jl
index c1eb1056e..592eb1b2d 100644
--- a/src/cgs.jl
+++ b/src/cgs.jl
@@ -21,7 +21,7 @@ export cgs, cgs!
Solve the consistent linear system Ax = b using conjugate gradient squared algorithm.
CGS requires two initial vectors `b` and `c`.
-The relation `bᵀc ≠ 0` must be satisfied and by default `c = b`.
+The relation `bᴴc ≠ 0` must be satisfied and by default `c = b`.
From "Iterative Methods for Sparse Linear Systems (Y. Saad)" :
@@ -142,7 +142,7 @@ function cgs!(solver :: CgsSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Abst
if ρ == 0
stats.niter = 0
stats.solved, stats.inconsistent = false, false
- stats.status = "Breakdown bᵀc = 0"
+ stats.status = "Breakdown bᴴc = 0"
solver.warm_start =false
return solver
end
diff --git a/src/cr.jl b/src/cr.jl
index c678c7d29..4405eda76 100644
--- a/src/cr.jl
+++ b/src/cr.jl
@@ -149,7 +149,7 @@ function cr!(solver :: CrSolver{T,FC,S}, A, b :: AbstractVector{FC};
(verbose > 0) && @printf("%5s %8s %8s %8s\n", "k", "‖x‖", "‖r‖", "quad")
kdisplay(iter, verbose) && @printf(" %d %8.1e %8.1e %8.1e\n", iter, xNorm, rNorm, m)
- descent = pr > 0 # pᵀr > 0 means p is a descent direction
+ descent = pr > 0 # pᴴr > 0 means p is a descent direction
solved = rNorm ≤ ε
tired = iter ≥ itmax
on_boundary = false
@@ -161,7 +161,7 @@ function cr!(solver :: CrSolver{T,FC,S}, A, b :: AbstractVector{FC};
if linesearch
if (pAp ≤ γ * pNorm²) || (ρ ≤ γ * rNorm²)
npcurv = true
- (verbose > 0) && @printf("nonpositive curvature detected: pᵀAp = %8.1e and rᵀAr = %8.1e\n", pAp, ρ)
+ (verbose > 0) && @printf("nonpositive curvature detected: pᴴAp = %8.1e and rᴴAr = %8.1e\n", pAp, ρ)
stats.solved = solved
stats.inconsistent = false
stats.status = "nonpositive curvature"
@@ -182,16 +182,16 @@ function cr!(solver :: CrSolver{T,FC,S}, A, b :: AbstractVector{FC};
tr = maximum(to_boundary(x, r, radius; flip = false, xNorm2 = xNorm², dNorm2 = rNorm²))
(verbose > 0) && @printf("t1 = %8.1e, t2 = %8.1e and tr = %8.1e\n", t1, t2, tr)
- if abspAp ≤ γ * pNorm * @knrm2(n, q) # pᵀAp ≃ 0
+ if abspAp ≤ γ * pNorm * @knrm2(n, q) # pᴴAp ≃ 0
npcurv = true # nonpositive curvature
- (verbose > 0) && @printf("pᵀAp = %8.1e ≃ 0\n", pAp)
- if abspr ≤ γ * pNorm * rNorm # pᵀr ≃ 0
- (verbose > 0) && @printf("pᵀr = %8.1e ≃ 0, redefining p := r\n", pr)
+ (verbose > 0) && @printf("pᴴAp = %8.1e ≃ 0\n", pAp)
+ if abspr ≤ γ * pNorm * rNorm # pᴴr ≃ 0
+ (verbose > 0) && @printf("pᴴr = %8.1e ≃ 0, redefining p := r\n", pr)
p = r # - ∇q(x)
q = Ar
- # q(x + αr) = q(x) - α ‖r‖² + ½ α² rᵀAr
- # 1) if rᵀAr > 0, the quadratic decreases from α = 0 to α = ‖r‖² / rᵀAr
- # 2) if rᵀAr ≤ 0, the quadratic decreases to -∞ in the direction r
+ # q(x + αr) = q(x) - α ‖r‖² + ½ α² rᴴAr
+ # 1) if rᴴAr > 0, the quadratic decreases from α = 0 to α = ‖r‖² / rᴴAr
+ # 2) if rᴴAr ≤ 0, the quadratic decreases to -∞ in the direction r
if ρ > 0 # case 1
(verbose > 0) && @printf("quadratic is convex in direction r, curv = %8.1e\n", ρ)
α = min(tr, rNorm² / ρ)
@@ -200,12 +200,12 @@ function cr!(solver :: CrSolver{T,FC,S}, A, b :: AbstractVector{FC};
α = tr
end
else
- # q_p = q(x + α_p * p) - q(x) = -α_p * rᵀp + ½ (α_p)² * pᵀAp
- # q_r = q(x + α_r * r) - q(x) = -α_r * ‖r‖² + ½ (α_r)² * rᵀAr
+ # q_p = q(x + α_p * p) - q(x) = -α_p * rᴴp + ½ (α_p)² * pᴴAp
+ # q_r = q(x + α_r * r) - q(x) = -α_r * ‖r‖² + ½ (α_r)² * rᴴAr
# Δ = q_p - q_r. If Δ > 0, r is followed, else p is followed
α = descent ? t1 : t2
ρ > 0 && (tr = min(tr, rNorm² / ρ))
- Δ = -α * pr + tr * rNorm² - (tr)^2 * ρ / 2 # as pᵀAp = 0
+ Δ = -α * pr + tr * rNorm² - (tr)^2 * ρ / 2 # as pᴴAp = 0
if Δ > 0 # direction r engenders a better decrease
(verbose > 0) && @printf("direction r engenders a bigger decrease. q_p - q_r = %8.1e > 0\n", Δ)
(verbose > 0) && @printf("redefining p := r\n")
@@ -218,7 +218,7 @@ function cr!(solver :: CrSolver{T,FC,S}, A, b :: AbstractVector{FC};
end
elseif pAp > 0 && ρ > 0 # no negative curvature
- (verbose > 0) && @printf("positive curvatures along p and r. pᵀAp = %8.1e and rᵀAr = %8.1e\n", pAp, ρ)
+ (verbose > 0) && @printf("positive curvatures along p and r. pᴴAp = %8.1e and rᴴAr = %8.1e\n", pAp, ρ)
α = ρ / @kdotr(n, q, Mq)
if α ≥ t1
α = t1
@@ -227,8 +227,8 @@ function cr!(solver :: CrSolver{T,FC,S}, A, b :: AbstractVector{FC};
elseif pAp > 0 && ρ < 0
npcurv = true
- (verbose > 0) && @printf("pᵀAp = %8.1e > 0 and rᵀAr = %8.1e < 0\n", pAp, ρ)
- # q_p is minimal for α_p = rᵀp / pᵀAp
+ (verbose > 0) && @printf("pᴴAp = %8.1e > 0 and rᴴAr = %8.1e < 0\n", pAp, ρ)
+ # q_p is minimal for α_p = rᴴp / pᴴAp
α = descent ? min(t1, pr / pAp) : max(t2, pr / pAp)
Δ = -α * pr + tr * rNorm² + (α^2 * pAp - (tr)^2 * ρ) / 2
if Δ > 0
@@ -243,7 +243,7 @@ function cr!(solver :: CrSolver{T,FC,S}, A, b :: AbstractVector{FC};
elseif pAp < 0 && ρ > 0
npcurv = true
- (verbose > 0) && @printf("pᵀAp = %8.1e < 0 and rᵀAr = %8.1e > 0\n", pAp, ρ)
+ (verbose > 0) && @printf("pᴴAp = %8.1e < 0 and rᴴAr = %8.1e > 0\n", pAp, ρ)
α = descent ? t1 : t2
tr = min(tr, rNorm² / ρ)
Δ = -α * pr + tr * rNorm² + (α^2 * pAp - (tr)^2 * ρ) / 2
@@ -259,7 +259,7 @@ function cr!(solver :: CrSolver{T,FC,S}, A, b :: AbstractVector{FC};
elseif pAp < 0 && ρ < 0
npcurv = true
- (verbose > 0) && @printf("negative curvatures along p and r. pᵀAp = %8.1e and rᵀAr = %8.1e\n", pAp, ρ)
+ (verbose > 0) && @printf("negative curvatures along p and r. pᴴAp = %8.1e and rᴴAr = %8.1e\n", pAp, ρ)
α = descent ? t1 : t2
Δ = -α * pr + tr * rNorm² + (α^2 * pAp - (tr)^2 * ρ) / 2
if Δ > 0
@@ -330,9 +330,9 @@ function cr!(solver :: CrSolver{T,FC,S}, A, b :: AbstractVector{FC};
solver.warm_start = false
return solver
end
- pr = rNorm² + β * pr - β * α * pAp # pᵀr
+ pr = rNorm² + β * pr - β * α * pAp # pᴴr
abspr = abs(pr)
- pAp = ρ + β^2 * pAp # pᵀq
+ pAp = ρ + β^2 * pAp # pᴴq
abspAp = abs(pAp)
descent = pr > 0
diff --git a/src/craig.jl b/src/craig.jl
index 20597ea02..5759e31df 100644
--- a/src/craig.jl
+++ b/src/craig.jl
@@ -11,7 +11,7 @@
# and is equivalent to applying the conjugate gradient method
# to the linear system
#
-# AAᵀy = b.
+# AAᴴy = b.
#
# This method, sometimes known under the name CRAIG, is the
# Golub-Kahan implementation of CGNE, and is described in
@@ -52,14 +52,14 @@ regularization parameter. This method is equivalent to CGNE but is more
stable.
For a system in the form Ax = b, Craig's method is equivalent to applying
-CG to AAᵀy = b and recovering x = Aᵀy. Note that y are the Lagrange
+CG to AAᴴy = b and recovering x = Aᴴy. Note that y are the Lagrange
multipliers of the least-norm problem
minimize ‖x‖ s.t. Ax = b.
If `λ > 0`, CRAIG solves the symmetric and quasi-definite system
- [ -F Aᵀ ] [ x ] [ 0 ]
+ [ -F Aᴴ ] [ x ] [ 0 ]
[ A λ²E ] [ y ] = [ b ],
where E and F are symmetric and positive definite.
@@ -70,12 +70,12 @@ The system above represents the optimality conditions of
min ‖x‖²_F + λ²‖y‖²_E s.t. Ax + λ²Ey = b.
-For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᵀKx`.
-CRAIG is then equivalent to applying CG to `(AF⁻¹Aᵀ + λ²E)y = b` with `Fx = Aᵀy`.
+For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᴴKx`.
+CRAIG is then equivalent to applying CG to `(AF⁻¹Aᴴ + λ²E)y = b` with `Fx = Aᴴy`.
If `λ = 0`, CRAIG solves the symmetric and indefinite system
- [ -F Aᵀ ] [ x ] [ 0 ]
+ [ -F Aᴴ ] [ x ] [ 0 ]
[ A 0 ] [ y ] = [ b ].
The system above represents the optimality conditions of
@@ -134,13 +134,13 @@ function craig!(solver :: CraigSolver{T,FC,S}, A, b :: AbstractVector{FC};
ktypeof(b) == S || error("ktypeof(b) ≠ $S")
# Compute the adjoint of A
- Aᵀ = A'
+ Aᴴ = A'
# Set up workspace.
allocate_if(!MisI, solver, :u , S, m)
allocate_if(!NisI, solver, :v , S, n)
allocate_if(λ > 0, solver, :w2, S, n)
- x, Nv, Aᵀu, y, w = solver.x, solver.Nv, solver.Aᵀu, solver.y, solver.w
+ x, Nv, Aᴴu, y, w = solver.x, solver.Nv, solver.Aᴴu, solver.y, solver.w
Mu, Av, w2, stats = solver.Mu, solver.Av, solver.w2, solver.stats
rNorms = stats.residuals
reset!(stats)
@@ -180,7 +180,7 @@ function craig!(solver :: CraigSolver{T,FC,S}, A, b :: AbstractVector{FC};
Anorm² = zero(T) # Estimate of ‖A‖²_F.
Anorm = zero(T)
- Dnorm² = zero(T) # Estimate of ‖(AᵀA)⁻¹‖².
+ Dnorm² = zero(T) # Estimate of ‖(AᴴA)⁻¹‖².
Acond = zero(T) # Estimate of cond(A).
xNorm² = zero(T) # Estimate of ‖x‖².
xNorm = zero(T)
@@ -212,9 +212,9 @@ function craig!(solver :: CraigSolver{T,FC,S}, A, b :: AbstractVector{FC};
while ! (solved || inconsistent || ill_cond || tired || user_requested_exit)
# Generate the next Golub-Kahan vectors
- # 1. αₖ₊₁Nvₖ₊₁ = Aᵀuₖ₊₁ - βₖ₊₁Nvₖ
- mul!(Aᵀu, Aᵀ, u)
- @kaxpby!(n, one(FC), Aᵀu, -β, Nv)
+ # 1. αₖ₊₁Nvₖ₊₁ = Aᴴuₖ₊₁ - βₖ₊₁Nvₖ
+ mul!(Aᴴu, Aᴴ, u)
+ @kaxpby!(n, one(FC), Aᴴu, -β, Nv)
NisI || mulorldiv!(v, N, Nv, ldiv)
α = sqrt(@kdotr(n, v, Nv))
if α == 0
diff --git a/src/craigmr.jl b/src/craigmr.jl
index e08bb9c36..854e3df98 100644
--- a/src/craigmr.jl
+++ b/src/craigmr.jl
@@ -10,7 +10,7 @@
# and is equivalent to applying the conjugate residual method
# to the linear system
#
-# AAᵀy = b.
+# AAᴴy = b.
#
# This method is equivalent to CRMR, and is described in
#
@@ -44,7 +44,7 @@ using the CRAIGMR method, where λ ≥ 0 is a regularization parameter.
This method is equivalent to applying the Conjugate Residuals method
to the normal equations of the second kind
- (AAᵀ + λ²I) y = b
+ (AAᴴ + λ²I) y = b
but is more stable. When λ = 0, this method solves the minimum-norm problem
@@ -52,7 +52,7 @@ but is more stable. When λ = 0, this method solves the minimum-norm problem
If `λ > 0`, CRAIGMR solves the symmetric and quasi-definite system
- [ -F Aᵀ ] [ x ] [ 0 ]
+ [ -F Aᴴ ] [ x ] [ 0 ]
[ A λ²E ] [ y ] = [ b ],
where E and F are symmetric and positive definite.
@@ -63,12 +63,12 @@ The system above represents the optimality conditions of
min ‖x‖²_F + λ²‖y‖²_E s.t. Ax + λ²Ey = b.
-For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᵀKx`.
-CRAIGMR is then equivalent to applying MINRES to `(AF⁻¹Aᵀ + λ²E)y = b` with `Fx = Aᵀy`.
+For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᴴKx`.
+CRAIGMR is then equivalent to applying MINRES to `(AF⁻¹Aᴴ + λ²E)y = b` with `Fx = Aᴴy`.
If `λ = 0`, CRAIGMR solves the symmetric and indefinite system
- [ -F Aᵀ ] [ x ] [ 0 ]
+ [ -F Aᴴ ] [ x ] [ 0 ]
[ A 0 ] [ y ] = [ b ].
The system above represents the optimality conditions of
@@ -129,20 +129,20 @@ function craigmr!(solver :: CraigmrSolver{T,FC,S}, A, b :: AbstractVector{FC};
ktypeof(b) == S || error("ktypeof(b) ≠ $S")
# Compute the adjoint of A
- Aᵀ = A'
+ Aᴴ = A'
# Set up workspace.
allocate_if(!MisI, solver, :u, S, m)
allocate_if(!NisI, solver, :v, S, n)
allocate_if(λ > 0, solver, :q, S, n)
- x, Nv, Aᵀu, d, y, Mu = solver.x, solver.Nv, solver.Aᵀu, solver.d, solver.y, solver.Mu
+ x, Nv, Aᴴu, d, y, Mu = solver.x, solver.Nv, solver.Aᴴu, solver.d, solver.y, solver.Mu
w, wbar, Av, q, stats = solver.w, solver.wbar, solver.Av, solver.q, solver.stats
rNorms, ArNorms = stats.residuals, stats.Aresiduals
reset!(stats)
u = MisI ? Mu : solver.u
v = NisI ? Nv : solver.v
- # Compute y such that AAᵀy = b. Then recover x = Aᵀy.
+ # Compute y such that AAᴴy = b. Then recover x = Aᴴy.
x .= zero(FC)
y .= zero(FC)
Mu .= b
@@ -161,9 +161,9 @@ function craigmr!(solver :: CraigmrSolver{T,FC,S}, A, b :: AbstractVector{FC};
# β₁Mu₁ = b.
@kscal!(m, one(FC)/β, u)
MisI || @kscal!(m, one(FC)/β, Mu)
- # α₁Nv₁ = Aᵀu₁.
- mul!(Aᵀu, Aᵀ, u)
- Nv .= Aᵀu
+ # α₁Nv₁ = Aᴴu₁.
+ mul!(Aᴴu, Aᴴ, u)
+ Nv .= Aᴴu
NisI || mulorldiv!(v, N, Nv, ldiv)
α = sqrt(@kdotr(n, v, Nv))
Anorm² = α * α
@@ -171,10 +171,10 @@ function craigmr!(solver :: CraigmrSolver{T,FC,S}, A, b :: AbstractVector{FC};
iter = 0
itmax == 0 && (itmax = m + n)
- (verbose > 0) && @printf("%5s %7s %7s %7s %7s %8s %8s %7s\n", "k", "‖r‖", "‖Aᵀr‖", "β", "α", "cos", "sin", "‖A‖²")
+ (verbose > 0) && @printf("%5s %7s %7s %7s %7s %8s %8s %7s\n", "k", "‖r‖", "‖Aᴴr‖", "β", "α", "cos", "sin", "‖A‖²")
kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e\n", iter, β, α, β, α, 0, 1, Anorm²)
- # Aᵀb = 0 so x = 0 is a minimum least-squares solution
+ # Aᴴb = 0 so x = 0 is a minimum least-squares solution
if α == 0
stats.niter = 0
stats.solved, stats.inconsistent = true, false
@@ -288,9 +288,9 @@ function craigmr!(solver :: CraigmrSolver{T,FC,S}, A, b :: AbstractVector{FC};
# xₖ = Dₖzₖ
@kaxpy!(n, ζ, d, x)
- # 2. αₖ₊₁Nvₖ₊₁ = Aᵀuₖ₊₁ - βₖ₊₁Nvₖ
- mul!(Aᵀu, Aᵀ, u)
- @kaxpby!(n, one(FC), Aᵀu, -β, Nv)
+ # 2. αₖ₊₁Nvₖ₊₁ = Aᴴuₖ₊₁ - βₖ₊₁Nvₖ
+ mul!(Aᴴu, Aᴴ, u)
+ @kaxpby!(n, one(FC), Aᴴu, -β, Nv)
NisI || mulorldiv!(v, N, Nv, ldiv)
α = sqrt(@kdotr(n, v, Nv))
Anorm² = Anorm² + α * α # = ‖Lₖ‖
diff --git a/src/crls.jl b/src/crls.jl
index 6410fb836..b041f8e9f 100644
--- a/src/crls.jl
+++ b/src/crls.jl
@@ -5,7 +5,7 @@
#
# equivalently, of the linear system
#
-# AᵀAx = Aᵀb.
+# AᴴAx = Aᴴb.
#
# This implementation follows the formulation given in
#
@@ -37,11 +37,11 @@ Solve the linear least-squares problem
using the Conjugate Residuals (CR) method. This method is equivalent to
applying MINRES to the normal equations
- (AᵀA + λI) x = Aᵀb.
+ (AᴴA + λI) x = Aᴴb.
This implementation recurs the residual r := b - Ax.
-CRLS produces monotonic residuals ‖r‖₂ and optimality residuals ‖Aᵀr‖₂.
+CRLS produces monotonic residuals ‖r‖₂ and optimality residuals ‖Aᴴr‖₂.
It is formally equivalent to LSMR, though can be substantially less accurate,
but simpler to implement.
@@ -86,7 +86,7 @@ function crls!(solver :: CrlsSolver{T,FC,S}, A, b :: AbstractVector{FC};
ktypeof(b) == S || error("ktypeof(b) ≠ $S")
# Compute the adjoint of A
- Aᵀ = A'
+ Aᴴ = A'
# Set up workspace.
allocate_if(!MisI, solver, :Ms, S, m)
@@ -112,13 +112,13 @@ function crls!(solver :: CrlsSolver{T,FC,S}, A, b :: AbstractVector{FC};
end
MisI || mulorldiv!(Mr, M, r, ldiv)
- mul!(Ar, Aᵀ, Mr) # - λ * x0 if x0 ≠ 0.
+ mul!(Ar, Aᴴ, Mr) # - λ * x0 if x0 ≠ 0.
mul!(s, A, Ar)
MisI || mulorldiv!(Ms, M, s, ldiv)
p .= Ar
Ap .= s
- mul!(q, Aᵀ, Ms) # Ap
+ mul!(q, Aᴴ, Ms) # Ap
λ > 0 && @kaxpy!(n, λ, p, q) # q = q + λ * p
γ = @kdotr(m, s, Ms) # Faster than γ = dot(s, Ms)
iter = 0
@@ -128,7 +128,7 @@ function crls!(solver :: CrlsSolver{T,FC,S}, A, b :: AbstractVector{FC};
λ > 0 && (γ += λ * ArNorm * ArNorm)
history && push!(ArNorms, ArNorm)
ε = atol + rtol * ArNorm
- (verbose > 0) && @printf("%5s %8s %8s\n", "k", "‖Aᵀr‖", "‖r‖")
+ (verbose > 0) && @printf("%5s %8s %8s\n", "k", "‖Aᴴr‖", "‖r‖")
kdisplay(iter, verbose) && @printf("%5d %8.2e %8.2e\n", iter, ArNorm, rNorm)
status = "unknown"
@@ -147,11 +147,11 @@ function crls!(solver :: CrlsSolver{T,FC,S}, A, b :: AbstractVector{FC};
if radius > 0
pNorm = @knrm2(n, p)
if @kdotr(m, Ap, Ap) ≤ ε * sqrt(qNorm²) * pNorm # the quadratic is constant in the direction p
- psd = true # det(AᵀA) = 0
- p = Ar # p = Aᵀr
+ psd = true # det(AᴴA) = 0
+ p = Ar # p = Aᴴr
pNorm² = ArNorm * ArNorm
- mul!(q, Aᵀ, s)
- α = min(ArNorm^2 / γ, maximum(to_boundary(x, p, radius, flip = false, dNorm2 = pNorm²))) # the quadratic is minimal in the direction Aᵀr for α = ‖Ar‖²/γ
+ mul!(q, Aᴴ, s)
+ α = min(ArNorm^2 / γ, maximum(to_boundary(x, p, radius, flip = false, dNorm2 = pNorm²))) # the quadratic is minimal in the direction Aᴴr for α = ‖Ar‖²/γ
else
pNorm² = pNorm * pNorm
σ = maximum(to_boundary(x, p, radius, flip = false, dNorm2 = pNorm²))
@@ -177,7 +177,7 @@ function crls!(solver :: CrlsSolver{T,FC,S}, A, b :: AbstractVector{FC};
@kaxpby!(n, one(FC), Ar, β, p) # Faster than p = Ar + β * p
@kaxpby!(m, one(FC), s, β, Ap) # Faster than Ap = s + β * Ap
MisI || mulorldiv!(MAp, M, Ap, ldiv)
- mul!(q, Aᵀ, MAp)
+ mul!(q, Aᴴ, MAp)
λ > 0 && @kaxpy!(n, λ, p, q) # q = q + λ * p
γ = γ_next
diff --git a/src/crmr.jl b/src/crmr.jl
index 6ed2b3c60..3fff12b08 100644
--- a/src/crmr.jl
+++ b/src/crmr.jl
@@ -10,7 +10,7 @@
# and is equivalent to applying the conjugate residual method
# to the linear system
#
-# AAᵀy = b.
+# AAᴴy = b.
#
# This method is equivalent to CRAIGMR, described in
#
@@ -44,7 +44,7 @@ using the Conjugate Residual (CR) method, where λ ≥ 0 is a regularization
parameter. This method is equivalent to applying CR to the normal equations
of the second kind
- (AAᵀ + λI) y = b
+ (AAᴴ + λI) y = b
but is more stable. When λ = 0, this method solves the minimum-norm problem
@@ -102,19 +102,19 @@ function crmr!(solver :: CrmrSolver{T,FC,S}, A, b :: AbstractVector{FC};
ktypeof(b) == S || error("ktypeof(b) ≠ $S")
# Compute the adjoint of A
- Aᵀ = A'
+ Aᴴ = A'
# Set up workspace.
allocate_if(!NisI, solver, :Nq, S, m)
allocate_if(λ > 0, solver, :s , S, m)
- x, p, Aᵀr, r = solver.x, solver.p, solver.Aᵀr, solver.r
+ x, p, Aᴴr, r = solver.x, solver.p, solver.Aᴴr, solver.r
q, s, stats = solver.q, solver.s, solver.stats
rNorms, ArNorms = stats.residuals, stats.Aresiduals
reset!(stats)
Nq = NisI ? q : solver.Nq
x .= zero(FC) # initial estimation x = 0
- mulorldiv!(r, N, b, ldiv) # initial residual r = M * (b - Ax) = M * b
+ mulorldiv!(r, N, b, ldiv) # initial residual r = N * (b - Ax) = N * b
bNorm = @knrm2(m, r) # norm(b - A * x0) if x0 ≠ 0.
rNorm = bNorm # + λ * ‖x0‖ if x0 ≠ 0 and λ > 0.
history && push!(rNorms, rNorm)
@@ -126,9 +126,9 @@ function crmr!(solver :: CrmrSolver{T,FC,S}, A, b :: AbstractVector{FC};
return solver
end
λ > 0 && (s .= r)
- mul!(Aᵀr, Aᵀ, r) # - λ * x0 if x0 ≠ 0.
- p .= Aᵀr
- γ = @kdotr(n, Aᵀr, Aᵀr) # Faster than γ = dot(Aᵀr, Aᵀr)
+ mul!(Aᴴr, Aᴴ, r) # - λ * x0 if x0 ≠ 0.
+ p .= Aᴴr
+ γ = @kdotr(n, Aᴴr, Aᴴr) # Faster than γ = dot(Aᴴr, Aᴴr)
λ > 0 && (γ += λ * rNorm * rNorm)
iter = 0
itmax == 0 && (itmax = m + n)
@@ -137,7 +137,7 @@ function crmr!(solver :: CrmrSolver{T,FC,S}, A, b :: AbstractVector{FC};
history && push!(ArNorms, ArNorm)
ɛ_c = atol + rtol * rNorm # Stopping tolerance for consistent systems.
ɛ_i = atol + rtol * ArNorm # Stopping tolerance for inconsistent systems.
- (verbose > 0) && @printf("%5s %8s %8s\n", "k", "‖Aᵀr‖", "‖r‖")
+ (verbose > 0) && @printf("%5s %8s %8s\n", "k", "‖Aᴴr‖", "‖r‖")
kdisplay(iter, verbose) && @printf("%5d %8.2e %8.2e\n", iter, ArNorm, rNorm)
status = "unknown"
@@ -150,16 +150,16 @@ function crmr!(solver :: CrmrSolver{T,FC,S}, A, b :: AbstractVector{FC};
mul!(q, A, p)
λ > 0 && @kaxpy!(m, λ, s, q) # q = q + λ * s
NisI || mulorldiv!(Nq, N, q, ldiv)
- α = γ / @kdotr(m, q, Nq) # Compute qᵗ * M * q
+ α = γ / @kdotr(m, q, Nq) # Compute qᴴ * N * q
@kaxpy!(n, α, p, x) # Faster than x = x + α * p
@kaxpy!(m, -α, Nq, r) # Faster than r = r - α * Nq
rNorm = @knrm2(m, r) # norm(r)
- mul!(Aᵀr, Aᵀ, r)
- γ_next = @kdotr(n, Aᵀr, Aᵀr) # Faster than γ_next = dot(Aᵀr, Aᵀr)
+ mul!(Aᴴr, Aᴴ, r)
+ γ_next = @kdotr(n, Aᴴr, Aᴴr) # Faster than γ_next = dot(Aᴴr, Aᴴr)
λ > 0 && (γ_next += λ * rNorm * rNorm)
β = γ_next / γ
- @kaxpby!(n, one(FC), Aᵀr, β, p) # Faster than p = Aᵀr + β * p
+ @kaxpby!(n, one(FC), Aᴴr, β, p) # Faster than p = Aᴴr + β * p
if λ > 0
@kaxpby!(m, one(FC), r, β, s) # s = r + β * s
end
diff --git a/src/fom.jl b/src/fom.jl
index fcae5cf62..b212129ef 100644
--- a/src/fom.jl
+++ b/src/fom.jl
@@ -211,7 +211,7 @@ function fom!(solver :: FomSolver{T,FC,S}, A, b :: AbstractVector{FC};
mul!(w, A, p) # w ← AN⁻¹vₖ
MisI || mulorldiv!(q, M, w, ldiv) # q ← M⁻¹AN⁻¹vₖ
for i = 1 : inner_iter
- U[nr+i] = @kdot(n, V[i], q) # hᵢₖ = qᵀvᵢ
+ U[nr+i] = @kdot(n, V[i], q) # hᵢₖ = (vᵢ)ᴴq
@kaxpy!(n, -U[nr+i], V[i], q) # q ← q - hᵢₖvᵢ
end
diff --git a/src/gmres.jl b/src/gmres.jl
index 388a4ab96..32999aa23 100644
--- a/src/gmres.jl
+++ b/src/gmres.jl
@@ -214,7 +214,7 @@ function gmres!(solver :: GmresSolver{T,FC,S}, A, b :: AbstractVector{FC};
mul!(w, A, p) # w ← AN⁻¹vₖ
MisI || mulorldiv!(q, M, w, ldiv) # q ← M⁻¹AN⁻¹vₖ
for i = 1 : inner_iter
- R[nr+i] = @kdot(n, V[i], q) # hᵢₖ = qᵀvᵢ
+ R[nr+i] = @kdot(n, V[i], q) # hᵢₖ = (vᵢ)ᴴq
@kaxpy!(n, -R[nr+i], V[i], q) # q ← q - hᵢₖvᵢ
end
@@ -245,7 +245,7 @@ function gmres!(solver :: GmresSolver{T,FC,S}, A, b :: AbstractVector{FC};
# [s̄ₖ -cₖ] [hₖ₊₁.ₖ] [ 0 ]
(c[inner_iter], s[inner_iter], R[nr+inner_iter]) = sym_givens(R[nr+inner_iter], Hbis)
- # Update zₖ = (Qₖ)ᵀβe₁
+ # Update zₖ = (Qₖ)ᴴβe₁
ζₖ₊₁ = conj(s[inner_iter]) * z[inner_iter]
z[inner_iter] = c[inner_iter] * z[inner_iter]
diff --git a/src/gpmr.jl b/src/gpmr.jl
index b10942995..82499b50e 100644
--- a/src/gpmr.jl
+++ b/src/gpmr.jl
@@ -28,7 +28,7 @@ GPMR solves the unsymmetric partitioned linear system
[ B μI ] [ y ] [ c ],
where λ and μ are real or complex numbers.
-`A` can have any shape and `B` has the shape of `Aᵀ`.
+`A` can have any shape and `B` has the shape of `Aᴴ`.
`A`, `B`, `b` and `c` must be all nonzero.
This implementation allows left and right block diagonal preconditioners
@@ -172,7 +172,7 @@ function gpmr!(solver :: GpmrSolver{T,FC,S}, A, B, b :: AbstractVector{FC}, c ::
gs .= zero(FC) # Givens sines used for the factorization QₖRₖ = Sₖ₊₁.ₖ.
gc .= zero(T) # Givens cosines used for the factorization QₖRₖ = Sₖ₊₁.ₖ.
R .= zero(FC) # Upper triangular matrix Rₖ.
- zt .= zero(FC) # Rₖzₖ = tₖ with (tₖ, τbar₂ₖ₊₁, τbar₂ₖ₊₂) = (Qₖ)ᵀ(βe₁ + γe₂).
+ zt .= zero(FC) # Rₖzₖ = tₖ with (tₖ, τbar₂ₖ₊₁, τbar₂ₖ₊₂) = (Qₖ)ᴴ(βe₁ + γe₂).
# Warm-start
# If λ ≠ 0, Cb₀ = Cb - CAΔy - λΔx because CM = Iₘ and E = Iₘ
@@ -259,8 +259,8 @@ function gpmr!(solver :: GpmrSolver{T,FC,S}, A, B, b :: AbstractVector{FC}, c ::
DisI || mulorldiv!(p, D, dB, ldiv) # p = DBEvₖ
for i = 1 : iter
- hᵢₖ = @kdot(m, V[i], q) # hᵢ.ₖ = vᵢAuₖ
- fᵢₖ = @kdot(n, U[i], p) # fᵢ.ₖ = uᵢBvₖ
+ hᵢₖ = @kdot(m, V[i], q) # hᵢ.ₖ = (vᵢ)ᴴq
+ fᵢₖ = @kdot(n, U[i], p) # fᵢ.ₖ = (uᵢ)ᴴp
@kaxpy!(m, -hᵢₖ, V[i], q) # q ← q - hᵢ.ₖvᵢ
@kaxpy!(n, -fᵢₖ, U[i], p) # p ← p - fᵢ.ₖuᵢ
R[nr₂ₖ + 2i-1] = hᵢₖ
@@ -270,8 +270,8 @@ function gpmr!(solver :: GpmrSolver{T,FC,S}, A, B, b :: AbstractVector{FC}, c ::
# Reorthogonalization of the Krylov basis.
if reorthogonalization
for i = 1 : iter
- Htmp = @kdot(m, V[i], q) # hₜₘₚ = qᵀvᵢ
- Ftmp = @kdot(n, U[i], p) # fₜₘₚ = pᵀuᵢ
+ Htmp = @kdot(m, V[i], q) # hₜₘₚ = (vᵢ)ᴴq
+ Ftmp = @kdot(n, U[i], p) # fₜₘₚ = (uᵢ)ᴴp
@kaxpy!(m, -Htmp, V[i], q) # q ← q - hₜₘₚvᵢ
@kaxpy!(n, -Ftmp, U[i], p) # p ← p - fₜₘₚuᵢ
R[nr₂ₖ + 2i-1] += Htmp # hᵢ.ₖ = hᵢ.ₖ + hₜₘₚ
diff --git a/src/krylov_solvers.jl b/src/krylov_solvers.jl
index d557d91ae..abd0c7352 100644
--- a/src/krylov_solvers.jl
+++ b/src/krylov_solvers.jl
@@ -1092,7 +1092,7 @@ may be used in order to create these vectors.
mutable struct CgneSolver{T,FC,S} <: KrylovSolver{T,FC,S}
x :: S
p :: S
- Aᵀz :: S
+ Aᴴz :: S
r :: S
q :: S
s :: S
@@ -1105,13 +1105,13 @@ function CgneSolver(n, m, S)
T = real(FC)
x = S(undef, m)
p = S(undef, m)
- Aᵀz = S(undef, m)
+ Aᴴz = S(undef, m)
r = S(undef, n)
q = S(undef, n)
s = S(undef, 0)
z = S(undef, 0)
stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = CgneSolver{T,FC,S}(x, p, Aᵀz, r, q, s, z, stats)
+ solver = CgneSolver{T,FC,S}(x, p, Aᴴz, r, q, s, z, stats)
return solver
end
@@ -1134,7 +1134,7 @@ may be used in order to create these vectors.
mutable struct CrmrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
x :: S
p :: S
- Aᵀr :: S
+ Aᴴr :: S
r :: S
q :: S
Nq :: S
@@ -1147,13 +1147,13 @@ function CrmrSolver(n, m, S)
T = real(FC)
x = S(undef, m)
p = S(undef, m)
- Aᵀr = S(undef, m)
+ Aᴴr = S(undef, m)
r = S(undef, n)
q = S(undef, n)
Nq = S(undef, 0)
s = S(undef, 0)
stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = CrmrSolver{T,FC,S}(x, p, Aᵀr, r, q, Nq, s, stats)
+ solver = CrmrSolver{T,FC,S}(x, p, Aᴴr, r, q, Nq, s, stats)
return solver
end
@@ -1176,7 +1176,7 @@ may be used in order to create these vectors.
mutable struct LslqSolver{T,FC,S} <: KrylovSolver{T,FC,S}
x :: S
Nv :: S
- Aᵀu :: S
+ Aᴴu :: S
w̄ :: S
Mu :: S
Av :: S
@@ -1191,7 +1191,7 @@ function LslqSolver(n, m, S; window :: Int=5)
T = real(FC)
x = S(undef, m)
Nv = S(undef, m)
- Aᵀu = S(undef, m)
+ Aᴴu = S(undef, m)
w̄ = S(undef, m)
Mu = S(undef, n)
Av = S(undef, n)
@@ -1199,7 +1199,7 @@ function LslqSolver(n, m, S; window :: Int=5)
v = S(undef, 0)
err_vec = zeros(T, window)
stats = LSLQStats(0, false, false, T[], T[], T[], false, T[], T[], "unknown")
- solver = LslqSolver{T,FC,S}(x, Nv, Aᵀu, w̄, Mu, Av, u, v, err_vec, stats)
+ solver = LslqSolver{T,FC,S}(x, Nv, Aᴴu, w̄, Mu, Av, u, v, err_vec, stats)
return solver
end
@@ -1222,7 +1222,7 @@ may be used in order to create these vectors.
mutable struct LsqrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
x :: S
Nv :: S
- Aᵀu :: S
+ Aᴴu :: S
w :: S
Mu :: S
Av :: S
@@ -1237,7 +1237,7 @@ function LsqrSolver(n, m, S; window :: Int=5)
T = real(FC)
x = S(undef, m)
Nv = S(undef, m)
- Aᵀu = S(undef, m)
+ Aᴴu = S(undef, m)
w = S(undef, m)
Mu = S(undef, n)
Av = S(undef, n)
@@ -1245,7 +1245,7 @@ function LsqrSolver(n, m, S; window :: Int=5)
v = S(undef, 0)
err_vec = zeros(T, window)
stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = LsqrSolver{T,FC,S}(x, Nv, Aᵀu, w, Mu, Av, u, v, err_vec, stats)
+ solver = LsqrSolver{T,FC,S}(x, Nv, Aᴴu, w, Mu, Av, u, v, err_vec, stats)
return solver
end
@@ -1268,7 +1268,7 @@ may be used in order to create these vectors.
mutable struct LsmrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
x :: S
Nv :: S
- Aᵀu :: S
+ Aᴴu :: S
h :: S
hbar :: S
Mu :: S
@@ -1284,7 +1284,7 @@ function LsmrSolver(n, m, S; window :: Int=5)
T = real(FC)
x = S(undef, m)
Nv = S(undef, m)
- Aᵀu = S(undef, m)
+ Aᴴu = S(undef, m)
h = S(undef, m)
hbar = S(undef, m)
Mu = S(undef, n)
@@ -1293,7 +1293,7 @@ function LsmrSolver(n, m, S; window :: Int=5)
v = S(undef, 0)
err_vec = zeros(T, window)
stats = LsmrStats(0, false, false, T[], T[], zero(T), zero(T), zero(T), zero(T), zero(T), "unknown")
- solver = LsmrSolver{T,FC,S}(x, Nv, Aᵀu, h, hbar, Mu, Av, u, v, err_vec, stats)
+ solver = LsmrSolver{T,FC,S}(x, Nv, Aᴴu, h, hbar, Mu, Av, u, v, err_vec, stats)
return solver
end
@@ -1316,7 +1316,7 @@ may be used in order to create these vectors.
mutable struct LnlqSolver{T,FC,S} <: KrylovSolver{T,FC,S}
x :: S
Nv :: S
- Aᵀu :: S
+ Aᴴu :: S
y :: S
w̄ :: S
Mu :: S
@@ -1332,7 +1332,7 @@ function LnlqSolver(n, m, S)
T = real(FC)
x = S(undef, m)
Nv = S(undef, m)
- Aᵀu = S(undef, m)
+ Aᴴu = S(undef, m)
y = S(undef, n)
w̄ = S(undef, n)
Mu = S(undef, n)
@@ -1341,7 +1341,7 @@ function LnlqSolver(n, m, S)
v = S(undef, 0)
q = S(undef, 0)
stats = LNLQStats(0, false, T[], false, T[], T[], "unknown")
- solver = LnlqSolver{T,FC,S}(x, Nv, Aᵀu, y, w̄, Mu, Av, u, v, q, stats)
+ solver = LnlqSolver{T,FC,S}(x, Nv, Aᴴu, y, w̄, Mu, Av, u, v, q, stats)
return solver
end
@@ -1364,7 +1364,7 @@ may be used in order to create these vectors.
mutable struct CraigSolver{T,FC,S} <: KrylovSolver{T,FC,S}
x :: S
Nv :: S
- Aᵀu :: S
+ Aᴴu :: S
y :: S
w :: S
Mu :: S
@@ -1380,7 +1380,7 @@ function CraigSolver(n, m, S)
T = real(FC)
x = S(undef, m)
Nv = S(undef, m)
- Aᵀu = S(undef, m)
+ Aᴴu = S(undef, m)
y = S(undef, n)
w = S(undef, n)
Mu = S(undef, n)
@@ -1389,7 +1389,7 @@ function CraigSolver(n, m, S)
v = S(undef, 0)
w2 = S(undef, 0)
stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = CraigSolver{T,FC,S}(x, Nv, Aᵀu, y, w, Mu, Av, u, v, w2, stats)
+ solver = CraigSolver{T,FC,S}(x, Nv, Aᴴu, y, w, Mu, Av, u, v, w2, stats)
return solver
end
@@ -1412,7 +1412,7 @@ may be used in order to create these vectors.
mutable struct CraigmrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
x :: S
Nv :: S
- Aᵀu :: S
+ Aᴴu :: S
d :: S
y :: S
Mu :: S
@@ -1430,7 +1430,7 @@ function CraigmrSolver(n, m, S)
T = real(FC)
x = S(undef, m)
Nv = S(undef, m)
- Aᵀu = S(undef, m)
+ Aᴴu = S(undef, m)
d = S(undef, m)
y = S(undef, n)
Mu = S(undef, n)
@@ -1441,7 +1441,7 @@ function CraigmrSolver(n, m, S)
v = S(undef, 0)
q = S(undef, 0)
stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = CraigmrSolver{T,FC,S}(x, Nv, Aᵀu, d, y, Mu, w, wbar, Av, u, v, q, stats)
+ solver = CraigmrSolver{T,FC,S}(x, Nv, Aᴴu, d, y, Mu, w, wbar, Av, u, v, q, stats)
return solver
end
diff --git a/src/krylov_utils.jl b/src/krylov_utils.jl
index 6f0c1c382..c61bf2e5e 100644
--- a/src/krylov_utils.jl
+++ b/src/krylov_utils.jl
@@ -164,14 +164,14 @@ function to_boundary(x :: Vector{T}, d :: Vector{T},
radius :: T; flip :: Bool=false, xNorm2 :: T=zero(T), dNorm2 :: T=zero(T)) where T <: Number
radius > 0 || error("radius must be positive")
- # ‖d‖² σ² + 2 xᵀd σ + (‖x‖² - radius²).
- xd = dot(x, d)
- flip && (xd = -xd)
+ # ‖d‖² σ² + (xᴴd + dᴴx) σ + (‖x‖² - radius²).
+ rxd = real(dot(x, d))
+ flip && (rxd = -rxd)
dNorm2 == zero(T) && (dNorm2 = dot(d, d))
dNorm2 == zero(T) && error("zero direction")
xNorm2 == zero(T) && (xNorm2 = dot(x, x))
(xNorm2 ≤ radius * radius) || error(@sprintf("outside of the trust region: ‖x‖²=%7.1e, Δ²=%7.1e", xNorm2, radius * radius))
- roots = roots_quadratic(dNorm2, 2 * xd, xNorm2 - radius * radius)
+ roots = roots_quadratic(dNorm2, 2 * rxd, xNorm2 - radius * radius)
return roots # `σ1` and `σ2`
end
diff --git a/src/lnlq.jl b/src/lnlq.jl
index a1f890de2..db0a7c951 100644
--- a/src/lnlq.jl
+++ b/src/lnlq.jl
@@ -9,9 +9,9 @@
# and is equivalent to applying the SYMMLQ method
# to the linear system
#
-# AAᵀy = b with x = Aᵀy and can be reformulated as
+# AAᴴy = b with x = Aᴴy and can be reformulated as
#
-# [ -I Aᵀ ][ x ] = [ 0 ]
+# [ -I Aᴴ ][ x ] = [ 0 ]
# [ A ][ y ] [ b ].
#
# This method is based on the Golub-Kahan bidiagonalization process and is described in
@@ -41,14 +41,14 @@ Find the least-norm solution of the consistent linear system
using the LNLQ method, where λ ≥ 0 is a regularization parameter.
For a system in the form Ax = b, LNLQ method is equivalent to applying
-SYMMLQ to AAᵀy = b and recovering x = Aᵀy but is more stable.
+SYMMLQ to AAᴴy = b and recovering x = Aᴴy but is more stable.
Note that y are the Lagrange multipliers of the least-norm problem
minimize ‖x‖ s.t. Ax = b.
If `λ > 0`, LNLQ solves the symmetric and quasi-definite system
- [ -F Aᵀ ] [ x ] [ 0 ]
+ [ -F Aᴴ ] [ x ] [ 0 ]
[ A λ²E ] [ y ] = [ b ],
where E and F are symmetric and positive definite.
@@ -59,12 +59,12 @@ The system above represents the optimality conditions of
min ‖x‖²_F + λ²‖y‖²_E s.t. Ax + λ²Ey = b.
-For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᵀKx`.
-LNLQ is then equivalent to applying SYMMLQ to `(AF⁻¹Aᵀ + λ²E)y = b` with `Fx = Aᵀy`.
+For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᴴKx`.
+LNLQ is then equivalent to applying SYMMLQ to `(AF⁻¹Aᴴ + λ²E)y = b` with `Fx = Aᴴy`.
If `λ = 0`, LNLQ solves the symmetric and indefinite system
- [ -F Aᵀ ] [ x ] [ 0 ]
+ [ -F Aᴴ ] [ x ] [ 0 ]
[ A 0 ] [ y ] = [ b ].
The system above represents the optimality conditions of
@@ -126,13 +126,13 @@ function lnlq!(solver :: LnlqSolver{T,FC,S}, A, b :: AbstractVector{FC};
ktypeof(b) == S || error("ktypeof(b) ≠ $S")
# Compute the adjoint of A
- Aᵀ = A'
+ Aᴴ = A'
# Set up workspace.
allocate_if(!MisI, solver, :u, S, m)
allocate_if(!NisI, solver, :v, S, n)
allocate_if(λ > 0, solver, :q, S, n)
- x, Nv, Aᵀu, y, w̄ = solver.x, solver.Nv, solver.Aᵀu, solver.y, solver.w̄
+ x, Nv, Aᴴu, y, w̄ = solver.x, solver.Nv, solver.Aᴴu, solver.y, solver.w̄
Mu, Av, q, stats = solver.Mu, solver.Av, solver.q, solver.stats
rNorms, xNorms, yNorms = stats.residuals, stats.error_bnd_x, stats.error_bnd_y
reset!(stats)
@@ -179,9 +179,9 @@ function lnlq!(solver :: LnlqSolver{T,FC,S}, A, b :: AbstractVector{FC};
MisI || @kscal!(m, one(FC) / βₖ, Mu)
end
- # α₁Nv₁ = Aᵀu₁.
- mul!(Aᵀu, Aᵀ, u)
- Nv .= Aᵀu
+ # α₁Nv₁ = Aᴴu₁.
+ mul!(Aᴴu, Aᴴ, u)
+ Nv .= Aᴴu
NisI || mulorldiv!(v, N, Nv, ldiv) # v₁ = N⁻¹ * Nv₁
αₖ = sqrt(@kdotr(n, v, Nv)) # α₁ = ‖v₁‖_N
if αₖ ≠ 0
@@ -190,8 +190,8 @@ function lnlq!(solver :: LnlqSolver{T,FC,S}, A, b :: AbstractVector{FC};
end
w̄ .= u # Direction w̄₁
- cₖ = zero(T) # Givens cosines used for the LQ factorization of (Lₖ)ᵀ
- sₖ = zero(FC) # Givens sines used for the LQ factorization of (Lₖ)ᵀ
+ cₖ = zero(T) # Givens cosines used for the LQ factorization of (Lₖ)ᴴ
+ sₖ = zero(FC) # Givens sines used for the LQ factorization of (Lₖ)ᴴ
ζₖ₋₁ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ
ηₖ = zero(FC) # Coefficient of M̅ₖ
@@ -214,7 +214,7 @@ function lnlq!(solver :: LnlqSolver{T,FC,S}, A, b :: AbstractVector{FC};
αhatₖ = αₖ
end
- # Begin the LQ factorization of (Lₖ)ᵀ = M̅ₖQₖ.
+ # Begin the LQ factorization of (Lₖ)ᴴ = M̅ₖQₖ.
# [ α₁ β₂ 0 • • • 0 ] [ ϵ₁ 0 • • • • 0 ]
# [ 0 α₂ • • • ] [ η₂ ϵ₂ • • ]
# [ • • • • • • ] [ 0 • • • • ]
@@ -225,7 +225,7 @@ function lnlq!(solver :: LnlqSolver{T,FC,S}, A, b :: AbstractVector{FC};
ϵbarₖ = αhatₖ # ϵbar₁ = αhat₁
- # Hₖ = Bₖ(Lₖ)ᵀ = [ Lₖ(Lₖ)ᵀ ] ⟹ (Hₖ₋₁)ᵀ = [Lₖ₋₁Mₖ₋₁ 0] Qₖ
+ # Hₖ = Bₖ(Lₖ)ᴴ = [ Lₖ(Lₖ)ᴴ ] ⟹ (Hₖ₋₁)ᴴ = [Lₖ₋₁Mₖ₋₁ 0] Qₖ
# [ αₖβₖ₊₁(eₖ)ᵀ ]
#
# Solve Lₖtₖ = β₁e₁ and M̅ₖz̅ₖ = tₖ
@@ -273,7 +273,7 @@ function lnlq!(solver :: LnlqSolver{T,FC,S}, A, b :: AbstractVector{FC};
# Continue the generalized Golub-Kahan bidiagonalization.
# AVₖ = MUₖ₊₁Bₖ
- # AᵀUₖ₊₁ = NVₖ(Bₖ)ᵀ + αₖ₊₁Nvₖ₊₁(eₖ₊₁)ᵀ = NVₖ₊₁(Lₖ₊₁)ᵀ
+ # AᴴUₖ₊₁ = NVₖ(Bₖ)ᴴ + αₖ₊₁Nvₖ₊₁(eₖ₊₁)ᴴ = NVₖ₊₁(Lₖ₊₁)ᴴ
#
# [ α₁ 0 • • • • 0 ]
# [ β₂ α₂ • • ]
@@ -296,9 +296,9 @@ function lnlq!(solver :: LnlqSolver{T,FC,S}, A, b :: AbstractVector{FC};
MisI || @kscal!(m, one(FC) / βₖ₊₁, Mu)
end
- # αₖ₊₁Nvₖ₊₁ = Aᵀuₖ₊₁ - βₖ₊₁Nvₖ
- mul!(Aᵀu, Aᵀ, u)
- @kaxpby!(n, one(FC), Aᵀu, -βₖ₊₁, Nv)
+ # αₖ₊₁Nvₖ₊₁ = Aᴴuₖ₊₁ - βₖ₊₁Nvₖ
+ mul!(Aᴴu, Aᴴ, u)
+ @kaxpby!(n, one(FC), Aᴴu, -βₖ₊₁, Nv)
NisI || mulorldiv!(v, N, Nv, ldiv) # vₖ₊₁ = N⁻¹ * Nvₖ₊₁
αₖ₊₁ = sqrt(@kdotr(n, v, Nv)) # αₖ₊₁ = ‖vₖ₊₁‖_N
if αₖ₊₁ ≠ 0
@@ -353,7 +353,7 @@ function lnlq!(solver :: LnlqSolver{T,FC,S}, A, b :: AbstractVector{FC};
ρbar = ssig * μbar + csig * σₑₛₜ
end
- # Continue the LQ factorization of (Lₖ₊₁)ᵀ.
+ # Continue the LQ factorization of (Lₖ₊₁)ᴴ.
# [ηₖ ϵbarₖ βₖ₊₁] [1 0 0 ] = [ηₖ ϵₖ 0 ]
# [0 0 αₖ₊₁] [0 cₖ₊₁ sₖ₊₁] [0 ηₖ₊₁ ϵbarₖ₊₁]
# [0 sₖ₊₁ -cₖ₊₁]
diff --git a/src/lslq.jl b/src/lslq.jl
index 908de19c5..d43d4a089 100644
--- a/src/lslq.jl
+++ b/src/lslq.jl
@@ -5,7 +5,7 @@
#
# equivalently, of the normal equations
#
-# AᵀAx = Aᵀb.
+# AᴴAx = Aᴴb.
#
# LSLQ is formally equivalent to applying SYMMLQ to the normal equations
# but should be more stable.
@@ -41,7 +41,7 @@ Solve the regularized linear least-squares problem
using the LSLQ method, where λ ≥ 0 is a regularization parameter.
LSLQ is formally equivalent to applying SYMMLQ to the normal equations
- (AᵀA + λ²I) x = Aᵀb
+ (AᴴA + λ²I) x = Aᴴb
but is more stable.
@@ -62,7 +62,7 @@ but is more stable.
If `λ > 0`, we solve the symmetric and quasi-definite system
[ E A ] [ r ] [ b ]
- [ Aᵀ -λ²F ] [ x ] = [ 0 ],
+ [ Aᴴ -λ²F ] [ x ] = [ 0 ],
where E and F are symmetric and positive definite.
Preconditioners M = E⁻¹ ≻ 0 and N = F⁻¹ ≻ 0 may be provided in the form of linear operators.
@@ -72,19 +72,19 @@ The system above represents the optimality conditions of
minimize ‖b - Ax‖²_E⁻¹ + λ²‖x‖²_F.
-For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᵀKx`.
-LSLQ is then equivalent to applying SYMMLQ to `(AᵀE⁻¹A + λ²F)x = AᵀE⁻¹b` with `r = E⁻¹(b - Ax)`.
+For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᴴKx`.
+LSLQ is then equivalent to applying SYMMLQ to `(AᴴE⁻¹A + λ²F)x = AᴴE⁻¹b` with `r = E⁻¹(b - Ax)`.
If `λ = 0`, we solve the symmetric and indefinite system
[ E A ] [ r ] [ b ]
- [ Aᵀ 0 ] [ x ] = [ 0 ].
+ [ Aᴴ 0 ] [ x ] = [ 0 ].
The system above represents the optimality conditions of
minimize ‖b - Ax‖²_E⁻¹.
-In this case, `N` can still be specified and indicates the weighted norm in which `x` and `Aᵀr` should be measured.
+In this case, `N` can still be specified and indicates the weighted norm in which `x` and `Aᴴr` should be measured.
`r` can be recovered by computing `E⁻¹(b - Ax)`.
* `λ` is a regularization parameter (see the problem statement above)
@@ -116,8 +116,8 @@ In this case, `N` can still be specified and indicates the weighted norm in whic
The iterations stop as soon as one of the following conditions holds true:
* the optimality residual is sufficiently small (`stats.status = "found approximate minimum least-squares solution"`) in the sense that either
- * ‖Aᵀr‖ / (‖A‖ ‖r‖) ≤ atol, or
- * 1 + ‖Aᵀr‖ / (‖A‖ ‖r‖) ≤ 1
+ * ‖Aᴴr‖ / (‖A‖ ‖r‖) ≤ atol, or
+ * 1 + ‖Aᴴr‖ / (‖A‖ ‖r‖) ≤ 1
* an approximate zero-residual solution has been found (`stats.status = "found approximate zero-residual solution"`) in the sense that either
* ‖r‖ / ‖b‖ ≤ btol + atol ‖A‖ * ‖xᴸ‖ / ‖b‖, or
* 1 + ‖r‖ / ‖b‖ ≤ 1
@@ -177,12 +177,12 @@ function lslq!(solver :: LslqSolver{T,FC,S}, A, b :: AbstractVector{FC};
ktypeof(b) == S || error("ktypeof(b) ≠ $S")
# Compute the adjoint of A
- Aᵀ = A'
+ Aᴴ = A'
# Set up workspace.
allocate_if(!MisI, solver, :u, S, m)
allocate_if(!NisI, solver, :v, S, n)
- x, Nv, Aᵀu, w̄ = solver.x, solver.Nv, solver.Aᵀu, solver.w̄
+ x, Nv, Aᴴu, w̄ = solver.x, solver.Nv, solver.Aᴴu, solver.w̄
Mu, Av, err_vec, stats = solver.Mu, solver.Av, solver.err_vec, solver.stats
rNorms, ArNorms, err_lbnds = stats.residuals, stats.Aresiduals, stats.err_lbnds
err_ubnds_lq, err_ubnds_cg = stats.err_ubnds_lq, stats.err_ubnds_cg
@@ -213,12 +213,12 @@ function lslq!(solver :: LslqSolver{T,FC,S}, A, b :: AbstractVector{FC};
@kscal!(m, one(FC)/β₁, u)
MisI || @kscal!(m, one(FC)/β₁, Mu)
- mul!(Aᵀu, Aᵀ, u)
- Nv .= Aᵀu
+ mul!(Aᴴu, Aᴴ, u)
+ Nv .= Aᴴu
NisI || mulorldiv!(v, N, Nv, ldiv)
α = sqrt(@kdotr(n, v, Nv)) # = α₁
- # Aᵀb = 0 so x = 0 is a minimum least-squares solution
+ # Aᴴb = 0 so x = 0 is a minimum least-squares solution
if α == 0
stats.niter = 0
stats.solved, stats.inconsistent = true, false
@@ -274,7 +274,7 @@ function lslq!(solver :: LslqSolver{T,FC,S}, A, b :: AbstractVector{FC};
iter = 0
itmax == 0 && (itmax = m + n)
- (verbose > 0) && @printf("%5s %7s %7s %7s %7s %8s %8s %7s %7s %7s\n", "k", "‖r‖", "‖Aᵀr‖", "β", "α", "cos", "sin", "‖A‖²", "κ(A)", "‖xL‖")
+ (verbose > 0) && @printf("%5s %7s %7s %7s %7s %8s %8s %7s %7s %7s\n", "k", "‖r‖", "‖Aᴴr‖", "β", "α", "cos", "sin", "‖A‖²", "κ(A)", "‖xL‖")
kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e %7.1e\n", iter, rNorm, ArNorm, β, α, c, s, Anorm², Acond, xlqNorm)
status = "unknown"
@@ -298,9 +298,9 @@ function lslq!(solver :: LslqSolver{T,FC,S}, A, b :: AbstractVector{FC};
@kscal!(m, one(FC)/β, u)
MisI || @kscal!(m, one(FC)/β, Mu)
- # 2. αₖ₊₁Nvₖ₊₁ = Aᵀuₖ₊₁ - βₖ₊₁Nvₖ
- mul!(Aᵀu, Aᵀ, u)
- @kaxpby!(n, one(FC), Aᵀu, -β, Nv)
+ # 2. αₖ₊₁Nvₖ₊₁ = Aᴴuₖ₊₁ - βₖ₊₁Nvₖ
+ mul!(Aᴴu, Aᴴ, u)
+ @kaxpby!(n, one(FC), Aᴴu, -β, Nv)
NisI || mulorldiv!(v, N, Nv, ldiv)
α = sqrt(@kdotr(n, v, Nv))
if α ≠ 0
diff --git a/src/lsmr.jl b/src/lsmr.jl
index f4d8349d1..78db5db59 100644
--- a/src/lsmr.jl
+++ b/src/lsmr.jl
@@ -5,7 +5,7 @@
#
# equivalently, of the normal equations
#
-# AᵀAx = Aᵀb.
+# AᴴAx = Aᴴb.
#
# LSMR is formally equivalent to applying MINRES to the normal equations
# but should be more stable. It is also formally equivalent to CRLS though
@@ -46,21 +46,21 @@ Solve the regularized linear least-squares problem
using the LSMR method, where λ ≥ 0 is a regularization parameter.
LSMR is formally equivalent to applying MINRES to the normal equations
- (AᵀA + λ²I) x = Aᵀb
+ (AᴴA + λ²I) x = Aᴴb
(and therefore to CRLS) but is more stable.
-LSMR produces monotonic residuals ‖r‖₂ and optimality residuals ‖Aᵀr‖₂.
+LSMR produces monotonic residuals ‖r‖₂ and optimality residuals ‖Aᴴr‖₂.
It is formally equivalent to CRLS, though can be substantially more accurate.
LSMR can be also used to find a null vector of a singular matrix A
-by solving the problem `min ‖Aᵀx - b‖` with any nonzero vector `b`.
-At a minimizer, the residual vector `r = b - Aᵀx` will satisfy `Ar = 0`.
+by solving the problem `min ‖Aᴴx - b‖` with any nonzero vector `b`.
+At a minimizer, the residual vector `r = b - Aᴴx` will satisfy `Ar = 0`.
If `λ > 0`, we solve the symmetric and quasi-definite system
[ E A ] [ r ] [ b ]
- [ Aᵀ -λ²F ] [ x ] = [ 0 ],
+ [ Aᴴ -λ²F ] [ x ] = [ 0 ],
where E and F are symmetric and positive definite.
Preconditioners M = E⁻¹ ≻ 0 and N = F⁻¹ ≻ 0 may be provided in the form of linear operators.
@@ -70,19 +70,19 @@ The system above represents the optimality conditions of
minimize ‖b - Ax‖²_E⁻¹ + λ²‖x‖²_F.
-For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᵀKx`.
-LSMR is then equivalent to applying MINRES to `(AᵀE⁻¹A + λ²F)x = AᵀE⁻¹b` with `r = E⁻¹(b - Ax)`.
+For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᴴKx`.
+LSMR is then equivalent to applying MINRES to `(AᴴE⁻¹A + λ²F)x = AᴴE⁻¹b` with `r = E⁻¹(b - Ax)`.
If `λ = 0`, we solve the symmetric and indefinite system
[ E A ] [ r ] [ b ]
- [ Aᵀ 0 ] [ x ] = [ 0 ].
+ [ Aᴴ 0 ] [ x ] = [ 0 ].
The system above represents the optimality conditions of
minimize ‖b - Ax‖²_E⁻¹.
-In this case, `N` can still be specified and indicates the weighted norm in which `x` and `Aᵀr` should be measured.
+In this case, `N` can still be specified and indicates the weighted norm in which `x` and `Aᴴr` should be measured.
`r` can be recovered by computing `E⁻¹(b - Ax)`.
The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
@@ -134,12 +134,12 @@ function lsmr!(solver :: LsmrSolver{T,FC,S}, A, b :: AbstractVector{FC};
ktypeof(b) == S || error("ktypeof(b) ≠ $S")
# Compute the adjoint of A
- Aᵀ = A'
+ Aᴴ = A'
# Set up workspace.
allocate_if(!MisI, solver, :u, S, m)
allocate_if(!NisI, solver, :v, S, n)
- x, Nv, Aᵀu, h, hbar = solver.x, solver.Nv, solver.Aᵀu, solver.h, solver.hbar
+ x, Nv, Aᴴu, h, hbar = solver.x, solver.Nv, solver.Aᴴu, solver.h, solver.hbar
Mu, Av, err_vec, stats = solver.Mu, solver.Av, solver.err_vec, solver.stats
rNorms, ArNorms = stats.residuals, stats.Aresiduals
reset!(stats)
@@ -166,8 +166,8 @@ function lsmr!(solver :: LsmrSolver{T,FC,S}, A, b :: AbstractVector{FC};
@kscal!(m, one(FC)/β₁, u)
MisI || @kscal!(m, one(FC)/β₁, Mu)
- mul!(Aᵀu, Aᵀ, u)
- Nv .= Aᵀu
+ mul!(Aᴴu, Aᴴ, u)
+ Nv .= Aᴴu
NisI || mulorldiv!(v, N, Nv, ldiv)
α = sqrt(@kdotr(n, v, Nv))
@@ -210,10 +210,10 @@ function lsmr!(solver :: LsmrSolver{T,FC,S}, A, b :: AbstractVector{FC};
iter = 0
itmax == 0 && (itmax = m + n)
- (verbose > 0) && @printf("%5s %7s %7s %7s %7s %8s %8s %7s\n", "k", "‖r‖", "‖Aᵀr‖", "β", "α", "cos", "sin", "‖A‖²")
+ (verbose > 0) && @printf("%5s %7s %7s %7s %7s %8s %8s %7s\n", "k", "‖r‖", "‖Aᴴr‖", "β", "α", "cos", "sin", "‖A‖²")
kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e\n", iter, β₁, α, β₁, α, 0, 1, Anorm²)
- # Aᵀb = 0 so x = 0 is a minimum least-squares solution
+ # Aᴴb = 0 so x = 0 is a minimum least-squares solution
if α == 0
stats.niter = 0
stats.solved, stats.inconsistent = true, false
@@ -248,9 +248,9 @@ function lsmr!(solver :: LsmrSolver{T,FC,S}, A, b :: AbstractVector{FC};
@kscal!(m, one(FC)/β, u)
MisI || @kscal!(m, one(FC)/β, Mu)
- # 2. αₖ₊₁Nvₖ₊₁ = Aᵀuₖ₊₁ - βₖ₊₁Nvₖ
- mul!(Aᵀu, Aᵀ, u)
- @kaxpby!(n, one(FC), Aᵀu, -β, Nv)
+ # 2. αₖ₊₁Nvₖ₊₁ = Aᴴuₖ₊₁ - βₖ₊₁Nvₖ
+ mul!(Aᴴu, Aᴴ, u)
+ @kaxpby!(n, one(FC), Aᴴu, -β, Nv)
NisI || mulorldiv!(v, N, Nv, ldiv)
α = sqrt(@kdotr(n, v, Nv))
if α ≠ 0
diff --git a/src/lsqr.jl b/src/lsqr.jl
index dd3779dce..083b2f9f9 100644
--- a/src/lsqr.jl
+++ b/src/lsqr.jl
@@ -5,7 +5,7 @@
#
# equivalently, of the normal equations
#
-# AᵀAx = Aᵀb.
+# AᴴAx = Aᴴb.
#
# LSQR is formally equivalent to applying the conjugate gradient method
# to the normal equations but should be more stable. It is also formally
@@ -45,17 +45,17 @@ Solve the regularized linear least-squares problem
using the LSQR method, where λ ≥ 0 is a regularization parameter.
LSQR is formally equivalent to applying CG to the normal equations
- (AᵀA + λ²I) x = Aᵀb
+ (AᴴA + λ²I) x = Aᴴb
(and therefore to CGLS) but is more stable.
-LSQR produces monotonic residuals ‖r‖₂ but not optimality residuals ‖Aᵀr‖₂.
+LSQR produces monotonic residuals ‖r‖₂ but not optimality residuals ‖Aᴴr‖₂.
It is formally equivalent to CGLS, though can be slightly more accurate.
If `λ > 0`, LSQR solves the symmetric and quasi-definite system
[ E A ] [ r ] [ b ]
- [ Aᵀ -λ²F ] [ x ] = [ 0 ],
+ [ Aᴴ -λ²F ] [ x ] = [ 0 ],
where E and F are symmetric and positive definite.
Preconditioners M = E⁻¹ ≻ 0 and N = F⁻¹ ≻ 0 may be provided in the form of linear operators.
@@ -65,19 +65,19 @@ The system above represents the optimality conditions of
minimize ‖b - Ax‖²_E⁻¹ + λ²‖x‖²_F.
-For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᵀKx`.
-LSQR is then equivalent to applying CG to `(AᵀE⁻¹A + λ²F)x = AᵀE⁻¹b` with `r = E⁻¹(b - Ax)`.
+For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᴴKx`.
+LSQR is then equivalent to applying CG to `(AᴴE⁻¹A + λ²F)x = AᴴE⁻¹b` with `r = E⁻¹(b - Ax)`.
If `λ = 0`, we solve the symmetric and indefinite system
[ E A ] [ r ] [ b ]
- [ Aᵀ 0 ] [ x ] = [ 0 ].
+ [ Aᴴ 0 ] [ x ] = [ 0 ].
The system above represents the optimality conditions of
minimize ‖b - Ax‖²_E⁻¹.
-In this case, `N` can still be specified and indicates the weighted norm in which `x` and `Aᵀr` should be measured.
+In this case, `N` can still be specified and indicates the weighted norm in which `x` and `Aᴴr` should be measured.
`r` can be recovered by computing `E⁻¹(b - Ax)`.
The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
@@ -129,12 +129,12 @@ function lsqr!(solver :: LsqrSolver{T,FC,S}, A, b :: AbstractVector{FC};
ktypeof(b) == S || error("ktypeof(b) ≠ $S")
# Compute the adjoint of A
- Aᵀ = A'
+ Aᴴ = A'
# Set up workspace.
allocate_if(!MisI, solver, :u, S, m)
allocate_if(!NisI, solver, :v, S, n)
- x, Nv, Aᵀu, w = solver.x, solver.Nv, solver.Aᵀu, solver.w
+ x, Nv, Aᴴu, w = solver.x, solver.Nv, solver.Aᴴu, solver.w
Mu, Av, err_vec, stats = solver.Mu, solver.Av, solver.err_vec, solver.stats
rNorms, ArNorms = stats.residuals, stats.Aresiduals
reset!(stats)
@@ -162,8 +162,8 @@ function lsqr!(solver :: LsqrSolver{T,FC,S}, A, b :: AbstractVector{FC};
@kscal!(m, one(FC)/β₁, u)
MisI || @kscal!(m, one(FC)/β₁, Mu)
- mul!(Aᵀu, Aᵀ, u)
- Nv .= Aᵀu
+ mul!(Aᴴu, Aᴴ, u)
+ Nv .= Aᴴu
NisI || mulorldiv!(v, N, Nv, ldiv)
Anorm² = @kdotr(n, v, Nv)
Anorm = sqrt(Anorm²)
@@ -184,7 +184,7 @@ function lsqr!(solver :: LsqrSolver{T,FC,S}, A, b :: AbstractVector{FC};
iter = 0
itmax == 0 && (itmax = m + n)
- (verbose > 0) && @printf("%5s %7s %7s %7s %7s %7s %7s %7s %7s\n", "k", "α", "β", "‖r‖", "‖Aᵀr‖", "compat", "backwrd", "‖A‖", "κ(A)")
+ (verbose > 0) && @printf("%5s %7s %7s %7s %7s %7s %7s %7s %7s\n", "k", "α", "β", "‖r‖", "‖Aᴴr‖", "compat", "backwrd", "‖A‖", "κ(A)")
kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e\n", iter, β₁, α, β₁, α, 0, 1, Anorm, Acond)
rNorm = β₁
@@ -194,7 +194,7 @@ function lsqr!(solver :: LsqrSolver{T,FC,S}, A, b :: AbstractVector{FC};
history && push!(rNorms, r2Norm)
ArNorm = ArNorm0 = α * β
history && push!(ArNorms, ArNorm)
- # Aᵀb = 0 so x = 0 is a minimum least-squares solution
+ # Aᴴb = 0 so x = 0 is a minimum least-squares solution
if α == 0
stats.niter = 0
stats.solved, stats.inconsistent = true, false
@@ -237,9 +237,9 @@ function lsqr!(solver :: LsqrSolver{T,FC,S}, A, b :: AbstractVector{FC};
Anorm² = Anorm² + α * α + β * β # = ‖B_{k-1}‖²
λ > 0 && (Anorm² += λ²)
- # 2. αₖ₊₁Nvₖ₊₁ = Aᵀuₖ₊₁ - βₖ₊₁Nvₖ
- mul!(Aᵀu, Aᵀ, u)
- @kaxpby!(n, one(FC), Aᵀu, -β, Nv)
+ # 2. αₖ₊₁Nvₖ₊₁ = Aᴴuₖ₊₁ - βₖ₊₁Nvₖ
+ mul!(Aᴴu, Aᴴ, u)
+ @kaxpby!(n, one(FC), Aᴴu, -β, Nv)
NisI || mulorldiv!(v, N, Nv, ldiv)
α = sqrt(@kdotr(n, v, Nv))
if α ≠ 0
diff --git a/src/minres.jl b/src/minres.jl
index cbaefee9f..d3b8732ee 100644
--- a/src/minres.jl
+++ b/src/minres.jl
@@ -50,7 +50,7 @@ MINRES is formally equivalent to applying CR to Ax=b when A is positive
definite, but is typically more stable and also applies to the case where
A is indefinite.
-MINRES produces monotonic residuals ‖r‖₂ and optimality residuals ‖Aᵀr‖₂.
+MINRES produces monotonic residuals ‖r‖₂ and optimality residuals ‖Aᴴr‖₂.
A preconditioner M may be provided in the form of a linear operator and is
assumed to be symmetric and positive definite.
@@ -189,7 +189,7 @@ function minres!(solver :: MinresSolver{T,FC,S}, A, b :: AbstractVector{FC};
iter = 0
itmax == 0 && (itmax = 2*n)
- (verbose > 0) && @printf("%5s %7s %7s %7s %8s %8s %7s %7s %7s %7s\n", "k", "‖r‖", "‖Aᵀr‖", "β", "cos", "sin", "‖A‖", "κ(A)", "test1", "test2")
+ (verbose > 0) && @printf("%5s %7s %7s %7s %8s %8s %7s %7s %7s %7s\n", "k", "‖r‖", "‖Aᴴr‖", "β", "cos", "sin", "‖A‖", "κ(A)", "test1", "test2")
kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e\n", iter, rNorm, ArNorm, β, cs, sn, ANorm, Acond)
tol = atol + rtol * β₁
@@ -241,7 +241,7 @@ function minres!(solver :: MinresSolver{T,FC,S}, A, b :: AbstractVector{FC};
ϵ = sn * β
δbar = -cs * β
root = sqrt(γbar * γbar + δbar * δbar)
- ArNorm = ϕbar * root # = ‖Aᵀrₖ₋₁‖
+ ArNorm = ϕbar * root # = ‖Aᴴrₖ₋₁‖
history && push!(ArNorms, ArNorm)
# Compute the next plane rotation.
@@ -295,7 +295,7 @@ function minres!(solver :: MinresSolver{T,FC,S}, A, b :: AbstractVector{FC};
kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e %7.1e %7.1e\n", iter, rNorm, ArNorm, β, cs, sn, ANorm, Acond, test1, test2)
if iter == 1 && β / β₁ ≤ 10 * ϵM
- # Aᵀb = 0 so x = 0 is a minimum least-squares solution
+ # Aᴴb = 0 so x = 0 is a minimum least-squares solution
stats.niter = 0
stats.solved, stats.inconsistent = true, true
stats.status = "x is a minimum least-squares solution"
diff --git a/src/minres_qlp.jl b/src/minres_qlp.jl
index bbfbf856b..509a7ef4e 100644
--- a/src/minres_qlp.jl
+++ b/src/minres_qlp.jl
@@ -246,7 +246,7 @@ function minres_qlp!(solver :: MinresQlpSolver{T,FC,S}, A, b :: AbstractVector{F
# [sₖ -cₖ] [βₖ₊₁ ] [0 ]
(cₖ, sₖ, λₖ) = sym_givens(λbarₖ, βₖ₊₁)
- # Compute [ zₖ ] = (Qₖ)ᵀβ₁e₁
+ # Compute [ zₖ ] = (Qₖ)ᴴβ₁e₁
# [ζbarₖ₊₁]
#
# [cₖ sₖ] [ζbarₖ] = [ ζₖ ]
@@ -312,7 +312,7 @@ function minres_qlp!(solver :: MinresQlpSolver{T,FC,S}, A, b :: AbstractVector{F
τₖ = (ξₖ - ψbarₖ₋₁ * τₖ₋₁) / μbarₖ
end
- # Compute directions wₖ₋₂, ẘₖ₋₁ and w̄ₖ, last columns of Wₖ = Vₖ(Pₖ)ᵀ
+ # Compute directions wₖ₋₂, ẘₖ₋₁ and w̄ₖ, last columns of Wₖ = Vₖ(Pₖ)ᴴ
if iter == 1
# w̅₁ = v₁
@. wₖ = vₖ
diff --git a/src/qmr.jl b/src/qmr.jl
index eb4a4eb46..d4b684601 100644
--- a/src/qmr.jl
+++ b/src/qmr.jl
@@ -32,7 +32,7 @@ export qmr, qmr!
Solve the square linear system Ax = b using the QMR method.
QMR is based on the Lanczos biorthogonalization process and requires two initial vectors `b` and `c`.
-The relation `bᵀc ≠ 0` must be satisfied and by default `c = b`.
+The relation `bᴴc ≠ 0` must be satisfied and by default `c = b`.
When `A` is symmetric and `b = c`, QMR is equivalent to MINRES.
QMR can be warm-started from an initial guess `x0` with the method
@@ -96,7 +96,7 @@ function qmr!(solver :: QmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Abst
ktypeof(c) == S || error("ktypeof(c) ≠ $S")
# Compute the adjoint of A
- Aᵀ = A'
+ Aᴴ = A'
# Set up workspace.
uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p = solver.uₖ₋₁, solver.uₖ, solver.q, solver.vₖ₋₁, solver.vₖ, solver.p
@@ -133,18 +133,18 @@ function qmr!(solver :: QmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Abst
kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm)
# Initialize the Lanczos biorthogonalization process.
- cᵗb = @kdot(n, c, r₀) # ⟨c,r₀⟩
- if cᵗb == 0
+ cᴴb = @kdot(n, c, r₀) # ⟨c,r₀⟩
+ if cᴴb == 0
stats.niter = 0
stats.solved = false
stats.inconsistent = false
- stats.status = "Breakdown bᵀc = 0"
+ stats.status = "Breakdown bᴴc = 0"
solver.warm_start = false
return solver
end
- βₖ = √(abs(cᵗb)) # β₁γ₁ = cᵀ(b - Ax₀)
- γₖ = cᵗb / βₖ # β₁γ₁ = cᵀ(b - Ax₀)
+ βₖ = √(abs(cᴴb)) # β₁γ₁ = cᴴ(b - Ax₀)
+ γₖ = cᴴb / βₖ # β₁γ₁ = cᴴ(b - Ax₀)
vₖ₋₁ .= zero(FC) # v₀ = 0
uₖ₋₁ .= zero(FC) # u₀ = 0
vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁
@@ -153,7 +153,7 @@ function qmr!(solver :: QmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Abst
sₖ₋₂ = sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the QR factorization of Tₖ₊₁.ₖ
wₖ₋₂ .= zero(FC) # Column k-2 of Wₖ = Vₖ(Rₖ)⁻¹
wₖ₋₁ .= zero(FC) # Column k-1 of Wₖ = Vₖ(Rₖ)⁻¹
- ζbarₖ = βₖ # ζbarₖ is the last component of z̅ₖ = (Qₖ)ᵀβ₁e₁
+ ζbarₖ = βₖ # ζbarₖ is the last component of z̅ₖ = (Qₖ)ᴴβ₁e₁
τₖ = @kdotr(n, vₖ, vₖ) # τₖ is used for the residual norm estimate
# Stopping criterion.
@@ -169,10 +169,10 @@ function qmr!(solver :: QmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Abst
# Continue the Lanczos biorthogonalization process.
# AVₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ
- # AᵀUₖ = Uₖ(Tₖ)ᵀ + γ̄ₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᵀ
+ # AᴴUₖ = Uₖ(Tₖ)ᴴ + γ̄ₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᴴ
mul!(q, A , vₖ) # Forms vₖ₊₁ : q ← Avₖ
- mul!(p, Aᵀ, uₖ) # Forms uₖ₊₁ : p ← Aᵀuₖ
+ mul!(p, Aᴴ, uₖ) # Forms uₖ₊₁ : p ← Aᴴuₖ
@kaxpy!(n, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁
@kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - β̄ₖ * uₖ₋₁
@@ -182,9 +182,9 @@ function qmr!(solver :: QmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Abst
@kaxpy!(n, - αₖ , vₖ, q) # q ← q - αₖ * vₖ
@kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ
- pᵗq = @kdot(n, p, q) # pᵗq = ⟨p,q⟩
- βₖ₊₁ = √(abs(pᵗq)) # βₖ₊₁ = √(|pᵗq|)
- γₖ₊₁ = pᵗq / βₖ₊₁ # γₖ₊₁ = pᵗq / βₖ₊₁
+ pᴴq = @kdot(n, p, q) # pᴴq = ⟨p,q⟩
+ βₖ₊₁ = √(abs(pᴴq)) # βₖ₊₁ = √(|pᴴq|)
+ γₖ₊₁ = pᴴq / βₖ₊₁ # γₖ₊₁ = pᴴq / βₖ₊₁
# Update the QR factorization of Tₖ₊₁.ₖ = Qₖ [ Rₖ ].
# [ Oᵀ ]
@@ -271,7 +271,7 @@ function qmr!(solver :: QmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Abst
@. vₖ₋₁ = vₖ # vₖ₋₁ ← vₖ
@. uₖ₋₁ = uₖ # uₖ₋₁ ← uₖ
- if pᵗq ≠ zero(FC)
+ if pᴴq ≠ zero(FC)
@. vₖ = q / βₖ₊₁ # βₖ₊₁vₖ₊₁ = q
@. uₖ = p / conj(γₖ₊₁) # γ̄ₖ₊₁uₖ₊₁ = p
end
@@ -303,7 +303,7 @@ function qmr!(solver :: QmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Abst
resid_decrease_lim = rNorm ≤ ε
solved = resid_decrease_lim || resid_decrease_mach
tired = iter ≥ itmax
- breakdown = !solved && (pᵗq == 0)
+ breakdown = !solved && (pᴴq == 0)
kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm)
end
(verbose > 0) && @printf("\n")
diff --git a/src/tricg.jl b/src/tricg.jl
index 5acff2d52..7c140a821 100644
--- a/src/tricg.jl
+++ b/src/tricg.jl
@@ -25,7 +25,7 @@ export tricg, tricg!
TriCG solves the symmetric linear system
[ τE A ] [ x ] = [ b ]
- [ Aᵀ νF ] [ y ] [ c ],
+ [ Aᴴ νF ] [ y ] [ c ],
where τ and ν are real numbers, E = M⁻¹ ≻ 0 and F = N⁻¹ ≻ 0.
`b` and `c` must both be nonzero.
@@ -133,7 +133,7 @@ function tricg!(solver :: TricgSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
warm_start && (ν ≠ 0) && !NisI && error("Warm-start with preconditioners is not supported.")
# Compute the adjoint of A
- Aᵀ = A'
+ Aᴴ = A'
# Set up workspace.
allocate_if(!MisI, solver, :vₖ, S, m)
@@ -164,12 +164,12 @@ function tricg!(solver :: TricgSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
N⁻¹uₖ₋₁ .= zero(FC) # u₀ = 0
# [ τI A ] [ xₖ ] = [ b - τΔx - AΔy ] = [ b₀ ]
- # [ Aᵀ νI ] [ yₖ ] [ c - AᵀΔx - νΔy ] [ c₀ ]
+ # [ Aᴴ νI ] [ yₖ ] [ c - AᴴΔx - νΔy ] [ c₀ ]
if warm_start
mul!(b₀, A, Δy)
(τ ≠ 0) && @kaxpy!(m, τ, Δx, b₀)
@kaxpby!(m, one(FC), b, -one(FC), b₀)
- mul!(c₀, Aᵀ, Δx)
+ mul!(c₀, Aᴴ, Δx)
(ν ≠ 0) && @kaxpy!(n, ν, Δy, c₀)
@kaxpby!(n, one(FC), c, -one(FC), c₀)
end
@@ -196,7 +196,7 @@ function tricg!(solver :: TricgSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
error("c must be nonzero")
end
- # Initialize directions Gₖ such that Lₖ(Gₖ)ᵀ = (Wₖ)ᵀ
+ # Initialize directions Gₖ such that L̄ₖ(Gₖ)ᵀ = (Wₖ)ᵀ
gx₂ₖ₋₁ .= zero(FC)
gy₂ₖ₋₁ .= zero(FC)
gx₂ₖ .= zero(FC)
@@ -231,10 +231,10 @@ function tricg!(solver :: TricgSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
# Continue the orthogonal tridiagonalization process.
# AUₖ = EVₖTₖ + βₖ₊₁Evₖ₊₁(eₖ)ᵀ = EVₖ₊₁Tₖ₊₁.ₖ
- # AᵀVₖ = FUₖ(Tₖ)ᵀ + γₖ₊₁Fuₖ₊₁(eₖ)ᵀ = FUₖ₊₁(Tₖ.ₖ₊₁)ᵀ
+ # AᴴVₖ = FUₖ(Tₖ)ᴴ + γₖ₊₁Fuₖ₊₁(eₖ)ᵀ = FUₖ₊₁(Tₖ.ₖ₊₁)ᴴ
mul!(q, A , uₖ) # Forms Evₖ₊₁ : q ← Auₖ
- mul!(p, Aᵀ, vₖ) # Forms Fuₖ₊₁ : p ← Aᵀvₖ
+ mul!(p, Aᴴ, vₖ) # Forms Fuₖ₊₁ : p ← Aᴴvₖ
if iter ≥ 2
@kaxpy!(m, -γₖ, M⁻¹vₖ₋₁, q) # q ← q - γₖ * M⁻¹vₖ₋₁
@@ -254,14 +254,14 @@ function tricg!(solver :: TricgSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
# [0 u₁ ••• 0 uₖ]
#
# rₖ = [ b ] - [ τE A ] [ xₖ ] = [ b ] - [ τE A ] Wₖzₖ
- # [ c ] [ Aᵀ νF ] [ yₖ ] [ c ] [ Aᵀ νF ]
+ # [ c ] [ Aᴴ νF ] [ yₖ ] [ c ] [ Aᴴ νF ]
#
# block-Lanczos formulation : [ τE A ] Wₖ = [ E 0 ] Wₖ₊₁Sₖ₊₁.ₖ
- # [ Aᵀ νF ] [ 0 F ]
+ # [ Aᴴ νF ] [ 0 F ]
#
- # TriCG subproblem : (Wₖ)ᵀ * rₖ = 0 ↔ Sₖ.ₖzₖ = β₁e₁ + γ₁e₂
+ # TriCG subproblem : (Wₖ)ᴴ * rₖ = 0 ↔ Sₖ.ₖzₖ = β₁e₁ + γ₁e₂
#
- # Update the LDLᵀ factorization of Sₖ.ₖ.
+ # Update the LDLᴴ factorization of Sₖ.ₖ.
#
# [ τ α₁ γ₂ 0 • • • • 0 ]
# [ ᾱ₁ ν β₂ • • ]
@@ -306,7 +306,7 @@ function tricg!(solver :: TricgSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
π₂ₖ = -(δₖ * d₂ₖ₋₁ * π₂ₖ₋₁ + λₖ * d₂ₖ₋₂ * π₂ₖ₋₂ + ηₖ * d₂ₖ₋₃ * π₂ₖ₋₃) / d₂ₖ
end
- # Solve Gₖ = Wₖ(Lₖ)⁻ᵀ ⟷ L̄ₖ(Gₖ)ᵀ = (Wₖ)ᵀ.
+ # Solve Gₖ = Wₖ(Lₖ)⁻ᴴ ⟷ L̄ₖ(Gₖ)ᵀ = (Wₖ)ᵀ.
if iter == 1
# [ 1 0 ] [ gx₁ gy₁ ] = [ v₁ 0 ]
# [ δ̄₁ 1 ] [ gx₂ gy₂ ] [ 0 u₁ ]
@@ -342,7 +342,7 @@ function tricg!(solver :: TricgSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
# Compute vₖ₊₁ and uₖ₊₁
MisI || mulorldiv!(vₖ₊₁, M, q, ldiv) # βₖ₊₁vₖ₊₁ = MAuₖ - γₖvₖ₋₁ - αₖvₖ
- NisI || mulorldiv!(uₖ₊₁, N, p, ldiv) # γₖ₊₁uₖ₊₁ = NAᵀvₖ - βₖuₖ₋₁ - ᾱₖuₖ
+ NisI || mulorldiv!(uₖ₊₁, N, p, ldiv) # γₖ₊₁uₖ₊₁ = NAᴴvₖ - βₖuₖ₋₁ - ᾱₖuₖ
βₖ₊₁ = sqrt(@kdotr(m, vₖ₊₁, q)) # βₖ₊₁ = ‖vₖ₊₁‖_E
γₖ₊₁ = sqrt(@kdotr(n, uₖ₊₁, p)) # γₖ₊₁ = ‖uₖ₊₁‖_F
diff --git a/src/trilqr.jl b/src/trilqr.jl
index edcb4c9b9..6b0948984 100644
--- a/src/trilqr.jl
+++ b/src/trilqr.jl
@@ -1,5 +1,5 @@
# An implementation of TRILQR for the solution of square or
-# rectangular consistent linear adjoint systems Ax = b and Aᵀy = c.
+# rectangular consistent linear adjoint systems Ax = b and Aᴴy = c.
#
# This method is described in
#
@@ -24,10 +24,10 @@ export trilqr, trilqr!
Combine USYMLQ and USYMQR to solve adjoint systems.
[0 A] [y] = [b]
- [Aᵀ 0] [x] [c]
+ [Aᴴ 0] [x] [c]
USYMLQ is used for solving primal system `Ax = b`.
-USYMQR is used for solving dual system `Aᵀy = c`.
+USYMQR is used for solving dual system `Aᴴy = c`.
An option gives the possibility of transferring from the USYMLQ point to the
USYMCG point, when it exists. The transfer is based on the residual norm.
@@ -93,7 +93,7 @@ function trilqr!(solver :: TrilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :
ktypeof(c) == S || error("ktypeof(c) ≠ $S")
# Compute the adjoint of A
- Aᵀ = A'
+ Aᴴ = A'
# Set up workspace.
uₖ₋₁, uₖ, p, d̅, x, stats = solver.uₖ₋₁, solver.uₖ, solver.p, solver.d̅, solver.x, solver.stats
@@ -107,7 +107,7 @@ function trilqr!(solver :: TrilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :
if warm_start
mul!(r₀, A, Δx)
@kaxpby!(n, one(FC), b, -one(FC), r₀)
- mul!(s₀, Aᵀ, Δy)
+ mul!(s₀, Aᴴ, Δy)
@kaxpby!(n, one(FC), c, -one(FC), s₀)
end
@@ -115,7 +115,7 @@ function trilqr!(solver :: TrilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :
x .= zero(FC) # x₀
bNorm = @knrm2(m, r₀) # rNorm = ‖r₀‖
- # Initial solution y₀ and residual s₀ = c - Aᵀy₀.
+ # Initial solution y₀ and residual s₀ = c - Aᴴy₀.
t .= zero(FC) # t₀
cNorm = @knrm2(n, s₀) # sNorm = ‖s₀‖
@@ -136,17 +136,17 @@ function trilqr!(solver :: TrilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :
vₖ₋₁ .= zero(FC) # v₀ = 0
uₖ₋₁ .= zero(FC) # u₀ = 0
vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁
- uₖ .= s₀ ./ γₖ # u₁ = (c - Aᵀy₀) / γ₁
+ uₖ .= s₀ ./ γₖ # u₁ = (c - Aᴴy₀) / γ₁
cₖ₋₁ = cₖ = -one(T) # Givens cosines used for the LQ factorization of Tₖ
sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the LQ factorization of Tₖ
- d̅ .= zero(FC) # Last column of D̅ₖ = Uₖ(Qₖ)ᵀ
+ d̅ .= zero(FC) # Last column of D̅ₖ = Uₖ(Qₖ)ᴴ
ζₖ₋₁ = ζbarₖ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁
ζₖ₋₂ = ηₖ = zero(FC) # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ
δbarₖ₋₁ = δbarₖ = zero(FC) # Coefficients of Lₖ₋₁ and L̅ₖ modified over the course of two iterations
ψbarₖ₋₁ = ψₖ₋₁ = zero(FC) # ψₖ₋₁ and ψbarₖ are the last components of h̅ₖ = Qₖγ₁e₁
ϵₖ₋₃ = λₖ₋₂ = zero(FC) # Components of Lₖ₋₁
- wₖ₋₃ .= zero(FC) # Column k-3 of Wₖ = Vₖ(Lₖ)⁻ᵀ
- wₖ₋₂ .= zero(FC) # Column k-2 of Wₖ = Vₖ(Lₖ)⁻ᵀ
+ wₖ₋₃ .= zero(FC) # Column k-3 of Wₖ = Vₖ(Lₖ)⁻ᴴ
+ wₖ₋₂ .= zero(FC) # Column k-2 of Wₖ = Vₖ(Lₖ)⁻ᴴ
# Stopping criterion.
inconsistent = false
@@ -166,10 +166,10 @@ function trilqr!(solver :: TrilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :
# Continue the SSY tridiagonalization process.
# AUₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ
- # AᵀVₖ = Uₖ(Tₖ)ᵀ + γₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᵀ
+ # AᴴVₖ = Uₖ(Tₖ)ᴴ + γₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᴴ
mul!(q, A , uₖ) # Forms vₖ₊₁ : q ← Auₖ
- mul!(p, Aᵀ, vₖ) # Forms uₖ₊₁ : p ← Aᵀvₖ
+ mul!(p, Aᴴ, vₖ) # Forms uₖ₊₁ : p ← Aᴴvₖ
@kaxpy!(m, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁
@kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - βₖ * uₖ₋₁
@@ -236,7 +236,7 @@ function trilqr!(solver :: TrilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :
ηₖ = -ϵₖ₋₂ * ζₖ₋₂ - λₖ₋₁ * ζₖ₋₁
end
- # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Uₖ(Qₖ)ᵀ.
+ # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Uₖ(Qₖ)ᴴ.
# [d̅ₖ₋₁ uₖ] [cₖ s̄ₖ] = [dₖ₋₁ d̅ₖ] ⟷ dₖ₋₁ = cₖ * d̅ₖ₋₁ + sₖ * uₖ
# [sₖ -cₖ] ⟷ d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * uₖ
if iter ≥ 2
@@ -295,7 +295,7 @@ function trilqr!(solver :: TrilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :
ψbarₖ = sₖ * ψbarₖ₋₁
end
- # Compute the direction wₖ₋₁, the last column of Wₖ₋₁ = (Vₖ₋₁)(Lₖ₋₁)⁻ᵀ ⟷ (L̄ₖ₋₁)(Wₖ₋₁)ᵀ = (Vₖ₋₁)ᵀ.
+ # Compute the direction wₖ₋₁, the last column of Wₖ₋₁ = (Vₖ₋₁)(Lₖ₋₁)⁻ᴴ ⟷ (L̄ₖ₋₁)(Wₖ₋₁)ᵀ = (Vₖ₋₁)ᵀ.
# w₁ = v₁ / δ̄₁
if iter == 2
wₖ₋₁ = wₖ₋₂
diff --git a/src/trimr.jl b/src/trimr.jl
index bc53633c2..7dd826edf 100644
--- a/src/trimr.jl
+++ b/src/trimr.jl
@@ -25,7 +25,7 @@ export trimr, trimr!
TriMR solves the symmetric linear system
[ τE A ] [ x ] = [ b ]
- [ Aᵀ νF ] [ y ] [ c ],
+ [ Aᴴ νF ] [ y ] [ c ],
where τ and ν are real numbers, E = M⁻¹ ≻ 0, F = N⁻¹ ≻ 0.
`b` and `c` must both be nonzero.
@@ -137,7 +137,7 @@ function trimr!(solver :: TrimrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
warm_start && (ν ≠ 0) && !NisI && error("Warm-start with preconditioners is not supported.")
# Compute the adjoint of A
- Aᵀ = A'
+ Aᴴ = A'
# Set up workspace.
allocate_if(!MisI, solver, :vₖ, S, m)
@@ -169,12 +169,12 @@ function trimr!(solver :: TrimrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
N⁻¹uₖ₋₁ .= zero(FC) # u₀ = 0
# [ τI A ] [ xₖ ] = [ b - τΔx - AΔy ] = [ b₀ ]
- # [ Aᵀ νI ] [ yₖ ] [ c - AᵀΔx - νΔy ] [ c₀ ]
+ # [ Aᴴ νI ] [ yₖ ] [ c - AᴴΔx - νΔy ] [ c₀ ]
if warm_start
mul!(b₀, A, Δy)
(τ ≠ 0) && @kaxpy!(m, τ, Δx, b₀)
@kaxpby!(m, one(FC), b, -one(FC), b₀)
- mul!(c₀, Aᵀ, Δx)
+ mul!(c₀, Aᴴ, Δx)
(ν ≠ 0) && @kaxpy!(n, ν, Δy, c₀)
@kaxpby!(n, one(FC), c, -one(FC), c₀)
end
@@ -244,10 +244,10 @@ function trimr!(solver :: TrimrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
# Continue the orthogonal tridiagonalization process.
# AUₖ = EVₖTₖ + βₖ₊₁Evₖ₊₁(eₖ)ᵀ = EVₖ₊₁Tₖ₊₁.ₖ
- # AᵀVₖ = FUₖ(Tₖ)ᵀ + γₖ₊₁Fuₖ₊₁(eₖ)ᵀ = FUₖ₊₁(Tₖ.ₖ₊₁)ᵀ
+ # AᴴVₖ = FUₖ(Tₖ)ᴴ + γₖ₊₁Fuₖ₊₁(eₖ)ᵀ = FUₖ₊₁(Tₖ.ₖ₊₁)ᴴ
mul!(q, A , uₖ) # Forms Evₖ₊₁ : q ← Auₖ
- mul!(p, Aᵀ, vₖ) # Forms Fuₖ₊₁ : p ← Aᵀvₖ
+ mul!(p, Aᴴ, vₖ) # Forms Fuₖ₊₁ : p ← Aᴴvₖ
if iter ≥ 2
@kaxpy!(m, -γₖ, M⁻¹vₖ₋₁, q) # q ← q - γₖ * M⁻¹vₖ₋₁
@@ -261,7 +261,7 @@ function trimr!(solver :: TrimrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
# Compute vₖ₊₁ and uₖ₊₁
MisI || mulorldiv!(vₖ₊₁, M, q, ldiv) # βₖ₊₁vₖ₊₁ = MAuₖ - γₖvₖ₋₁ - αₖvₖ
- NisI || mulorldiv!(uₖ₊₁, N, p, ldiv) # γₖ₊₁uₖ₊₁ = NAᵀvₖ - βₖuₖ₋₁ - ᾱₖuₖ
+ NisI || mulorldiv!(uₖ₊₁, N, p, ldiv) # γₖ₊₁uₖ₊₁ = NAᴴvₖ - βₖuₖ₋₁ - ᾱₖuₖ
βₖ₊₁ = sqrt(@kdotr(m, vₖ₊₁, q)) # βₖ₊₁ = ‖vₖ₊₁‖_E
γₖ₊₁ = sqrt(@kdotr(n, uₖ₊₁, p)) # γₖ₊₁ = ‖uₖ₊₁‖_F
@@ -282,10 +282,10 @@ function trimr!(solver :: TrimrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
# [0 u₁ ••• 0 uₖ]
#
# rₖ = [ b ] - [ τE A ] [ xₖ ] = [ b ] - [ τE A ] Wₖzₖ
- # [ c ] [ Aᵀ νF ] [ yₖ ] [ c ] [ Aᵀ νF ]
+ # [ c ] [ Aᴴ νF ] [ yₖ ] [ c ] [ Aᴴ νF ]
#
# block-Lanczos formulation : [ τE A ] Wₖ = [ E 0 ] Wₖ₊₁Sₖ₊₁.ₖ
- # [ Aᵀ νF ] [ 0 F ]
+ # [ Aᴴ νF ] [ 0 F ]
#
# TriMR subproblem : min ‖ rₖ ‖ ↔ min ‖ Sₖ₊₁.ₖzₖ - β₁e₁ - γ₁e₂ ‖
#
@@ -419,7 +419,7 @@ function trimr!(solver :: TrimrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
@kswap(gy₂ₖ₋₂, gy₂ₖ)
end
- # Update p̅ₖ = (Qₖ)ᵀ * (β₁e₁ + γ₁e₂)
+ # Update p̅ₖ = (Qₖ)ᴴ * (β₁e₁ + γ₁e₂)
πbis₂ₖ = c₁ₖ * πbar₂ₖ
πbis₂ₖ₊₂ = conj(s₁ₖ) * πbar₂ₖ
#
diff --git a/src/usymlq.jl b/src/usymlq.jl
index 71670c80f..29cd704c7 100644
--- a/src/usymlq.jl
+++ b/src/usymlq.jl
@@ -31,7 +31,7 @@ export usymlq, usymlq!
Solve the linear system Ax = b using the USYMLQ method.
USYMLQ is based on the orthogonal tridiagonalization process and requires two initial nonzero vectors `b` and `c`.
-The vector `c` is only used to initialize the process and a default value can be `b` or `Aᵀb` depending on the shape of `A`.
+The vector `c` is only used to initialize the process and a default value can be `b` or `Aᴴb` depending on the shape of `A`.
The error norm ‖x - x*‖ monotonously decreases in USYMLQ.
It's considered as a generalization of SYMMLQ.
@@ -103,7 +103,7 @@ function usymlq!(solver :: UsymlqSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :
ktypeof(c) == S || error("ktypeof(c) ≠ $S")
# Compute the adjoint of A
- Aᵀ = A'
+ Aᴴ = A'
# Set up workspace.
uₖ₋₁, uₖ, p, Δx, x = solver.uₖ₋₁, solver.uₖ, solver.p, solver.Δx, solver.x
@@ -146,7 +146,7 @@ function usymlq!(solver :: UsymlqSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :
uₖ .= c ./ γₖ # u₁ = c / γ₁
cₖ₋₁ = cₖ = -one(T) # Givens cosines used for the LQ factorization of Tₖ
sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the LQ factorization of Tₖ
- d̅ .= zero(FC) # Last column of D̅ₖ = Uₖ(Qₖ)ᵀ
+ d̅ .= zero(FC) # Last column of D̅ₖ = Uₖ(Qₖ)ᴴ
ζₖ₋₁ = ζbarₖ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁
ζₖ₋₂ = ηₖ = zero(FC) # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ
δbarₖ₋₁ = δbarₖ = zero(FC) # Coefficients of Lₖ₋₁ and Lₖ modified over the course of two iterations
@@ -164,10 +164,10 @@ function usymlq!(solver :: UsymlqSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :
# Continue the SSY tridiagonalization process.
# AUₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ
- # AᵀVₖ = Uₖ(Tₖ)ᵀ + γₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᵀ
+ # AᴴVₖ = Uₖ(Tₖ)ᴴ + γₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᴴ
mul!(q, A , uₖ) # Forms vₖ₊₁ : q ← Auₖ
- mul!(p, Aᵀ, vₖ) # Forms uₖ₊₁ : p ← Aᵀvₖ
+ mul!(p, Aᴴ, vₖ) # Forms uₖ₊₁ : p ← Aᴴvₖ
@kaxpy!(m, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁
@kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - βₖ * uₖ₋₁
@@ -233,7 +233,7 @@ function usymlq!(solver :: UsymlqSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :
ηₖ = -ϵₖ₋₂ * ζₖ₋₂ - λₖ₋₁ * ζₖ₋₁
end
- # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Uₖ(Qₖ)ᵀ.
+ # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Uₖ(Qₖ)ᴴ.
# [d̅ₖ₋₁ uₖ] [cₖ s̄ₖ] = [dₖ₋₁ d̅ₖ] ⟷ dₖ₋₁ = cₖ * d̅ₖ₋₁ + sₖ * uₖ
# [sₖ -cₖ] ⟷ d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * uₖ
if iter ≥ 2
diff --git a/src/usymqr.jl b/src/usymqr.jl
index 863390c3f..45c95c88d 100644
--- a/src/usymqr.jl
+++ b/src/usymqr.jl
@@ -31,7 +31,7 @@ export usymqr, usymqr!
Solve the linear system Ax = b using the USYMQR method.
USYMQR is based on the orthogonal tridiagonalization process and requires two initial nonzero vectors `b` and `c`.
-The vector `c` is only used to initialize the process and a default value can be `b` or `Aᵀb` depending on the shape of `A`.
+The vector `c` is only used to initialize the process and a default value can be `b` or `Aᴴb` depending on the shape of `A`.
The residual norm ‖b - Ax‖ monotonously decreases in USYMQR.
It's considered as a generalization of MINRES.
@@ -100,13 +100,13 @@ function usymqr!(solver :: UsymqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :
ktypeof(c) == S || error("ktypeof(c) ≠ $S")
# Compute the adjoint of A
- Aᵀ = A'
+ Aᴴ = A'
# Set up workspace.
vₖ₋₁, vₖ, q, Δx, x, p = solver.vₖ₋₁, solver.vₖ, solver.q, solver.Δx, solver.x, solver.p
wₖ₋₂, wₖ₋₁, uₖ₋₁, uₖ, stats = solver.wₖ₋₂, solver.wₖ₋₁, solver.uₖ₋₁, solver.uₖ, solver.stats
warm_start = solver.warm_start
- rNorms, AᵀrNorms = stats.residuals, stats.Aresiduals
+ rNorms, AᴴrNorms = stats.residuals, stats.Aresiduals
reset!(stats)
r₀ = warm_start ? q : b
@@ -133,7 +133,7 @@ function usymqr!(solver :: UsymqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :
ε = atol + rtol * rNorm
κ = zero(T)
- (verbose > 0) && @printf("%5s %7s %7s\n", "k", "‖rₖ‖", "‖Aᵀrₖ₋₁‖")
+ (verbose > 0) && @printf("%5s %7s %7s\n", "k", "‖rₖ‖", "‖Aᴴrₖ₋₁‖")
kdisplay(iter, verbose) && @printf("%5d %7.1e %7s\n", iter, rNorm, "✗ ✗ ✗ ✗")
βₖ = @knrm2(m, r₀) # β₁ = ‖v₁‖ = ‖r₀‖
@@ -146,7 +146,7 @@ function usymqr!(solver :: UsymqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :
sₖ₋₂ = sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the QR factorization of Tₖ₊₁.ₖ
wₖ₋₂ .= zero(FC) # Column k-2 of Wₖ = Uₖ(Rₖ)⁻¹
wₖ₋₁ .= zero(FC) # Column k-1 of Wₖ = Uₖ(Rₖ)⁻¹
- ζbarₖ = βₖ # ζbarₖ is the last component of z̅ₖ = (Qₖ)ᵀβ₁e₁
+ ζbarₖ = βₖ # ζbarₖ is the last component of z̅ₖ = (Qₖ)ᴴβ₁e₁
# Stopping criterion.
solved = rNorm ≤ ε
@@ -161,10 +161,10 @@ function usymqr!(solver :: UsymqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :
# Continue the SSY tridiagonalization process.
# AUₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ
- # AᵀVₖ = Uₖ(Tₖ)ᵀ + γₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᵀ
+ # AᴴVₖ = Uₖ(Tₖ)ᴴ + γₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᴴ
mul!(q, A , uₖ) # Forms vₖ₊₁ : q ← Auₖ
- mul!(p, Aᵀ, vₖ) # Forms uₖ₊₁ : p ← Aᵀvₖ
+ mul!(p, Aᴴ, vₖ) # Forms uₖ₊₁ : p ← Aᴴvₖ
@kaxpy!(m, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁
@kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - βₖ * uₖ₋₁
@@ -254,9 +254,9 @@ function usymqr!(solver :: UsymqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :
rNorm = abs(ζbarₖ₊₁)
history && push!(rNorms, rNorm)
- # Compute ‖Aᵀrₖ₋₁‖ = |ζbarₖ| * √(|δbarₖ|² + |λbarₖ|²).
- AᵀrNorm = abs(ζbarₖ) * √(abs2(δbarₖ) + abs2(cₖ₋₁ * γₖ₊₁))
- history && push!(AᵀrNorms, AᵀrNorm)
+ # Compute ‖Aᴴrₖ₋₁‖ = |ζbarₖ| * √(|δbarₖ|² + |λbarₖ|²).
+ AᴴrNorm = abs(ζbarₖ) * √(abs2(δbarₖ) + abs2(cₖ₋₁ * γₖ₊₁))
+ history && push!(AᴴrNorms, AᴴrNorm)
# Compute uₖ₊₁ and uₖ₊₁.
@. vₖ₋₁ = vₖ # vₖ₋₁ ← vₖ
@@ -286,12 +286,12 @@ function usymqr!(solver :: UsymqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :
βₖ = βₖ₊₁
# Update stopping criterion.
- iter == 1 && (κ = atol + rtol * AᵀrNorm)
+ iter == 1 && (κ = atol + rtol * AᴴrNorm)
user_requested_exit = callback(solver) :: Bool
solved = rNorm ≤ ε
- inconsistent = !solved && AᵀrNorm ≤ κ
+ inconsistent = !solved && AᴴrNorm ≤ κ
tired = iter ≥ itmax
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e\n", iter, rNorm, AᵀrNorm)
+ kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e\n", iter, rNorm, AᴴrNorm)
end
(verbose > 0) && @printf("\n")
tired && (status = "maximum number of iterations exceeded")
From fc8677ba1fa0a5254f3072f3c19482cecdac17f5 Mon Sep 17 00:00:00 2001
From: Alexis Montoison
Date: Fri, 9 Sep 2022 13:43:59 -0400
Subject: [PATCH 018/132] Test Krylov macros
---
src/krylov_utils.jl | 151 ++++++++++++-----------
test/test_aux.jl | 288 ++++++++++++++++++++++++++------------------
2 files changed, 246 insertions(+), 193 deletions(-)
diff --git a/src/krylov_utils.jl b/src/krylov_utils.jl
index c61bf2e5e..46c9d6cd6 100644
--- a/src/krylov_utils.jl
+++ b/src/krylov_utils.jl
@@ -175,6 +175,49 @@ function to_boundary(x :: Vector{T}, d :: Vector{T},
return roots # `σ1` and `σ2`
end
+"""
+ s = vec2str(x; ndisp)
+
+Display an array in the form
+
+ [ -3.0e-01 -5.1e-01 1.9e-01 ... -2.3e-01 -4.4e-01 2.4e-01 ]
+
+with (ndisp - 1)/2 elements on each side.
+"""
+function vec2str(x :: AbstractVector{T}; ndisp :: Int=7) where T <: Union{AbstractFloat, Missing}
+ n = length(x)
+ if n ≤ ndisp
+ ndisp = n
+ nside = n
+ else
+ nside = max(1, div(ndisp - 1, 2))
+ end
+ s = "["
+ i = 1
+ while i ≤ nside
+ if x[i] !== missing
+ s *= @sprintf("%8.1e ", x[i])
+ else
+ s *= " ✗✗✗✗ "
+ end
+ i += 1
+ end
+ if i ≤ div(n, 2)
+ s *= "... "
+ end
+ i = max(i, n - nside + 1)
+ while i ≤ n
+ if x[i] !== missing
+ s *= @sprintf("%8.1e ", x[i])
+ else
+ s *= " ✗✗✗✗ "
+ end
+ i += 1
+ end
+ s *= "]"
+ return s
+end
+
"""
S = ktypeof(v)
@@ -209,76 +252,75 @@ end
Create an AbstractVector of storage type `S` of length `n` only composed of zero.
"""
-@inline kzeros(S, n) = fill!(S(undef, n), zero(eltype(S)))
+kzeros(S, n) = fill!(S(undef, n), zero(eltype(S)))
"""
v = kones(S, n)
Create an AbstractVector of storage type `S` of length `n` only composed of one.
"""
-@inline kones(S, n) = fill!(S(undef, n), one(eltype(S)))
+kones(S, n) = fill!(S(undef, n), one(eltype(S)))
-@inline allocate_if(bool, solver, v, S, n) = bool && isempty(solver.:($v)) && (solver.:($v) = S(undef, n))
+allocate_if(bool, solver, v, S, n) = bool && isempty(solver.:($v)) && (solver.:($v) = S(undef, n))
-@inline kdisplay(iter, verbose) = (verbose > 0) && (mod(iter, verbose) == 0)
+kdisplay(iter, verbose) = (verbose > 0) && (mod(iter, verbose) == 0)
-@inline mulorldiv!(y, P, x, ldiv::Bool) = ldiv ? ldiv!(y, P, x) : mul!(y, P, x)
+mulorldiv!(y, P, x, ldiv::Bool) = ldiv ? ldiv!(y, P, x) : mul!(y, P, x)
-@inline krylov_dot(n :: Integer, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasReal = BLAS.dot(n, x, dx, y, dy)
-@inline krylov_dot(n :: Integer, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasComplex = BLAS.dotc(n, x, dx, y, dy)
-@inline krylov_dot(n :: Integer, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: Number = dot(x, y)
+kdot(n :: Integer, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasReal = BLAS.dot(n, x, dx, y, dy)
+kdot(n :: Integer, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasComplex = BLAS.dotc(n, x, dx, y, dy)
+kdot(n :: Integer, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: FloatOrComplex = dot(x, y)
-@inline krylov_dotr(n :: Integer, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: AbstractFloat = krylov_dot(n, x, dx, y, dy)
-@inline krylov_dotr(n :: Integer, x :: AbstractVector{Complex{T}}, dx :: Integer, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = real(krylov_dot(n, x, dx, y, dy))
+kdotr(n :: Integer, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: AbstractFloat = kdot(n, x, dx, y, dy)
+kdotr(n :: Integer, x :: AbstractVector{Complex{T}}, dx :: Integer, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = real(kdot(n, x, dx, y, dy))
-@inline krylov_norm2(n :: Integer, x :: Vector{T}, dx :: Integer) where T <: BLAS.BlasFloat = BLAS.nrm2(n, x, dx)
-@inline krylov_norm2(n :: Integer, x :: AbstractVector{T}, dx :: Integer) where T <: Number = norm(x)
+knrm2(n :: Integer, x :: Vector{T}, dx :: Integer) where T <: BLAS.BlasFloat = BLAS.nrm2(n, x, dx)
+knrm2(n :: Integer, x :: AbstractVector{T}, dx :: Integer) where T <: FloatOrComplex = norm(x)
-@inline krylov_scal!(n :: Integer, s :: T, x :: Vector{T}, dx :: Integer) where T <: BLAS.BlasFloat = BLAS.scal!(n, s, x, dx)
-@inline krylov_scal!(n :: Integer, s :: T, x :: AbstractVector{T}, dx :: Integer) where T <: Number = (x .*= s)
-@inline krylov_scal!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer) where T <: AbstractFloat = krylov_scal!(n, Complex{T}(s), x, dx)
+kscal!(n :: Integer, s :: T, x :: Vector{T}, dx :: Integer) where T <: BLAS.BlasFloat = BLAS.scal!(n, s, x, dx)
+kscal!(n :: Integer, s :: T, x :: AbstractVector{T}, dx :: Integer) where T <: FloatOrComplex = (x .*= s)
+kscal!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer) where T <: AbstractFloat = kscal!(n, Complex{T}(s), x, dx)
-@inline krylov_axpy!(n :: Integer, s :: T, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasFloat = BLAS.axpy!(n, s, x, dx, y, dy)
-@inline krylov_axpy!(n :: Integer, s :: T, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: Number = axpy!(s, x, y)
-@inline krylov_axpy!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = krylov_axpy!(n, Complex{T}(s), x, dx, y, dy)
+kaxpy!(n :: Integer, s :: T, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasFloat = BLAS.axpy!(n, s, x, dx, y, dy)
+kaxpy!(n :: Integer, s :: T, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: FloatOrComplex = axpy!(s, x, y)
+kaxpy!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = kaxpy!(n, Complex{T}(s), x, dx, y, dy)
-@inline krylov_axpby!(n :: Integer, s :: T, x :: Vector{T}, dx :: Integer, t :: T, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasFloat = BLAS.axpby!(n, s, x, dx, t, y, dy)
-@inline krylov_axpby!(n :: Integer, s :: T, x :: AbstractVector{T}, dx :: Integer, t :: T, y :: AbstractVector{T}, dy :: Integer) where T <: Number = axpby!(s, x, t, y)
-@inline krylov_axpby!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer, t :: Complex{T}, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = krylov_axpby!(n, Complex{T}(s), x, dx, t, y, dy)
-@inline krylov_axpby!(n :: Integer, s :: Complex{T}, x :: AbstractVector{Complex{T}}, dx :: Integer, t :: T, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = krylov_axpby!(n, s, x, dx, Complex{T}(t), y, dy)
-@inline krylov_axpby!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer, t :: T, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = krylov_axpby!(n, Complex{T}(s), x, dx, Complex{T}(t), y, dy)
+kaxpby!(n :: Integer, s :: T, x :: Vector{T}, dx :: Integer, t :: T, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasFloat = BLAS.axpby!(n, s, x, dx, t, y, dy)
+kaxpby!(n :: Integer, s :: T, x :: AbstractVector{T}, dx :: Integer, t :: T, y :: AbstractVector{T}, dy :: Integer) where T <: FloatOrComplex = axpby!(s, x, t, y)
+kaxpby!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer, t :: Complex{T}, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = kaxpby!(n, Complex{T}(s), x, dx, t, y, dy)
+kaxpby!(n :: Integer, s :: Complex{T}, x :: AbstractVector{Complex{T}}, dx :: Integer, t :: T, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = kaxpby!(n, s, x, dx, Complex{T}(t), y, dy)
+kaxpby!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer, t :: T, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = kaxpby!(n, Complex{T}(s), x, dx, Complex{T}(t), y, dy)
-@inline krylov_copy!(n :: Integer, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasFloat = BLAS.blascopy!(n, x, dx, y, dy)
-@inline krylov_copy!(n :: Integer, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: Number = copyto!(y, x)
+kcopy!(n :: Integer, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasFloat = BLAS.blascopy!(n, x, dx, y, dy)
+kcopy!(n :: Integer, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: FloatOrComplex = copyto!(y, x)
# the macros are just for readability, so we don't have to write the increments (always equal to 1)
-
macro kdot(n, x, y)
- return esc(:(krylov_dot($n, $x, 1, $y, 1)))
+ return esc(:(Krylov.kdot($n, $x, 1, $y, 1)))
end
macro kdotr(n, x, y)
- return esc(:(krylov_dotr($n, $x, 1, $y, 1)))
+ return esc(:(Krylov.kdotr($n, $x, 1, $y, 1)))
end
macro knrm2(n, x)
- return esc(:(krylov_norm2($n, $x, 1)))
+ return esc(:(Krylov.knrm2($n, $x, 1)))
end
macro kscal!(n, s, x)
- return esc(:(krylov_scal!($n, $s, $x, 1)))
+ return esc(:(Krylov.kscal!($n, $s, $x, 1)))
end
macro kaxpy!(n, s, x, y)
- return esc(:(krylov_axpy!($n, $s, $x, 1, $y, 1)))
+ return esc(:(Krylov.kaxpy!($n, $s, $x, 1, $y, 1)))
end
macro kaxpby!(n, s, x, t, y)
- return esc(:(krylov_axpby!($n, $s, $x, 1, $t, $y, 1)))
+ return esc(:(Krylov.kaxpby!($n, $s, $x, 1, $t, $y, 1)))
end
macro kcopy!(n, x, y)
- return esc(:(krylov_copy!($n, $x, 1, $y, 1)))
+ return esc(:(Krylov.kcopy!($n, $x, 1, $y, 1)))
end
macro kswap(x, y)
@@ -292,46 +334,3 @@ end
macro kref!(n, x, y, c, s)
return esc(:(reflect!($x, $y, $c, $s)))
end
-
-"""
- s = vec2str(x; ndisp)
-
-Display an array in the form
-
- [ -3.0e-01 -5.1e-01 1.9e-01 ... -2.3e-01 -4.4e-01 2.4e-01 ]
-
-with (ndisp - 1)/2 elements on each side.
-"""
-function vec2str(x :: AbstractVector{T}; ndisp :: Int=7) where T <: Union{AbstractFloat, Missing}
- n = length(x)
- if n ≤ ndisp
- ndisp = n
- nside = n
- else
- nside = max(1, div(ndisp - 1, 2))
- end
- s = "["
- i = 1
- while i ≤ nside
- if x[i] !== missing
- s *= @sprintf("%8.1e ", x[i])
- else
- s *= " ✗✗✗✗ "
- end
- i += 1
- end
- if i ≤ div(n, 2)
- s *= "... "
- end
- i = max(i, n - nside + 1)
- while i ≤ n
- if x[i] !== missing
- s *= @sprintf("%8.1e ", x[i])
- else
- s *= " ✗✗✗✗ "
- end
- i += 1
- end
- s *= "]"
- return s
-end
diff --git a/test/test_aux.jl b/test/test_aux.jl
index 11bdb7c2d..215ffc4b8 100644
--- a/test/test_aux.jl
+++ b/test/test_aux.jl
@@ -1,119 +1,173 @@
@testset "aux" begin
- # test Givens reflector corner cases
- (c, s, ρ) = Krylov.sym_givens(0.0, 0.0)
- @test (c == 1.0) && (s == 0.0) && (ρ == 0.0)
-
- a = 3.14
- (c, s, ρ) = Krylov.sym_givens(a, 0.0)
- @test (c == 1.0) && (s == 0.0) && (ρ == a)
- (c, s, ρ) = Krylov.sym_givens(-a, 0.0)
- @test (c == -1.0) && (s == 0.0) && (ρ == a)
-
- b = 3.14
- (c, s, ρ) = Krylov.sym_givens(0.0, b)
- @test (c == 0.0) && (s == 1.0) && (ρ == b)
- (c, s, ρ) = Krylov.sym_givens(0.0, -b)
- @test (c == 0.0) && (s == -1.0) && (ρ == b)
-
- (c, s, ρ) = Krylov.sym_givens(Complex(0.0), Complex(0.0))
- @test (c == 1.0) && (s == Complex(0.0)) && (ρ == Complex(0.0))
-
- a = Complex(1.0, 1.0)
- (c, s, ρ) = Krylov.sym_givens(a, Complex(0.0))
- @test (c == 1.0) && (s == Complex(0.0)) && (ρ == a)
- (c, s, ρ) = Krylov.sym_givens(-a, Complex(0.0))
- @test (c == 1.0) && (s == Complex(0.0)) && (ρ == -a)
-
- b = Complex(1.0, 1.0)
- (c, s, ρ) = Krylov.sym_givens(Complex(0.0), b)
- @test (c == 0.0) && (s == Complex(1.0)) && (ρ == b)
- (c, s, ρ) = Krylov.sym_givens(Complex(0.0), -b)
- @test (c == 0.0) && (s == Complex(1.0)) && (ρ == -b)
-
- # test roots of a quadratic
- roots = Krylov.roots_quadratic(0.0, 0.0, 0.0)
- @test length(roots) == 1
- @test roots[1] == 0.0
-
- roots = Krylov.roots_quadratic(0.0, 0.0, 1.0)
- @test length(roots) == 0
-
- roots = Krylov.roots_quadratic(0.0, 3.14, -1.0)
- @test length(roots) == 1
- @test roots[1] == 1.0 / 3.14
-
- roots = Krylov.roots_quadratic(1.0, 0.0, 1.0)
- @test length(roots) == 0
-
- roots = Krylov.roots_quadratic(1.0, 0.0, 0.0)
- @test length(roots) == 2
- @test roots[1] == 0.0
- @test roots[2] == 0.0
-
- roots = Krylov.roots_quadratic(1.0, 3.0, 2.0)
- @test length(roots) == 2
- @test roots[1] ≈ -2.0
- @test roots[2] ≈ -1.0
-
- roots = Krylov.roots_quadratic(1.0e+8, 1.0, 1.0)
- @test length(roots) == 0
-
- # ill-conditioned quadratic
- roots = Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=0)
- @test length(roots) == 2
- @test roots[1] == 1.0e+13
- @test roots[2] == 0.0
-
- # iterative refinement is crucial!
- roots = Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=1)
- @test length(roots) == 2
- @test roots[1] == 1.0e+13
- @test roots[2] == -1.0e-05
-
- # not ill-conditioned quadratic
- roots = Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=0)
- @test length(roots) == 2
- @test isapprox(roots[1], 1.0e+7, rtol=1.0e-6)
- @test isapprox(roots[2], -1.0, rtol=1.0e-6)
-
- roots = Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=1)
- @test length(roots) == 2
- @test isapprox(roots[1], 1.0e+7, rtol=1.0e-6)
- @test isapprox(roots[2], -1.0, rtol=1.0e-6)
-
- # test trust-region boundary
- x = ones(5)
- d = ones(5); d[1:2:5] .= -1
- @test_throws ErrorException Krylov.to_boundary(x, d, -1.0)
- @test_throws ErrorException Krylov.to_boundary(x, d, 0.5)
- @test_throws ErrorException Krylov.to_boundary(x, zeros(5), 1.0)
- @test maximum(Krylov.to_boundary(x, d, 5.0)) ≈ 2.209975124224178
- @test minimum(Krylov.to_boundary(x, d, 5.0)) ≈ -1.8099751242241782
- @test maximum(Krylov.to_boundary(x, d, 5.0, flip=true)) ≈ 1.8099751242241782
- @test minimum(Krylov.to_boundary(x, d, 5.0, flip=true)) ≈ -2.209975124224178
-
- # test kzeros and kones
- @test Krylov.kzeros(Vector{Float64}, 10) == zeros(10)
- @test Krylov.kones(Vector{Float64}, 10) == ones(10)
-
- # test ktypeof
- a = rand(Float32, 10)
- b = view(a, 4:8)
- @test Krylov.ktypeof(a) == Vector{Float32}
- @test Krylov.ktypeof(b) == Vector{Float32}
-
- a = rand(Float64, 10)
- b = view(a, 4:8)
- @test Krylov.ktypeof(a) == Vector{Float64}
- @test Krylov.ktypeof(b) == Vector{Float64}
-
- a = sprand(Float32, 10, 0.5)
- b = view(a, 4:8)
- @test Krylov.ktypeof(a) == Vector{Float32}
- @test Krylov.ktypeof(b) == Vector{Float32}
-
- a = sprand(Float64, 10, 0.5)
- b = view(a, 4:8)
- @test Krylov.ktypeof(a) == Vector{Float64}
- @test Krylov.ktypeof(b) == Vector{Float64}
+
+ @testset "sym_givens" begin
+ # test Givens reflector corner cases
+ (c, s, ρ) = Krylov.sym_givens(0.0, 0.0)
+ @test (c == 1.0) && (s == 0.0) && (ρ == 0.0)
+
+ a = 3.14
+ (c, s, ρ) = Krylov.sym_givens(a, 0.0)
+ @test (c == 1.0) && (s == 0.0) && (ρ == a)
+ (c, s, ρ) = Krylov.sym_givens(-a, 0.0)
+ @test (c == -1.0) && (s == 0.0) && (ρ == a)
+
+ b = 3.14
+ (c, s, ρ) = Krylov.sym_givens(0.0, b)
+ @test (c == 0.0) && (s == 1.0) && (ρ == b)
+ (c, s, ρ) = Krylov.sym_givens(0.0, -b)
+ @test (c == 0.0) && (s == -1.0) && (ρ == b)
+
+ (c, s, ρ) = Krylov.sym_givens(Complex(0.0), Complex(0.0))
+ @test (c == 1.0) && (s == Complex(0.0)) && (ρ == Complex(0.0))
+
+ a = Complex(1.0, 1.0)
+ (c, s, ρ) = Krylov.sym_givens(a, Complex(0.0))
+ @test (c == 1.0) && (s == Complex(0.0)) && (ρ == a)
+ (c, s, ρ) = Krylov.sym_givens(-a, Complex(0.0))
+ @test (c == 1.0) && (s == Complex(0.0)) && (ρ == -a)
+
+ b = Complex(1.0, 1.0)
+ (c, s, ρ) = Krylov.sym_givens(Complex(0.0), b)
+ @test (c == 0.0) && (s == Complex(1.0)) && (ρ == b)
+ (c, s, ρ) = Krylov.sym_givens(Complex(0.0), -b)
+ @test (c == 0.0) && (s == Complex(1.0)) && (ρ == -b)
+ end
+
+ @testset "roots_quadratic" begin
+ # test roots of a quadratic
+ roots = Krylov.roots_quadratic(0.0, 0.0, 0.0)
+ @test length(roots) == 1
+ @test roots[1] == 0.0
+
+ roots = Krylov.roots_quadratic(0.0, 0.0, 1.0)
+ @test length(roots) == 0
+
+ roots = Krylov.roots_quadratic(0.0, 3.14, -1.0)
+ @test length(roots) == 1
+ @test roots[1] == 1.0 / 3.14
+
+ roots = Krylov.roots_quadratic(1.0, 0.0, 1.0)
+ @test length(roots) == 0
+
+ roots = Krylov.roots_quadratic(1.0, 0.0, 0.0)
+ @test length(roots) == 2
+ @test roots[1] == 0.0
+ @test roots[2] == 0.0
+
+ roots = Krylov.roots_quadratic(1.0, 3.0, 2.0)
+ @test length(roots) == 2
+ @test roots[1] ≈ -2.0
+ @test roots[2] ≈ -1.0
+
+ roots = Krylov.roots_quadratic(1.0e+8, 1.0, 1.0)
+ @test length(roots) == 0
+
+ # ill-conditioned quadratic
+ roots = Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=0)
+ @test length(roots) == 2
+ @test roots[1] == 1.0e+13
+ @test roots[2] == 0.0
+
+ # iterative refinement is crucial!
+ roots = Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=1)
+ @test length(roots) == 2
+ @test roots[1] == 1.0e+13
+ @test roots[2] == -1.0e-05
+
+ # not ill-conditioned quadratic
+ roots = Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=0)
+ @test length(roots) == 2
+ @test isapprox(roots[1], 1.0e+7, rtol=1.0e-6)
+ @test isapprox(roots[2], -1.0, rtol=1.0e-6)
+
+ roots = Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=1)
+ @test length(roots) == 2
+ @test isapprox(roots[1], 1.0e+7, rtol=1.0e-6)
+ @test isapprox(roots[2], -1.0, rtol=1.0e-6)
+ end
+
+ @testset "to_boundary" begin
+ # test trust-region boundary
+ x = ones(5)
+ d = ones(5); d[1:2:5] .= -1
+ @test_throws ErrorException Krylov.to_boundary(x, d, -1.0)
+ @test_throws ErrorException Krylov.to_boundary(x, d, 0.5)
+ @test_throws ErrorException Krylov.to_boundary(x, zeros(5), 1.0)
+ @test maximum(Krylov.to_boundary(x, d, 5.0)) ≈ 2.209975124224178
+ @test minimum(Krylov.to_boundary(x, d, 5.0)) ≈ -1.8099751242241782
+ @test maximum(Krylov.to_boundary(x, d, 5.0, flip=true)) ≈ 1.8099751242241782
+ @test minimum(Krylov.to_boundary(x, d, 5.0, flip=true)) ≈ -2.209975124224178
+ end
+
+ @testset "kzeros" begin
+ # test kzeros
+ @test Krylov.kzeros(Vector{Float64}, 10) == zeros(Float64, 10)
+ @test Krylov.kzeros(Vector{ComplexF32}, 10) == zeros(ComplexF32, 10)
+ end
+
+ @testset "kones" begin
+ # test kones
+ @test Krylov.kones(Vector{Float64}, 10) == ones(Float64, 10)
+ @test Krylov.kones(Vector{ComplexF32}, 10) == ones(ComplexF32, 10)
+ end
+
+ @testset "ktypeof" begin
+ # test ktypeof
+ a = rand(Float32, 10)
+ b = view(a, 4:8)
+ @test Krylov.ktypeof(a) == Vector{Float32}
+ @test Krylov.ktypeof(b) == Vector{Float32}
+
+ a = rand(Float64, 10)
+ b = view(a, 4:8)
+ @test Krylov.ktypeof(a) == Vector{Float64}
+ @test Krylov.ktypeof(b) == Vector{Float64}
+
+ a = sprand(Float32, 10, 0.5)
+ b = view(a, 4:8)
+ @test Krylov.ktypeof(a) == Vector{Float32}
+ @test Krylov.ktypeof(b) == Vector{Float32}
+
+ a = sprand(Float64, 10, 0.5)
+ b = view(a, 4:8)
+ @test Krylov.ktypeof(a) == Vector{Float64}
+ @test Krylov.ktypeof(b) == Vector{Float64}
+ end
+
+ @testset "macros" begin
+ # test macros
+ for FC ∈ (Float16, Float32, Float64, Complex{Float16}, Complex{Float32}, Complex{Float64})
+ n = 10
+ x = rand(FC, n)
+ y = rand(FC, n)
+ a = rand(FC)
+ b = rand(FC)
+ c = rand(FC)
+ s = rand(FC)
+
+ T = real(FC)
+ a2 = rand(T)
+ b2 = rand(T)
+
+ Krylov.@kdot(n, x, y)
+
+ Krylov.@kdotr(n, x, y)
+
+ Krylov.@knrm2(n, x)
+
+ Krylov.@kaxpy!(n, a, x, y)
+ Krylov.@kaxpy!(n, a2, x, y)
+
+ Krylov.@kaxpby!(n, a, x, b, y)
+ Krylov.@kaxpby!(n, a2, x, b, y)
+ Krylov.@kaxpby!(n, a, x, b2, y)
+ Krylov.@kaxpby!(n, a2, x, b2, y)
+
+ Krylov.@kcopy!(n, x, y)
+
+ Krylov.@kswap(x, y)
+
+ Krylov.@kref!(n, x, y, c, s)
+ end
+ end
end
From e7cd9b8b08da8710342cd8c0101d15becdf9e45e Mon Sep 17 00:00:00 2001
From: Alexis Montoison
Date: Fri, 9 Sep 2022 15:21:45 -0400
Subject: [PATCH 019/132] Use buildkite to test the GPU support
---
.buildkite/pipeline.yml | 65 ++++++++++++++++++++++++++++++
test/gpu/amd.jl | 77 ++++++++++++++++++++++++++++++++++++
test/gpu/intel.jl | 84 +++++++++++++++++++++++++++++++++++++++
test/gpu/metal.jl | 88 +++++++++++++++++++++++++++++++++++++++++
test/gpu/nvidia.jl | 77 ++++++++++++++++++++++++++++++++++++
test/test_aux.jl | 2 +-
6 files changed, 392 insertions(+), 1 deletion(-)
create mode 100644 .buildkite/pipeline.yml
create mode 100644 test/gpu/amd.jl
create mode 100644 test/gpu/intel.jl
create mode 100644 test/gpu/metal.jl
create mode 100644 test/gpu/nvidia.jl
diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
new file mode 100644
index 000000000..24c01d7f0
--- /dev/null
+++ b/.buildkite/pipeline.yml
@@ -0,0 +1,65 @@
+steps:
+ - label: "Nvidia GPUs -- CUDA.jl"
+ plugins:
+ - JuliaCI/julia#v1:
+ version: 1.8
+ agents:
+ queue: "juliagpu"
+ cuda: "*"
+ command: |
+ julia --color=yes --project -e '
+ using Pkg
+ Pkg.add("CUDA")
+ Pkg.instantiate()
+ include("test/gpu/nvidia.jl")'
+ timeout_in_minutes: 30
+
+ - label: "AMD GPUs -- AMDGPU.jl"
+ plugins:
+ - JuliaCI/julia#v1:
+ version: 1.8
+ agents:
+ queue: "juliagpu"
+ rocm: "*"
+ rocmgpu: "gfx908"
+ env:
+ JULIA_AMDGPU_CORE_MUST_LOAD: "1"
+ JULIA_AMDGPU_HIP_MUST_LOAD: "1"
+ JULIA_AMDGPU_DISABLE_ARTIFACTS: "1"
+ command: |
+ julia --color=yes --project -e '
+ using Pkg
+ Pkg.add("AMDGPU")
+ Pkg.instantiate()
+ include("test/gpu/amd.jl")'
+ timeout_in_minutes: 30
+
+ - label: "Intel GPUs -- oneAPI.jl"
+ plugins:
+ - JuliaCI/julia#v1:
+ version: 1.8
+ agents:
+ queue: "juliagpu"
+ intel: "*"
+ command: |
+ julia --color=yes --project -e '
+ using Pkg
+ Pkg.add("oneAPI")
+ Pkg.instantiate()
+ include("test/gpu/intel.jl")'
+ timeout_in_minutes: 30
+
+ - label: "Apple M1 GPUs -- Metal.jl"
+ plugins:
+ - JuliaCI/julia#v1:
+ version: 1.8
+ agents:
+ queue: "juliagpu"
+ metal: "*"
+ command: |
+ julia --color=yes --project -e '
+ using Pkg
+ Pkg.add("Metal")
+ Pkg.instantiate()
+ include("test/gpu/metal.jl")'
+ timeout_in_minutes: 30
diff --git a/test/gpu/amd.jl b/test/gpu/amd.jl
new file mode 100644
index 000000000..f1193b235
--- /dev/null
+++ b/test/gpu/amd.jl
@@ -0,0 +1,77 @@
+using LinearAlgebra, SparseArrays, Test
+using Krylov, AMDGPU
+
+@testset "AMD -- AMDGPU.jl" begin
+
+ @test AMDGPU.functional()
+ AMDGPU.allowscalar(false)
+
+ for FC in (Float32, Float64, ComplexF32, ComplexF64)
+ S = ROCVector{FC}
+ T = real(FC)
+ n = 10
+ x = rand(FC, n)
+ x = S(x)
+ y = rand(FC, n)
+ y = S(y)
+ a = rand(FC)
+ b = rand(FC)
+ s = rand(FC)
+ a2 = rand(T)
+ b2 = rand(T)
+ c = rand(T)
+
+ @testset "kdot -- $FC" begin
+ Krylov.@kdot(n, x, y)
+ end
+
+ @testset "kdotr -- $FC" begin
+ Krylov.@kdotr(n, x, y)
+ end
+
+ @testset "knrm2 -- $FC" begin
+ Krylov.@knrm2(n, x)
+ end
+
+ @testset "kaxpy! -- $FC" begin
+ Krylov.@kaxpy!(n, a, x, y)
+ Krylov.@kaxpy!(n, a2, x, y)
+ end
+
+ @testset "kaxpby! -- $FC" begin
+ Krylov.@kaxpby!(n, a, x, b, y)
+ Krylov.@kaxpby!(n, a2, x, b, y)
+ Krylov.@kaxpby!(n, a, x, b2, y)
+ Krylov.@kaxpby!(n, a2, x, b2, y)
+ end
+
+ @testset "kcopy! -- $FC" begin
+ Krylov.@kcopy!(n, x, y)
+ end
+
+ @testset "kswap -- $FC" begin
+ Krylov.@kswap(x, y)
+ end
+
+ # @testset "kref! -- $FC" begin
+ # Krylov.@kref!(n, x, y, c, s)
+ # end
+
+ ε = eps(T)
+ A = rand(FC, n, n)
+ A = ROCMatrix{FC}(A)
+ b = rand(FC, n)
+ b = ROCVector{FC}(b)
+
+ @testset "GMRES -- $FC" begin
+ x, stats = gmres(A, b)
+ @test norm(b - A * x) ≤ √ε
+ end
+
+ @testset "CG -- $FC" begin
+ C = A * A'
+ x, stats = cg(C, b)
+ @test stats.solved
+ end
+ end
+end
diff --git a/test/gpu/intel.jl b/test/gpu/intel.jl
new file mode 100644
index 000000000..4fa4192c6
--- /dev/null
+++ b/test/gpu/intel.jl
@@ -0,0 +1,84 @@
+using LinearAlgebra, SparseArrays, Test
+using Krylov, oneAPI
+
+import Krylov.kdot
+function kdot(n :: Integer, x :: oneVector{T}, dx :: Integer, y :: oneVector{T}, dy :: Integer) where T <: Krylov.FloatOrComplex
+ z = similar(x)
+ z .= conj.(x) .* y
+ reduce(+, z)
+end
+
+@testset "Intel -- oneAPI.jl" begin
+
+ @test oneAPI.functional()
+ oneAPI.allowscalar(false)
+
+ for FC ∈ (Float32, ComplexF32)
+ S = oneVector{FC}
+ T = real(FC)
+ n = 10
+ x = rand(FC, n)
+ x = S(x)
+ y = rand(FC, n)
+ y = S(y)
+ a = rand(FC)
+ b = rand(FC)
+ s = rand(FC)
+ a2 = rand(T)
+ b2 = rand(T)
+ c = rand(T)
+
+ @testset "kdot -- $FC" begin
+ Krylov.@kdot(n, x, y)
+ end
+
+ @testset "kdotr -- $FC" begin
+ Krylov.@kdotr(n, x, y)
+ end
+
+ @testset "knrm2 -- $FC" begin
+ Krylov.@knrm2(n, x)
+ end
+
+ @testset "kaxpy! -- $FC" begin
+ Krylov.@kaxpy!(n, a, x, y)
+ Krylov.@kaxpy!(n, a2, x, y)
+ end
+
+ @testset "kaxpby! -- $FC" begin
+ Krylov.@kaxpby!(n, a, x, b, y)
+ Krylov.@kaxpby!(n, a2, x, b, y)
+ Krylov.@kaxpby!(n, a, x, b2, y)
+ Krylov.@kaxpby!(n, a2, x, b2, y)
+ end
+
+ @testset "kcopy! -- $FC" begin
+ Krylov.@kcopy!(n, x, y)
+ end
+
+ @testset "kswap -- $FC" begin
+ Krylov.@kswap(x, y)
+ end
+
+ # @testset "kref! -- $FC" begin
+ # Krylov.@kref!(n, x, y, c, s)
+ # end
+
+ ε = eps(T)
+ A = rand(FC, n, n)
+ A = oneMatrix{FC}(A)
+ b = rand(FC, n)
+ b = oneVector{FC}(b)
+
+ @testset "GMRES -- $FC" begin
+ x, stats = gmres(A, b)
+ @test norm(b - A * x) ≤ √ε
+ end
+
+ @testset "CG -- $FC" begin
+ C = A * A'
+ x, stats = cg(C, b)
+ @test stats.solved
+ end
+ end
+end
diff --git a/test/gpu/metal.jl b/test/gpu/metal.jl
new file mode 100644
index 000000000..af386f513
--- /dev/null
+++ b/test/gpu/metal.jl
@@ -0,0 +1,88 @@
+using LinearAlgebra, SparseArrays, Test
+using Krylov, Metal
+
+# https://github.com/JuliaGPU/Metal.jl/pull/48
+const MtlVector{T} = MtlArray{T,1}
+const MtlMatrix{T} = MtlArray{T,2}
+
+import Krylov.kdot
+function kdot(n :: Integer, x :: MtlVector{T}, dx :: Integer, y :: MtlVector{T}, dy :: Integer) where T <: Krylov.FloatOrComplex
+ z = similar(x)
+ z .= conj.(x) .* y
+ reduce(+, z)
+end
+
+@testset "Apple M1 GPUs -- Metal.jl" begin
+
+ # @test Metal.functional()
+ Metal.allowscalar(false)
+
+ for FC in (Float32, ComplexF32)
+ S = MtlVector{FC}
+ T = real(FC)
+ n = 10
+ x = rand(FC, n)
+ x = S(x)
+ y = rand(FC, n)
+ y = S(y)
+ a = rand(FC)
+ b = rand(FC)
+ s = rand(FC)
+ a2 = rand(T)
+ b2 = rand(T)
+ c = rand(T)
+
+ @testset "kdot -- $FC" begin
+ Krylov.@kdot(n, x, y)
+ end
+
+ @testset "kdotr -- $FC" begin
+ Krylov.@kdotr(n, x, y)
+ end
+
+ @testset "knrm2 -- $FC" begin
+ Krylov.@knrm2(n, x)
+ end
+
+ @testset "kaxpy! -- $FC" begin
+ Krylov.@kaxpy!(n, a, x, y)
+ Krylov.@kaxpy!(n, a2, x, y)
+ end
+
+ @testset "kaxpby! -- $FC" begin
+ Krylov.@kaxpby!(n, a, x, b, y)
+ Krylov.@kaxpby!(n, a2, x, b, y)
+ Krylov.@kaxpby!(n, a, x, b2, y)
+ Krylov.@kaxpby!(n, a2, x, b2, y)
+ end
+
+ @testset "kcopy! -- $FC" begin
+ Krylov.@kcopy!(n, x, y)
+ end
+
+ @testset "kswap -- $FC" begin
+ Krylov.@kswap(x, y)
+ end
+
+ # @testset "kref! -- $FC" begin
+ # Krylov.@kref!(n, x, y, c, s)
+ # end
+
+ ε = eps(T)
+ A = rand(FC, n, n)
+ A = MtlMatrix{FC}(A)
+ b = rand(FC, n)
+ b = MtlVector{FC}(b)
+
+ @testset "GMRES -- $FC" begin
+ x, stats = gmres(A, b)
+ @test norm(b - A * x) ≤ √ε
+ end
+
+ @testset "CG -- $FC" begin
+ C = A * A'
+ x, stats = cg(C, b)
+ @test stats.solved
+ end
+ end
+end
diff --git a/test/gpu/nvidia.jl b/test/gpu/nvidia.jl
new file mode 100644
index 000000000..824d65239
--- /dev/null
+++ b/test/gpu/nvidia.jl
@@ -0,0 +1,77 @@
+using LinearAlgebra, SparseArrays, Test
+using Krylov, CUDA, CUDA.CUSPARSE
+
+@testset "Nvidia -- CUDA.jl" begin
+
+ @test CUDA.functional()
+ CUDA.allowscalar(false)
+
+ for FC in (Float32, Float64, ComplexF32, ComplexF64)
+ S = CuVector{FC}
+ T = real(FC)
+ n = 10
+ x = rand(FC, n)
+ x = S(x)
+ y = rand(FC, n)
+ y = S(y)
+ a = rand(FC)
+ b = rand(FC)
+ s = rand(FC)
+ a2 = rand(T)
+ b2 = rand(T)
+ c = rand(T)
+
+ @testset "kdot -- $FC" begin
+ Krylov.@kdot(n, x, y)
+ end
+
+ @testset "kdotr -- $FC" begin
+ Krylov.@kdotr(n, x, y)
+ end
+
+ @testset "knrm2 -- $FC" begin
+ Krylov.@knrm2(n, x)
+ end
+
+ @testset "kaxpy! -- $FC" begin
+ Krylov.@kaxpy!(n, a, x, y)
+ Krylov.@kaxpy!(n, a2, x, y)
+ end
+
+ @testset "kaxpby! -- $FC" begin
+ Krylov.@kaxpby!(n, a, x, b, y)
+ Krylov.@kaxpby!(n, a2, x, b, y)
+ Krylov.@kaxpby!(n, a, x, b2, y)
+ Krylov.@kaxpby!(n, a2, x, b2, y)
+ end
+
+ @testset "kcopy! -- $FC" begin
+ Krylov.@kcopy!(n, x, y)
+ end
+
+ @testset "kswap -- $FC" begin
+ Krylov.@kswap(x, y)
+ end
+
+ @testset "kref! -- $FC" begin
+ Krylov.@kref!(n, x, y, c, s)
+ end
+
+ ε = eps(T)
+ A = rand(FC, n, n)
+ A = CuMatrix{FC}(A)
+ b = rand(FC, n)
+ b = CuVector{FC}(b)
+
+ @testset "GMRES -- $FC" begin
+ x, stats = gmres(A, b)
+ @test norm(b - A * x) ≤ √ε
+ end
+
+ @testset "CG -- $FC" begin
+ C = A * A'
+ x, stats = cg(C, b)
+ @test stats.solved
+ end
+ end
+end
diff --git a/test/test_aux.jl b/test/test_aux.jl
index 215ffc4b8..5a4d094c7 100644
--- a/test/test_aux.jl
+++ b/test/test_aux.jl
@@ -136,7 +136,7 @@
@testset "macros" begin
# test macros
- for FC ∈ (Float16, Float32, Float64, Complex{Float16}, Complex{Float32}, Complex{Float64})
+ for FC ∈ (Float16, Float32, Float64, ComplexF16, ComplexF32, ComplexF64)
n = 10
x = rand(FC, n)
y = rand(FC, n)
From fc8bb5983fadede4235b971c339fdcbd46c56b3e Mon Sep 17 00:00:00 2001
From: Alexis Montoison
Date: Sat, 10 Sep 2022 00:31:35 -0400
Subject: [PATCH 020/132] Update workflows
---
.github/workflows/Breakage.yml | 12 ++++-----
.github/workflows/CommentPR.yml | 35 ++++++++++++++++++------
.github/workflows/CompatHelper.yml | 41 +++++++++++++++++++++++------
.github/workflows/Documentation.yml | 2 +-
.github/workflows/ci.yml | 8 +++---
5 files changed, 71 insertions(+), 27 deletions(-)
diff --git a/.github/workflows/Breakage.yml b/.github/workflows/Breakage.yml
index 266eed3cc..8fd92afdd 100644
--- a/.github/workflows/Breakage.yml
+++ b/.github/workflows/Breakage.yml
@@ -24,14 +24,14 @@ jobs:
pkgversion: [latest, stable]
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v3
# Install Julia
- uses: julia-actions/setup-julia@v1
with:
version: '1'
arch: x64
- - uses: actions/cache@v1
+ - uses: actions/cache@v3
env:
cache-name: cache-artifacts
with:
@@ -85,7 +85,7 @@ jobs:
end;
end'
- - uses: actions/upload-artifact@v2
+ - uses: actions/upload-artifact@v3
with:
name: pr
path: pr/
@@ -94,9 +94,9 @@ jobs:
needs: break
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v3
- - uses: actions/download-artifact@v2
+ - uses: actions/download-artifact@v3
with:
name: pr
path: pr/
@@ -127,7 +127,7 @@ jobs:
fi
done >> MSG
- - uses: actions/upload-artifact@v2
+ - uses: actions/upload-artifact@v3
with:
name: pr
path: pr/
diff --git a/.github/workflows/CommentPR.yml b/.github/workflows/CommentPR.yml
index 14f6dcd47..479d50dc5 100644
--- a/.github/workflows/CommentPR.yml
+++ b/.github/workflows/CommentPR.yml
@@ -39,16 +39,35 @@ jobs:
- run: unzip pr.zip
- name: 'Comment on PR'
- uses: actions/github-script@v3
+ uses: actions/github-script@v6
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
- var fs = require('fs');
- var issue_number = Number(fs.readFileSync('./NR'));
- var msg = fs.readFileSync('./MSG', 'utf8');
- await github.issues.createComment({
+ var fs = require('fs')
+ var msg = fs.readFileSync('./MSG', 'utf8')
+
+ // Get the existing comments.
+ const {data: comments} = await github.rest.issues.listComments({
owner: context.repo.owner,
repo: context.repo.repo,
- issue_number: issue_number,
- body: msg
- });
+ issue_number: context.payload.number
+ })
+
+ // Find any comment already made by the bot.
+ const botComment = comments.find(comment => comment.user.id === 41898282)
+
+ if (botComment) {
+ await github.rest.issues.updateComment({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ comment_id: botComment.id,
+ body: msg
+ })
+ } else {
+ await github.rest.issues.createComment({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ issue_number: context.payload.number,
+ body: msg
+ })
+ }
diff --git a/.github/workflows/CompatHelper.yml b/.github/workflows/CompatHelper.yml
index b546a8082..7a9c79fd4 100644
--- a/.github/workflows/CompatHelper.yml
+++ b/.github/workflows/CompatHelper.yml
@@ -1,19 +1,44 @@
name: CompatHelper
-
on:
schedule:
- - cron: '00 00 * * *'
-
+ - cron: 0 0 * * *
+ workflow_dispatch:
+permissions:
+ contents: write
+ pull-requests: write
jobs:
CompatHelper:
runs-on: ubuntu-latest
steps:
- - uses: julia-actions/setup-julia@latest
+ - name: Check if Julia is already available in the PATH
+ id: julia_in_path
+ run: which julia
+ continue-on-error: true
+ - name: Install Julia, but only if it is not already available in the PATH
+ uses: julia-actions/setup-julia@v1
with:
version: '1'
- - name: CompatHelper
- run: julia -e 'using Pkg; Pkg.add("CompatHelper")'
- - name: CompatHelper.main()
+ arch: ${{ runner.arch }}
+ if: steps.julia_in_path.outcome != 'success'
+ - name: "Add the General registry via Git"
+ run: |
+ import Pkg
+ ENV["JULIA_PKG_SERVER"] = ""
+ Pkg.Registry.add("General")
+ shell: julia --color=yes {0}
+ - name: "Install CompatHelper"
+ run: |
+ import Pkg
+ name = "CompatHelper"
+ uuid = "aa819f21-2bde-4658-8897-bab36330d9b7"
+ version = "3"
+ Pkg.add(; name, uuid, version)
+ shell: julia --color=yes {0}
+ - name: "Run CompatHelper"
+ run: |
+ import CompatHelper
+ CompatHelper.main()
+ shell: julia --color=yes {0}
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- run: julia -e 'using CompatHelper; CompatHelper.main()'
+ COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }}
diff --git a/.github/workflows/Documentation.yml b/.github/workflows/Documentation.yml
index be0b86584..fef36054d 100644
--- a/.github/workflows/Documentation.yml
+++ b/.github/workflows/Documentation.yml
@@ -10,7 +10,7 @@ jobs:
build:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v3
- uses: julia-actions/setup-julia@latest
with:
version: '1'
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 409e0d146..9e1791f48 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -31,12 +31,12 @@ jobs:
arch: x64
allow_failure: true
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v3
- uses: julia-actions/setup-julia@v1
with:
version: ${{ matrix.version }}
arch: ${{ matrix.arch }}
- - uses: actions/cache@v1
+ - uses: actions/cache@v3
env:
cache-name: cache-artifacts
with:
@@ -49,6 +49,6 @@ jobs:
- uses: julia-actions/julia-buildpkg@v1
- uses: julia-actions/julia-runtest@v1
- uses: julia-actions/julia-processcoverage@v1
- - uses: codecov/codecov-action@v1
+ - uses: codecov/codecov-action@v3
with:
- file: lcov.info
+ files: lcov.info
From 26c485641793badb14d1cea7b69320bb1f7e9471 Mon Sep 17 00:00:00 2001
From: Alexis Montoison
Date: Sat, 10 Sep 2022 01:58:51 -0400
Subject: [PATCH 021/132] Update CommentPR.yml
---
.github/workflows/CommentPR.yml | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/CommentPR.yml b/.github/workflows/CommentPR.yml
index 479d50dc5..043113f74 100644
--- a/.github/workflows/CommentPR.yml
+++ b/.github/workflows/CommentPR.yml
@@ -44,13 +44,14 @@ jobs:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
var fs = require('fs')
+ var issue_number = Number(fs.readFileSync('./NR'))
var msg = fs.readFileSync('./MSG', 'utf8')
// Get the existing comments.
const {data: comments} = await github.rest.issues.listComments({
owner: context.repo.owner,
repo: context.repo.repo,
- issue_number: context.payload.number
+ issue_number: issue_number
})
// Find any comment already made by the bot.
@@ -67,7 +68,7 @@ jobs:
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
- issue_number: context.payload.number,
+ issue_number: issue_number,
body: msg
})
}
From b19c74b454b7fedd23844976cc9403cfcb7efe75 Mon Sep 17 00:00:00 2001
From: Alexis Montoison
Date: Tue, 13 Sep 2022 01:31:30 -0400
Subject: [PATCH 022/132] [documentation] update gpu.md
---
docs/make.jl | 2 +-
docs/src/gpu.md | 113 ++++++++++++++++++++++++++++++++++++++++++------
2 files changed, 101 insertions(+), 14 deletions(-)
diff --git a/docs/make.jl b/docs/make.jl
index f59bfac0c..0ad50d52f 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -6,7 +6,7 @@ makedocs(
linkcheck = true,
strict = true,
format = Documenter.HTML(assets = ["assets/style.css"],
- ansicolor=true,
+ ansicolor = true,
prettyurls = get(ENV, "CI", nothing) == "true",
collapselevel = 1),
sitename = "Krylov.jl",
diff --git a/docs/src/gpu.md b/docs/src/gpu.md
index 3c6bc1e29..fc7a05587 100644
--- a/docs/src/gpu.md
+++ b/docs/src/gpu.md
@@ -1,6 +1,15 @@
-## GPU support
+# [GPU support](@id gpu)
-All solvers in Krylov.jl can be used with `CuArrays` and allow computations with Nvidia GPU. Problems stored in CPU format (`Matrix` and `Vector`) must first be converted to GPU format (`CuMatrix` and `CuVector`).
+Krylov methods are well suited for GPU computations because they only require matrix-vector products ($u \leftarrow Av$, $u \leftarrow A^{H}w$) and vector operations ($\|v\|$, $u^H v$, $v \leftarrow \alpha u + \beta v$), which are easily parallelizable.
+
+The implementations in Krylov.jl are generic so as to take advantage of the multiple dispatch and broadcast features of Julia.
+It allows the implementations to be specialized automatically by the compiler for both CPU and GPU usages.
+Thus, Krylov.jl works with GPU backends that build on [GPUArrays.jl](https://github.com/JuliaGPU/GPUArrays.jl), such as [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl), [AMDGPU.jl](https://github.com/JuliaGPU/AMDGPU.jl), [oneAPI.jl](https://github.com/JuliaGPU/oneAPI.jl) or [Metal.jl](https://github.com/JuliaGPU/Metal.jl).
+
+## Nvidia GPUs
+
+All solvers in Krylov.jl can be used with [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl) and allow computations with Nvidia GPUs.
+Problems stored in CPU format (`Matrix` and `Vector`) must first be converted to related GPU format (`CuMatrix` and `CuVector`).
```julia
using CUDA, Krylov
@@ -13,11 +22,11 @@ b_cpu = rand(20)
A_gpu = CuMatrix(A_cpu)
b_gpu = CuVector(b_cpu)
-# Solve a square and dense system on GPU
+# Solve a square and dense system on a Nivida GPU
x, stats = bilq(A_gpu, b_gpu)
```
-Sparse matrices have a specific storage on GPU (`CuSparseMatrixCSC` or `CuSparseMatrixCSR`):
+Sparse matrices have a specific storage on Nvidia GPUs (`CuSparseMatrixCSC` or `CuSparseMatrixCSR`):
```julia
using CUDA, Krylov
@@ -31,7 +40,7 @@ b_cpu = rand(200)
A_gpu = CuSparseMatrixCSC(A_cpu)
b_gpu = CuVector(b_cpu)
-# Solve a rectangular and sparse system on GPU
+# Solve a rectangular and sparse system on a Nvidia GPU
x, stats = lsmr(A_gpu, b_gpu)
```
@@ -47,14 +56,14 @@ using SparseArrays, Krylov, LinearOperators
using CUDA, CUDA.CUSPARSE
# Transfer the linear system from the CPU to the GPU
-A_gpu = CuSparseMatrixCSC(A_cpu) # A = CuSparseMatrixCSR(A_cpu)
+A_gpu = CuSparseMatrixCSC(A_cpu) # A_gpu = CuSparseMatrixCSR(A_cpu)
b_gpu = CuVector(b_cpu)
# LLᴴ ≈ A for CuSparseMatrixCSC or CuSparseMatrixCSR matrices
P = ic02(A_gpu, 'O')
# Solve Py = x
-function ldiv!(y, P, x)
+function ldiv_ic0!(y, P, x)
copyto!(y, x) # Variant for CuSparseMatrixCSR
sv2!('T', 'U', 'N', 1.0, P, y, 'O') # sv2!('N', 'L', 'N', 1.0, P, y, 'O')
sv2!('N', 'U', 'N', 1.0, P, y, 'O') # sv2!('T', 'L', 'N', 1.0, P, y, 'O')
@@ -65,10 +74,10 @@ end
n = length(b_gpu)
T = eltype(b_gpu)
symmetric = hermitian = true
-opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv!(y, P, x))
+opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ic0!(y, P, x))
# Solve a symmetric positive definite system with an incomplete Cholesky preconditioner on GPU
-(x, stats) = cg(A_gpu, b_gpu, M=opM)
+x, stats = cg(A_gpu, b_gpu, M=opM)
```
### Example with a general square system
@@ -84,14 +93,14 @@ A_cpu = A_cpu[p,:]
b_cpu = b_cpu[p]
# Transfer the linear system from the CPU to the GPU
-A_gpu = CuSparseMatrixCSC(A_cpu) # A = CuSparseMatrixCSR(A_cpu)
+A_gpu = CuSparseMatrixCSC(A_cpu) # A_gpu = CuSparseMatrixCSR(A_cpu)
b_gpu = CuVector(b_cpu)
# LU ≈ A for CuSparseMatrixCSC or CuSparseMatrixCSR matrices
P = ilu02(A_gpu, 'O')
# Solve Py = x
-function ldiv!(y, P, x)
+function ldiv_ilu0!(y, P, x)
copyto!(y, x) # Variant for CuSparseMatrixCSR
sv2!('N', 'L', 'N', 1.0, P, y, 'O') # sv2!('N', 'L', 'U', 1.0, P, y, 'O')
sv2!('N', 'U', 'U', 1.0, P, y, 'O') # sv2!('N', 'U', 'N', 1.0, P, y, 'O')
@@ -102,8 +111,86 @@ end
n = length(b_gpu)
T = eltype(b_gpu)
symmetric = hermitian = false
-opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv!(y, P, x))
+opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ilu0!(y, P, x))
# Solve an unsymmetric system with an incomplete LU preconditioner on GPU
-(x, stats) = bicgstab(A_gpu, b_gpu, M=opM)
+x, stats = bicgstab(A_gpu, b_gpu, M=opM)
```
+
+## AMD GPUs
+
+All solvers in Krylov.jl can be used with [AMDGPU.jl](https://github.com/JuliaGPU/AMDGPU.jl) and allow computations with AMD GPUs.
+Problems stored in CPU format (`Matrix` and `Vector`) must first be converted to related GPU format (`ROCMatrix` and `ROCVector`).
+
+```julia
+using Krylov, AMDGPU
+
+# CPU Arrays
+A_cpu = rand(ComplexF64, 20, 20)
+A_cpu = A_cpu + A_cpu'
+b_cpu = rand(ComplexF64, 20)
+
+A = A + A'
+A_gpu = ROCMatrix(A)
+b_gpu = ROCVector(b)
+
+# Solve a dense hermitian system on an AMD GPU
+x, stats = minres(A_gpu, b_gpu)
+```
+
+!!! info
+ The library `rocSPARSE` is not interfaced yet in AMDGPU.jl and only dense linear systems are supported.
+
+## Intel GPUs
+
+All solvers in Krylov.jl, except [`MINRES-QLP`](@ref minres_qlp), can be used with [oneAPI.jl](https://github.com/JuliaGPU/oneAPI.jl) and allow computations with Intel GPUs.
+Problems stored in CPU format (`Matrix` and `Vector`) must first be converted to related GPU format (`oneMatrix` and `oneVector`).
+
+```julia
+using Krylov, oneAPI
+
+T = Float32 # oneAPI.jl also works with ComplexF32
+m = 20
+n = 10
+
+# CPU Arrays
+A_cpu = rand(T, m, n)
+b_cpu = rand(T, m)
+
+# GPU Arrays
+A_gpu = oneMatrix(A_cpu)
+b_gpu = oneVector(b_cpu)
+
+# Solve a dense least-squares problem on an Intel GPU
+x, stats = lsqr(A_gpu, b_gpu)
+```
+
+!!! warning
+ The library `oneMKL` is not interfaced yet in oneAPI.jl and all BLAS routines (dot, norm, mul!, etc.) dispatch to generic fallbacks.
+
+## Apple M1 GPUs
+
+All solvers in Krylov.jl, except [`MINRES-QLP`](@ref minres_qlp), can be used with [Metal.jl](https://github.com/JuliaGPU/Metal.jl) and allow computations with Apple M1 GPUs.
+Problems stored in CPU format (`Matrix` and `Vector`) must first be converted to related GPU format (`MtlMatrix` and `MtlVector`).
+
+```julia
+using Krylov, Metal
+
+T = Float32 # Metal.jl also works with ComplexF32
+n = 10
+n = 20
+
+# CPU Arrays
+A_cpu = rand(T, n, m)
+b_cpu = rand(T, n)
+
+# GPU Arrays
+A_gpu = MtlMatrix(A_cpu)
+b_gpu = MtlVector(b_cpu)
+
+# Solve a dense least-norm problem on an Apple M1 GPU
+x, stats = craig(A_gpu, b_gpu)
+```
+
+!!! warning
+ Metal.jl is under heavy development and is considered experimental for now.
From 3c67dfb9da554a5126e680bf204d48722e8a94b5 Mon Sep 17 00:00:00 2001
From: Alexis Montoison
Date: Tue, 13 Sep 2022 01:32:54 -0400
Subject: [PATCH 023/132] [documentation] Test the code of GPU backends with
buildkite
---
.buildkite/pipeline.yml | 1 +
test/gpu/amd.jl | 10 +++++
test/gpu/intel.jl | 16 ++++++--
test/gpu/metal.jl | 16 ++++++--
test/gpu/nvidia.jl | 89 ++++++++++++++++++++++++++++++++++++++++-
5 files changed, 125 insertions(+), 7 deletions(-)
diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
index 24c01d7f0..963eb619b 100644
--- a/.buildkite/pipeline.yml
+++ b/.buildkite/pipeline.yml
@@ -10,6 +10,7 @@ steps:
julia --color=yes --project -e '
using Pkg
Pkg.add("CUDA")
+ Pkg.add("LinearOperators")
Pkg.instantiate()
include("test/gpu/nvidia.jl")'
timeout_in_minutes: 30
diff --git a/test/gpu/amd.jl b/test/gpu/amd.jl
index f1193b235..c161b1900 100644
--- a/test/gpu/amd.jl
+++ b/test/gpu/amd.jl
@@ -6,6 +6,16 @@ using Krylov, AMDGPU
@test AMDGPU.functional()
AMDGPU.allowscalar(false)
+ @testset "documentation" begin
+ A_cpu = rand(ComplexF64, 20, 20)
+ A_cpu = A_cpu + A_cpu'
+ b_cpu = rand(ComplexF64, 20)
+ A = A + A'
+ A_gpu = ROCMatrix(A)
+ b_gpu = ROCVector(b)
+ x, stats = minres(A_gpu, b_gpu)
+ end
+
for FC in (Float32, Float64, ComplexF32, ComplexF64)
S = ROCVector{FC}
T = real(FC)
diff --git a/test/gpu/intel.jl b/test/gpu/intel.jl
index 4fa4192c6..12d9232fd 100644
--- a/test/gpu/intel.jl
+++ b/test/gpu/intel.jl
@@ -1,11 +1,10 @@
using LinearAlgebra, SparseArrays, Test
using Krylov, oneAPI
+# https://github.com/JuliaGPU/GPUArrays.jl/pull/427
import Krylov.kdot
function kdot(n :: Integer, x :: oneVector{T}, dx :: Integer, y :: oneVector{T}, dy :: Integer) where T <: Krylov.FloatOrComplex
- z = similar(x)
- z .= conj.(x) .* y
- reduce(+, z)
+ return mapreduce(dot, +, x, y)
end
@testset "Intel -- oneAPI.jl" begin
@@ -13,6 +12,17 @@ end
@test oneAPI.functional()
oneAPI.allowscalar(false)
+ @testset "documentation" begin
+ T = Float32
+ m = 20
+ n = 10
+ A_cpu = rand(T, m, n)
+ b_cpu = rand(T, m)
+ A_gpu = oneMatrix(A_cpu)
+ b_gpu = oneVector(b_cpu)
+ x, stats = lsqr(A_gpu, b_gpu)
+ end
+
for FC ∈ (Float32, ComplexF32)
S = oneVector{FC}
T = real(FC)
diff --git a/test/gpu/metal.jl b/test/gpu/metal.jl
index af386f513..a4ce46922 100644
--- a/test/gpu/metal.jl
+++ b/test/gpu/metal.jl
@@ -5,11 +5,10 @@ using Krylov, Metal
const MtlVector{T} = MtlArray{T,1}
const MtlMatrix{T} = MtlArray{T,2}
+# https://github.com/JuliaGPU/GPUArrays.jl/pull/427
import Krylov.kdot
function kdot(n :: Integer, x :: MtlVector{T}, dx :: Integer, y :: MtlVector{T}, dy :: Integer) where T <: Krylov.FloatOrComplex
- z = similar(x)
- z .= conj.(x) .* y
- reduce(+, z)
+ return mapreduce(dot, +, x, y)
end
@testset "Apple M1 GPUs -- Metal.jl" begin
@@ -17,6 +16,17 @@ end
# @test Metal.functional()
Metal.allowscalar(false)
+ @testset "documentation" begin
+ T = Float32
+ n = 10
+ n = 20
+ A_cpu = rand(T, n, m)
+ b_cpu = rand(T, n)
+ A_gpu = MtlMatrix(A_cpu)
+ b_gpu = MtlVector(b_cpu)
+ x, stats = craig(A_gpu, b_gpu)
+ end
+
for FC in (Float32, ComplexF32)
S = MtlVector{FC}
T = real(FC)
diff --git a/test/gpu/nvidia.jl b/test/gpu/nvidia.jl
index 824d65239..33e0a4ba8 100644
--- a/test/gpu/nvidia.jl
+++ b/test/gpu/nvidia.jl
@@ -1,11 +1,98 @@
using LinearAlgebra, SparseArrays, Test
-using Krylov, CUDA, CUDA.CUSPARSE
+using LinearOperators, Krylov, CUDA, CUDA.CUSPARSE, CUDA.CUSOLVER
+
+include("../test_utils.jl")
@testset "Nvidia -- CUDA.jl" begin
@test CUDA.functional()
CUDA.allowscalar(false)
+ @testset "documentation" begin
+ A_cpu = rand(20, 20)
+ b_cpu = rand(20)
+ A_gpu = CuMatrix(A_cpu)
+ b_gpu = CuVector(b_cpu)
+ x, stats = bilq(A_gpu, b_gpu)
+
+ A_cpu = sprand(200, 100, 0.3)
+ b_cpu = rand(200)
+ A_gpu = CuSparseMatrixCSC(A_cpu)
+ b_gpu = CuVector(b_cpu)
+ x, stats = lsmr(A_gpu, b_gpu)
+
+ @testset "ic0" begin
+ A_cpu, b_cpu = sparse_laplacian()
+
+ b_gpu = CuVector(b_cpu)
+ n = length(b_gpu)
+ T = eltype(b_gpu)
+ symmetric = hermitian = true
+
+ A_gpu = CuSparseMatrixCSC(A_cpu)
+ P = ic02(A_gpu, 'O')
+ function ldiv_ic0!(y, P, x)
+ copyto!(y, x)
+ sv2!('T', 'U', 'N', 1.0, P, y, 'O')
+ sv2!('N', 'U', 'N', 1.0, P, y, 'O')
+ return y
+ end
+ opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ic0!(y, P, x))
+ x, stats = cg(A_gpu, b_gpu, M=opM)
+ @test norm(b_gpu - A_gpu * x) ≤ 1e-6
+
+ A_gpu = CuSparseMatrixCSR(A_cpu)
+ P = ic02(A_gpu, 'O')
+ function ldiv_ic0!(y, P, x)
+ copyto!(y, x)
+ sv2!('N', 'L', 'N', 1.0, P, y, 'O')
+ sv2!('T', 'L', 'N', 1.0, P, y, 'O')
+ return y
+ end
+ opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ic0!(y, P, x))
+ x, stats = cg(A_gpu, b_gpu, M=opM)
+ @test norm(b_gpu - A_gpu * x) ≤ 1e-6
+ end
+
+ @testset "ilu0" begin
+ A_cpu, b_cpu = polar_poisson()
+
+ p = zfd(A_cpu, 'O')
+ p .+= 1
+ A_cpu = A_cpu[p,:]
+ b_cpu = b_cpu[p]
+
+ b_gpu = CuVector(b_cpu)
+ n = length(b_gpu)
+ T = eltype(b_gpu)
+ symmetric = hermitian = false
+
+ A_gpu = CuSparseMatrixCSC(A_cpu)
+ P = ilu02(A_gpu, 'O')
+ function ldiv_ilu0!(y, P, x)
+ copyto!(y, x)
+ sv2!('N', 'L', 'N', 1.0, P, y, 'O')
+ sv2!('N', 'U', 'U', 1.0, P, y, 'O')
+ return y
+ end
+ opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ilu0!(y, P, x))
+ x, stats = bicgstab(A_gpu, b_gpu, M=opM)
+ @test norm(b_gpu - A_gpu * x) ≤ 1e-6
+
+ A_gpu = CuSparseMatrixCSR(A_cpu)
+ P = ilu02(A_gpu, 'O')
+ function ldiv_ilu0!(y, P, x)
+ copyto!(y, x)
+ sv2!('N', 'L', 'U', 1.0, P, y, 'O')
+ sv2!('N', 'U', 'N', 1.0, P, y, 'O')
+ return y
+ end
+ opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ilu0!(y, P, x))
+ x, stats = bicgstab(A_gpu, b_gpu, M=opM)
+ @test norm(b_gpu - A_gpu * x) ≤ 1e-6
+ end
+ end
+
for FC in (Float32, Float64, ComplexF32, ComplexF64)
S = CuVector{FC}
T = real(FC)
From a97909a7ad033a2b719690daa428df0795006b71 Mon Sep 17 00:00:00 2001
From: Alexis Montoison
Date: Tue, 13 Sep 2022 01:40:12 -0400
Subject: [PATCH 024/132] Fix few typos
---
docs/src/gpu.md | 7 +++----
test/gpu/amd.jl | 5 ++---
test/gpu/metal.jl | 2 +-
3 files changed, 6 insertions(+), 8 deletions(-)
diff --git a/docs/src/gpu.md b/docs/src/gpu.md
index fc7a05587..3fb68dd0d 100644
--- a/docs/src/gpu.md
+++ b/docs/src/gpu.md
@@ -130,9 +130,8 @@ A_cpu = rand(ComplexF64, 20, 20)
A_cpu = A_cpu + A_cpu'
b_cpu = rand(ComplexF64, 20)
-A = A + A'
-A_gpu = ROCMatrix(A)
-b_gpu = ROCVector(b)
+A_gpu = ROCMatrix(A_cpu)
+b_gpu = ROCVector(b_cpu)
# Solve a dense hermitian system on an AMD GPU
x, stats = minres(A_gpu, b_gpu)
@@ -178,7 +177,7 @@ using Krylov, Metal
T = Float32 # Metal.jl also works with ComplexF32
n = 10
-n = 20
+m = 20
# CPU Arrays
A_cpu = rand(T, n, m)
diff --git a/test/gpu/amd.jl b/test/gpu/amd.jl
index c161b1900..c5dbdd0af 100644
--- a/test/gpu/amd.jl
+++ b/test/gpu/amd.jl
@@ -10,9 +10,8 @@ using Krylov, AMDGPU
A_cpu = rand(ComplexF64, 20, 20)
A_cpu = A_cpu + A_cpu'
b_cpu = rand(ComplexF64, 20)
- A = A + A'
- A_gpu = ROCMatrix(A)
- b_gpu = ROCVector(b)
+ A_gpu = ROCMatrix(A_cpu)
+ b_gpu = ROCVector(b_cpu)
x, stats = minres(A_gpu, b_gpu)
end
diff --git a/test/gpu/metal.jl b/test/gpu/metal.jl
index a4ce46922..9e7f101d1 100644
--- a/test/gpu/metal.jl
+++ b/test/gpu/metal.jl
@@ -19,7 +19,7 @@ end
@testset "documentation" begin
T = Float32
n = 10
- n = 20
+ m = 20
A_cpu = rand(T, n, m)
b_cpu = rand(T, n)
A_gpu = MtlMatrix(A_cpu)
From b5c2ea97358fdc92d266ecee2280eb1ed269ff42 Mon Sep 17 00:00:00 2001
From: Alexis <35051714+amontoison@users.noreply.github.com>
Date: Tue, 13 Sep 2022 15:43:47 -0400
Subject: [PATCH 025/132] Apply suggestions from code review
Co-authored-by: Dominique
---
docs/src/gpu.md | 24 ++++++++++++------------
1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/docs/src/gpu.md b/docs/src/gpu.md
index 3fb68dd0d..4ce8ee448 100644
--- a/docs/src/gpu.md
+++ b/docs/src/gpu.md
@@ -1,15 +1,15 @@
# [GPU support](@id gpu)
-Krylov methods are well suited for GPU computations because they only require matrix-vector products ($u \leftarrow Av$, $u \leftarrow A^{H}w$) and vector operations ($\|v\|$, $u^H v$, $v \leftarrow \alpha u + \beta v$), which are easily parallelizable.
+Krylov methods are well suited for GPU computations because they only require matrix-vector products ($u \leftarrow Av$, $u \leftarrow A^{H}w$) and vector operations ($\|v\|$, $u^H v$, $v \leftarrow \alpha u + \beta v$), which are highly parallelizable.
The implementations in Krylov.jl are generic so as to take advantage of the multiple dispatch and broadcast features of Julia.
-It allows the implementations to be specialized automatically by the compiler for both CPU and GPU usages.
+Those allow the implementations to be specialized automatically by the compiler for both CPU and GPU.
Thus, Krylov.jl works with GPU backends that build on [GPUArrays.jl](https://github.com/JuliaGPU/GPUArrays.jl), such as [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl), [AMDGPU.jl](https://github.com/JuliaGPU/AMDGPU.jl), [oneAPI.jl](https://github.com/JuliaGPU/oneAPI.jl) or [Metal.jl](https://github.com/JuliaGPU/Metal.jl).
## Nvidia GPUs
-All solvers in Krylov.jl can be used with [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl) and allow computations with Nvidia GPUs.
-Problems stored in CPU format (`Matrix` and `Vector`) must first be converted to related GPU format (`CuMatrix` and `CuVector`).
+All solvers in Krylov.jl can be used with [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl) and allow computations on Nvidia GPUs.
+Problems stored in CPU format (`Matrix` and `Vector`) must first be converted to the related GPU format (`CuMatrix` and `CuVector`).
```julia
using CUDA, Krylov
@@ -22,7 +22,7 @@ b_cpu = rand(20)
A_gpu = CuMatrix(A_cpu)
b_gpu = CuVector(b_cpu)
-# Solve a square and dense system on a Nivida GPU
+# Solve a square and dense system on an Nivida GPU
x, stats = bilq(A_gpu, b_gpu)
```
@@ -119,8 +119,8 @@ x, stats = bicgstab(A_gpu, b_gpu, M=opM)
## AMD GPUs
-All solvers in Krylov.jl can be used with [AMDGPU.jl](https://github.com/JuliaGPU/AMDGPU.jl) and allow computations with AMD GPUs.
-Problems stored in CPU format (`Matrix` and `Vector`) must first be converted to related GPU format (`ROCMatrix` and `ROCVector`).
+All solvers in Krylov.jl can be used with [AMDGPU.jl](https://github.com/JuliaGPU/AMDGPU.jl) and allow computations on AMD GPUs.
+Problems stored in CPU format (`Matrix` and `Vector`) must first be converted to the related GPU format (`ROCMatrix` and `ROCVector`).
```julia
using Krylov, AMDGPU
@@ -133,7 +133,7 @@ b_cpu = rand(ComplexF64, 20)
A_gpu = ROCMatrix(A_cpu)
b_gpu = ROCVector(b_cpu)
-# Solve a dense hermitian system on an AMD GPU
+# Solve a dense Hermitian system on an AMD GPU
x, stats = minres(A_gpu, b_gpu)
```
@@ -142,8 +142,8 @@ x, stats = minres(A_gpu, b_gpu)
## Intel GPUs
-All solvers in Krylov.jl, except [`MINRES-QLP`](@ref minres_qlp), can be used with [oneAPI.jl](https://github.com/JuliaGPU/oneAPI.jl) and allow computations with Intel GPUs.
-Problems stored in CPU format (`Matrix` and `Vector`) must first be converted to related GPU format (`oneMatrix` and `oneVector`).
+All solvers in Krylov.jl, except [`MINRES-QLP`](@ref minres_qlp), can be used with [oneAPI.jl](https://github.com/JuliaGPU/oneAPI.jl) and allow computations on Intel GPUs.
+Problems stored in CPU format (`Matrix` and `Vector`) must first be converted to the related GPU format (`oneMatrix` and `oneVector`).
```julia
using Krylov, oneAPI
@@ -169,8 +169,8 @@ x, stats = lsqr(A_gpu, b_gpu)
## Apple M1 GPUs
-All solvers in Krylov.jl, except [`MINRES-QLP`](@ref minres_qlp), can be used with [Metal.jl](https://github.com/JuliaGPU/Metal.jl) and allow computations with Apple M1 GPUs.
-Problems stored in CPU format (`Matrix` and `Vector`) must first be converted to related GPU format (`MtlMatrix` and `MtlVector`).
+All solvers in Krylov.jl, except [`MINRES-QLP`](@ref minres_qlp), can be used with [Metal.jl](https://github.com/JuliaGPU/Metal.jl) and allow computations on Apple M1 GPUs.
+Problems stored in CPU format (`Matrix` and `Vector`) must first be converted to the related GPU format (`MtlMatrix` and `MtlVector`).
```julia
using Krylov, Metal
From bcbc6aa07f5752067ce1552ebd499588907ac9d6 Mon Sep 17 00:00:00 2001
From: Alexis Montoison
Date: Tue, 13 Sep 2022 19:14:11 -0400
Subject: [PATCH 026/132] [GPU] Remove random tests
---
test/gpu/amd.jl | 21 +++++++++++++--------
test/gpu/intel.jl | 23 ++++++++++++++---------
test/gpu/metal.jl | 21 +++++++++++++--------
test/gpu/nvidia.jl | 21 +++++++++++++--------
4 files changed, 53 insertions(+), 33 deletions(-)
diff --git a/test/gpu/amd.jl b/test/gpu/amd.jl
index c5dbdd0af..03ada1d4d 100644
--- a/test/gpu/amd.jl
+++ b/test/gpu/amd.jl
@@ -1,6 +1,8 @@
using LinearAlgebra, SparseArrays, Test
using Krylov, AMDGPU
+include("../test_utils.jl")
+
@testset "AMD -- AMDGPU.jl" begin
@test AMDGPU.functional()
@@ -67,20 +69,23 @@ using Krylov, AMDGPU
# end
ε = eps(T)
- A = rand(FC, n, n)
- A = ROCMatrix{FC}(A)
- b = rand(FC, n)
- b = ROCVector{FC}(b)
+ atol = √ε
+ rtol = √ε
@testset "GMRES -- $FC" begin
+ A, b = nonsymmetric_indefinite(FC=FC)
+ A = ROCMatrix{FC}(A)
+ b = ROCVector{FC}(b)
x, stats = gmres(A, b)
- @test norm(b - A * x) ≤ √ε
+ @test norm(b - A * x) ≤ atol + rtol * norm(b)
end
@testset "CG -- $FC" begin
- C = A * A'
- x, stats = cg(C, b)
- @test stats.solved
+ A, b = symmetric_definite(FC=FC)
+ A = ROCMatrix{FC}(A)
+ b = ROCVector{FC}(b)
+ x, stats = cg(A, b)
+ @test norm(b - A * x) ≤ atol + rtol * norm(b)
end
end
end
diff --git a/test/gpu/intel.jl b/test/gpu/intel.jl
index 12d9232fd..e6826e9e9 100644
--- a/test/gpu/intel.jl
+++ b/test/gpu/intel.jl
@@ -1,8 +1,10 @@
using LinearAlgebra, SparseArrays, Test
using Krylov, oneAPI
-# https://github.com/JuliaGPU/GPUArrays.jl/pull/427
+include("../test_utils.jl")
+
import Krylov.kdot
+# https://github.com/JuliaGPU/GPUArrays.jl/pull/427
function kdot(n :: Integer, x :: oneVector{T}, dx :: Integer, y :: oneVector{T}, dy :: Integer) where T <: Krylov.FloatOrComplex
return mapreduce(dot, +, x, y)
end
@@ -75,20 +77,23 @@ end
# end
ε = eps(T)
- A = rand(FC, n, n)
- A = oneMatrix{FC}(A)
- b = rand(FC, n)
- b = oneVector{FC}(b)
+ atol = √ε
+ rtol = √ε
@testset "GMRES -- $FC" begin
+ A, b = nonsymmetric_indefinite(FC=FC)
+ A = oneMatrix{FC}(A)
+ b = oneVector{FC}(b)
x, stats = gmres(A, b)
- @test norm(b - A * x) ≤ √ε
+ @test norm(b - A * x) ≤ atol + rtol * norm(b)
end
@testset "CG -- $FC" begin
- C = A * A'
- x, stats = cg(C, b)
- @test stats.solved
+ A, b = symmetric_definite(FC=FC)
+ A = oneMatrix{FC}(A)
+ b = oneVector{FC}(b)
+ x, stats = cg(A, b)
+ @test norm(b - A * x) ≤ atol + rtol * norm(b)
end
end
end
diff --git a/test/gpu/metal.jl b/test/gpu/metal.jl
index 9e7f101d1..774ccc10c 100644
--- a/test/gpu/metal.jl
+++ b/test/gpu/metal.jl
@@ -1,6 +1,8 @@
using LinearAlgebra, SparseArrays, Test
using Krylov, Metal
+include("../test_utils.jl")
+
# https://github.com/JuliaGPU/Metal.jl/pull/48
const MtlVector{T} = MtlArray{T,1}
const MtlMatrix{T} = MtlArray{T,2}
@@ -79,20 +81,23 @@ end
# end
ε = eps(T)
- A = rand(FC, n, n)
- A = MtlMatrix{FC}(A)
- b = rand(FC, n)
- b = MtlVector{FC}(b)
+ atol = √ε
+ rtol = √ε
@testset "GMRES -- $FC" begin
+ A, b = nonsymmetric_indefinite(FC=FC)
+ A = MtlMatrix{FC}(A)
+ b = MtlVector{FC}(b)
x, stats = gmres(A, b)
- @test norm(b - A * x) ≤ √ε
+ @test norm(b - A * x) ≤ atol + rtol * norm(b)
end
@testset "CG -- $FC" begin
- C = A * A'
- x, stats = cg(C, b)
- @test stats.solved
+ A, b = symmetric_definite(FC=FC)
+ A = MtlMatrix{FC}(A)
+ b = MtlVector{FC}(b)
+ x, stats = cg(A, b)
+ @test norm(b - A * x) ≤ atol + rtol * norm(b)
end
end
end
diff --git a/test/gpu/nvidia.jl b/test/gpu/nvidia.jl
index 33e0a4ba8..8dfb61b0f 100644
--- a/test/gpu/nvidia.jl
+++ b/test/gpu/nvidia.jl
@@ -3,6 +3,8 @@ using LinearOperators, Krylov, CUDA, CUDA.CUSPARSE, CUDA.CUSOLVER
include("../test_utils.jl")
+include("../test_utils.jl")
+
@testset "Nvidia -- CUDA.jl" begin
@test CUDA.functional()
@@ -145,20 +147,23 @@ include("../test_utils.jl")
end
ε = eps(T)
- A = rand(FC, n, n)
- A = CuMatrix{FC}(A)
- b = rand(FC, n)
- b = CuVector{FC}(b)
+ atol = √ε
+ rtol = √ε
@testset "GMRES -- $FC" begin
+ A, b = nonsymmetric_indefinite(FC=FC)
+ A = CuMatrix{FC}(A)
+ b = CuVector{FC}(b)
x, stats = gmres(A, b)
- @test norm(b - A * x) ≤ √ε
+ @test norm(b - A * x) ≤ atol + rtol * norm(b)
end
@testset "CG -- $FC" begin
- C = A * A'
- x, stats = cg(C, b)
- @test stats.solved
+ A, b = symmetric_definite(FC=FC)
+ A = CuMatrix{FC}(A)
+ b = CuVector{FC}(b)
+ x, stats = cg(A, b)
+ @test norm(b - A * x) ≤ atol + rtol * norm(b)
end
end
end
From db3520fca3583dc0ab877490fdd220381766bd05 Mon Sep 17 00:00:00 2001
From: Alexis Montoison
Date: Thu, 15 Sep 2022 14:38:24 -0400
Subject: [PATCH 027/132] Add more examples with the preconditioners
---
docs/src/preconditioners.md | 35 +++++++++++++++++++++++++++++++++++
1 file changed, 35 insertions(+)
diff --git a/docs/src/preconditioners.md b/docs/src/preconditioners.md
index 133020dc0..e37ab378a 100644
--- a/docs/src/preconditioners.md
+++ b/docs/src/preconditioners.md
@@ -172,3 +172,38 @@ using ILUZero, Krylov
Pᵣ = ilu0(A)
x, stats = bicgstab(A, b, N=Pᵣ, ldiv=true) # right preconditioning
```
+
+```julia
+using LDLFactorizations, Krylov
+
+M = ldl(E)
+N = ldl(F)
+
+# [E A] [x] = [b]
+# [Aᴴ F] [y] [c]
+x, y, stats = tricg(A, b, c, M=M, N=N, ldiv=true)
+```
+
+```julia
+using SuiteSparse, Krylov
+import LinearAlgebra.ldiv!
+
+M = cholesky(E)
+
+# ldiv! is not implemented for the sparse Cholesky factorization (SuiteSparse.CHOLMOD)
+ldiv!(y::Vector{T}, F::SuiteSparse.CHOLMOD.Factor{T}, x::Vector{T}) where T = (y .= F \ x)
+
+# [E A] [x] = [b]
+# [Aᴴ 0] [y] [c]
+x, y, stats = trimr(A, b, c, M=M, sp=true, ldiv=true)
+```
+
+```julia
+using Krylov
+
+C = lu(M)
+
+# [M A] [x] = [b]
+# [B 0] [y] [c]
+x, y, stats = gpmr(A, B, b, c, C=C, gsp=true, ldiv=true)
+```
From 6208825cf5dd146b9033b17a39087c173234c348 Mon Sep 17 00:00:00 2001
From: Alexis <35051714+amontoison@users.noreply.github.com>
Date: Thu, 15 Sep 2022 14:41:30 -0400
Subject: [PATCH 028/132] Update docs/src/preconditioners.md
---
docs/src/preconditioners.md | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/docs/src/preconditioners.md b/docs/src/preconditioners.md
index e37ab378a..6e2039634 100644
--- a/docs/src/preconditioners.md
+++ b/docs/src/preconditioners.md
@@ -179,8 +179,8 @@ using LDLFactorizations, Krylov
M = ldl(E)
N = ldl(F)
-# [E A] [x] = [b]
-# [Aᴴ F] [y] [c]
+# [E A] [x] = [b]
+# [Aᴴ -F] [y] [c]
x, y, stats = tricg(A, b, c, M=M, N=N, ldiv=true)
```
From 66e1fd140f1963089cc7e0a5a0e11cef9b41c6ab Mon Sep 17 00:00:00 2001
From: Alexis Montoison
Date: Thu, 15 Sep 2022 14:59:32 -0400
Subject: [PATCH 029/132] Release 0.8.4
---
Project.toml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Project.toml b/Project.toml
index a91e07b8a..74005745f 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
name = "Krylov"
uuid = "ba0b0d4f-ebba-5204-a429-3ac8c609bfb7"
-version = "0.8.3"
+version = "0.8.4"
[deps]
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
From 794483b8f92934e81a9efe352239b9e7b7cf057e Mon Sep 17 00:00:00 2001
From: Alexis Montoison
Date: Mon, 12 Sep 2022 16:32:11 -0400
Subject: [PATCH 030/132] [documentation] update callbacks.md
---
docs/src/callbacks.md | 71 +++++++++++++++++++++++++++++++++----------
1 file changed, 55 insertions(+), 16 deletions(-)
diff --git a/docs/src/callbacks.md b/docs/src/callbacks.md
index f44018687..2fd69c768 100644
--- a/docs/src/callbacks.md
+++ b/docs/src/callbacks.md
@@ -1,6 +1,7 @@
-## Callbacks
+# [Callbacks](@id callbacks)
-Each Krylov method is able to call a callback function as `callback(solver)` at each iteration. The callback should return `true` if the main loop should terminate, and `false` otherwise.
+Each Krylov method is able to call a callback function as `callback(solver)` at each iteration.
+The callback should return `true` if the main loop should terminate, and `false` otherwise.
If the method terminated because of the callback, the output status will be `"user-requested exit"`.
For example, if the user defines `my_callback(solver::MinresSolver)`, it can be passed to the solver using
@@ -11,33 +12,71 @@ For example, if the user defines `my_callback(solver::MinresSolver)`, it can be
If you need to write a callback that uses variables that are not in the `MinresSolver`, use a closure:
```julia
-function my_callback2(solver::MinresSolver, A, b, storage_vec, tol::Float64)
- mul!(storage_vec, A, solver.x)
- storage_vec .-= b
- return norm(storage_vec) ≤ tol # tolerance based on the 2-norm of the residual
+function my_callback2(solver::MinresSolver, A, b, r, tol)
+ mul!(r, A, solver.x)
+ r .-= b # r := b - Ax
+ bool = norm(r) ≤ tol # tolerance based on the 2-norm of the residual
+ return bool
end
-storage_vec = similar(b)
-(x, stats) = minres(A, b, callback = solver -> my_callback2(solver, A, b, storage_vec, 0.1))
+r = similar(b)
+(x, stats) = minres(A, b, callback = solver -> my_callback2(solver, A, b, r, 1e-6))
```
Alternatively, use a structure and make it callable:
```julia
-mutable struct MyCallback3{S, M}
+mutable struct my_callback3{S, M}
A::M
b::S
- storage_vec::S
+ r::S
tol::Float64
end
-MyCallback3(A, b; tol = 0.1) = MyCallback3(A, b, similar(b), tol)
-function (my_cb::MyCallback3)(solver)
- mul!(my_cb.storage_vec, my_cb.A, solver.x)
- my_cb.storage_vec .-= my_cb.b
- return norm(my_cb.storage_vec) ≤ my_cb.tol # tolerance based on the 2-norm of the residual
+my_callback3(A, b; tol=1e-6) = my_callback3(A, b, similar(b), tol) # Outer constructor
+
+function (my_cb::my_callback3)(solver)
+ mul!(my_cb.r, my_cb.A, solver.x)
+ my_cb.r .-= my_cb.b
+ bool = norm(my_cb.r) ≤ my_cb.tol
+ return bool
end
-my_cb = MyCallback3(A, b; tol = 0.1)
+my_cb = my_callback3(A, b)
(x, stats) = minres(A, b, callback = my_cb)
```
+
+Although the main goal of a callback is to add new stopping conditions, it can also retrieve informations from the workspace of a Krylov method along the iterations.
+We now illustrate how to store all iterates $x_k$ of the GMRES method.
+
+```julia
+S = Krylov.ktypeof(b)
+global X = S[] # Storage for GMRES iterates
+
+function gmres_callback(solver)
+ z = solver.z
+ k = solver.inner_iter
+ nr = sum(1:k)
+ V = solver.V
+ R = solver.R
+ y = copy(z)
+
+ # Solve Rk * yk = zk
+ for i = k : -1 : 1
+ pos = nr + i - k
+ for j = k : -1 : i+1
+ y[i] = y[i] - R[pos] * y[j]
+ pos = pos - j + 1
+ end
+ y[i] = y[i] / R[pos]
+ end
+
+ # xk = Vk * yk
+ xk = sum(V[i] * y[i] for i = 1:k)
+ push!(X, xk)
+
+ return false # We don't want to add new stopping conditions
+end
+
+(x, stats) = gmres(A, b, callback = gmres_callback)
+```
From b0d20791c5e3e3fa21de0353cba0772cd29adf58 Mon Sep 17 00:00:00 2001
From: Alexis <35051714+amontoison@users.noreply.github.com>
Date: Fri, 16 Sep 2022 11:33:56 -0400
Subject: [PATCH 031/132] Update docs/src/callbacks.md
Co-authored-by: Dominique
---
docs/src/callbacks.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/src/callbacks.md b/docs/src/callbacks.md
index 2fd69c768..552f7a1c4 100644
--- a/docs/src/callbacks.md
+++ b/docs/src/callbacks.md
@@ -46,7 +46,7 @@ my_cb = my_callback3(A, b)
(x, stats) = minres(A, b, callback = my_cb)
```
-Although the main goal of a callback is to add new stopping conditions, it can also retrieve informations from the workspace of a Krylov method along the iterations.
+Although the main goal of a callback is to add new stopping conditions, it can also retrieve information from the workspace of a Krylov method along the iterations.
We now illustrate how to store all iterates $x_k$ of the GMRES method.
```julia
From 212a266edb30b182fd1f185dc712990ac6c937cb Mon Sep 17 00:00:00 2001
From: Alexis Montoison
Date: Wed, 21 Sep 2022 19:16:56 -0400
Subject: [PATCH 032/132] Update callbacks.md
---
docs/src/callbacks.md | 36 +++++++++++++++++-------------------
1 file changed, 17 insertions(+), 19 deletions(-)
diff --git a/docs/src/callbacks.md b/docs/src/callbacks.md
index 552f7a1c4..91e0b521c 100644
--- a/docs/src/callbacks.md
+++ b/docs/src/callbacks.md
@@ -3,47 +3,45 @@
Each Krylov method is able to call a callback function as `callback(solver)` at each iteration.
The callback should return `true` if the main loop should terminate, and `false` otherwise.
If the method terminated because of the callback, the output status will be `"user-requested exit"`.
-For example, if the user defines `my_callback(solver::MinresSolver)`, it can be passed to the solver using
+For example, if the user defines `minres_callback(solver::MinresSolver)`, it can be passed to the solver using
```julia
-(x, stats) = minres(A, b, callback = my_callback)
+(x, stats) = minres(A, b, callback = minres_callback)
```
-If you need to write a callback that uses variables that are not in the `MinresSolver`, use a closure:
+If you need to write a callback that uses variables that are not in a `KrylovSolver`, use a closure:
```julia
-function my_callback2(solver::MinresSolver, A, b, r, tol)
+function custom_stopping_condition(solver::KrylovSolver, A, b, r, tol)
mul!(r, A, solver.x)
r .-= b # r := b - Ax
bool = norm(r) ≤ tol # tolerance based on the 2-norm of the residual
return bool
end
-r = similar(b)
-(x, stats) = minres(A, b, callback = solver -> my_callback2(solver, A, b, r, 1e-6))
+cg_callback(solver) = custom_stopping_condition(solver, A, b, r, tol)
+(x, stats) = cg(A, b, callback = cg_callback)
```
Alternatively, use a structure and make it callable:
```julia
-mutable struct my_callback3{S, M}
- A::M
- b::S
- r::S
- tol::Float64
+mutable struct CallbackWorkspace{T}
+ A::Matrix{T}
+ b::Vector{T}
+ r::Vector{T}
+ tol::T
end
-my_callback3(A, b; tol=1e-6) = my_callback3(A, b, similar(b), tol) # Outer constructor
-
-function (my_cb::my_callback3)(solver)
- mul!(my_cb.r, my_cb.A, solver.x)
- my_cb.r .-= my_cb.b
- bool = norm(my_cb.r) ≤ my_cb.tol
+function (workspace::CallbackWorkspace)(solver::KrylovSolver)
+ mul!(workspace.r, workspace.A, solver.x)
+ workspace.r .-= workspace.b
+ bool = norm(workspace.r) ≤ workspace.tol
return bool
end
-my_cb = my_callback3(A, b)
-(x, stats) = minres(A, b, callback = my_cb)
+bicgstab_callback = CallbackWorkspace(A, b, r, tol)
+(x, stats) = bicgstab(A, b, callback = bicgstab_callback)
```
Although the main goal of a callback is to add new stopping conditions, it can also retrieve information from the workspace of a Krylov method along the iterations.
From 611b3c2632c40e6e77fcc4da4cbe4abc32c79246 Mon Sep 17 00:00:00 2001
From: Alexis Montoison
Date: Fri, 23 Sep 2022 10:07:18 -0400
Subject: [PATCH 033/132] Add vector_to_matrix function
---
src/krylov_utils.jl | 18 ++++++++++++++++++
test/gpu/amd.jl | 6 ++++++
test/gpu/intel.jl | 6 ++++++
test/gpu/metal.jl | 6 ++++++
test/gpu/nvidia.jl | 8 ++++++--
test/test_aux.jl | 9 +++++++++
6 files changed, 51 insertions(+), 2 deletions(-)
diff --git a/src/krylov_utils.jl b/src/krylov_utils.jl
index 46c9d6cd6..b16da57c0 100644
--- a/src/krylov_utils.jl
+++ b/src/krylov_utils.jl
@@ -247,6 +247,24 @@ function ktypeof(v::S) where S <: SubArray
return ktypeof(v.parent)
end
+"""
+ M = vector_to_matrix(S)
+
+Return the dense matrix storage type `M` related to the dense vector storage type `S`.
+"""
+function vector_to_matrix(::Type{S}) where S <: DenseVector
+ V = hasproperty(S, :body) ? S.body : S
+ par = V.parameters
+ npar = length(par)
+ (2 ≤ npar ≤ 3) || error("Type $S is not supported.")
+ if npar == 2
+ M = V.name.wrapper{par[1], 2}
+ else
+ M = V.name.wrapper{par[1], 2, par[3]}
+ end
+ return M
+end
+
"""
v = kzeros(S, n)
diff --git a/test/gpu/amd.jl b/test/gpu/amd.jl
index 03ada1d4d..baad2bdcf 100644
--- a/test/gpu/amd.jl
+++ b/test/gpu/amd.jl
@@ -68,6 +68,12 @@ include("../test_utils.jl")
# Krylov.@kref!(n, x, y, c, s)
# end
+ @testset "vector_to_matrix" begin
+ S = ROCVector{FC}
+ M = Krylov.vector_to_matrix(S)
+ @test M == ROCMatrix{FC}
+ end
+
ε = eps(T)
atol = √ε
rtol = √ε
diff --git a/test/gpu/intel.jl b/test/gpu/intel.jl
index e6826e9e9..67ad0a7d5 100644
--- a/test/gpu/intel.jl
+++ b/test/gpu/intel.jl
@@ -76,6 +76,12 @@ end
# Krylov.@kref!(n, x, y, c, s)
# end
+ @testset "vector_to_matrix" begin
+ S = oneVector{FC}
+ M = Krylov.vector_to_matrix(S)
+ @test M == oneMatrix{FC}
+ end
+
ε = eps(T)
atol = √ε
rtol = √ε
diff --git a/test/gpu/metal.jl b/test/gpu/metal.jl
index 774ccc10c..35325c863 100644
--- a/test/gpu/metal.jl
+++ b/test/gpu/metal.jl
@@ -80,6 +80,12 @@ end
# Krylov.@kref!(n, x, y, c, s)
# end
+ @testset "vector_to_matrix" begin
+ S = MtlVector{FC}
+ M = Krylov.vector_to_matrix(S)
+ @test M == MtlMatrix{FC}
+ end
+
ε = eps(T)
atol = √ε
rtol = √ε
diff --git a/test/gpu/nvidia.jl b/test/gpu/nvidia.jl
index 8dfb61b0f..8faed479a 100644
--- a/test/gpu/nvidia.jl
+++ b/test/gpu/nvidia.jl
@@ -3,8 +3,6 @@ using LinearOperators, Krylov, CUDA, CUDA.CUSPARSE, CUDA.CUSOLVER
include("../test_utils.jl")
-include("../test_utils.jl")
-
@testset "Nvidia -- CUDA.jl" begin
@test CUDA.functional()
@@ -146,6 +144,12 @@ include("../test_utils.jl")
Krylov.@kref!(n, x, y, c, s)
end
+ @testset "vector_to_matrix" begin
+ S = CuVector{FC}
+ M = Krylov.vector_to_matrix(S)
+ @test M == CuMatrix{FC}
+ end
+
ε = eps(T)
atol = √ε
rtol = √ε
diff --git a/test/test_aux.jl b/test/test_aux.jl
index 5a4d094c7..5ac2b401c 100644
--- a/test/test_aux.jl
+++ b/test/test_aux.jl
@@ -134,6 +134,15 @@
@test Krylov.ktypeof(b) == Vector{Float64}
end
+ @testset "vector_to_matrix" begin
+ # test vector_to_matrix
+ for FC in (Float32, Float64, ComplexF32, ComplexF64)
+ S = Vector{FC}
+ M = Krylov.vector_to_matrix(S)
+ @test M == Matrix{FC}
+ end
+ end
+
@testset "macros" begin
# test macros
for FC ∈ (Float16, Float32, Float64, ComplexF16, ComplexF32, ComplexF64)
From a01a78d68cca7e3f694fe67438ef3377eeedc185 Mon Sep 17 00:00:00 2001
From: Alexis Montoison
Date: Fri, 23 Sep 2022 10:12:21 -0400
Subject: [PATCH 034/132] Update buildkite pipeline
---
.buildkite/pipeline.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
index 963eb619b..73121253c 100644
--- a/.buildkite/pipeline.yml
+++ b/.buildkite/pipeline.yml
@@ -22,7 +22,7 @@ steps:
agents:
queue: "juliagpu"
rocm: "*"
- rocmgpu: "gfx908"
+ rocmgpu: "*"
env:
JULIA_AMDGPU_CORE_MUST_LOAD: "1"
JULIA_AMDGPU_HIP_MUST_LOAD: "1"
From 4af9334c95683904bdd271c008cbd93016db8a8b Mon Sep 17 00:00:00 2001
From: Alexis Montoison
Date: Fri, 23 Sep 2022 10:18:14 -0400
Subject: [PATCH 035/132] Update api.md
---
docs/src/api.md | 1 +
1 file changed, 1 insertion(+)
diff --git a/docs/src/api.md b/docs/src/api.md
index 7f2f4dff7..bf3d5c783 100644
--- a/docs/src/api.md
+++ b/docs/src/api.md
@@ -60,4 +60,5 @@ Krylov.vec2str
Krylov.ktypeof
Krylov.kzeros
Krylov.kones
+Krylov.vector_to_matrix
```
From 2fe79648713e12b59d417ced877398e85edfd5a8 Mon Sep 17 00:00:00 2001
From: Alexis Montoison
Date: Tue, 20 Sep 2022 21:40:35 -0400
Subject: [PATCH 036/132] An implementation of FGMRES
---
docs/src/api.md | 1 +
docs/src/inplace.md | 2 +-
docs/src/preconditioners.md | 2 +-
docs/src/solvers/unsymmetric.md | 7 +
src/Krylov.jl | 1 +
src/fgmres.jl | 332 ++++++++++++++++++++++++++++++++
src/krylov_solvers.jl | 56 +++++-
test/runtests.jl | 1 +
test/test_allocations.jl | 20 ++
test/test_fgmres.jl | 145 ++++++++++++++
test/test_mp.jl | 2 +-
test/test_solvers.jl | 36 ++++
test/test_utils.jl | 21 ++
test/test_warm_start.jl | 5 +
14 files changed, 627 insertions(+), 4 deletions(-)
create mode 100644 src/fgmres.jl
create mode 100644 test/test_fgmres.jl
diff --git a/docs/src/api.md b/docs/src/api.md
index bf3d5c783..bad8b9245 100644
--- a/docs/src/api.md
+++ b/docs/src/api.md
@@ -48,6 +48,7 @@ LnlqSolver
CraigSolver
CraigmrSolver
GpmrSolver
+FgmresSolver
```
## Utilities
diff --git a/docs/src/inplace.md b/docs/src/inplace.md
index 71a4e25de..9950575fe 100644
--- a/docs/src/inplace.md
+++ b/docs/src/inplace.md
@@ -15,7 +15,7 @@ Given an operator `A` and a right-hand side `b`, you can create a `KrylovSolver`
For example, use `S = Vector{Float64}` if you want to solve linear systems in double precision on the CPU and `S = CuVector{Float32}` if you want to solve linear systems in single precision on an Nvidia GPU.
!!! note
- `DiomSolver`, `FomSolver`, `DqgmresSolver`, `GmresSolver`, `GpmrSolver` and `CgLanczosShiftSolver` require an additional argument (`memory` or `nshifts`).
+ `DiomSolver`, `FomSolver`, `DqgmresSolver`, `GmresSolver`, `FgmresSolver`, `GpmrSolver` and `CgLanczosShiftSolver` require an additional argument (`memory` or `nshifts`).
The workspace is always the first argument of the in-place methods:
diff --git a/docs/src/preconditioners.md b/docs/src/preconditioners.md
index 6e2039634..60258868b 100644
--- a/docs/src/preconditioners.md
+++ b/docs/src/preconditioners.md
@@ -29,7 +29,7 @@ Krylov.jl supports both approaches thanks to the argument `ldiv` of the Krylov s
### Square non-Hermitian linear systems
-Methods concerned: [`CGS`](@ref cgs), [`BiCGSTAB`](@ref bicgstab), [`DQGMRES`](@ref dqgmres), [`GMRES`](@ref gmres), [`DIOM`](@ref diom) and [`FOM`](@ref fom).
+Methods concerned: [`CGS`](@ref cgs), [`BiCGSTAB`](@ref bicgstab), [`DQGMRES`](@ref dqgmres), [`GMRES`](@ref gmres), [`FGMRES`](@ref fgmres), [`DIOM`](@ref diom) and [`FOM`](@ref fom).
A Krylov method dedicated to non-Hermitian linear systems allows the three variants of preconditioning.
diff --git a/docs/src/solvers/unsymmetric.md b/docs/src/solvers/unsymmetric.md
index 280908ea5..e559145a2 100644
--- a/docs/src/solvers/unsymmetric.md
+++ b/docs/src/solvers/unsymmetric.md
@@ -71,3 +71,10 @@ dqgmres!
gmres
gmres!
```
+
+## FGMRES
+
+```@docs
+fgmres
+fgmres!
+```
diff --git a/src/Krylov.jl b/src/Krylov.jl
index b714ccd79..7c480896f 100644
--- a/src/Krylov.jl
+++ b/src/Krylov.jl
@@ -19,6 +19,7 @@ include("diom.jl")
include("fom.jl")
include("dqgmres.jl")
include("gmres.jl")
+include("fgmres.jl")
include("gpmr.jl")
diff --git a/src/fgmres.jl b/src/fgmres.jl
new file mode 100644
index 000000000..eb6ced660
--- /dev/null
+++ b/src/fgmres.jl
@@ -0,0 +1,332 @@
+# An implementation of FGMRES for the solution of the square linear system Ax = b.
+#
+# This method is described in
+#
+# Y. Saad, A Flexible Inner-Outer Preconditioned GMRES Algorithms.
+# SIAM Journal on Scientific Computing, Vol. 14(2), pp. 461--469, 1993.
+#
+# Alexis Montoison,
+# Montreal, September 2022.
+
+export fgmres, fgmres!
+
+"""
+ (x, stats) = fgmres(A, b::AbstractVector{FC}; memory::Int=20,
+ M=I, N=I, atol::T=√eps(T), rtol::T=√eps(T),
+ reorthogonalization::Bool=false, itmax::Int=0,
+ restart::Bool=false, verbose::Int=0, history::Bool=false,
+ ldiv::Bool=false, callback=solver->false)
+
+`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
+`FC` is `T` or `Complex{T}`.
+
+Solve the linear system Ax = b using FGMRES method.
+
+FGMRES computes a sequence of approximate solutions with the minimal residual property.
+FGMRES is a variant of GMRES that allows changes in the right preconditioning at every step.
+
+This implementation allows a left preconditioner M and a flexible right preconditioner N.
+A situation in which the preconditioner is "not constant" is when a relaxation-type method,
+a Chebyshev iteration or another Krylov subspace method is used as a preconditioner.
+Compared to GMRES, there is no additional cost incurred in the arithmetic but the memory requirement almost doubles.
+
+Full reorthogonalization is available with the `reorthogonalization` option.
+
+If `restart = true`, the restarted version FGMRES(k) is used with `k = memory`.
+If `restart = false`, the parameter `memory` should be used as a hint of the number of iterations to limit dynamic memory allocations.
+More storage will be allocated only if the number of iterations exceed `memory`.
+
+FGMRES can be warm-started from an initial guess `x0` with the method
+
+ (x, stats) = fgmres(A, b, x0; kwargs...)
+
+where `kwargs` are the same keyword arguments as above.
+
+The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
+and `false` otherwise.
+
+#### Reference
+
+* Y. Saad, [*A Flexible Inner-Outer Preconditioned GMRES Algorithm*](https://doi.org/10.1137/0914028), SIAM Journal on Scientific Computing, Vol. 14(2), pp. 461--469, 1993.
+"""
+function fgmres end
+
+function fgmres(A, b :: AbstractVector{FC}, x0 :: AbstractVector; memory :: Int=20, kwargs...) where FC <: FloatOrComplex
+ solver = FgmresSolver(A, b, memory)
+ fgmres!(solver, A, b, x0; kwargs...)
+ return (solver.x, solver.stats)
+end
+
+function fgmres(A, b :: AbstractVector{FC}; memory :: Int=20, kwargs...) where FC <: FloatOrComplex
+ solver = FgmresSolver(A, b, memory)
+ fgmres!(solver, A, b; kwargs...)
+ return (solver.x, solver.stats)
+end
+
+"""
+ solver = fgmres!(solver::FgmresSolver, A, b; kwargs...)
+ solver = fgmres!(solver::FgmresSolver, A, b, x0; kwargs...)
+
+where `kwargs` are keyword arguments of [`fgmres`](@ref).
+
+Note that the `memory` keyword argument is the only exception.
+It's required to create a `FgmresSolver` and can't be changed later.
+
+See [`FgmresSolver`](@ref) for more details about the `solver`.
+"""
+function fgmres! end
+
+function fgmres!(solver :: FgmresSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
+ warm_start!(solver, x0)
+ fgmres!(solver, A, b; kwargs...)
+ return solver
+end
+
+function fgmres!(solver :: FgmresSolver{T,FC,S}, A, b :: AbstractVector{FC};
+ M=I, N=I, atol :: T=√eps(T), rtol :: T=√eps(T),
+ reorthogonalization :: Bool=false, itmax :: Int=0,
+ restart :: Bool=false, verbose :: Int=0, history :: Bool=false,
+ ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
+
+ m, n = size(A)
+ m == n || error("System must be square")
+ length(b) == m || error("Inconsistent problem size")
+ (verbose > 0) && @printf("FGMRES: system of size %d\n", n)
+
+ # Check M = Iₙ and N = Iₙ
+ MisI = (M === I)
+ NisI = (N === I)
+
+ # Check type consistency
+ eltype(A) == FC || error("eltype(A) ≠ $FC")
+ ktypeof(b) == S || error("ktypeof(b) ≠ $S")
+
+ # Set up workspace.
+ allocate_if(!MisI , solver, :q , S, n)
+ allocate_if(restart, solver, :Δx, S, n)
+ Δx, x, w, V, Z = solver.Δx, solver.x, solver.w, solver.V, solver.Z
+ z, c, s, R, stats = solver.z, solver.c, solver.s, solver.R, solver.stats
+ warm_start = solver.warm_start
+ rNorms = stats.residuals
+ reset!(stats)
+ q = MisI ? w : solver.q
+ r₀ = MisI ? w : solver.q
+ xr = restart ? Δx : x
+
+ # Initial solution x₀.
+ x .= zero(FC)
+
+ # Initial residual r₀.
+ if warm_start
+ mul!(w, A, Δx)
+ @kaxpby!(n, one(FC), b, -one(FC), w)
+ restart && @kaxpy!(n, one(FC), Δx, x)
+ else
+ w .= b
+ end
+ MisI || mulorldiv!(r₀, M, w, ldiv) # r₀ = M⁻¹(b - Ax₀)
+ β = @knrm2(n, r₀) # β = ‖r₀‖₂
+
+ rNorm = β
+ history && push!(rNorms, β)
+ ε = atol + rtol * rNorm
+
+ if β == 0
+ stats.niter = 0
+ stats.solved, stats.inconsistent = true, false
+ stats.status = "x = 0 is a zero-residual solution"
+ solver.warm_start = false
+ return solver
+ end
+
+ mem = length(c) # Memory
+ npass = 0 # Number of pass
+
+ iter = 0 # Cumulative number of iterations
+ inner_iter = 0 # Number of iterations in a pass
+
+ itmax == 0 && (itmax = 2*n)
+ inner_itmax = itmax
+
+ (verbose > 0) && @printf("%5s %5s %7s %7s\n", "pass", "k", "‖rₖ‖", "hₖ₊₁.ₖ")
+ kdisplay(iter, verbose) && @printf("%5d %5d %7.1e %7s\n", npass, iter, rNorm, "✗ ✗ ✗ ✗")
+
+ # Tolerance for breakdown detection.
+ btol = eps(T)^(3/4)
+
+ # Stopping criterion
+ breakdown = false
+ inconsistent = false
+ solved = rNorm ≤ ε
+ tired = iter ≥ itmax
+ inner_tired = inner_iter ≥ inner_itmax
+ status = "unknown"
+ user_requested_exit = false
+
+ while !(solved || tired || breakdown || user_requested_exit)
+
+ # Initialize workspace.
+ nr = 0 # Number of coefficients stored in Rₖ.
+ for i = 1 : mem
+ V[i] .= zero(FC) # Orthogonal basis of {Mr₀, MANₖr₀, ..., (MANₖ)ᵏ⁻¹r₀}.
+ Z[i] .= zero(FC) # Z = [N₁v₁, ..., Nₖvₖ]
+ end
+ s .= zero(FC) # Givens sines used for the factorization QₖRₖ = Hₖ₊₁.ₖ.
+ c .= zero(T) # Givens cosines used for the factorization QₖRₖ = Hₖ₊₁.ₖ.
+ R .= zero(FC) # Upper triangular matrix Rₖ.
+ z .= zero(FC) # Right-hand of the least squares problem min ‖Hₖ₊₁.ₖyₖ - βe₁‖₂.
+
+ if restart
+ xr .= zero(FC) # xr === Δx when restart is set to true
+ if npass ≥ 1
+ mul!(w, A, x)
+ @kaxpby!(n, one(FC), b, -one(FC), w)
+ MisI || mulorldiv!(r₀, M, w, ldiv)
+ end
+ end
+
+ # Initial ζ₁ and V₁
+ β = @knrm2(n, r₀)
+ z[1] = β
+ @. V[1] = r₀ / rNorm
+
+ npass = npass + 1
+ solver.inner_iter = 0
+ inner_tired = false
+
+ while !(solved || inner_tired || breakdown || user_requested_exit)
+
+ # Update iteration index
+ solver.inner_iter = solver.inner_iter + 1
+ inner_iter = solver.inner_iter
+
+ # Update workspace if more storage is required and restart is set to false
+ if !restart && (inner_iter > mem)
+ for i = 1 : inner_iter
+ push!(R, zero(FC))
+ end
+ push!(s, zero(FC))
+ push!(c, zero(T))
+ push!(Z, S(undef, n))
+ end
+
+ # Continue the process.
+ # MAZₖ = Vₖ₊₁Hₖ₊₁.ₖ
+ mulorldiv!(Z[inner_iter], N, V[inner_iter], ldiv) # zₖ ← Nₖvₖ
+ mul!(w, A, Z[inner_iter]) # w ← Azₖ
+ MisI || mulorldiv!(q, M, w, ldiv) # q ← MAzₖ
+ for i = 1 : inner_iter
+ R[nr+i] = @kdot(n, V[i], q) # hᵢₖ = (vᵢ)ᴴq
+ @kaxpy!(n, -R[nr+i], V[i], q) # q ← q - hᵢₖvᵢ
+ end
+
+ # Reorthogonalization of the basis.
+ if reorthogonalization
+ for i = 1 : inner_iter
+ Htmp = @kdot(n, V[i], q)
+ R[nr+i] += Htmp
+ @kaxpy!(n, -Htmp, V[i], q)
+ end
+ end
+
+ # Compute hₖ₊₁.ₖ
+ Hbis = @knrm2(n, q) # hₖ₊₁.ₖ = ‖vₖ₊₁‖₂
+
+ # Update the QR factorization of Hₖ₊₁.ₖ.
+ # Apply previous Givens reflections Ωᵢ.
+ # [cᵢ sᵢ] [ r̄ᵢ.ₖ ] = [ rᵢ.ₖ ]
+ # [s̄ᵢ -cᵢ] [rᵢ₊₁.ₖ] [r̄ᵢ₊₁.ₖ]
+ for i = 1 : inner_iter-1
+ Rtmp = c[i] * R[nr+i] + s[i] * R[nr+i+1]
+ R[nr+i+1] = conj(s[i]) * R[nr+i] - c[i] * R[nr+i+1]
+ R[nr+i] = Rtmp
+ end
+
+ # Compute and apply current Givens reflection Ωₖ.
+ # [cₖ sₖ] [ r̄ₖ.ₖ ] = [rₖ.ₖ]
+ # [s̄ₖ -cₖ] [hₖ₊₁.ₖ] [ 0 ]
+ (c[inner_iter], s[inner_iter], R[nr+inner_iter]) = sym_givens(R[nr+inner_iter], Hbis)
+
+ # Update zₖ = (Qₖ)ᴴβe₁
+ ζₖ₊₁ = conj(s[inner_iter]) * z[inner_iter]
+ z[inner_iter] = c[inner_iter] * z[inner_iter]
+
+ # Update residual norm estimate.
+ # ‖ M⁻¹(b - Axₖ) ‖₂ = |ζₖ₊₁|
+ rNorm = abs(ζₖ₊₁)
+ history && push!(rNorms, rNorm)
+
+ # Update the number of coefficients in Rₖ
+ nr = nr + inner_iter
+
+ # Stopping conditions that do not depend on user input.
+ # This is to guard against tolerances that are unreasonably small.
+ resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+
+ # Update stopping criterion.
+ resid_decrease_lim = rNorm ≤ ε
+ breakdown = Hbis ≤ btol
+ solved = resid_decrease_lim || resid_decrease_mach
+ inner_tired = restart ? inner_iter ≥ min(mem, inner_itmax) : inner_iter ≥ inner_itmax
+ solver.inner_iter = inner_iter
+ kdisplay(iter+inner_iter, verbose) && @printf("%5d %5d %7.1e %7.1e\n", npass, iter+inner_iter, rNorm, Hbis)
+
+ # Compute vₖ₊₁
+ if !(solved || inner_tired || breakdown)
+ if !restart && (inner_iter ≥ mem)
+ push!(V, S(undef, n))
+ push!(z, zero(FC))
+ end
+ @. V[inner_iter+1] = q / Hbis # hₖ₊₁.ₖvₖ₊₁ = q
+ z[inner_iter+1] = ζₖ₊₁
+ end
+
+ user_requested_exit = callback(solver) :: Bool
+ end
+
+ # Compute y by solving Ry = z with backward substitution.
+ y = z # yᵢ = ζᵢ
+ for i = inner_iter : -1 : 1
+ pos = nr + i - inner_iter # position of rᵢ.ₖ
+ for j = inner_iter : -1 : i+1
+ y[i] = y[i] - R[pos] * y[j] # yᵢ ← yᵢ - rᵢⱼyⱼ
+ pos = pos - j + 1 # position of rᵢ.ⱼ₋₁
+ end
+ # Rₖ can be singular if the system is inconsistent
+ if abs(R[pos]) ≤ btol
+ y[i] = zero(FC)
+ inconsistent = true
+ else
+ y[i] = y[i] / R[pos] # yᵢ ← yᵢ / rᵢᵢ
+ end
+ end
+
+ # Form xₖ = N₁v₁y₁ + ... + Nₖvₖyₖ = z₁y₁ + ... + zₖyₖ
+ for i = 1 : inner_iter
+ @kaxpy!(n, y[i], Z[i], xr)
+ end
+ restart && @kaxpy!(n, one(FC), xr, x)
+
+ # Update inner_itmax, iter and tired variables.
+ inner_itmax = inner_itmax - inner_iter
+ iter = iter + inner_iter
+ tired = iter ≥ itmax
+ end
+ (verbose > 0) && @printf("\n")
+
+ tired && (status = "maximum number of iterations exceeded")
+ solved && (status = "solution good enough given atol and rtol")
+ inconsistent && (status = "found approximate least-squares solution")
+ user_requested_exit && (status = "user-requested exit")
+
+ # Update x
+ warm_start && !restart && @kaxpy!(n, one(FC), Δx, x)
+ solver.warm_start = false
+
+ # Update stats
+ stats.niter = iter
+ stats.solved = solved
+ stats.inconsistent = inconsistent
+ stats.status = status
+ return solver
+end
diff --git a/src/krylov_solvers.jl b/src/krylov_solvers.jl
index abd0c7352..b37ccd575 100644
--- a/src/krylov_solvers.jl
+++ b/src/krylov_solvers.jl
@@ -3,7 +3,7 @@ CgLanczosShiftSolver, MinresQlpSolver, DqgmresSolver, DiomSolver, UsymlqSolver,
UsymqrSolver, TricgSolver, TrimrSolver, TrilqrSolver, CgsSolver, BicgstabSolver,
BilqSolver, QmrSolver, BilqrSolver, CglsSolver, CrlsSolver, CgneSolver, CrmrSolver,
LslqSolver, LsqrSolver, LsmrSolver, LnlqSolver, CraigSolver, CraigmrSolver,
-GmresSolver, FomSolver, GpmrSolver
+GmresSolver, FomSolver, GpmrSolver, FgmresSolver
export solve!, solution, nsolution, statistics, issolved, issolved_primal, issolved_dual,
niterations, Aprod, Atprod, Bprod, warm_start!
@@ -20,6 +20,7 @@ const KRYLOV_SOLVERS = Dict(
:fom => :FomSolver ,
:dqgmres => :DqgmresSolver ,
:gmres => :GmresSolver ,
+ :fgmres => :FgmresSolver ,
:gpmr => :GpmrSolver ,
:usymlq => :UsymlqSolver ,
:usymqr => :UsymqrSolver ,
@@ -1503,6 +1504,58 @@ function GmresSolver(A, b, memory = 20)
GmresSolver(n, m, memory, S)
end
+"""
+Type for storing the vectors required by the in-place version of FGMRES.
+
+The outer constructors
+
+ solver = FgmresSolver(n, m, memory, S)
+ solver = FgmresSolver(A, b, memory = 20)
+
+may be used in order to create these vectors.
+`memory` is set to `n` if the value given is larger than `n`.
+"""
+mutable struct FgmresSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ Δx :: S
+ x :: S
+ w :: S
+ q :: S
+ V :: Vector{S}
+ Z :: Vector{S}
+ c :: Vector{T}
+ s :: Vector{FC}
+ z :: Vector{FC}
+ R :: Vector{FC}
+ warm_start :: Bool
+ inner_iter :: Int
+ stats :: SimpleStats{T}
+end
+
+function FgmresSolver(n, m, memory, S)
+ memory = min(n, memory)
+ FC = eltype(S)
+ T = real(FC)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ w = S(undef, n)
+ q = S(undef, 0)
+ V = [S(undef, n) for i = 1 : memory]
+ Z = [S(undef, n) for i = 1 : memory]
+ c = Vector{T}(undef, memory)
+ s = Vector{FC}(undef, memory)
+ z = Vector{FC}(undef, memory)
+ R = Vector{FC}(undef, div(memory * (memory+1), 2))
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = FgmresSolver{T,FC,S}(Δx, x, w, q, V, Z, c, s, z, R, false, 0, stats)
+ return solver
+end
+
+function FgmresSolver(A, b, memory = 20)
+ n, m = size(A)
+ S = ktypeof(b)
+ FgmresSolver(n, m, memory, S)
+end
+
"""
Type for storing the vectors required by the in-place version of FOM.
@@ -1704,6 +1757,7 @@ for (KS, fun, nsol, nA, nAt, warm_start) in [
(MinresQlpSolver , :minres_qlp! , 1, 1, 0, true )
(QmrSolver , :qmr! , 1, 1, 1, true )
(GmresSolver , :gmres! , 1, 1, 0, true )
+ (FgmresSolver , :fgmres! , 1, 1, 0, true )
(FomSolver , :fom! , 1, 1, 0, true )
(GpmrSolver , :gpmr! , 2, 1, 0, true )
]
diff --git a/test/runtests.jl b/test/runtests.jl
index 99ab25fda..75e8f0941 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -5,6 +5,7 @@ include("test_utils.jl")
include("test_aux.jl")
include("test_stats.jl")
+include("test_fgmres.jl")
include("test_gpmr.jl")
include("test_fom.jl")
include("test_gmres.jl")
diff --git a/test/test_allocations.jl b/test/test_allocations.jl
index 790fcc7a8..b29f11631 100644
--- a/test/test_allocations.jl
+++ b/test/test_allocations.jl
@@ -218,6 +218,26 @@
@test inplace_gmres_bytes == 0
end
+ @testset "FGMRES" begin
+ # FGMRES needs:
+ # - 2 n-vectors: x, w
+ # - 2 n*(mem)-matrix: V, Z
+ # - 3 mem-vectors: c, s, z
+ # - 1 (mem*(mem+1)/2)-vector: R
+ storage_fgmres(mem, n) = (2 * n) + (2 * n * mem) + (3 * mem) + (mem * (mem+1) / 2)
+ storage_fgmres_bytes(mem, n) = nbits * storage_fgmres(mem, n)
+
+ expected_fgmres_bytes = storage_fgmres_bytes(mem, n)
+ fgmres(A, b, memory=mem) # warmup
+ actual_fgmres_bytes = @allocated fgmres(A, b, memory=mem)
+ @test expected_fgmres_bytes ≤ actual_fgmres_bytes ≤ 1.02 * expected_fgmres_bytes
+
+ solver = FgmresSolver(A, b, mem)
+ fgmres!(solver, A, b) # warmup
+ inplace_fgmres_bytes = @allocated fgmres!(solver, A, b)
+ @test inplace_fgmres_bytes == 0
+ end
+
@testset "CGS" begin
# CGS needs:
# 6 n-vectors: x, r, u, p, q, ts
diff --git a/test/test_fgmres.jl b/test/test_fgmres.jl
new file mode 100644
index 000000000..e640da46e
--- /dev/null
+++ b/test/test_fgmres.jl
@@ -0,0 +1,145 @@
+@testset "fgmres" begin
+ fgmres_tol = 1.0e-6
+
+ for FC in (Float64, ComplexF64)
+ @testset "Data Type: $FC" begin
+
+ # Symmetric and positive definite system.
+ A, b = symmetric_definite(FC=FC)
+ (x, stats) = fgmres(A, b)
+ r = b - A * x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ fgmres_tol)
+ @test(stats.solved)
+
+ # Symmetric indefinite variant.
+ A, b = symmetric_indefinite(FC=FC)
+ (x, stats) = fgmres(A, b)
+ r = b - A * x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ fgmres_tol)
+ @test(stats.solved)
+
+ # Nonsymmetric and positive definite systems.
+ A, b = nonsymmetric_definite(FC=FC)
+ (x, stats) = fgmres(A, b)
+ r = b - A * x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ fgmres_tol)
+ @test(stats.solved)
+
+ # Nonsymmetric indefinite variant.
+ A, b = nonsymmetric_indefinite(FC=FC)
+ (x, stats) = fgmres(A, b)
+ r = b - A * x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ fgmres_tol)
+ @test(stats.solved)
+
+ # Symmetric indefinite variant, almost singular.
+ A, b = almost_singular(FC=FC)
+ (x, stats) = fgmres(A, b)
+ r = b - A * x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ 100 * fgmres_tol)
+ @test(stats.solved)
+
+ # Singular system.
+ A, b = square_inconsistent(FC=FC)
+ (x, stats) = fgmres(A, b)
+ r = b - A * x
+ Aresid = norm(A' * r) / norm(A' * b)
+ @test(Aresid ≤ fgmres_tol)
+ @test(stats.inconsistent)
+
+ # Test b == 0
+ A, b = zero_rhs(FC=FC)
+ (x, stats) = fgmres(A, b)
+ @test norm(x) == 0
+ @test stats.status == "x = 0 is a zero-residual solution"
+
+ # Poisson equation in polar coordinates.
+ A, b = polar_poisson(FC=FC)
+ (x, stats) = fgmres(A, b, reorthogonalization=true)
+ r = b - A * x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ fgmres_tol)
+ @test(stats.solved)
+
+ # Left preconditioning
+ A, b, M = square_preconditioned(FC=FC)
+ (x, stats) = fgmres(A, b, M=M)
+ r = b - A * x
+ resid = norm(M * r) / norm(M * b)
+ @test(resid ≤ fgmres_tol)
+ @test(stats.solved)
+
+ # Right preconditioning
+ A, b, N = square_preconditioned(FC=FC)
+ (x, stats) = fgmres(A, b, N=N)
+ r = b - A * x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ fgmres_tol)
+ @test(stats.solved)
+
+ # Split preconditioning
+ A, b, M, N = two_preconditioners(FC=FC)
+ (x, stats) = fgmres(A, b, M=M, N=N)
+ r = b - A * x
+ resid = norm(M * r) / norm(M * b)
+ @test(resid ≤ fgmres_tol)
+ @test(stats.solved)
+
+ # Restart
+ for restart ∈ (false, true)
+ memory = 10
+
+ A, b = sparse_laplacian(FC=FC)
+ (x, stats) = fgmres(A, b, restart=restart, memory=memory)
+ r = b - A * x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ fgmres_tol)
+ @test(stats.niter > memory)
+ @test(stats.solved)
+
+ M = Diagonal(1 ./ diag(A))
+ (x, stats) = fgmres(A, b, M=M, restart=restart, memory=memory)
+ r = b - A * x
+ resid = norm(M * r) / norm(M * b)
+ @test(resid ≤ fgmres_tol)
+ @test(stats.niter > memory)
+ @test(stats.solved)
+
+ N = Diagonal(1 ./ diag(A))
+ (x, stats) = fgmres(A, b, N=N, restart=restart, memory=memory)
+ r = b - A * x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ fgmres_tol)
+ @test(stats.niter > memory)
+ @test(stats.solved)
+
+ N = Diagonal(1 ./ sqrt.(diag(A)))
+ N = Diagonal(1 ./ sqrt.(diag(A)))
+ (x, stats) = fgmres(A, b, M=M, N=N, restart=restart, memory=memory)
+ r = b - A * x
+ resid = norm(M * r) / norm(M * b)
+ @test(resid ≤ fgmres_tol)
+ @test(stats.niter > memory)
+ @test(stats.solved)
+ end
+
+ # test callback function
+ # A, b = sparse_laplacian(FC=FC)
+ # solver = FgmresSolver(A, b)
+ # tol = 1.0e-1
+ # N = Diagonal(1 ./ diag(A))
+ # stor = StorageGetxRestartedGmres(solver, N = N)
+ # storage_vec = similar(b)
+ # fgmres!(solver, A, b, N = N, atol = 0.0, rtol = 0.0, restart = true, callback = solver -> restarted_fgmres_callback_n2(solver, A, b, stor, N, storage_vec, tol))
+ # @test solver.stats.status == "user-requested exit"
+ # @test norm(A * x - b) ≤ tol
+ #
+ # @test_throws TypeError fgmres(A, b, callback = solver -> "string", history = true)
+ end
+ end
+end
diff --git a/test/test_mp.jl b/test/test_mp.jl
index b7aa43d38..6b6d58450 100644
--- a/test/test_mp.jl
+++ b/test/test_mp.jl
@@ -3,7 +3,7 @@
for fn in (:cg, :cgls, :usymqr, :cgne, :cgs, :crmr, :cg_lanczos, :dqgmres, :diom, :cr, :gpmr,
:lslq, :lsqr, :lsmr, :lnlq, :craig, :bicgstab, :craigmr, :crls, :symmlq, :minres,
:bilq, :minres_qlp, :qmr, :usymlq, :tricg, :trimr, :trilqr, :bilqr, :gmres, :fom,
- :cg_lanczos_shift)
+ :fgmres, :cg_lanczos_shift)
for T in (Float16, Float32, Float64, BigFloat)
for FC in (T, Complex{T})
A = spdiagm(-1 => -ones(FC,n-1), 0 => 3*ones(FC,n), 1 => -ones(FC,n-1))
diff --git a/test/test_solvers.jl b/test/test_solvers.jl
index 6f60cb737..17b3edf0b 100644
--- a/test/test_solvers.jl
+++ b/test/test_solvers.jl
@@ -21,6 +21,7 @@ function test_solvers(FC)
fom_solver = $(KRYLOV_SOLVERS[:fom])($n, $n, $mem, $S)
dqgmres_solver = $(KRYLOV_SOLVERS[:dqgmres])($n, $n, $mem, $S)
gmres_solver = $(KRYLOV_SOLVERS[:gmres])($n, $n, $mem, $S)
+ fgmres_solver = $(KRYLOV_SOLVERS[:fgmres])($n, $n, $mem, $S)
cr_solver = $(KRYLOV_SOLVERS[:cr])($n, $n, $S)
crmr_solver = $(KRYLOV_SOLVERS[:crmr])($m, $n, $S)
cgs_solver = $(KRYLOV_SOLVERS[:cgs])($n, $n, $S)
@@ -144,6 +145,16 @@ function test_solvers(FC)
@test nsolution(solver) == 1
@test issolved(solver)
+ solver = solve!(fgmres_solver, A, b)
+ niter = niterations(solver)
+ @test niter > 0
+ @test Aprod(solver) == niter
+ @test Atprod(solver) == 0
+ @test statistics(solver) === solver.stats
+ @test solution(solver, 1) === solver.x
+ @test nsolution(solver) == 1
+ @test issolved(solver)
+
solver = solve!(cr_solver, A, b)
niter = niterations(solver)
@test niter > 0
@@ -596,6 +607,31 @@ function test_solvers(FC)
"""
@test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
+ io = IOBuffer()
+ show(io, fgmres_solver, show_stats=false)
+ showed = String(take!(io))
+ expected = """
+ ┌────────────┬───────────────────┬─────────────────┐
+ │FgmresSolver│ Precision: $FC │Architecture: CPU│
+ ├────────────┼───────────────────┼─────────────────┤
+ │ Attribute│ Type│ Size│
+ ├────────────┼───────────────────┼─────────────────┤
+ │ Δx│ Vector{$FC}│ 0│
+ │ x│ Vector{$FC}│ 64│
+ │ w│ Vector{$FC}│ 64│
+ │ q│ Vector{$FC}│ 0│
+ │ V│Vector{Vector{$FC}}│ 10 x 64│
+ │ Z│Vector{Vector{$FC}}│ 10 x 64│
+ │ c│ Vector{$T}│ 10│
+ │ s│ Vector{$FC}│ 10│
+ │ z│ Vector{$FC}│ 10│
+ │ R│ Vector{$FC}│ 55│
+ │ warm_start│ Bool│ 0│
+ │ inner_iter│ Int64│ 0│
+ └────────────┴───────────────────┴─────────────────┘
+ """
+ @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
+
io = IOBuffer()
show(io, cr_solver, show_stats=false)
showed = String(take!(io))
diff --git a/test/test_utils.jl b/test/test_utils.jl
index fbfe2e4e0..dba687c82 100644
--- a/test/test_utils.jl
+++ b/test/test_utils.jl
@@ -470,3 +470,24 @@ function restarted_gmres_callback_n2(solver::GmresSolver, A, b, stor, N, storage
storage_vec .-= b
return (norm(storage_vec) ≤ tol)
end
+
+# Successive over-relaxation (SOR) method
+function sor!(x, A, b, ω, k)
+ x .= 0
+ n = length(x)
+ for iter = 1:k
+ for i = 1:n
+ sum1 = sum(A[i,j] * x[j] for j = 1:i-1; init = 0)
+ sum2 = sum(A[i,j] * x[j] for j = i+1:n; init = 0)
+ x[i] = (1 - ω) * x[i] + (ω / A[i,i]) * (b[i] - sum1 - sum2)
+ end
+ end
+ return x
+end
+
+function test_sor()
+ A = [4 -1 -6 0; -5 -4 10 8; 0 9 4 -2; 1 0 -7 5]
+ b = [2; 21; -12; -6]
+ ω = 0.5
+ return A, b, ω
+end
diff --git a/test/test_warm_start.jl b/test/test_warm_start.jl
index 66a1cbea7..232a5a9cf 100644
--- a/test/test_warm_start.jl
+++ b/test/test_warm_start.jl
@@ -70,6 +70,11 @@ function test_warm_start(FC)
resid = norm(r) / norm(b)
@test(resid ≤ tol)
+ x, stats = fgmres(A, b, x0)
+ r = b - A * x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ tol)
+
x, stats = bicgstab(A, b, x0)
r = b - A * x
resid = norm(r) / norm(b)
From 6fa3132881a28ffa0d597d0d8f23ba4b91eee998 Mon Sep 17 00:00:00 2001
From: Alexis Montoison
Date: Wed, 21 Sep 2022 18:21:32 -0400
Subject: [PATCH 037/132] Add a test with a variable preconditioner
---
src/fgmres.jl | 4 ++--
test/test_fgmres.jl | 33 +++++++++++++++++++++------------
test/test_utils.jl | 21 ---------------------
3 files changed, 23 insertions(+), 35 deletions(-)
diff --git a/src/fgmres.jl b/src/fgmres.jl
index eb6ced660..ca5a44096 100644
--- a/src/fgmres.jl
+++ b/src/fgmres.jl
@@ -29,6 +29,7 @@ This implementation allows a left preconditioner M and a flexible right precondi
A situation in which the preconditioner is "not constant" is when a relaxation-type method,
a Chebyshev iteration or another Krylov subspace method is used as a preconditioner.
Compared to GMRES, there is no additional cost incurred in the arithmetic but the memory requirement almost doubles.
+Thus, GMRES is recommended if the right preconditioner N is identical as each iteration.
Full reorthogonalization is available with the `reorthogonalization` option.
@@ -93,9 +94,8 @@ function fgmres!(solver :: FgmresSolver{T,FC,S}, A, b :: AbstractVector{FC};
length(b) == m || error("Inconsistent problem size")
(verbose > 0) && @printf("FGMRES: system of size %d\n", n)
- # Check M = Iₙ and N = Iₙ
+ # Check M = Iₙ
MisI = (M === I)
- NisI = (N === I)
# Check type consistency
eltype(A) == FC || error("eltype(A) ≠ $FC")
diff --git a/test/test_fgmres.jl b/test/test_fgmres.jl
index e640da46e..9bb73d3e4 100644
--- a/test/test_fgmres.jl
+++ b/test/test_fgmres.jl
@@ -1,3 +1,16 @@
+import LinearAlgebra.mul!
+
+mutable struct FlexiblePreconditioner{T,S}
+ D::Diagonal{T, S}
+ ω::T
+end
+
+function mul!(y::Vector, P::FlexiblePreconditioner, x::Vector)
+ P.ω = -P.ω
+ mul!(y, P.D, x)
+ y .*= P.ω
+end
+
@testset "fgmres" begin
fgmres_tol = 1.0e-6
@@ -128,18 +141,14 @@
@test(stats.solved)
end
- # test callback function
- # A, b = sparse_laplacian(FC=FC)
- # solver = FgmresSolver(A, b)
- # tol = 1.0e-1
- # N = Diagonal(1 ./ diag(A))
- # stor = StorageGetxRestartedGmres(solver, N = N)
- # storage_vec = similar(b)
- # fgmres!(solver, A, b, N = N, atol = 0.0, rtol = 0.0, restart = true, callback = solver -> restarted_fgmres_callback_n2(solver, A, b, stor, N, storage_vec, tol))
- # @test solver.stats.status == "user-requested exit"
- # @test norm(A * x - b) ≤ tol
- #
- # @test_throws TypeError fgmres(A, b, callback = solver -> "string", history = true)
+ A, b = polar_poisson(FC=FC)
+ J = inv(Diagonal(A)) # Jacobi preconditioner
+ N = FlexiblePreconditioner(J, 1.0)
+ (x, stats) = fgmres(A, b, N=N)
+ r = b - A * x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ fgmres_tol)
+ @test(stats.solved)
end
end
end
diff --git a/test/test_utils.jl b/test/test_utils.jl
index dba687c82..fbfe2e4e0 100644
--- a/test/test_utils.jl
+++ b/test/test_utils.jl
@@ -470,24 +470,3 @@ function restarted_gmres_callback_n2(solver::GmresSolver, A, b, stor, N, storage
storage_vec .-= b
return (norm(storage_vec) ≤ tol)
end
-
-# Successive over-relaxation (SOR) method
-function sor!(x, A, b, ω, k)
- x .= 0
- n = length(x)
- for iter = 1:k
- for i = 1:n
- sum1 = sum(A[i,j] * x[j] for j = 1:i-1; init = 0)
- sum2 = sum(A[i,j] * x[j] for j = i+1:n; init = 0)
- x[i] = (1 - ω) * x[i] + (ω / A[i,i]) * (b[i] - sum1 - sum2)
- end
- end
- return x
-end
-
-function test_sor()
- A = [4 -1 -6 0; -5 -4 10 8; 0 9 4 -2; 1 0 -7 5]
- b = [2; 21; -12; -6]
- ω = 0.5
- return A, b, ω
-end
From bd315942a0515eeaf524ab4b5977892dd5797eec Mon Sep 17 00:00:00 2001
From: Alexis <35051714+amontoison@users.noreply.github.com>
Date: Sun, 25 Sep 2022 20:09:56 -0400
Subject: [PATCH 038/132] Apply suggestions from code review
Co-authored-by: Dominique
---
src/fgmres.jl | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/src/fgmres.jl b/src/fgmres.jl
index ca5a44096..cdc317e1d 100644
--- a/src/fgmres.jl
+++ b/src/fgmres.jl
@@ -22,22 +22,22 @@ export fgmres, fgmres!
Solve the linear system Ax = b using FGMRES method.
-FGMRES computes a sequence of approximate solutions with the minimal residual property.
-FGMRES is a variant of GMRES that allows changes in the right preconditioning at every step.
+FGMRES computes a sequence of approximate solutions with minimum residual.
+FGMRES is a variant of GMRES that allows changes in the right preconditioner at each iteration.
This implementation allows a left preconditioner M and a flexible right preconditioner N.
A situation in which the preconditioner is "not constant" is when a relaxation-type method,
a Chebyshev iteration or another Krylov subspace method is used as a preconditioner.
Compared to GMRES, there is no additional cost incurred in the arithmetic but the memory requirement almost doubles.
-Thus, GMRES is recommended if the right preconditioner N is identical as each iteration.
+Thus, GMRES is recommended if the right preconditioner N is constant.
Full reorthogonalization is available with the `reorthogonalization` option.
If `restart = true`, the restarted version FGMRES(k) is used with `k = memory`.
If `restart = false`, the parameter `memory` should be used as a hint of the number of iterations to limit dynamic memory allocations.
-More storage will be allocated only if the number of iterations exceed `memory`.
+More storage will be allocated only if the number of iterations exceeds `memory`.
-FGMRES can be warm-started from an initial guess `x0` with the method
+FGMRES can be warm-started from an initial guess `x0` with
(x, stats) = fgmres(A, b, x0; kwargs...)
@@ -169,7 +169,7 @@ function fgmres!(solver :: FgmresSolver{T,FC,S}, A, b :: AbstractVector{FC};
nr = 0 # Number of coefficients stored in Rₖ.
for i = 1 : mem
V[i] .= zero(FC) # Orthogonal basis of {Mr₀, MANₖr₀, ..., (MANₖ)ᵏ⁻¹r₀}.
- Z[i] .= zero(FC) # Z = [N₁v₁, ..., Nₖvₖ]
+ Z[i] .= zero(FC) # Zₖ = [N₁v₁, ..., Nₖvₖ]
end
s .= zero(FC) # Givens sines used for the factorization QₖRₖ = Hₖ₊₁.ₖ.
c .= zero(T) # Givens cosines used for the factorization QₖRₖ = Hₖ₊₁.ₖ.
From 45d5a0f05b3a4c50a80148be0c4d0df321ddfe74 Mon Sep 17 00:00:00 2001
From: Alexis Montoison
Date: Sun, 25 Sep 2022 20:14:19 -0400
Subject: [PATCH 039/132] Add fgmres in docs/src/factorization-free.md
---
docs/src/factorization-free.md | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/docs/src/factorization-free.md b/docs/src/factorization-free.md
index aa0f51f07..235d7daef 100644
--- a/docs/src/factorization-free.md
+++ b/docs/src/factorization-free.md
@@ -10,8 +10,8 @@ Some methods only require `A * v` products, whereas other ones also require `A'
|:--------------------------------------:|:----------------------------------------:|
| CG, CR | CGLS, CRLS, CGNE, CRMR |
| SYMMLQ, CG-LANCZOS, MINRES, MINRES-QLP | LSLQ, LSQR, LSMR, LNLQ, CRAIG, CRAIGMR |
-| DIOM, FOM, DQGMRES, GMRES | BiLQ, QMR, BiLQR, USYMLQ, USYMQR, TriLQR |
-| CGS, BICGSTAB | TriCG, TriMR, USYMLQR |
+| DIOM, FOM, DQGMRES, GMRES, FGMRES | BiLQ, QMR, BiLQR, USYMLQ, USYMQR, TriLQR |
+| CGS, BICGSTAB | TriCG, TriMR |
Preconditioners `M`, `N`, `C`, `D`, `E` or `F` can be also linear operators and must implement `mul!` or `ldiv!`.
From 837f7c915941230d5917034d5128a9fcf8366191 Mon Sep 17 00:00:00 2001
From: Alexis <35051714+amontoison@users.noreply.github.com>
Date: Sun, 25 Sep 2022 20:34:08 -0400
Subject: [PATCH 040/132] Update src/fgmres.jl
Co-authored-by: Dominique
---
src/fgmres.jl | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/fgmres.jl b/src/fgmres.jl
index cdc317e1d..b4cc213e1 100644
--- a/src/fgmres.jl
+++ b/src/fgmres.jl
@@ -20,7 +20,7 @@ export fgmres, fgmres!
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
-Solve the linear system Ax = b using FGMRES method.
+Solve the linear system Ax = b using FGMRES.
FGMRES computes a sequence of approximate solutions with minimum residual.
FGMRES is a variant of GMRES that allows changes in the right preconditioner at each iteration.
From e2a6737e3a952ab0f09feed3e791284c217d2dd8 Mon Sep 17 00:00:00 2001
From: Alexis Montoison
Date: Sun, 25 Sep 2022 21:16:18 -0400
Subject: [PATCH 041/132] Update multiple docstrings and comments
---
docs/src/factorization-free.md | 3 ++
src/bicgstab.jl | 4 +-
src/bilq.jl | 4 +-
src/bilqr.jl | 2 +-
src/cg.jl | 4 +-
src/cg_lanczos.jl | 2 +-
src/cgs.jl | 2 +-
src/cr.jl | 2 +-
src/diom.jl | 75 ++++++++++++++--------------
src/dqgmres.jl | 89 ++++++++++++++++------------------
src/fgmres.jl | 2 +-
src/fom.jl | 24 ++++-----
src/gmres.jl | 26 +++++-----
src/gpmr.jl | 2 +-
src/minres.jl | 2 +-
src/minres_qlp.jl | 2 +-
src/qmr.jl | 4 +-
src/symmlq.jl | 4 +-
src/tricg.jl | 2 +-
src/trilqr.jl | 2 +-
src/trimr.jl | 2 +-
src/usymlq.jl | 2 +-
src/usymqr.jl | 4 +-
23 files changed, 127 insertions(+), 138 deletions(-)
diff --git a/docs/src/factorization-free.md b/docs/src/factorization-free.md
index 235d7daef..81f995810 100644
--- a/docs/src/factorization-free.md
+++ b/docs/src/factorization-free.md
@@ -13,6 +13,9 @@ Some methods only require `A * v` products, whereas other ones also require `A'
| DIOM, FOM, DQGMRES, GMRES, FGMRES | BiLQ, QMR, BiLQR, USYMLQ, USYMQR, TriLQR |
| CGS, BICGSTAB | TriCG, TriMR |
+!!! info
+ GPMR is the only method that requires `A * v` and `B * w` products.
+
Preconditioners `M`, `N`, `C`, `D`, `E` or `F` can be also linear operators and must implement `mul!` or `ldiv!`.
We strongly recommend [LinearOperators.jl](https://github.com/JuliaSmoothOptimizers/LinearOperators.jl) to model matrix-free operators, but other packages such as [LinearMaps.jl](https://github.com/JuliaLinearAlgebra/LinearMaps.jl), [DiffEqOperators.jl](https://github.com/SciML/DiffEqOperators.jl) or your own operator can be used as well.
diff --git a/src/bicgstab.jl b/src/bicgstab.jl
index 3e5635775..25abd01e6 100644
--- a/src/bicgstab.jl
+++ b/src/bicgstab.jl
@@ -24,7 +24,7 @@ export bicgstab, bicgstab!
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
-Solve the square linear system Ax = b using the BICGSTAB method.
+Solve the square linear system Ax = b using BICGSTAB.
BICGSTAB requires two initial vectors `b` and `c`.
The relation `bᴴc ≠ 0` must be satisfied and by default `c = b`.
@@ -42,7 +42,7 @@ Information will be displayed every `verbose` iterations.
This implementation allows a left preconditioner `M` and a right preconditioner `N`.
-BICGSTAB can be warm-started from an initial guess `x0` with the method
+BICGSTAB can be warm-started from an initial guess `x0` with
(x, stats) = bicgstab(A, b, x0; kwargs...)
diff --git a/src/bilq.jl b/src/bilq.jl
index f40538245..8dbd46c51 100644
--- a/src/bilq.jl
+++ b/src/bilq.jl
@@ -21,7 +21,7 @@ export bilq, bilq!
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
-Solve the square linear system Ax = b using the BiLQ method.
+Solve the square linear system Ax = b using BiLQ.
BiLQ is based on the Lanczos biorthogonalization process and requires two initial vectors `b` and `c`.
The relation `bᴴc ≠ 0` must be satisfied and by default `c = b`.
@@ -30,7 +30,7 @@ When `A` is symmetric and `b = c`, BiLQ is equivalent to SYMMLQ.
An option gives the possibility of transferring to the BiCG point,
when it exists. The transfer is based on the residual norm.
-BiLQ can be warm-started from an initial guess `x0` with the method
+BiLQ can be warm-started from an initial guess `x0` with
(x, stats) = bilq(A, b, x0; kwargs...)
diff --git a/src/bilqr.jl b/src/bilqr.jl
index 7284597dc..479e01319 100644
--- a/src/bilqr.jl
+++ b/src/bilqr.jl
@@ -33,7 +33,7 @@ QMR is used for solving dual system `Aᴴy = c`.
An option gives the possibility of transferring from the BiLQ point to the
BiCG point, when it exists. The transfer is based on the residual norm.
-BiLQR can be warm-started from initial guesses `x0` and `y0` with the method
+BiLQR can be warm-started from initial guesses `x0` and `y0` with
(x, y, stats) = bilqr(A, b, c, x0, y0; kwargs...)
diff --git a/src/cg.jl b/src/cg.jl
index 8a974accc..212c68484 100644
--- a/src/cg.jl
+++ b/src/cg.jl
@@ -26,7 +26,7 @@ export cg, cg!
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
-The conjugate gradient method to solve the symmetric linear system Ax=b.
+The conjugate gradient method to solve the symmetric linear system Ax = b.
The method does _not_ abort if A is not definite.
@@ -37,7 +37,7 @@ M also indicates the weighted norm in which residuals are measured.
If `itmax=0`, the default number of iterations is set to `2 * n`,
with `n = length(b)`.
-CG can be warm-started from an initial guess `x0` with the method
+CG can be warm-started from an initial guess `x0` with
(x, stats) = cg(A, b, x0; kwargs...)
diff --git a/src/cg_lanczos.jl b/src/cg_lanczos.jl
index 2f2dae16d..4e503f09a 100644
--- a/src/cg_lanczos.jl
+++ b/src/cg_lanczos.jl
@@ -32,7 +32,7 @@ The method does _not_ abort if A is not definite.
A preconditioner M may be provided in the form of a linear operator and is
assumed to be hermitian and positive definite.
-CG-LANCZOS can be warm-started from an initial guess `x0` with the method
+CG-LANCZOS can be warm-started from an initial guess `x0` with
(x, stats) = cg_lanczos(A, b, x0; kwargs...)
diff --git a/src/cgs.jl b/src/cgs.jl
index 592eb1b2d..37a1c4137 100644
--- a/src/cgs.jl
+++ b/src/cgs.jl
@@ -40,7 +40,7 @@ TFQMR and BICGSTAB were developed to remedy this difficulty.»
This implementation allows a left preconditioner M and a right preconditioner N.
-CGS can be warm-started from an initial guess `x0` with the method
+CGS can be warm-started from an initial guess `x0` with
(x, stats) = cgs(A, b, x0; kwargs...)
diff --git a/src/cr.jl b/src/cr.jl
index 4405eda76..0e93e7eaa 100644
--- a/src/cr.jl
+++ b/src/cr.jl
@@ -34,7 +34,7 @@ In a linesearch context, 'linesearch' must be set to 'true'.
If `itmax=0`, the default number of iterations is set to `2 * n`,
with `n = length(b)`.
-CR can be warm-started from an initial guess `x0` with the method
+CR can be warm-started from an initial guess `x0` with
(x, stats) = cr(A, b, x0; kwargs...)
diff --git a/src/diom.jl b/src/diom.jl
index 9c6b9767b..168bc5b94 100644
--- a/src/diom.jl
+++ b/src/diom.jl
@@ -20,7 +20,7 @@ export diom, diom!
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
-Solve the consistent linear system Ax = b using direct incomplete orthogonalization method.
+Solve the consistent linear system Ax = b using DIOM.
DIOM only orthogonalizes the new vectors of the Krylov basis against the `memory` most recent vectors.
If CG is well defined on `Ax = b` and `memory = 2`, DIOM is theoretically equivalent to CG.
@@ -33,11 +33,8 @@ An advantage of DIOM is that nonsymmetric or symmetric indefinite or both nonsym
and indefinite systems of linear equations can be handled by this single algorithm.
This implementation allows a left preconditioner M and a right preconditioner N.
-- Left preconditioning : M⁻¹Ax = M⁻¹b
-- Right preconditioning : AN⁻¹u = b with x = N⁻¹u
-- Split preconditioning : M⁻¹AN⁻¹u = M⁻¹b with x = N⁻¹u
-DIOM can be warm-started from an initial guess `x0` with the method
+DIOM can be warm-started from an initial guess `x0` with
(x, stats) = diom(A, b, x0; kwargs...)
@@ -121,7 +118,7 @@ function diom!(solver :: DiomSolver{T,FC,S}, A, b :: AbstractVector{FC};
else
t .= b
end
- MisI || mulorldiv!(r₀, M, t, ldiv) # M⁻¹(b - Ax₀)
+ MisI || mulorldiv!(r₀, M, t, ldiv) # M(b - Ax₀)
rNorm = @knrm2(n, r₀) # β = ‖r₀‖₂
history && push!(rNorms, rNorm)
if rNorm == 0
@@ -141,14 +138,14 @@ function diom!(solver :: DiomSolver{T,FC,S}, A, b :: AbstractVector{FC};
mem = length(L) # Memory
for i = 1 : mem
- V[i] .= zero(FC) # Orthogonal basis of Kₖ(M⁻¹AN⁻¹, M⁻¹b).
- P[i] .= zero(FC) # Directions for x : Pₘ = N⁻¹Vₘ(Uₘ)⁻¹.
+ V[i] .= zero(FC) # Orthogonal basis of Kₖ(MAN, Mr₀).
+ P[i] .= zero(FC) # Directions for x : Pₖ = NVₖ(Uₖ)⁻¹.
end
- H .= zero(FC) # Last column of the band hessenberg matrix Hₘ = LₘUₘ.
- # Each column has at most mem + 1 nonzero elements. hᵢ.ₘ is stored as H[m-i+2].
- # m-i+2 represents the indice of the diagonal where hᵢ.ₘ is located.
- # In addition of that, the last column of Uₘ is stored in H.
- L .= zero(FC) # Last mem pivots of Lₘ.
+ H .= zero(FC) # Last column of the band hessenberg matrix Hₖ = LₖUₖ.
+ # Each column has at most mem + 1 nonzero elements. hᵢ.ₖ is stored as H[k-i+2].
+ # k-i+2 represents the indice of the diagonal where hᵢ.ₖ is located.
+ # In addition of that, the last column of Uₖ is stored in H.
+ L .= zero(FC) # Last mem pivots of Lₖ.
# Initial ξ₁ and V₁.
ξ = rNorm
@@ -166,19 +163,19 @@ function diom!(solver :: DiomSolver{T,FC,S}, A, b :: AbstractVector{FC};
iter = iter + 1
# Set position in circulars stacks.
- pos = mod(iter-1, mem) + 1 # Position corresponding to pₘ and vₘ in circular stacks P and V.
- next_pos = mod(iter, mem) + 1 # Position corresponding to vₘ₊₁ in the circular stack V.
+ pos = mod(iter-1, mem) + 1 # Position corresponding to pₖ and vₖ in circular stacks P and V.
+ next_pos = mod(iter, mem) + 1 # Position corresponding to vₖ₊₁ in the circular stack V.
# Incomplete Arnoldi procedure.
z = NisI ? V[pos] : solver.z
- NisI || mulorldiv!(z, N, V[pos], ldiv) # N⁻¹vₘ, forms pₘ
- mul!(t, A, z) # AN⁻¹vₘ
- MisI || mulorldiv!(w, M, t, ldiv) # M⁻¹AN⁻¹vₘ, forms vₘ₊₁
+ NisI || mulorldiv!(z, N, V[pos], ldiv) # Nvₖ, forms pₖ
+ mul!(t, A, z) # ANvₖ
+ MisI || mulorldiv!(w, M, t, ldiv) # MANvₖ, forms vₖ₊₁
for i = max(1, iter-mem+1) : iter
ipos = mod(i-1, mem) + 1 # Position corresponding to vᵢ in the circular stack V.
diag = iter - i + 2
- H[diag] = @kdot(n, w, V[ipos]) # hᵢ.ₘ = ⟨M⁻¹AN⁻¹vₘ , vᵢ⟩
- @kaxpy!(n, -H[diag], V[ipos], w) # w ← w - hᵢ.ₘ * vᵢ
+ H[diag] = @kdot(n, w, V[ipos]) # hᵢ.ₖ = ⟨MANvₖ , vᵢ⟩
+ @kaxpy!(n, -H[diag], V[ipos], w) # w ← w - hᵢ.ₖvᵢ
end
# Partial reorthogonalization of the Krylov basis.
@@ -192,56 +189,56 @@ function diom!(solver :: DiomSolver{T,FC,S}, A, b :: AbstractVector{FC};
end
end
- # Compute hₘ₊₁.ₘ and vₘ₊₁.
- H[1] = @knrm2(n, w) # hₘ₊₁.ₘ = ‖vₘ₊₁‖₂
- if H[1] ≠ 0 # hₘ₊₁.ₘ = 0 ⇒ "lucky breakdown"
- @. V[next_pos] = w / H[1] # vₘ₊₁ = w / hₘ₊₁.ₘ
+ # Compute hₖ₊₁.ₖ and vₖ₊₁.
+ H[1] = @knrm2(n, w) # hₖ₊₁.ₖ = ‖vₖ₊₁‖₂
+ if H[1] ≠ 0 # hₖ₊₁.ₖ = 0 ⇒ "lucky breakdown"
+ @. V[next_pos] = w / H[1] # vₖ₊₁ = w / hₖ₊₁.ₖ
end
- # It's possible that uₘ₋ₘₑₘ.ₘ ≠ 0 when m ≥ mem + 1
+ # It's possible that uₖ₋ₘₑₘ.ₖ ≠ 0 when k ≥ mem + 1
if iter ≥ mem + 2
- H[mem+2] = zero(FC) # hₘ₋ₘₑₘ.ₘ = 0
+ H[mem+2] = zero(FC) # hₖ₋ₘₑₘ.ₖ = 0
end
# Update the LU factorization with partial pivoting of H.
- # Compute the last column of Uₘ.
+ # Compute the last column of Uₖ.
if iter ≥ 2
for i = max(2,iter-mem+1) : iter
lpos = mod(i-1, mem) + 1 # Position corresponding to lᵢ.ᵢ₋₁ in the circular stack L.
diag = iter - i + 2
next_diag = diag + 1
- # uᵢ.ₘ ← hᵢ.ₘ - lᵢ.ᵢ₋₁ * uᵢ₋₁.ₘ
+ # uᵢ.ₖ ← hᵢ.ₖ - lᵢ.ᵢ₋₁ * uᵢ₋₁.ₖ
H[diag] = H[diag] - L[lpos] * H[next_diag]
end
- # Compute ξₘ the last component of zₘ = β(Lₘ)⁻¹e₁.
- # ξₘ = -lₘ.ₘ₋₁ * ξₘ₋₁
+ # Compute ξₖ the last component of zₖ = β(Lₖ)⁻¹e₁.
+ # ξₖ = -lₖ.ₖ₋₁ * ξₖ₋₁
ξ = - L[pos] * ξ
end
- # Compute next pivot lₘ₊₁.ₘ = hₘ₊₁.ₘ / uₘ.ₘ
+ # Compute next pivot lₖ₊₁.ₖ = hₖ₊₁.ₖ / uₖ.ₖ
L[next_pos] = H[1] / H[2]
- # Compute the direction pₘ, the last column of Pₘ = N⁻¹Vₘ(Uₘ)⁻¹.
+ # Compute the direction pₖ, the last column of Pₖ = NVₖ(Uₖ)⁻¹.
for i = max(1,iter-mem) : iter-1
ipos = mod(i-1, mem) + 1 # Position corresponding to pᵢ in the circular stack P.
diag = iter - i + 2
if ipos == pos
- # pₐᵤₓ ← -hₘ₋ₘₑₘ.ₘ * pₘ₋ₘₑₘ
+ # pₐᵤₓ ← -hₖ₋ₘₑₘ.ₖ * pₖ₋ₘₑₘ
@kscal!(n, -H[diag], P[pos])
else
- # pₐᵤₓ ← pₐᵤₓ - hᵢ.ₘ * pᵢ
+ # pₐᵤₓ ← pₐᵤₓ - hᵢ.ₖ * pᵢ
@kaxpy!(n, -H[diag], P[ipos], P[pos])
end
end
- # pₐᵤₓ ← pₐᵤₓ + N⁻¹vₘ
+ # pₐᵤₓ ← pₐᵤₓ + Nvₖ
@kaxpy!(n, one(FC), z, P[pos])
- # pₘ = pₐᵤₓ / uₘ.ₘ
+ # pₖ = pₐᵤₓ / uₖ.ₖ
@. P[pos] = P[pos] / H[2]
- # Update solution xₘ.
- # xₘ = xₘ₋₁ + ξₘ * pₘ
+ # Update solution xₖ.
+ # xₖ = xₖ₋₁ + ξₖ * pₖ
@kaxpy!(n, ξ, P[pos], x)
# Compute residual norm.
- # ‖ M⁻¹(b - Axₘ) ‖₂ = hₘ₊₁.ₘ * |ξₘ / uₘ.ₘ|
+ # ‖ M(b - Axₖ) ‖₂ = hₖ₊₁.ₖ * |ξₖ / uₖ.ₖ|
rNorm = real(H[1]) * abs(ξ / H[2])
history && push!(rNorms, rNorm)
diff --git a/src/dqgmres.jl b/src/dqgmres.jl
index ab7c490a6..1b6dd8d75 100644
--- a/src/dqgmres.jl
+++ b/src/dqgmres.jl
@@ -20,7 +20,7 @@ export dqgmres, dqgmres!
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
-Solve the consistent linear system Ax = b using DQGMRES method.
+Solve the consistent linear system Ax = b using DQGMRES.
DQGMRES algorithm is based on the incomplete Arnoldi orthogonalization process
and computes a sequence of approximate solutions with the quasi-minimal residual property.
@@ -33,11 +33,8 @@ Otherwise, DQGMRES interpolates between MINRES and GMRES and is similar to MINRE
Partial reorthogonalization is available with the `reorthogonalization` option.
This implementation allows a left preconditioner M and a right preconditioner N.
-- Left preconditioning : M⁻¹Ax = M⁻¹b
-- Right preconditioning : AN⁻¹u = b with x = N⁻¹u
-- Split preconditioning : M⁻¹AN⁻¹u = M⁻¹b with x = N⁻¹u
-DQGMRES can be warm-started from an initial guess `x0` with the method
+DQGMRES can be warm-started from an initial guess `x0` with
(x, stats) = dqgmres(A, b, x0; kwargs...)
@@ -121,7 +118,7 @@ function dqgmres!(solver :: DqgmresSolver{T,FC,S}, A, b :: AbstractVector{FC};
else
t .= b
end
- MisI || mulorldiv!(r₀, M, t, ldiv) # M⁻¹(b - Ax₀)
+ MisI || mulorldiv!(r₀, M, t, ldiv) # M(b - Ax₀)
rNorm = @knrm2(n, r₀) # β = ‖r₀‖₂
history && push!(rNorms, rNorm)
if rNorm == 0
@@ -142,23 +139,23 @@ function dqgmres!(solver :: DqgmresSolver{T,FC,S}, A, b :: AbstractVector{FC};
# Set up workspace.
mem = length(c) # Memory.
for i = 1 : mem
- V[i] .= zero(FC) # Orthogonal basis of Kₖ(M⁻¹AN⁻¹, M⁻¹b).
- P[i] .= zero(FC) # Directions for x : Pₘ = N⁻¹Vₘ(Rₘ)⁻¹.
+ V[i] .= zero(FC) # Orthogonal basis of Kₖ(MAN, Mr₀).
+ P[i] .= zero(FC) # Directions for x : Pₖ = NVₖ(Rₖ)⁻¹.
end
- c .= zero(T) # Last mem Givens cosines used for the factorization QₘRₘ = Hₘ.
- s .= zero(FC) # Last mem Givens sines used for the factorization QₘRₘ = Hₘ.
- H .= zero(FC) # Last column of the band hessenberg matrix Hₘ.
- # Each column has at most mem + 1 nonzero elements. hᵢ.ₘ is stored as H[m-i+2].
- # m-i+2 represents the indice of the diagonal where hᵢ.ₘ is located.
- # In addition of that, the last column of Rₘ is also stored in H.
+ c .= zero(T) # Last mem Givens cosines used for the factorization QₖRₖ = Hₖ.
+ s .= zero(FC) # Last mem Givens sines used for the factorization QₖRₖ = Hₖ.
+ H .= zero(FC) # Last column of the band hessenberg matrix Hₖ.
+ # Each column has at most mem + 1 nonzero elements. hᵢ.ₖ is stored as H[k-i+2].
+ # k-i+2 represents the indice of the diagonal where hᵢ.ₖ is located.
+ # In addition of that, the last column of Rₖ is also stored in H.
# Initial γ₁ and V₁.
- γₘ = rNorm # γₘ and γₘ₊₁ are the last components of gₘ, right-hand of the least squares problem min ‖ Hₘyₘ - gₘ ‖₂.
+ γₖ = rNorm # γₖ and γₖ₊₁ are the last components of gₖ, right-hand of the least squares problem min ‖ Hₖyₖ - gₖ ‖₂.
@. V[1] = r₀ / rNorm
# The following stopping criterion compensates for the lag in the
# residual, but usually increases the number of iterations.
- # solved = sqrt(max(1, iter-mem+1)) * |γₘ₊₁| ≤ ε
+ # solved = sqrt(max(1, iter-mem+1)) * |γₖ₊₁| ≤ ε
solved = rNorm ≤ ε # less accurate, but acceptable.
tired = iter ≥ itmax
status = "unknown"
@@ -170,19 +167,19 @@ function dqgmres!(solver :: DqgmresSolver{T,FC,S}, A, b :: AbstractVector{FC};
iter = iter + 1
# Set position in circulars stacks.
- pos = mod(iter-1, mem) + 1 # Position corresponding to pₘ and vₘ in circular stacks P and V.
- next_pos = mod(iter, mem) + 1 # Position corresponding to vₘ₊₁ in the circular stack V.
+ pos = mod(iter-1, mem) + 1 # Position corresponding to pₖ and vₖ in circular stacks P and V.
+ next_pos = mod(iter, mem) + 1 # Position corresponding to vₖ₊₁ in the circular stack V.
# Incomplete Arnoldi procedure.
z = NisI ? V[pos] : solver.z
- NisI || mulorldiv!(z, N, V[pos], ldiv) # N⁻¹vₘ, forms pₘ
- mul!(t, A, z) # AN⁻¹vₘ
- MisI || mulorldiv!(w, M, t, ldiv) # M⁻¹AN⁻¹vₘ, forms vₘ₊₁
+ NisI || mulorldiv!(z, N, V[pos], ldiv) # Nvₖ, forms pₖ
+ mul!(t, A, z) # ANvₖ
+ MisI || mulorldiv!(w, M, t, ldiv) # MANvₖ, forms vₖ₊₁
for i = max(1, iter-mem+1) : iter
ipos = mod(i-1, mem) + 1 # Position corresponding to vᵢ in the circular stack V.
diag = iter - i + 2
- H[diag] = @kdot(n, w, V[ipos]) # hᵢ.ₘ = ⟨M⁻¹AN⁻¹vₘ , vᵢ⟩
- @kaxpy!(n, -H[diag], V[ipos], w) # w ← w - hᵢ.ₘ * vᵢ
+ H[diag] = @kdot(n, w, V[ipos]) # hᵢ.ₖ = ⟨MANvₖ , vᵢ⟩
+ @kaxpy!(n, -H[diag], V[ipos], w) # w ← w - hᵢ.ₖvᵢ
end
# Partial reorthogonalization of the Krylov basis.
@@ -196,14 +193,14 @@ function dqgmres!(solver :: DqgmresSolver{T,FC,S}, A, b :: AbstractVector{FC};
end
end
- # Compute hₘ₊₁.ₘ and vₘ₊₁.
- H[1] = @knrm2(n, w) # hₘ₊₁.ₘ = ‖vₘ₊₁‖₂
- if H[1] ≠ 0 # hₘ₊₁.ₘ = 0 ⇒ "lucky breakdown"
- @. V[next_pos] = w / H[1] # vₘ₊₁ = w / hₘ₊₁.ₘ
+ # Compute hₖ₊₁.ₖ and vₖ₊₁.
+ H[1] = @knrm2(n, w) # hₖ₊₁.ₖ = ‖vₖ₊₁‖₂
+ if H[1] ≠ 0 # hₖ₊₁.ₖ = 0 ⇒ "lucky breakdown"
+ @. V[next_pos] = w / H[1] # vₖ₊₁ = w / hₖ₊₁.ₖ
end
- # rₘ₋ₘₑₘ.ₘ ≠ 0 when m ≥ mem + 1
+ # rₖ₋ₘₑₘ.ₖ ≠ 0 when k ≥ mem + 1
if iter ≥ mem + 2
- H[mem+2] = zero(FC) # hₘ₋ₘₑₘ.ₘ = 0
+ H[mem+2] = zero(FC) # hₖ₋ₘₑₘ.ₖ = 0
end
# Update the QR factorization of H.
@@ -217,41 +214,41 @@ function dqgmres!(solver :: DqgmresSolver{T,FC,S}, A, b :: AbstractVector{FC};
H[next_diag] = H_aux
end
- # Compute and apply current Givens reflection Ωₘ.
- # [cₘ sₘ] [ hₘ.ₘ ] = [ρₘ]
- # [sₘ -cₘ] [hₘ₊₁.ₘ] [0 ]
+ # Compute and apply current Givens reflection Ωₖ.
+ # [cₖ sₖ] [ hₖ.ₖ ] = [ρₖ]
+ # [sₖ -cₖ] [hₖ₊₁.ₖ] [0 ]
(c[pos], s[pos], H[2]) = sym_givens(H[2], H[1])
- γₘ₊₁ = conj(s[pos]) * γₘ
- γₘ = c[pos] * γₘ
+ γₖ₊₁ = conj(s[pos]) * γₖ
+ γₖ = c[pos] * γₖ
- # Compute the direction pₘ, the last column of Pₘ = N⁻¹Vₘ(Rₘ)⁻¹.
+ # Compute the direction pₖ, the last column of Pₖ = NVₖ(Rₖ)⁻¹.
for i = max(1,iter-mem) : iter-1
ipos = mod(i-1, mem) + 1 # Position corresponding to pᵢ in the circular stack P.
diag = iter - i + 2
if ipos == pos
- # pₐᵤₓ ← -hₘ₋ₘₑₘ.ₘ * pₘ₋ₘₑₘ
+ # pₐᵤₓ ← -hₖ₋ₘₑₘ.ₖ * pₖ₋ₘₑₘ
@kscal!(n, -H[diag], P[pos])
else
- # pₐᵤₓ ← pₐᵤₓ - hᵢ.ₘ * pᵢ
+ # pₐᵤₓ ← pₐᵤₓ - hᵢ.ₖ * pᵢ
@kaxpy!(n, -H[diag], P[ipos], P[pos])
end
end
- # pₐᵤₓ ← pₐᵤₓ + N⁻¹vₘ
+ # pₐᵤₓ ← pₐᵤₓ + Nvₖ
@kaxpy!(n, one(FC), z, P[pos])
- # pₘ = pₐᵤₓ / hₘ.ₘ
+ # pₖ = pₐᵤₓ / hₖ.ₖ
@. P[pos] = P[pos] / H[2]
- # Compute solution xₘ.
- # xₘ ← xₘ₋₁ + γₘ * pₘ
- @kaxpy!(n, γₘ, P[pos], x)
+ # Compute solution xₖ.
+ # xₖ ← xₖ₋₁ + γₖ * pₖ
+ @kaxpy!(n, γₖ, P[pos], x)
# Update residual norm estimate.
- # ‖ M⁻¹(b - Axₘ) ‖₂ ≈ |γₘ₊₁|
- rNorm = abs(γₘ₊₁)
+ # ‖ M(b - Axₖ) ‖₂ ≈ |γₖ₊₁|
+ rNorm = abs(γₖ₊₁)
history && push!(rNorms, rNorm)
- # Update γₘ.
- γₘ = γₘ₊₁
+ # Update γₖ.
+ γₖ = γₖ₊₁
# Stopping conditions that do not depend on user input.
# This is to guard against tolerances that are unreasonably small.
diff --git a/src/fgmres.jl b/src/fgmres.jl
index b4cc213e1..635e7241e 100644
--- a/src/fgmres.jl
+++ b/src/fgmres.jl
@@ -124,7 +124,7 @@ function fgmres!(solver :: FgmresSolver{T,FC,S}, A, b :: AbstractVector{FC};
else
w .= b
end
- MisI || mulorldiv!(r₀, M, w, ldiv) # r₀ = M⁻¹(b - Ax₀)
+ MisI || mulorldiv!(r₀, M, w, ldiv) # r₀ = M(b - Ax₀)
β = @knrm2(n, r₀) # β = ‖r₀‖₂
rNorm = β
diff --git a/src/fom.jl b/src/fom.jl
index b212129ef..95bcc97d1 100644
--- a/src/fom.jl
+++ b/src/fom.jl
@@ -20,22 +20,18 @@ export fom, fom!
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
-Solve the linear system Ax = b using FOM method.
+Solve the linear system Ax = b using FOM.
FOM algorithm is based on the Arnoldi process and a Galerkin condition.
This implementation allows a left preconditioner M and a right preconditioner N.
-- Left preconditioning : M⁻¹Ax = M⁻¹b
-- Right preconditioning : AN⁻¹u = b with x = N⁻¹u
-- Split preconditioning : M⁻¹AN⁻¹u = M⁻¹b with x = N⁻¹u
-
Full reorthogonalization is available with the `reorthogonalization` option.
If `restart = true`, the restarted version FOM(k) is used with `k = memory`.
If `restart = false`, the parameter `memory` should be used as a hint of the number of iterations to limit dynamic memory allocations.
-More storage will be allocated only if the number of iterations exceed `memory`.
+More storage will be allocated only if the number of iterations exceeds `memory`.
-FOM can be warm-started from an initial guess `x0` with the method
+FOM can be warm-started from an initial guess `x0` with
(x, stats) = fom(A, b, x0; kwargs...)
@@ -124,7 +120,7 @@ function fom!(solver :: FomSolver{T,FC,S}, A, b :: AbstractVector{FC};
else
w .= b
end
- MisI || mulorldiv!(r₀, M, w, ldiv) # r₀ = M⁻¹(b - Ax₀)
+ MisI || mulorldiv!(r₀, M, w, ldiv) # r₀ = M(b - Ax₀)
β = @knrm2(n, r₀) # β = ‖r₀‖₂
rNorm = β
@@ -167,7 +163,7 @@ function fom!(solver :: FomSolver{T,FC,S}, A, b :: AbstractVector{FC};
# Initialize workspace.
nr = 0 # Number of coefficients stored in Uₖ.
for i = 1 : mem
- V[i] .= zero(FC) # Orthogonal basis of Kₖ(M⁻¹AN⁻¹, M⁻¹r₀).
+ V[i] .= zero(FC) # Orthogonal basis of Kₖ(MAN, Mr₀).
end
l .= zero(FC) # Lower unit triangular matrix Lₖ.
U .= zero(FC) # Upper triangular matrix Uₖ.
@@ -207,9 +203,9 @@ function fom!(solver :: FomSolver{T,FC,S}, A, b :: AbstractVector{FC};
# Continue the Arnoldi process.
p = NisI ? V[inner_iter] : solver.p
- NisI || mulorldiv!(p, N, V[inner_iter], ldiv) # p ← N⁻¹vₖ
- mul!(w, A, p) # w ← AN⁻¹vₖ
- MisI || mulorldiv!(q, M, w, ldiv) # q ← M⁻¹AN⁻¹vₖ
+ NisI || mulorldiv!(p, N, V[inner_iter], ldiv) # p ← Nvₖ
+ mul!(w, A, p) # w ← ANvₖ
+ MisI || mulorldiv!(q, M, w, ldiv) # q ← MANvₖ
for i = 1 : inner_iter
U[nr+i] = @kdot(n, V[i], q) # hᵢₖ = (vᵢ)ᴴq
@kaxpy!(n, -U[nr+i], V[i], q) # q ← q - hᵢₖvᵢ
@@ -240,7 +236,7 @@ function fom!(solver :: FomSolver{T,FC,S}, A, b :: AbstractVector{FC};
l[inner_iter] = Hbis / U[nr+inner_iter]
# Update residual norm estimate.
- # ‖ M⁻¹(b - Axₖ) ‖₂ = hₖ₊₁.ₖ * |ζₖ / uₖ.ₖ|
+ # ‖ M(b - Axₖ) ‖₂ = hₖ₊₁.ₖ * |ζₖ / uₖ.ₖ|
rNorm = Hbis * abs(z[inner_iter] / U[nr+inner_iter])
history && push!(rNorms, rNorm)
@@ -280,7 +276,7 @@ function fom!(solver :: FomSolver{T,FC,S}, A, b :: AbstractVector{FC};
y[i] = y[i] / U[pos] # yᵢ ← yᵢ / rᵢᵢ
end
- # Form xₖ = N⁻¹Vₖyₖ
+ # Form xₖ = NVₖyₖ
for i = 1 : inner_iter
@kaxpy!(n, y[i], V[i], xr)
end
diff --git a/src/gmres.jl b/src/gmres.jl
index 32999aa23..b145b512b 100644
--- a/src/gmres.jl
+++ b/src/gmres.jl
@@ -20,22 +20,18 @@ export gmres, gmres!
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
-Solve the linear system Ax = b using GMRES method.
+Solve the linear system Ax = b using GMRES.
-GMRES algorithm is based on the Arnoldi process and computes a sequence of approximate solutions with the minimal residual property.
+GMRES algorithm is based on the Arnoldi process and computes a sequence of approximate solutions with the minimum residual.
This implementation allows a left preconditioner M and a right preconditioner N.
-- Left preconditioning : M⁻¹Ax = M⁻¹b
-- Right preconditioning : AN⁻¹u = b with x = N⁻¹u
-- Split preconditioning : M⁻¹AN⁻¹u = M⁻¹b with x = N⁻¹u
-
Full reorthogonalization is available with the `reorthogonalization` option.
If `restart = true`, the restarted version GMRES(k) is used with `k = memory`.
If `restart = false`, the parameter `memory` should be used as a hint of the number of iterations to limit dynamic memory allocations.
-More storage will be allocated only if the number of iterations exceed `memory`.
+More storage will be allocated only if the number of iterations exceeds `memory`.
-GMRES can be warm-started from an initial guess `x0` with the method
+GMRES can be warm-started from an initial guess `x0` with
(x, stats) = gmres(A, b, x0; kwargs...)
@@ -124,7 +120,7 @@ function gmres!(solver :: GmresSolver{T,FC,S}, A, b :: AbstractVector{FC};
else
w .= b
end
- MisI || mulorldiv!(r₀, M, w, ldiv) # r₀ = M⁻¹(b - Ax₀)
+ MisI || mulorldiv!(r₀, M, w, ldiv) # r₀ = M(b - Ax₀)
β = @knrm2(n, r₀) # β = ‖r₀‖₂
rNorm = β
@@ -168,7 +164,7 @@ function gmres!(solver :: GmresSolver{T,FC,S}, A, b :: AbstractVector{FC};
# Initialize workspace.
nr = 0 # Number of coefficients stored in Rₖ.
for i = 1 : mem
- V[i] .= zero(FC) # Orthogonal basis of Kₖ(M⁻¹AN⁻¹, M⁻¹r₀).
+ V[i] .= zero(FC) # Orthogonal basis of Kₖ(MAN, Mr₀).
end
s .= zero(FC) # Givens sines used for the factorization QₖRₖ = Hₖ₊₁.ₖ.
c .= zero(T) # Givens cosines used for the factorization QₖRₖ = Hₖ₊₁.ₖ.
@@ -210,9 +206,9 @@ function gmres!(solver :: GmresSolver{T,FC,S}, A, b :: AbstractVector{FC};
# Continue the Arnoldi process.
p = NisI ? V[inner_iter] : solver.p
- NisI || mulorldiv!(p, N, V[inner_iter], ldiv) # p ← N⁻¹vₖ
- mul!(w, A, p) # w ← AN⁻¹vₖ
- MisI || mulorldiv!(q, M, w, ldiv) # q ← M⁻¹AN⁻¹vₖ
+ NisI || mulorldiv!(p, N, V[inner_iter], ldiv) # p ← Nvₖ
+ mul!(w, A, p) # w ← ANvₖ
+ MisI || mulorldiv!(q, M, w, ldiv) # q ← MANvₖ
for i = 1 : inner_iter
R[nr+i] = @kdot(n, V[i], q) # hᵢₖ = (vᵢ)ᴴq
@kaxpy!(n, -R[nr+i], V[i], q) # q ← q - hᵢₖvᵢ
@@ -250,7 +246,7 @@ function gmres!(solver :: GmresSolver{T,FC,S}, A, b :: AbstractVector{FC};
z[inner_iter] = c[inner_iter] * z[inner_iter]
# Update residual norm estimate.
- # ‖ M⁻¹(b - Axₖ) ‖₂ = |ζₖ₊₁|
+ # ‖ M(b - Axₖ) ‖₂ = |ζₖ₊₁|
rNorm = abs(ζₖ₊₁)
history && push!(rNorms, rNorm)
@@ -299,7 +295,7 @@ function gmres!(solver :: GmresSolver{T,FC,S}, A, b :: AbstractVector{FC};
end
end
- # Form xₖ = N⁻¹Vₖyₖ
+ # Form xₖ = NVₖyₖ
for i = 1 : inner_iter
@kaxpy!(n, y[i], V[i], xr)
end
diff --git a/src/gpmr.jl b/src/gpmr.jl
index 82499b50e..528bd522d 100644
--- a/src/gpmr.jl
+++ b/src/gpmr.jl
@@ -58,7 +58,7 @@ Full reorthogonalization is available with the `reorthogonalization` option.
Additional details can be displayed if verbose mode is enabled (verbose > 0).
Information will be displayed every `verbose` iterations.
-GPMR can be warm-started from initial guesses `x0` and `y0` with the method
+GPMR can be warm-started from initial guesses `x0` and `y0` with
(x, y, stats) = gpmr(A, B, b, c, x0, y0; kwargs...)
diff --git a/src/minres.jl b/src/minres.jl
index d3b8732ee..c95048bbc 100644
--- a/src/minres.jl
+++ b/src/minres.jl
@@ -55,7 +55,7 @@ MINRES produces monotonic residuals ‖r‖₂ and optimality residuals ‖Aᴴr
A preconditioner M may be provided in the form of a linear operator and is
assumed to be symmetric and positive definite.
-MINRES can be warm-started from an initial guess `x0` with the method
+MINRES can be warm-started from an initial guess `x0` with
(x, stats) = minres(A, b, x0; kwargs...)
diff --git a/src/minres_qlp.jl b/src/minres_qlp.jl
index 509a7ef4e..cb70754b8 100644
--- a/src/minres_qlp.jl
+++ b/src/minres_qlp.jl
@@ -34,7 +34,7 @@ A preconditioner M may be provided in the form of a linear operator and is
assumed to be symmetric and positive definite.
M also indicates the weighted norm in which residuals are measured.
-MINRES-QLP can be warm-started from an initial guess `x0` with the method
+MINRES-QLP can be warm-started from an initial guess `x0` with
(x, stats) = minres_qlp(A, b, x0; kwargs...)
diff --git a/src/qmr.jl b/src/qmr.jl
index d4b684601..fe0fab65c 100644
--- a/src/qmr.jl
+++ b/src/qmr.jl
@@ -29,13 +29,13 @@ export qmr, qmr!
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
-Solve the square linear system Ax = b using the QMR method.
+Solve the square linear system Ax = b using QMR.
QMR is based on the Lanczos biorthogonalization process and requires two initial vectors `b` and `c`.
The relation `bᴴc ≠ 0` must be satisfied and by default `c = b`.
When `A` is symmetric and `b = c`, QMR is equivalent to MINRES.
-QMR can be warm-started from an initial guess `x0` with the method
+QMR can be warm-started from an initial guess `x0` with
(x, stats) = qmr(A, b, x0; kwargs...)
diff --git a/src/symmlq.jl b/src/symmlq.jl
index 7b889c715..efbd751aa 100644
--- a/src/symmlq.jl
+++ b/src/symmlq.jl
@@ -30,12 +30,12 @@ Solve the shifted linear system
using the SYMMLQ method, where λ is a shift parameter,
and A is square and symmetric.
-SYMMLQ produces monotonic errors ‖x*-x‖₂.
+SYMMLQ produces monotonic errors ‖x* - x‖₂.
A preconditioner M may be provided in the form of a linear operator and is
assumed to be symmetric and positive definite.
-SYMMLQ can be warm-started from an initial guess `x0` with the method
+SYMMLQ can be warm-started from an initial guess `x0` with
(x, stats) = symmlq(A, b, x0; kwargs...)
diff --git a/src/tricg.jl b/src/tricg.jl
index 7c140a821..8d0a41ce3 100644
--- a/src/tricg.jl
+++ b/src/tricg.jl
@@ -53,7 +53,7 @@ TriCG stops when `itmax` iterations are reached or when `‖rₖ‖ ≤ atol +
Additional details can be displayed if verbose mode is enabled (verbose > 0).
Information will be displayed every `verbose` iterations.
-TriCG can be warm-started from initial guesses `x0` and `y0` with the method
+TriCG can be warm-started from initial guesses `x0` and `y0` with
(x, y, stats) = tricg(A, b, c, x0, y0; kwargs...)
diff --git a/src/trilqr.jl b/src/trilqr.jl
index 6b0948984..60663ff55 100644
--- a/src/trilqr.jl
+++ b/src/trilqr.jl
@@ -32,7 +32,7 @@ USYMQR is used for solving dual system `Aᴴy = c`.
An option gives the possibility of transferring from the USYMLQ point to the
USYMCG point, when it exists. The transfer is based on the residual norm.
-TriLQR can be warm-started from initial guesses `x0` and `y0` with the method
+TriLQR can be warm-started from initial guesses `x0` and `y0` with
(x, y, stats) = trilqr(A, b, c, x0, y0; kwargs...)
diff --git a/src/trimr.jl b/src/trimr.jl
index 7dd826edf..041a5ffff 100644
--- a/src/trimr.jl
+++ b/src/trimr.jl
@@ -53,7 +53,7 @@ TriMR stops when `itmax` iterations are reached or when `‖rₖ‖ ≤ atol +
Additional details can be displayed if verbose mode is enabled (verbose > 0).
Information will be displayed every `verbose` iterations.
-TriMR can be warm-started from initial guesses `x0` and `y0` with the method
+TriMR can be warm-started from initial guesses `x0` and `y0` with
(x, y, stats) = trimr(A, b, c, x0, y0; kwargs...)
diff --git a/src/usymlq.jl b/src/usymlq.jl
index 29cd704c7..acec8d77e 100644
--- a/src/usymlq.jl
+++ b/src/usymlq.jl
@@ -41,7 +41,7 @@ In all cases, problems must be consistent.
An option gives the possibility of transferring to the USYMCG point,
when it exists. The transfer is based on the residual norm.
-USYMLQ can be warm-started from an initial guess `x0` with the method
+USYMLQ can be warm-started from an initial guess `x0` with
(x, stats) = usymlq(A, b, c, x0; kwargs...)
diff --git a/src/usymqr.jl b/src/usymqr.jl
index 45c95c88d..13c19efa8 100644
--- a/src/usymqr.jl
+++ b/src/usymqr.jl
@@ -28,7 +28,7 @@ export usymqr, usymqr!
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
-Solve the linear system Ax = b using the USYMQR method.
+Solve the linear system Ax = b using USYMQR.
USYMQR is based on the orthogonal tridiagonalization process and requires two initial nonzero vectors `b` and `c`.
The vector `c` is only used to initialize the process and a default value can be `b` or `Aᴴb` depending on the shape of `A`.
@@ -38,7 +38,7 @@ It's considered as a generalization of MINRES.
It can also be applied to under-determined and over-determined problems.
USYMQR finds the minimum-norm solution if problems are inconsistent.
-USYMQR can be warm-started from an initial guess `x0` with the method
+USYMQR can be warm-started from an initial guess `x0` with
(x, stats) = usymqr(A, b, c, x0; kwargs...)
From a3511627d71787174ed1ed9309715784dc9fd0ab Mon Sep 17 00:00:00 2001
From: Alexis Montoison
Date: Mon, 26 Sep 2022 00:29:52 -0400
Subject: [PATCH 042/132] Reduce the storage required by DIOM and DQGMRES
---
src/diom.jl | 88 ++++++++++++++++++++++------------------
src/dqgmres.jl | 50 ++++++++++++-----------
src/krylov_solvers.jl | 8 ++--
test/test_allocations.jl | 13 +++---
test/test_diom.jl | 2 +-
5 files changed, 86 insertions(+), 75 deletions(-)
diff --git a/src/diom.jl b/src/diom.jl
index 168bc5b94..77e73c414 100644
--- a/src/diom.jl
+++ b/src/diom.jl
@@ -136,20 +136,23 @@ function diom!(solver :: DiomSolver{T,FC,S}, A, b :: AbstractVector{FC};
(verbose > 0) && @printf("%5s %7s\n", "k", "‖rₖ‖")
kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm)
- mem = length(L) # Memory
+ mem = length(V) # Memory
for i = 1 : mem
V[i] .= zero(FC) # Orthogonal basis of Kₖ(MAN, Mr₀).
- P[i] .= zero(FC) # Directions for x : Pₖ = NVₖ(Uₖ)⁻¹.
+ end
+ for i = 1 : mem-1
+ P[i] .= zero(FC) # Directions Pₖ = NVₖ(Uₖ)⁻¹.
end
H .= zero(FC) # Last column of the band hessenberg matrix Hₖ = LₖUₖ.
- # Each column has at most mem + 1 nonzero elements. hᵢ.ₖ is stored as H[k-i+2].
- # k-i+2 represents the indice of the diagonal where hᵢ.ₖ is located.
+ # Each column has at most mem + 1 nonzero elements.
+ # hᵢ.ₖ is stored as H[k-i+1], i ≤ k. hₖ₊₁.ₖ is not stored in H.
+ # k-i+1 represents the indice of the diagonal where hᵢ.ₖ is located.
# In addition of that, the last column of Uₖ is stored in H.
- L .= zero(FC) # Last mem pivots of Lₖ.
+ L .= zero(FC) # Last mem-1 pivots of Lₖ.
# Initial ξ₁ and V₁.
ξ = rNorm
- @. V[1] = r₀ / rNorm
+ V[1] .= r₀ ./ rNorm
# Stopping criterion.
solved = rNorm ≤ ε
@@ -163,8 +166,8 @@ function diom!(solver :: DiomSolver{T,FC,S}, A, b :: AbstractVector{FC};
iter = iter + 1
# Set position in circulars stacks.
- pos = mod(iter-1, mem) + 1 # Position corresponding to pₖ and vₖ in circular stacks P and V.
- next_pos = mod(iter, mem) + 1 # Position corresponding to vₖ₊₁ in the circular stack V.
+ pos = mod(iter-1, mem) + 1 # Position corresponding to vₖ in the circular stack V.
+ next_pos = mod(iter, mem) + 1 # Position corresponding to vₖ₊₁ in the circular stack V.
# Incomplete Arnoldi procedure.
z = NisI ? V[pos] : solver.z
@@ -172,17 +175,17 @@ function diom!(solver :: DiomSolver{T,FC,S}, A, b :: AbstractVector{FC};
mul!(t, A, z) # ANvₖ
MisI || mulorldiv!(w, M, t, ldiv) # MANvₖ, forms vₖ₊₁
for i = max(1, iter-mem+1) : iter
- ipos = mod(i-1, mem) + 1 # Position corresponding to vᵢ in the circular stack V.
- diag = iter - i + 2
- H[diag] = @kdot(n, w, V[ipos]) # hᵢ.ₖ = ⟨MANvₖ , vᵢ⟩
- @kaxpy!(n, -H[diag], V[ipos], w) # w ← w - hᵢ.ₖvᵢ
+ ipos = mod(i-1, mem) + 1 # Position corresponding to vᵢ in the circular stack V.
+ diag = iter - i + 1
+ H[diag] = @kdot(n, w, V[ipos]) # hᵢ.ₖ = ⟨MANvₖ, vᵢ⟩
+ @kaxpy!(n, -H[diag], V[ipos], w) # w ← w - hᵢ.ₖvᵢ
end
# Partial reorthogonalization of the Krylov basis.
if reorthogonalization
for i = max(1, iter-mem+1) : iter
ipos = mod(i-1, mem) + 1
- diag = iter - i + 2
+ diag = iter - i + 1
Htmp = @kdot(n, w, V[ipos])
H[diag] += Htmp
@kaxpy!(n, -Htmp, V[ipos], w)
@@ -190,56 +193,61 @@ function diom!(solver :: DiomSolver{T,FC,S}, A, b :: AbstractVector{FC};
end
# Compute hₖ₊₁.ₖ and vₖ₊₁.
- H[1] = @knrm2(n, w) # hₖ₊₁.ₖ = ‖vₖ₊₁‖₂
- if H[1] ≠ 0 # hₖ₊₁.ₖ = 0 ⇒ "lucky breakdown"
- @. V[next_pos] = w / H[1] # vₖ₊₁ = w / hₖ₊₁.ₖ
- end
- # It's possible that uₖ₋ₘₑₘ.ₖ ≠ 0 when k ≥ mem + 1
- if iter ≥ mem + 2
- H[mem+2] = zero(FC) # hₖ₋ₘₑₘ.ₖ = 0
+ Haux = @knrm2(n, w) # hₖ₊₁.ₖ = ‖vₖ₊₁‖₂
+ if Haux ≠ 0 # hₖ₊₁.ₖ = 0 ⇒ "lucky breakdown"
+ V[next_pos] .= w ./ Haux # vₖ₊₁ = w / hₖ₊₁.ₖ
end
- # Update the LU factorization with partial pivoting of H.
+ # Update the LU factorization of Hₖ.
# Compute the last column of Uₖ.
if iter ≥ 2
- for i = max(2,iter-mem+1) : iter
- lpos = mod(i-1, mem) + 1 # Position corresponding to lᵢ.ᵢ₋₁ in the circular stack L.
- diag = iter - i + 2
+ # u₁.ₖ ← h₁.ₖ if iter ≤ mem
+ # uₖ₋ₘₑₘ₊₁.ₖ ← hₖ₋ₘₑₘ₊₁.ₖ if iter ≥ mem + 1
+ for i = max(2,iter-mem+2) : iter
+ lpos = mod(i-1, mem-1) + 1 # Position corresponding to lᵢ.ᵢ₋₁ in the circular stack L.
+ diag = iter - i + 1
next_diag = diag + 1
# uᵢ.ₖ ← hᵢ.ₖ - lᵢ.ᵢ₋₁ * uᵢ₋₁.ₖ
H[diag] = H[diag] - L[lpos] * H[next_diag]
+ if i == iter
+ # Compute ξₖ the last component of zₖ = β(Lₖ)⁻¹e₁.
+ # ξₖ = -lₖ.ₖ₋₁ * ξₖ₋₁
+ ξ = - L[lpos] * ξ
+ end
end
- # Compute ξₖ the last component of zₖ = β(Lₖ)⁻¹e₁.
- # ξₖ = -lₖ.ₖ₋₁ * ξₖ₋₁
- ξ = - L[pos] * ξ
end
# Compute next pivot lₖ₊₁.ₖ = hₖ₊₁.ₖ / uₖ.ₖ
- L[next_pos] = H[1] / H[2]
+ next_lpos = mod(iter, mem-1) + 1
+ L[next_lpos] = Haux / H[1]
+
+ ppos = mod(iter-1, mem-1) + 1 # Position corresponding to pₖ in the circular stack P.
# Compute the direction pₖ, the last column of Pₖ = NVₖ(Uₖ)⁻¹.
- for i = max(1,iter-mem) : iter-1
- ipos = mod(i-1, mem) + 1 # Position corresponding to pᵢ in the circular stack P.
- diag = iter - i + 2
- if ipos == pos
- # pₐᵤₓ ← -hₖ₋ₘₑₘ.ₖ * pₖ₋ₘₑₘ
- @kscal!(n, -H[diag], P[pos])
+ # u₁.ₖp₁ + ... + uₖ.ₖpₖ = Nvₖ if k ≤ mem
+ # uₖ₋ₘₑₘ₊₁.ₖpₖ₋ₘₑₘ₊₁ + ... + uₖ.ₖpₖ = Nvₖ if k ≥ mem + 1
+ for i = max(1,iter-mem+1) : iter-1
+ ipos = mod(i-1, mem-1) + 1 # Position corresponding to pᵢ in the circular stack P.
+ diag = iter - i + 1
+ if ipos == ppos
+ # pₖ ← -uₖ₋ₘₑₘ₊₁.ₖ * pₖ₋ₘₑₘ₊₁
+ @kscal!(n, -H[diag], P[ppos])
else
- # pₐᵤₓ ← pₐᵤₓ - hᵢ.ₖ * pᵢ
- @kaxpy!(n, -H[diag], P[ipos], P[pos])
+ # pₖ ← pₖ - uᵢ.ₖ * pᵢ
+ @kaxpy!(n, -H[diag], P[ipos], P[ppos])
end
end
# pₐᵤₓ ← pₐᵤₓ + Nvₖ
- @kaxpy!(n, one(FC), z, P[pos])
+ @kaxpy!(n, one(FC), z, P[ppos])
# pₖ = pₐᵤₓ / uₖ.ₖ
- @. P[pos] = P[pos] / H[2]
+ P[ppos] .= P[ppos] ./ H[1]
# Update solution xₖ.
# xₖ = xₖ₋₁ + ξₖ * pₖ
- @kaxpy!(n, ξ, P[pos], x)
+ @kaxpy!(n, ξ, P[ppos], x)
# Compute residual norm.
# ‖ M(b - Axₖ) ‖₂ = hₖ₊₁.ₖ * |ξₖ / uₖ.ₖ|
- rNorm = real(H[1]) * abs(ξ / H[2])
+ rNorm = Haux * abs(ξ / H[1])
history && push!(rNorms, rNorm)
# Stopping conditions that do not depend on user input.
diff --git a/src/dqgmres.jl b/src/dqgmres.jl
index 1b6dd8d75..aa6e245ba 100644
--- a/src/dqgmres.jl
+++ b/src/dqgmres.jl
@@ -137,7 +137,7 @@ function dqgmres!(solver :: DqgmresSolver{T,FC,S}, A, b :: AbstractVector{FC};
kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm)
# Set up workspace.
- mem = length(c) # Memory.
+ mem = length(V) # Memory.
for i = 1 : mem
V[i] .= zero(FC) # Orthogonal basis of Kₖ(MAN, Mr₀).
P[i] .= zero(FC) # Directions for x : Pₖ = NVₖ(Rₖ)⁻¹.
@@ -145,13 +145,14 @@ function dqgmres!(solver :: DqgmresSolver{T,FC,S}, A, b :: AbstractVector{FC};
c .= zero(T) # Last mem Givens cosines used for the factorization QₖRₖ = Hₖ.
s .= zero(FC) # Last mem Givens sines used for the factorization QₖRₖ = Hₖ.
H .= zero(FC) # Last column of the band hessenberg matrix Hₖ.
- # Each column has at most mem + 1 nonzero elements. hᵢ.ₖ is stored as H[k-i+2].
- # k-i+2 represents the indice of the diagonal where hᵢ.ₖ is located.
+ # Each column has at most mem + 1 nonzero elements.
+ # hᵢ.ₖ is stored as H[k-i+1], i ≤ k. hₖ₊₁.ₖ is not stored in H.
+ # k-i+1 represents the indice of the diagonal where hᵢ.ₖ is located.
# In addition of that, the last column of Rₖ is also stored in H.
# Initial γ₁ and V₁.
γₖ = rNorm # γₖ and γₖ₊₁ are the last components of gₖ, right-hand of the least squares problem min ‖ Hₖyₖ - gₖ ‖₂.
- @. V[1] = r₀ / rNorm
+ V[1] .= r₀ ./ rNorm
# The following stopping criterion compensates for the lag in the
# residual, but usually increases the number of iterations.
@@ -167,8 +168,8 @@ function dqgmres!(solver :: DqgmresSolver{T,FC,S}, A, b :: AbstractVector{FC};
iter = iter + 1
# Set position in circulars stacks.
- pos = mod(iter-1, mem) + 1 # Position corresponding to pₖ and vₖ in circular stacks P and V.
- next_pos = mod(iter, mem) + 1 # Position corresponding to vₖ₊₁ in the circular stack V.
+ pos = mod(iter-1, mem) + 1 # Position corresponding to pₖ and vₖ in circular stacks P and V.
+ next_pos = mod(iter, mem) + 1 # Position corresponding to vₖ₊₁ in the circular stack V.
# Incomplete Arnoldi procedure.
z = NisI ? V[pos] : solver.z
@@ -176,17 +177,17 @@ function dqgmres!(solver :: DqgmresSolver{T,FC,S}, A, b :: AbstractVector{FC};
mul!(t, A, z) # ANvₖ
MisI || mulorldiv!(w, M, t, ldiv) # MANvₖ, forms vₖ₊₁
for i = max(1, iter-mem+1) : iter
- ipos = mod(i-1, mem) + 1 # Position corresponding to vᵢ in the circular stack V.
- diag = iter - i + 2
- H[diag] = @kdot(n, w, V[ipos]) # hᵢ.ₖ = ⟨MANvₖ , vᵢ⟩
- @kaxpy!(n, -H[diag], V[ipos], w) # w ← w - hᵢ.ₖvᵢ
+ ipos = mod(i-1, mem) + 1 # Position corresponding to vᵢ in the circular stack V.
+ diag = iter - i + 1
+ H[diag] = @kdot(n, w, V[ipos]) # hᵢ.ₖ = ⟨MANvₖ, vᵢ⟩
+ @kaxpy!(n, -H[diag], V[ipos], w) # w ← w - hᵢ.ₖvᵢ
end
# Partial reorthogonalization of the Krylov basis.
if reorthogonalization
for i = max(1, iter-mem+1) : iter
ipos = mod(i-1, mem) + 1
- diag = iter - i + 2
+ diag = iter - i + 1
Htmp = @kdot(n, w, V[ipos])
H[diag] += Htmp
@kaxpy!(n, -Htmp, V[ipos], w)
@@ -194,37 +195,38 @@ function dqgmres!(solver :: DqgmresSolver{T,FC,S}, A, b :: AbstractVector{FC};
end
# Compute hₖ₊₁.ₖ and vₖ₊₁.
- H[1] = @knrm2(n, w) # hₖ₊₁.ₖ = ‖vₖ₊₁‖₂
- if H[1] ≠ 0 # hₖ₊₁.ₖ = 0 ⇒ "lucky breakdown"
- @. V[next_pos] = w / H[1] # vₖ₊₁ = w / hₖ₊₁.ₖ
+ Haux = @knrm2(n, w) # hₖ₊₁.ₖ = ‖vₖ₊₁‖₂
+ if Haux ≠ 0 # hₖ₊₁.ₖ = 0 ⇒ "lucky breakdown"
+ V[next_pos] .= w ./ Haux # vₖ₊₁ = w / hₖ₊₁.ₖ
end
# rₖ₋ₘₑₘ.ₖ ≠ 0 when k ≥ mem + 1
+ # We don't want to use rₖ₋₁₋ₘₑₘ.ₖ₋₁ when we compute rₖ₋ₘₑₘ.ₖ
if iter ≥ mem + 2
- H[mem+2] = zero(FC) # hₖ₋ₘₑₘ.ₖ = 0
+ H[mem+1] = zero(FC) # rₖ₋ₘₑₘ.ₖ = 0
end
- # Update the QR factorization of H.
+ # Update the QR factorization of Hₖ.
# Apply mem previous Givens reflections Ωᵢ.
for i = max(1,iter-mem) : iter-1
- irot_pos = mod(i-1, mem) + 1 # Position corresponding to cᵢ and sᵢ in circular stacks c and s.
- diag = iter - i + 1
+ irot_pos = mod(i-1, mem) + 1 # Position corresponding to cᵢ and sᵢ in circular stacks c and s.
+ diag = iter - i
next_diag = diag + 1
- H_aux = c[irot_pos] * H[next_diag] + s[irot_pos] * H[diag]
+ Htmp = c[irot_pos] * H[next_diag] + s[irot_pos] * H[diag]
H[diag] = conj(s[irot_pos]) * H[next_diag] - c[irot_pos] * H[diag]
- H[next_diag] = H_aux
+ H[next_diag] = Htmp
end
# Compute and apply current Givens reflection Ωₖ.
# [cₖ sₖ] [ hₖ.ₖ ] = [ρₖ]
# [sₖ -cₖ] [hₖ₊₁.ₖ] [0 ]
- (c[pos], s[pos], H[2]) = sym_givens(H[2], H[1])
+ (c[pos], s[pos], H[1]) = sym_givens(H[1], Haux)
γₖ₊₁ = conj(s[pos]) * γₖ
γₖ = c[pos] * γₖ
# Compute the direction pₖ, the last column of Pₖ = NVₖ(Rₖ)⁻¹.
for i = max(1,iter-mem) : iter-1
- ipos = mod(i-1, mem) + 1 # Position corresponding to pᵢ in the circular stack P.
- diag = iter - i + 2
+ ipos = mod(i-1, mem) + 1 # Position corresponding to pᵢ in the circular stack P.
+ diag = iter - i + 1
if ipos == pos
# pₐᵤₓ ← -hₖ₋ₘₑₘ.ₖ * pₖ₋ₘₑₘ
@kscal!(n, -H[diag], P[pos])
@@ -236,7 +238,7 @@ function dqgmres!(solver :: DqgmresSolver{T,FC,S}, A, b :: AbstractVector{FC};
# pₐᵤₓ ← pₐᵤₓ + Nvₖ
@kaxpy!(n, one(FC), z, P[pos])
# pₖ = pₐᵤₓ / hₖ.ₖ
- @. P[pos] = P[pos] / H[2]
+ P[pos] .= P[pos] ./ H[1]
# Compute solution xₖ.
# xₖ ← xₖ₋₁ + γₖ * pₖ
diff --git a/src/krylov_solvers.jl b/src/krylov_solvers.jl
index b37ccd575..f94efd2f9 100644
--- a/src/krylov_solvers.jl
+++ b/src/krylov_solvers.jl
@@ -409,7 +409,7 @@ function DqgmresSolver(n, m, memory, S)
V = [S(undef, n) for i = 1 : memory]
c = Vector{T}(undef, memory)
s = Vector{FC}(undef, memory)
- H = Vector{FC}(undef, memory+2)
+ H = Vector{FC}(undef, memory+1)
stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
solver = DqgmresSolver{T,FC,S}(Δx, x, t, z, w, P, V, c, s, H, false, stats)
return solver
@@ -455,10 +455,10 @@ function DiomSolver(n, m, memory, S)
t = S(undef, n)
z = S(undef, 0)
w = S(undef, 0)
- P = [S(undef, n) for i = 1 : memory]
+ P = [S(undef, n) for i = 1 : memory-1]
V = [S(undef, n) for i = 1 : memory]
- L = Vector{FC}(undef, memory)
- H = Vector{FC}(undef, memory+2)
+ L = Vector{FC}(undef, memory-1)
+ H = Vector{FC}(undef, memory)
stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
solver = DiomSolver{T,FC,S}(Δx, x, t, z, w, P, V, L, H, false, stats)
return solver
diff --git a/test/test_allocations.jl b/test/test_allocations.jl
index b29f11631..308da1597 100644
--- a/test/test_allocations.jl
+++ b/test/test_allocations.jl
@@ -141,10 +141,11 @@
@testset "DIOM" begin
# DIOM needs:
# - 2 n-vectors: x, t
- # - 2 (n*mem)-matrices: P, V
- # - 1 mem-vector: L
- # - 1 (mem+2)-vector: H
- storage_diom(mem, n) = (2 * n) + (2 * n * mem) + (mem) + (mem + 2)
+ # - 1 (n*mem)-matrix: V
+ # - 1 n*(mem-1)-matrix: P
+ # - 1 (mem-1)-vector: L
+ # - 1 mem-vector: H
+ storage_diom(mem, n) = (2 * n) + (n * mem) + (n * (mem-1)) + (mem-1) + (mem)
storage_diom_bytes(mem, n) = nbits * storage_diom(mem, n)
expected_diom_bytes = storage_diom_bytes(mem, n)
@@ -183,8 +184,8 @@
# - 2 n-vectors: x, t
# - 2 (n*mem)-matrices: P, V
# - 2 mem-vectors: c, s
- # - 1 (mem+2)-vector: H
- storage_dqgmres(mem, n) = (2 * n) + (2 * n * mem) + (2 * mem) + (mem + 2)
+ # - 1 (mem+1)-vector: H
+ storage_dqgmres(mem, n) = (2 * n) + (2 * n * mem) + (2 * mem) + (mem + 1)
storage_dqgmres_bytes(mem, n) = nbits * storage_dqgmres(mem, n)
expected_dqgmres_bytes = storage_dqgmres_bytes(mem, n)
diff --git a/test/test_diom.jl b/test/test_diom.jl
index 4f1a8ecea..62a38b198 100644
--- a/test/test_diom.jl
+++ b/test/test_diom.jl
@@ -60,7 +60,7 @@
# Poisson equation in polar coordinates.
A, b = polar_poisson(FC=FC)
- (x, stats) = diom(A, b, memory=200)
+ (x, stats) = diom(A, b, memory=150)
r = b - A * x
resid = norm(r) / norm(b)
@test(resid ≤ diom_tol)
From 29cf54bec5fdb6482b933dcd7b8fd02150e3183f Mon Sep 17 00:00:00 2001
From: Alexis Montoison
Date: Mon, 26 Sep 2022 00:45:27 -0400
Subject: [PATCH 043/132] Update test_solvers.jl
---
test/test_solvers.jl | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/test/test_solvers.jl b/test/test_solvers.jl
index 17b3edf0b..a706cf3d0 100644
--- a/test/test_solvers.jl
+++ b/test/test_solvers.jl
@@ -526,10 +526,10 @@ function test_solvers(FC)
│ t│ Vector{$FC}│ 64│
│ z│ Vector{$FC}│ 0│
│ w│ Vector{$FC}│ 0│
- │ P│Vector{Vector{$FC}}│ 10 x 64│
+ │ P│Vector{Vector{$FC}}│ 9 x 64│
│ V│Vector{Vector{$FC}}│ 10 x 64│
- │ L│ Vector{$FC}│ 10│
- │ H│ Vector{$FC}│ 12│
+ │ L│ Vector{$FC}│ 9│
+ │ H│ Vector{$FC}│ 10│
│warm_start│ Bool│ 0│
└──────────┴───────────────────┴─────────────────┘
"""
@@ -576,7 +576,7 @@ function test_solvers(FC)
│ V│Vector{Vector{$FC}}│ 10 x 64│
│ c│ Vector{$T}│ 10│
│ s│ Vector{$FC}│ 10│
- │ H│ Vector{$FC}│ 12│
+ │ H│ Vector{$FC}│ 11│
│ warm_start│ Bool│ 0│
└─────────────┴───────────────────┴─────────────────┘
"""
From facd67db97a2d4adf6d77abd35d1c99b0e5a6233 Mon Sep 17 00:00:00 2001
From: Alexis Montoison
Date: Mon, 26 Sep 2022 15:35:37 -0400
Subject: [PATCH 044/132] Add a comment about complex matrices in gpu.md
---
docs/src/gpu.md | 3 +++
test/gpu/nvidia.jl | 16 ++++++++--------
2 files changed, 11 insertions(+), 8 deletions(-)
diff --git a/docs/src/gpu.md b/docs/src/gpu.md
index 4ce8ee448..20bea1656 100644
--- a/docs/src/gpu.md
+++ b/docs/src/gpu.md
@@ -80,6 +80,9 @@ opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ic0!(y, P, x)
x, stats = cg(A_gpu, b_gpu, M=opM)
```
+!!! note
+ You need to replace `'T'` by `'C'` in `ldiv_ic0!` if `A_gpu` is a complex matrix.
+
### Example with a general square system
```julia
diff --git a/test/gpu/nvidia.jl b/test/gpu/nvidia.jl
index 8faed479a..c72c5c6ba 100644
--- a/test/gpu/nvidia.jl
+++ b/test/gpu/nvidia.jl
@@ -31,25 +31,25 @@ include("../test_utils.jl")
A_gpu = CuSparseMatrixCSC(A_cpu)
P = ic02(A_gpu, 'O')
- function ldiv_ic0!(y, P, x)
+ function ldiv_csc_ic0!(y, P, x)
copyto!(y, x)
sv2!('T', 'U', 'N', 1.0, P, y, 'O')
sv2!('N', 'U', 'N', 1.0, P, y, 'O')
return y
end
- opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ic0!(y, P, x))
+ opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_csc_ic0!(y, P, x))
x, stats = cg(A_gpu, b_gpu, M=opM)
@test norm(b_gpu - A_gpu * x) ≤ 1e-6
A_gpu = CuSparseMatrixCSR(A_cpu)
P = ic02(A_gpu, 'O')
- function ldiv_ic0!(y, P, x)
+ function ldiv_csr_ic0!(y, P, x)
copyto!(y, x)
sv2!('N', 'L', 'N', 1.0, P, y, 'O')
sv2!('T', 'L', 'N', 1.0, P, y, 'O')
return y
end
- opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ic0!(y, P, x))
+ opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_csr_ic0!(y, P, x))
x, stats = cg(A_gpu, b_gpu, M=opM)
@test norm(b_gpu - A_gpu * x) ≤ 1e-6
end
@@ -69,25 +69,25 @@ include("../test_utils.jl")
A_gpu = CuSparseMatrixCSC(A_cpu)
P = ilu02(A_gpu, 'O')
- function ldiv_ilu0!(y, P, x)
+ function ldiv_csc_ilu0!(y, P, x)
copyto!(y, x)
sv2!('N', 'L', 'N', 1.0, P, y, 'O')
sv2!('N', 'U', 'U', 1.0, P, y, 'O')
return y
end
- opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ilu0!(y, P, x))
+ opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_csc_ilu0!(y, P, x))
x, stats = bicgstab(A_gpu, b_gpu, M=opM)
@test norm(b_gpu - A_gpu * x) ≤ 1e-6
A_gpu = CuSparseMatrixCSR(A_cpu)
P = ilu02(A_gpu, 'O')
- function ldiv_ilu0!(y, P, x)
+ function ldiv_csr_ilu0!(y, P, x)
copyto!(y, x)
sv2!('N', 'L', 'U', 1.0, P, y, 'O')
sv2!('N', 'U', 'N', 1.0, P, y, 'O')
return y
end
- opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ilu0!(y, P, x))
+ opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_csr_ilu0!(y, P, x))
x, stats = bicgstab(A_gpu, b_gpu, M=opM)
@test norm(b_gpu - A_gpu * x) ≤ 1e-6
end
From 460087ac87fda7b0fd5fbf8c4377ef48f69878b2 Mon Sep 17 00:00:00 2001
From: Alexis Montoison
Date: Mon, 26 Sep 2022 15:49:10 -0400
Subject: [PATCH 045/132] Fix a typo in gpu.md
---
docs/src/gpu.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/src/gpu.md b/docs/src/gpu.md
index 20bea1656..69aef5c60 100644
--- a/docs/src/gpu.md
+++ b/docs/src/gpu.md
@@ -40,7 +40,7 @@ b_cpu = rand(200)
A_gpu = CuSparseMatrixCSC(A_cpu)
b_gpu = CuVector(b_cpu)
-# Solve a rectangular and sparse system on a Nvidia GPU
+# Solve a rectangular and sparse system on an Nvidia GPU
x, stats = lsmr(A_gpu, b_gpu)
```
From fd1f1ba6ce58c5e33710f2186e969c5ef5d9af32 Mon Sep 17 00:00:00 2001
From: Alexis Montoison
Date: Mon, 26 Sep 2022 01:21:40 -0400
Subject: [PATCH 046/132] Remove allocations in roots_quadratic
---
src/krylov_utils.jl | 37 +++++++++++++++++++++----------------
test/test_aux.jl | 22 ++++++++++++++++++++++
2 files changed, 43 insertions(+), 16 deletions(-)
diff --git a/src/krylov_utils.jl b/src/krylov_utils.jl
index b16da57c0..c3dce2817 100644
--- a/src/krylov_utils.jl
+++ b/src/krylov_utils.jl
@@ -111,10 +111,10 @@ function roots_quadratic(q₂ :: T, q₁ :: T, q₀ :: T;
# Case where q(x) is linear.
if q₂ == zero(T)
if q₁ == zero(T)
- root = [zero(T)]
- q₀ == zero(T) || (root = T[])
+ root = tuple(zero(T))
+ q₀ == zero(T) || (root = tuple())
else
- root = [-q₀ / q₁]
+ root = tuple(-q₀ / q₁)
end
return root
end
@@ -123,26 +123,31 @@ function roots_quadratic(q₂ :: T, q₁ :: T, q₀ :: T;
rhs = √eps(T) * q₁ * q₁
if abs(q₀ * q₂) > rhs
ρ = q₁ * q₁ - 4 * q₂ * q₀
- ρ < 0 && return T[]
+ ρ < 0 && return tuple()
d = -(q₁ + copysign(sqrt(ρ), q₁)) / 2
- roots = [d / q₂, q₀ / d]
+ root1 = d / q₂
+ root2 = q₀ / d
else
# Ill-conditioned quadratic.
- roots = [-q₁ / q₂, zero(T)]
+ root1 = -q₁ / q₂
+ root2 = zero(T)
end
# Perform a few Newton iterations to improve accuracy.
- for k = 1 : 2
- root = roots[k]
- for it = 1 : nitref
- q = (q₂ * root + q₁) * root + q₀
- dq = 2 * q₂ * root + q₁
- dq == zero(T) && continue
- root = root - q / dq
- end
- roots[k] = root
+ for it = 1 : nitref
+ q = (q₂ * root1 + q₁) * root1 + q₀
+ dq = 2 * q₂ * root1 + q₁
+ dq == zero(T) && continue
+ root1 = root1 - q / dq
+ end
+
+ for it = 1 : nitref
+ q = (q₂ * root2 + q₁) * root2 + q₀
+ dq = 2 * q₂ * root2 + q₁
+ dq == zero(T) && continue
+ root2 = root2 - q / dq
end
- return roots
+ return (root1, root2)
end
diff --git a/test/test_aux.jl b/test/test_aux.jl
index 5ac2b401c..72815ff2f 100644
--- a/test/test_aux.jl
+++ b/test/test_aux.jl
@@ -36,54 +36,76 @@
@testset "roots_quadratic" begin
# test roots of a quadratic
roots = Krylov.roots_quadratic(0.0, 0.0, 0.0)
+ allocations = @allocated Krylov.roots_quadratic(0.0, 0.0, 0.0)
@test length(roots) == 1
@test roots[1] == 0.0
+ @test allocations == 0
roots = Krylov.roots_quadratic(0.0, 0.0, 1.0)
+ allocations = @allocated Krylov.roots_quadratic(0.0, 0.0, 1.0)
@test length(roots) == 0
+ @test allocations == 0
roots = Krylov.roots_quadratic(0.0, 3.14, -1.0)
+ allocations = @allocated Krylov.roots_quadratic(0.0, 3.14, -1.0)
@test length(roots) == 1
@test roots[1] == 1.0 / 3.14
+ @test allocations == 0
roots = Krylov.roots_quadratic(1.0, 0.0, 1.0)
+ allocations = @allocated Krylov.roots_quadratic(1.0, 0.0, 1.0)
@test length(roots) == 0
+ @test allocations == 0
roots = Krylov.roots_quadratic(1.0, 0.0, 0.0)
+ allocations = @allocated Krylov.roots_quadratic(1.0, 0.0, 0.0)
@test length(roots) == 2
@test roots[1] == 0.0
@test roots[2] == 0.0
+ @test allocations == 0
roots = Krylov.roots_quadratic(1.0, 3.0, 2.0)
+ allocations = @allocated Krylov.roots_quadratic(1.0, 3.0, 2.0)
@test length(roots) == 2
@test roots[1] ≈ -2.0
@test roots[2] ≈ -1.0
+ @test allocations == 0
roots = Krylov.roots_quadratic(1.0e+8, 1.0, 1.0)
+ allocations = @allocated Krylov.roots_quadratic(1.0e+8, 1.0, 1.0)
@test length(roots) == 0
+ @test allocations == 0
# ill-conditioned quadratic
roots = Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=0)
+ allocations = @allocated Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=0)
@test length(roots) == 2
@test roots[1] == 1.0e+13
@test roots[2] == 0.0
+ @test allocations == 0
# iterative refinement is crucial!
roots = Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=1)
+ allocations = @allocated Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=1)
@test length(roots) == 2
@test roots[1] == 1.0e+13
@test roots[2] == -1.0e-05
+ @test allocations == 0
# not ill-conditioned quadratic
roots = Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=0)
+ allocations = @allocated Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=0)
@test length(roots) == 2
@test isapprox(roots[1], 1.0e+7, rtol=1.0e-6)
@test isapprox(roots[2], -1.0, rtol=1.0e-6)
+ @test allocations == 0
roots = Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=1)
+ allocations = @allocated Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=1)
@test length(roots) == 2
@test isapprox(roots[1], 1.0e+7, rtol=1.0e-6)
@test isapprox(roots[2], -1.0, rtol=1.0e-6)
+ @test allocations == 0
end
@testset "to_boundary" begin
From fb509d68eef6d39bb5a5bdebc49aa98507efcf92 Mon Sep 17 00:00:00 2001
From: Alexis Montoison
Date: Mon, 26 Sep 2022 12:39:09 -0400
Subject: [PATCH 047/132] Test allocations of roots_quadratic for Julia >= 1.8
---
test/test_aux.jl | 57 +++++++++++++++++++++++++++++-------------------
1 file changed, 35 insertions(+), 22 deletions(-)
diff --git a/test/test_aux.jl b/test/test_aux.jl
index 72815ff2f..6f4f90398 100644
--- a/test/test_aux.jl
+++ b/test/test_aux.jl
@@ -36,76 +36,89 @@
@testset "roots_quadratic" begin
# test roots of a quadratic
roots = Krylov.roots_quadratic(0.0, 0.0, 0.0)
- allocations = @allocated Krylov.roots_quadratic(0.0, 0.0, 0.0)
@test length(roots) == 1
@test roots[1] == 0.0
- @test allocations == 0
roots = Krylov.roots_quadratic(0.0, 0.0, 1.0)
- allocations = @allocated Krylov.roots_quadratic(0.0, 0.0, 1.0)
@test length(roots) == 0
- @test allocations == 0
roots = Krylov.roots_quadratic(0.0, 3.14, -1.0)
- allocations = @allocated Krylov.roots_quadratic(0.0, 3.14, -1.0)
@test length(roots) == 1
@test roots[1] == 1.0 / 3.14
- @test allocations == 0
roots = Krylov.roots_quadratic(1.0, 0.0, 1.0)
- allocations = @allocated Krylov.roots_quadratic(1.0, 0.0, 1.0)
@test length(roots) == 0
- @test allocations == 0
roots = Krylov.roots_quadratic(1.0, 0.0, 0.0)
- allocations = @allocated Krylov.roots_quadratic(1.0, 0.0, 0.0)
@test length(roots) == 2
@test roots[1] == 0.0
@test roots[2] == 0.0
- @test allocations == 0
roots = Krylov.roots_quadratic(1.0, 3.0, 2.0)
- allocations = @allocated Krylov.roots_quadratic(1.0, 3.0, 2.0)
@test length(roots) == 2
@test roots[1] ≈ -2.0
@test roots[2] ≈ -1.0
- @test allocations == 0
roots = Krylov.roots_quadratic(1.0e+8, 1.0, 1.0)
- allocations = @allocated Krylov.roots_quadratic(1.0e+8, 1.0, 1.0)
@test length(roots) == 0
- @test allocations == 0
# ill-conditioned quadratic
roots = Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=0)
- allocations = @allocated Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=0)
@test length(roots) == 2
@test roots[1] == 1.0e+13
@test roots[2] == 0.0
- @test allocations == 0
# iterative refinement is crucial!
roots = Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=1)
- allocations = @allocated Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=1)
@test length(roots) == 2
@test roots[1] == 1.0e+13
@test roots[2] == -1.0e-05
- @test allocations == 0
# not ill-conditioned quadratic
roots = Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=0)
- allocations = @allocated Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=0)
@test length(roots) == 2
@test isapprox(roots[1], 1.0e+7, rtol=1.0e-6)
@test isapprox(roots[2], -1.0, rtol=1.0e-6)
- @test allocations == 0
roots = Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=1)
- allocations = @allocated Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=1)
@test length(roots) == 2
@test isapprox(roots[1], 1.0e+7, rtol=1.0e-6)
@test isapprox(roots[2], -1.0, rtol=1.0e-6)
- @test allocations == 0
+
+ if VERSION ≥ v"1.8"
+ allocations = @allocated Krylov.roots_quadratic(0.0, 0.0, 0.0)
+ @test allocations == 0
+
+ allocations = @allocated Krylov.roots_quadratic(0.0, 0.0, 1.0)
+ @test allocations == 0
+
+ allocations = @allocated Krylov.roots_quadratic(0.0, 3.14, -1.0)
+ @test allocations == 0
+
+ allocations = @allocated Krylov.roots_quadratic(1.0, 0.0, 1.0)
+ @test allocations == 0
+
+ allocations = @allocated Krylov.roots_quadratic(1.0, 0.0, 0.0)
+ @test allocations == 0
+
+ allocations = @allocated Krylov.roots_quadratic(1.0, 3.0, 2.0)
+ @test allocations == 0
+
+ allocations = @allocated Krylov.roots_quadratic(1.0e+8, 1.0, 1.0)
+ @test allocations == 0
+
+ allocations = @allocated Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=0)
+ @test allocations == 0
+
+ allocations = @allocated Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=1)
+ @test allocations == 0
+
+ allocations = @allocated Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=0)
+ @test allocations == 0
+
+ allocations = @allocated Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=1)
+ @test allocations == 0
+ end
end
@testset "to_boundary" begin
From 1a582cd01f8481528397a58f3c238c335d004318 Mon Sep 17 00:00:00 2001
From: Alexis Montoison
Date: Tue, 27 Sep 2022 07:21:12 -0400
Subject: [PATCH 048/132] Improve roots_quadratic
---
src/cg.jl | 2 +-
src/cgls.jl | 2 +-
src/cr.jl | 4 +--
src/crls.jl | 4 +--
src/krylov_utils.jl | 78 ++++++++++++++++++++++---------------------
src/lsmr.jl | 2 +-
src/lsqr.jl | 2 +-
test/test_aux.jl | 81 +++++++++++++++++----------------------------
8 files changed, 80 insertions(+), 95 deletions(-)
diff --git a/src/cg.jl b/src/cg.jl
index 212c68484..68a6e415d 100644
--- a/src/cg.jl
+++ b/src/cg.jl
@@ -164,7 +164,7 @@ function cg!(solver :: CgSolver{T,FC,S}, A, b :: AbstractVector{FC};
α = γ / pAp
# Compute step size to boundary if applicable.
- σ = radius > 0 ? maximum(to_boundary(x, p, radius, dNorm2=pNorm²)) : α
+ σ = radius > 0 ? maximum(to_boundary(n, x, p, radius, dNorm2=pNorm²)) : α
kdisplay(iter, verbose) && @printf("%8.1e %8.1e %8.1e\n", pAp, α, σ)
diff --git a/src/cgls.jl b/src/cgls.jl
index 43fa5a6b6..a8d6e3c94 100644
--- a/src/cgls.jl
+++ b/src/cgls.jl
@@ -145,7 +145,7 @@ function cgls!(solver :: CglsSolver{T,FC,S}, A, b :: AbstractVector{FC};
α = γ / δ
# if a trust-region constraint is give, compute step to the boundary
- σ = radius > 0 ? maximum(to_boundary(x, p, radius)) : α
+ σ = radius > 0 ? maximum(to_boundary(n, x, p, radius)) : α
if (radius > 0) & (α > σ)
α = σ
on_boundary = true
diff --git a/src/cr.jl b/src/cr.jl
index 0e93e7eaa..c18501b09 100644
--- a/src/cr.jl
+++ b/src/cr.jl
@@ -176,10 +176,10 @@ function cr!(solver :: CrSolver{T,FC,S}, A, b :: AbstractVector{FC};
(verbose > 0) && @printf("radius = %8.1e > 0 and ‖x‖ = %8.1e\n", radius, xNorm)
# find t1 > 0 and t2 < 0 such that ‖x + ti * p‖² = radius² (i = 1, 2)
xNorm² = xNorm * xNorm
- t = to_boundary(x, p, radius; flip = false, xNorm2 = xNorm², dNorm2 = pNorm²)
+ t = to_boundary(n, x, p, radius; flip = false, xNorm2 = xNorm², dNorm2 = pNorm²)
t1 = maximum(t) # > 0
t2 = minimum(t) # < 0
- tr = maximum(to_boundary(x, r, radius; flip = false, xNorm2 = xNorm², dNorm2 = rNorm²))
+ tr = maximum(to_boundary(n, x, r, radius; flip = false, xNorm2 = xNorm², dNorm2 = rNorm²))
(verbose > 0) && @printf("t1 = %8.1e, t2 = %8.1e and tr = %8.1e\n", t1, t2, tr)
if abspAp ≤ γ * pNorm * @knrm2(n, q) # pᴴAp ≃ 0
diff --git a/src/crls.jl b/src/crls.jl
index b041f8e9f..329b5a5fe 100644
--- a/src/crls.jl
+++ b/src/crls.jl
@@ -151,10 +151,10 @@ function crls!(solver :: CrlsSolver{T,FC,S}, A, b :: AbstractVector{FC};
p = Ar # p = Aᴴr
pNorm² = ArNorm * ArNorm
mul!(q, Aᴴ, s)
- α = min(ArNorm^2 / γ, maximum(to_boundary(x, p, radius, flip = false, dNorm2 = pNorm²))) # the quadratic is minimal in the direction Aᴴr for α = ‖Ar‖²/γ
+ α = min(ArNorm^2 / γ, maximum(to_boundary(n, x, p, radius, flip = false, dNorm2 = pNorm²))) # the quadratic is minimal in the direction Aᴴr for α = ‖Ar‖²/γ
else
pNorm² = pNorm * pNorm
- σ = maximum(to_boundary(x, p, radius, flip = false, dNorm2 = pNorm²))
+ σ = maximum(to_boundary(n, x, p, radius, flip = false, dNorm2 = pNorm²))
if α ≥ σ
α = σ
on_boundary = true
diff --git a/src/krylov_utils.jl b/src/krylov_utils.jl
index c3dce2817..575179eec 100644
--- a/src/krylov_utils.jl
+++ b/src/krylov_utils.jl
@@ -92,8 +92,8 @@ function sym_givens(a :: Complex{T}, b :: Complex{T}) where T <: AbstractFloat
return (c, s, ρ)
end
-@inline sym_givens(a :: Complex{T}, b :: T) where T <: AbstractFloat = sym_givens(a, Complex{T}(b))
-@inline sym_givens(a :: T, b :: Complex{T}) where T <: AbstractFloat = sym_givens(Complex{T}(a), b)
+sym_givens(a :: Complex{T}, b :: T) where T <: AbstractFloat = sym_givens(a, Complex{T}(b))
+sym_givens(a :: T, b :: Complex{T}) where T <: AbstractFloat = sym_givens(Complex{T}(a), b)
"""
roots = roots_quadratic(q₂, q₁, q₀; nitref)
@@ -111,19 +111,19 @@ function roots_quadratic(q₂ :: T, q₁ :: T, q₀ :: T;
# Case where q(x) is linear.
if q₂ == zero(T)
if q₁ == zero(T)
- root = tuple(zero(T))
- q₀ == zero(T) || (root = tuple())
+ q₀ == zero(T) || error("The quadratic `q` doesn't have real roots.")
+ root = zero(T)
else
- root = tuple(-q₀ / q₁)
+ root = -q₀ / q₁
end
- return root
+ return (root, root)
end
# Case where q(x) is indeed quadratic.
rhs = √eps(T) * q₁ * q₁
if abs(q₀ * q₂) > rhs
ρ = q₁ * q₁ - 4 * q₂ * q₀
- ρ < 0 && return tuple()
+ ρ < 0 && return error("The quadratic `q` doesn't have real roots.")
d = -(q₁ + copysign(sqrt(ρ), q₁)) / 2
root1 = d / q₂
root2 = q₀ / d
@@ -150,36 +150,6 @@ function roots_quadratic(q₂ :: T, q₁ :: T, q₀ :: T;
return (root1, root2)
end
-
-"""
- roots = to_boundary(x, d, radius; flip, xNorm2, dNorm2)
-
-Given a trust-region radius `radius`, a vector `x` lying inside the
-trust-region and a direction `d`, return `σ1` and `σ2` such that
-
- ‖x + σi d‖ = radius, i = 1, 2
-
-in the Euclidean norm. If known, ‖x‖² may be supplied in `xNorm2`.
-
-If `flip` is set to `true`, `σ1` and `σ2` are computed such that
-
- ‖x - σi d‖ = radius, i = 1, 2.
-"""
-function to_boundary(x :: Vector{T}, d :: Vector{T},
- radius :: T; flip :: Bool=false, xNorm2 :: T=zero(T), dNorm2 :: T=zero(T)) where T <: Number
- radius > 0 || error("radius must be positive")
-
- # ‖d‖² σ² + (xᴴd + dᴴx) σ + (‖x‖² - radius²).
- rxd = real(dot(x, d))
- flip && (rxd = -rxd)
- dNorm2 == zero(T) && (dNorm2 = dot(d, d))
- dNorm2 == zero(T) && error("zero direction")
- xNorm2 == zero(T) && (xNorm2 = dot(x, x))
- (xNorm2 ≤ radius * radius) || error(@sprintf("outside of the trust region: ‖x‖²=%7.1e, Δ²=%7.1e", xNorm2, radius * radius))
- roots = roots_quadratic(dNorm2, 2 * rxd, xNorm2 - radius * radius)
- return roots # `σ1` and `σ2`
-end
-
"""
s = vec2str(x; ndisp)
@@ -357,3 +327,37 @@ end
macro kref!(n, x, y, c, s)
return esc(:(reflect!($x, $y, $c, $s)))
end
+
+"""
+ roots = to_boundary(n, x, d, radius; flip, xNorm2, dNorm2)
+
+Given a trust-region radius `radius`, a vector `x` lying inside the
+trust-region and a direction `d`, return `σ1` and `σ2` such that
+
+ ‖x + σi d‖ = radius, i = 1, 2
+
+in the Euclidean norm.
+`n` is the length of vectors `x` and `d`.
+If known, ‖x‖² and ‖d‖² may be supplied with `xNorm2` and `dNorm2`.
+
+If `flip` is set to `true`, `σ1` and `σ2` are computed such that
+
+ ‖x - σi d‖ = radius, i = 1, 2.
+"""
+function to_boundary(n :: Int, x :: Vector{T}, d :: Vector{T}, radius :: T; flip :: Bool=false, xNorm2 :: T=zero(T), dNorm2 :: T=zero(T)) where T <: FloatOrComplex
+ radius > 0 || error("radius must be positive")
+
+ # ‖d‖² σ² + (xᴴd + dᴴx) σ + (‖x‖² - Δ²).
+ rxd = @kdotr(n, x, d)
+ flip && (rxd = -rxd)
+ dNorm2 == zero(T) && (dNorm2 = @kdot(n, d, d))
+ dNorm2 == zero(T) && error("zero direction")
+ xNorm2 == zero(T) && (xNorm2 = @kdot(n, x, x))
+ radius2 = radius * radius
+ (xNorm2 ≤ radius2) || error(@sprintf("outside of the trust region: ‖x‖²=%7.1e, Δ²=%7.1e", xNorm2, radius2))
+
+ # q₂ = ‖d‖², q₁ = xᴴd + dᴴx, q₀ = ‖x‖² - Δ²
+ # ‖x‖² ≤ Δ² ⟹ (q₁)² - 4 * q₂ * q₀ ≥ 0
+ roots = roots_quadratic(dNorm2, 2 * rxd, xNorm2 - radius2)
+ return roots # `σ1` and `σ2`
+end
diff --git a/src/lsmr.jl b/src/lsmr.jl
index 78db5db59..79d2543fb 100644
--- a/src/lsmr.jl
+++ b/src/lsmr.jl
@@ -287,7 +287,7 @@ function lsmr!(solver :: LsmrSolver{T,FC,S}, A, b :: AbstractVector{FC};
# the step ϕ/ρ is not necessarily positive
σ = ζ / (ρ * ρbar)
if radius > 0
- t1, t2 = to_boundary(x, hbar, radius)
+ t1, t2 = to_boundary(n, x, hbar, radius)
tmax, tmin = max(t1, t2), min(t1, t2)
on_boundary = σ > tmax || σ < tmin
σ = σ > 0 ? min(σ, tmax) : max(σ, tmin)
diff --git a/src/lsqr.jl b/src/lsqr.jl
index 083b2f9f9..e4973bd38 100644
--- a/src/lsqr.jl
+++ b/src/lsqr.jl
@@ -283,7 +283,7 @@ function lsqr!(solver :: LsqrSolver{T,FC,S}, A, b :: AbstractVector{FC};
# the step ϕ/ρ is not necessarily positive
σ = ϕ / ρ
if radius > 0
- t1, t2 = to_boundary(x, w, radius)
+ t1, t2 = to_boundary(n, x, w, radius)
tmax, tmin = max(t1, t2), min(t1, t2)
on_boundary = σ > tmax || σ < tmin
σ = σ > 0 ? min(σ, tmax) : max(σ, tmin)
diff --git a/test/test_aux.jl b/test/test_aux.jl
index 6f4f90398..f844368e8 100644
--- a/test/test_aux.jl
+++ b/test/test_aux.jl
@@ -36,102 +36,83 @@
@testset "roots_quadratic" begin
# test roots of a quadratic
roots = Krylov.roots_quadratic(0.0, 0.0, 0.0)
- @test length(roots) == 1
@test roots[1] == 0.0
+ @test roots[2] == 0.0
- roots = Krylov.roots_quadratic(0.0, 0.0, 1.0)
- @test length(roots) == 0
+ @test_throws ErrorException Krylov.roots_quadratic(0.0, 0.0, 1.0)
roots = Krylov.roots_quadratic(0.0, 3.14, -1.0)
- @test length(roots) == 1
@test roots[1] == 1.0 / 3.14
+ @test roots[2] == 1.0 / 3.14
- roots = Krylov.roots_quadratic(1.0, 0.0, 1.0)
- @test length(roots) == 0
+ @test_throws ErrorException Krylov.roots_quadratic(1.0, 0.0, 1.0)
roots = Krylov.roots_quadratic(1.0, 0.0, 0.0)
- @test length(roots) == 2
@test roots[1] == 0.0
@test roots[2] == 0.0
roots = Krylov.roots_quadratic(1.0, 3.0, 2.0)
- @test length(roots) == 2
@test roots[1] ≈ -2.0
@test roots[2] ≈ -1.0
- roots = Krylov.roots_quadratic(1.0e+8, 1.0, 1.0)
- @test length(roots) == 0
+ @test_throws ErrorException Krylov.roots_quadratic(1.0e+8, 1.0, 1.0)
# ill-conditioned quadratic
roots = Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=0)
- @test length(roots) == 2
@test roots[1] == 1.0e+13
@test roots[2] == 0.0
# iterative refinement is crucial!
roots = Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=1)
- @test length(roots) == 2
@test roots[1] == 1.0e+13
@test roots[2] == -1.0e-05
# not ill-conditioned quadratic
roots = Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=0)
- @test length(roots) == 2
@test isapprox(roots[1], 1.0e+7, rtol=1.0e-6)
@test isapprox(roots[2], -1.0, rtol=1.0e-6)
roots = Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=1)
- @test length(roots) == 2
@test isapprox(roots[1], 1.0e+7, rtol=1.0e-6)
@test isapprox(roots[2], -1.0, rtol=1.0e-6)
- if VERSION ≥ v"1.8"
- allocations = @allocated Krylov.roots_quadratic(0.0, 0.0, 0.0)
- @test allocations == 0
-
- allocations = @allocated Krylov.roots_quadratic(0.0, 0.0, 1.0)
- @test allocations == 0
-
- allocations = @allocated Krylov.roots_quadratic(0.0, 3.14, -1.0)
- @test allocations == 0
+ allocations = @allocated Krylov.roots_quadratic(0.0, 0.0, 0.0)
+ @test allocations == 0
- allocations = @allocated Krylov.roots_quadratic(1.0, 0.0, 1.0)
- @test allocations == 0
+ allocations = @allocated Krylov.roots_quadratic(0.0, 3.14, -1.0)
+ @test allocations == 0
- allocations = @allocated Krylov.roots_quadratic(1.0, 0.0, 0.0)
- @test allocations == 0
+ allocations = @allocated Krylov.roots_quadratic(1.0, 0.0, 0.0)
+ @test allocations == 0
- allocations = @allocated Krylov.roots_quadratic(1.0, 3.0, 2.0)
- @test allocations == 0
+ allocations = @allocated Krylov.roots_quadratic(1.0, 3.0, 2.0)
+ @test allocations == 0
- allocations = @allocated Krylov.roots_quadratic(1.0e+8, 1.0, 1.0)
- @test allocations == 0
+ allocations = @allocated Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=0)
+ @test allocations == 0
- allocations = @allocated Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=0)
- @test allocations == 0
+ allocations = @allocated Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=1)
+ @test allocations == 0
- allocations = @allocated Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=1)
- @test allocations == 0
+ allocations = @allocated Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=0)
+ @test allocations == 0
- allocations = @allocated Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=0)
- @test allocations == 0
-
- allocations = @allocated Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=1)
- @test allocations == 0
- end
+ allocations = @allocated Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=1)
+ @test allocations == 0
end
@testset "to_boundary" begin
# test trust-region boundary
- x = ones(5)
- d = ones(5); d[1:2:5] .= -1
- @test_throws ErrorException Krylov.to_boundary(x, d, -1.0)
- @test_throws ErrorException Krylov.to_boundary(x, d, 0.5)
- @test_throws ErrorException Krylov.to_boundary(x, zeros(5), 1.0)
- @test maximum(Krylov.to_boundary(x, d, 5.0)) ≈ 2.209975124224178
- @test minimum(Krylov.to_boundary(x, d, 5.0)) ≈ -1.8099751242241782
- @test maximum(Krylov.to_boundary(x, d, 5.0, flip=true)) ≈ 1.8099751242241782
- @test minimum(Krylov.to_boundary(x, d, 5.0, flip=true)) ≈ -2.209975124224178
+ n = 5
+ x = ones(n)
+ d = ones(n); d[1:2:n] .= -1
+ @test_throws ErrorException Krylov.to_boundary(n, x, d, -1.0)
+ @test_throws ErrorException Krylov.to_boundary(n, x, d, 0.5)
+ @test_throws ErrorException Krylov.to_boundary(n, x, zeros(n), 1.0)
+ @test maximum(Krylov.to_boundary(n, x, d, 5.0)) ≈ 2.209975124224178
+ @test minimum(Krylov.to_boundary(n, x, d, 5.0)) ≈ -1.8099751242241782
+ @test maximum(Krylov.to_boundary(n, x, d, 5.0, flip=true)) ≈ 1.8099751242241782
+ @test minimum(Krylov.to_boundary(n, x, d, 5.0, flip=true)) ≈ -2.209975124224178
end
@testset "kzeros" begin
From c45a492f4375e8e5648e58c2fcd2fa13b9e6ebbd Mon Sep 17 00:00:00 2001
From: Alexis Montoison
Date: Tue, 27 Sep 2022 20:45:49 -0400
Subject: [PATCH 049/132] Move callback_utils.jl file
---
src/Krylov.jl | 2 -
src/callback_utils.jl | 50 --------------
test/callback_utils.jl | 152 +++++++++++++++++++++++++++++++++++++++++
test/test_utils.jl | 108 +----------------------------
4 files changed, 153 insertions(+), 159 deletions(-)
delete mode 100644 src/callback_utils.jl
create mode 100644 test/callback_utils.jl
diff --git a/src/Krylov.jl b/src/Krylov.jl
index 7c480896f..e3903e124 100644
--- a/src/Krylov.jl
+++ b/src/Krylov.jl
@@ -50,6 +50,4 @@ include("lnlq.jl")
include("craig.jl")
include("craigmr.jl")
-include("callback_utils.jl")
-
end
diff --git a/src/callback_utils.jl b/src/callback_utils.jl
deleted file mode 100644
index eac362e5d..000000000
--- a/src/callback_utils.jl
+++ /dev/null
@@ -1,50 +0,0 @@
-export StorageGetxRestartedGmres
-
-export get_x_restarted_gmres!
-
-mutable struct StorageGetxRestartedGmres{S}
- x::S
- y::S
- p::S
-end
-StorageGetxRestartedGmres(solver::GmresSolver; N = I) =
- StorageGetxRestartedGmres(similar(solver.x), similar(solver.z), (N === I) ? similar(solver.p) : similar(solver.x))
-
-function get_x_restarted_gmres!(solver::GmresSolver{T,FC,S}, A,
- stor::StorageGetxRestartedGmres{S}, N) where {T,FC,S}
- NisI = (N === I)
- x2, y2, p2 = stor.x, stor.y, stor.p
- n = size(A, 2)
- # Compute yₖ by solving Rₖyₖ = zₖ with backward substitution.
- nr = sum(1:solver.inner_iter)
- y = solver.z # yᵢ = zᵢ
- y2 .= y
- R = solver.R
- V = solver.V
- x2 .= solver.Δx
- for i = solver.inner_iter : -1 : 1
- pos = nr + i - solver.inner_iter # position of rᵢ.ₖ
- for j = solver.inner_iter : -1 : i+1
- y2[i] = y2[i] - R[pos] * y2[j] # yᵢ ← yᵢ - rᵢⱼyⱼ
- pos = pos - j + 1 # position of rᵢ.ⱼ₋₁
- end
- # Rₖ can be singular if the system is inconsistent
- if abs(R[pos]) ≤ eps(T)^(3/4)
- y2[i] = zero(FC)
- inconsistent = true
- else
- y2[i] = y2[i] / R[pos] # yᵢ ← yᵢ / rᵢᵢ
- end
- end
-
- # Form xₖ = N⁻¹Vₖyₖ
- for i = 1 : solver.inner_iter
- @kaxpy!(n, y2[i], V[i], x2)
- end
- if !NisI
- p2 .= solver.p
- p2 .= x2
- mul!(x2, N, p2)
- end
- x2 .+= solver.x
-end
diff --git a/test/callback_utils.jl b/test/callback_utils.jl
new file mode 100644
index 000000000..c5993c2a3
--- /dev/null
+++ b/test/callback_utils.jl
@@ -0,0 +1,152 @@
+mutable struct StorageGetxRestartedGmres{S}
+ x::S
+ y::S
+ p::S
+end
+StorageGetxRestartedGmres(solver::GmresSolver; N = I) =
+ StorageGetxRestartedGmres(similar(solver.x), similar(solver.z), (N === I) ? similar(solver.p) : similar(solver.x))
+
+function get_x_restarted_gmres!(solver::GmresSolver{T,FC,S}, A,
+ stor::StorageGetxRestartedGmres{S}, N) where {T,FC,S}
+ NisI = (N === I)
+ x2, y2, p2 = stor.x, stor.y, stor.p
+ n = size(A, 2)
+ # Compute yₖ by solving Rₖyₖ = zₖ with backward substitution.
+ nr = sum(1:solver.inner_iter)
+ y = solver.z # yᵢ = zᵢ
+ y2 .= y
+ R = solver.R
+ V = solver.V
+ x2 .= solver.Δx
+ for i = solver.inner_iter : -1 : 1
+ pos = nr + i - solver.inner_iter # position of rᵢ.ₖ
+ for j = solver.inner_iter : -1 : i+1
+ y2[i] = y2[i] - R[pos] * y2[j] # yᵢ ← yᵢ - rᵢⱼyⱼ
+ pos = pos - j + 1 # position of rᵢ.ⱼ₋₁
+ end
+ # Rₖ can be singular if the system is inconsistent
+ if abs(R[pos]) ≤ eps(T)^(3/4)
+ y2[i] = zero(FC)
+ inconsistent = true
+ else
+ y2[i] = y2[i] / R[pos] # yᵢ ← yᵢ / rᵢᵢ
+ end
+ end
+
+ # Form xₖ = N⁻¹Vₖyₖ
+ for i = 1 : solver.inner_iter
+ Krylov.@kaxpy!(n, y2[i], V[i], x2)
+ end
+ if !NisI
+ p2 .= solver.p
+ p2 .= x2
+ mul!(x2, N, p2)
+ end
+ x2 .+= solver.x
+end
+
+mutable struct TestCallbackN2{T, S, M}
+ A::M
+ b::S
+ storage_vec::S
+ tol::T
+end
+TestCallbackN2(A, b; tol = 0.1) = TestCallbackN2(A, b, similar(b), tol)
+
+function (cb_n2::TestCallbackN2)(solver)
+ mul!(cb_n2.storage_vec, cb_n2.A, solver.x)
+ cb_n2.storage_vec .-= cb_n2.b
+ return norm(cb_n2.storage_vec) ≤ cb_n2.tol
+end
+
+mutable struct TestCallbackN2Adjoint{T, S, M}
+ A::M
+ b::S
+ c::S
+ storage_vec1::S
+ storage_vec2::S
+ tol::T
+end
+TestCallbackN2Adjoint(A, b, c; tol = 0.1) = TestCallbackN2Adjoint(A, b, c, similar(b), similar(c), tol)
+
+function (cb_n2::TestCallbackN2Adjoint)(solver)
+ mul!(cb_n2.storage_vec1, cb_n2.A, solver.x)
+ cb_n2.storage_vec1 .-= cb_n2.b
+ mul!(cb_n2.storage_vec2, cb_n2.A', solver.y)
+ cb_n2.storage_vec2 .-= cb_n2.c
+ return (norm(cb_n2.storage_vec1) ≤ cb_n2.tol && norm(cb_n2.storage_vec2) ≤ cb_n2.tol)
+end
+
+mutable struct TestCallbackN2Shifts{T, S, M}
+ A::M
+ b::S
+ shifts::Vector{T}
+ tol::T
+end
+TestCallbackN2Shifts(A, b, shifts; tol = 0.1) = TestCallbackN2Shifts(A, b, shifts, tol)
+
+function (cb_n2::TestCallbackN2Shifts)(solver)
+ r = residuals(cb_n2.A, cb_n2.b, cb_n2.shifts, solver.x)
+ return all(map(norm, r) .≤ cb_n2.tol)
+end
+
+mutable struct TestCallbackN2LS{T, S, M}
+ A::M
+ b::S
+ λ::T
+ storage_vec1::S
+ storage_vec2::S
+ tol::T
+end
+TestCallbackN2LS(A, b, λ; tol = 0.1) = TestCallbackN2LS(A, b, λ, similar(b), similar(b, size(A, 2)), tol)
+
+function (cb_n2::TestCallbackN2LS)(solver)
+ mul!(cb_n2.storage_vec1, cb_n2.A, solver.x)
+ cb_n2.storage_vec1 .-= cb_n2.b
+ mul!(cb_n2.storage_vec2, cb_n2.A', cb_n2.storage_vec1)
+ cb_n2.storage_vec2 .+= cb_n2.λ .* solver.x
+ return norm(cb_n2.storage_vec2) ≤ cb_n2.tol
+end
+
+mutable struct TestCallbackN2LN{T, S, M}
+ A::M
+ b::S
+ λ::T
+ storage_vec::S
+ tol::T
+end
+TestCallbackN2LN(A, b, λ; tol = 0.1) = TestCallbackN2LN(A, b, λ, similar(b), tol)
+
+function (cb_n2::TestCallbackN2LN)(solver)
+ mul!(cb_n2.storage_vec, cb_n2.A, solver.x)
+ cb_n2.storage_vec .-= cb_n2.b
+ cb_n2.λ != 0 && (cb_n2.storage_vec .+= sqrt(cb_n2.λ) .* solver.s)
+ return norm(cb_n2.storage_vec) ≤ cb_n2.tol
+end
+
+mutable struct TestCallbackN2SaddlePts{T, S, M}
+ A::M
+ b::S
+ c::S
+ storage_vec1::S
+ storage_vec2::S
+ tol::T
+end
+TestCallbackN2SaddlePts(A, b, c; tol = 0.1) =
+ TestCallbackN2SaddlePts(A, b, c, similar(b), similar(c), tol)
+
+function (cb_n2::TestCallbackN2SaddlePts)(solver)
+ mul!(cb_n2.storage_vec1, cb_n2.A, solver.y)
+ cb_n2.storage_vec1 .+= solver.x .- cb_n2.b
+ mul!(cb_n2.storage_vec2, cb_n2.A', solver.x)
+ cb_n2.storage_vec2 .-= solver.y .+ cb_n2.c
+ return (norm(cb_n2.storage_vec1) ≤ cb_n2.tol && norm(cb_n2.storage_vec2) ≤ cb_n2.tol)
+end
+
+function restarted_gmres_callback_n2(solver::GmresSolver, A, b, stor, N, storage_vec, tol)
+ get_x_restarted_gmres!(solver, A, stor, N)
+ x = stor.x
+ mul!(storage_vec, A, x)
+ storage_vec .-= b
+ return (norm(storage_vec) ≤ tol)
+end
diff --git a/test/test_utils.jl b/test/test_utils.jl
index fbfe2e4e0..0ac2e1538 100644
--- a/test/test_utils.jl
+++ b/test/test_utils.jl
@@ -1,6 +1,7 @@
include("get_div_grad.jl")
include("gen_lsq.jl")
include("check_min_norm.jl")
+include("callback_utils.jl")
# Symmetric and positive definite systems.
function symmetric_definite(n :: Int=10; FC=Float64)
@@ -363,110 +364,3 @@ function check_reset(stats :: KS) where KS <: Krylov.KrylovStats
end
end
end
-
-# Test callback
-mutable struct TestCallbackN2{T, S, M}
- A::M
- b::S
- storage_vec::S
- tol::T
-end
-TestCallbackN2(A, b; tol = 0.1) = TestCallbackN2(A, b, similar(b), tol)
-
-function (cb_n2::TestCallbackN2)(solver)
- mul!(cb_n2.storage_vec, cb_n2.A, solver.x)
- cb_n2.storage_vec .-= cb_n2.b
- return norm(cb_n2.storage_vec) ≤ cb_n2.tol
-end
-
-mutable struct TestCallbackN2Adjoint{T, S, M}
- A::M
- b::S
- c::S
- storage_vec1::S
- storage_vec2::S
- tol::T
-end
-TestCallbackN2Adjoint(A, b, c; tol = 0.1) = TestCallbackN2Adjoint(A, b, c, similar(b), similar(c), tol)
-
-function (cb_n2::TestCallbackN2Adjoint)(solver)
- mul!(cb_n2.storage_vec1, cb_n2.A, solver.x)
- cb_n2.storage_vec1 .-= cb_n2.b
- mul!(cb_n2.storage_vec2, cb_n2.A', solver.y)
- cb_n2.storage_vec2 .-= cb_n2.c
- return (norm(cb_n2.storage_vec1) ≤ cb_n2.tol && norm(cb_n2.storage_vec2) ≤ cb_n2.tol)
-end
-
-mutable struct TestCallbackN2Shifts{T, S, M}
- A::M
- b::S
- shifts::Vector{T}
- tol::T
-end
-TestCallbackN2Shifts(A, b, shifts; tol = 0.1) = TestCallbackN2Shifts(A, b, shifts, tol)
-
-function (cb_n2::TestCallbackN2Shifts)(solver)
- r = residuals(cb_n2.A, cb_n2.b, cb_n2.shifts, solver.x)
- return all(map(norm, r) .≤ cb_n2.tol)
-end
-
-mutable struct TestCallbackN2LS{T, S, M}
- A::M
- b::S
- λ::T
- storage_vec1::S
- storage_vec2::S
- tol::T
-end
-TestCallbackN2LS(A, b, λ; tol = 0.1) = TestCallbackN2LS(A, b, λ, similar(b), similar(b, size(A, 2)), tol)
-
-function (cb_n2::TestCallbackN2LS)(solver)
- mul!(cb_n2.storage_vec1, cb_n2.A, solver.x)
- cb_n2.storage_vec1 .-= cb_n2.b
- mul!(cb_n2.storage_vec2, cb_n2.A', cb_n2.storage_vec1)
- cb_n2.storage_vec2 .+= cb_n2.λ .* solver.x
- return norm(cb_n2.storage_vec2) ≤ cb_n2.tol
-end
-
-mutable struct TestCallbackN2LN{T, S, M}
- A::M
- b::S
- λ::T
- storage_vec::S
- tol::T
-end
-TestCallbackN2LN(A, b, λ; tol = 0.1) = TestCallbackN2LN(A, b, λ, similar(b), tol)
-
-function (cb_n2::TestCallbackN2LN)(solver)
- mul!(cb_n2.storage_vec, cb_n2.A, solver.x)
- cb_n2.storage_vec .-= cb_n2.b
- cb_n2.λ != 0 && (cb_n2.storage_vec .+= sqrt(cb_n2.λ) .* solver.s)
- return norm(cb_n2.storage_vec) ≤ cb_n2.tol
-end
-
-mutable struct TestCallbackN2SaddlePts{T, S, M}
- A::M
- b::S
- c::S
- storage_vec1::S
- storage_vec2::S
- tol::T
-end
-TestCallbackN2SaddlePts(A, b, c; tol = 0.1) =
- TestCallbackN2SaddlePts(A, b, c, similar(b), similar(c), tol)
-
-function (cb_n2::TestCallbackN2SaddlePts)(solver)
- mul!(cb_n2.storage_vec1, cb_n2.A, solver.y)
- cb_n2.storage_vec1 .+= solver.x .- cb_n2.b
- mul!(cb_n2.storage_vec2, cb_n2.A', solver.x)
- cb_n2.storage_vec2 .-= solver.y .+ cb_n2.c
- return (norm(cb_n2.storage_vec1) ≤ cb_n2.tol && norm(cb_n2.storage_vec2) ≤ cb_n2.tol)
-end
-
-function restarted_gmres_callback_n2(solver::GmresSolver, A, b, stor, N, storage_vec, tol)
- get_x_restarted_gmres!(solver, A, stor, N)
- x = stor.x
- mul!(storage_vec, A, x)
- storage_vec .-= b
- return (norm(storage_vec) ≤ tol)
-end
From 100792f45f37c777f3bbf98cdfc1290b7be6184f Mon Sep 17 00:00:00 2001
From: Alexis Montoison
Date: Tue, 27 Sep 2022 20:46:06 -0400
Subject: [PATCH 050/132] Update tips.md
---
docs/src/tips.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/src/tips.md b/docs/src/tips.md
index 604c0633d..ca3d927bd 100644
--- a/docs/src/tips.md
+++ b/docs/src/tips.md
@@ -23,7 +23,7 @@ BLAS.set_num_threads(N) # 1 ≤ N ≤ NMAX
BLAS.get_num_threads()
```
-The recommended number of BLAS threads is the number of physical and not logical cores, which is in general `N = NMAX / 2`.
+The recommended number of BLAS threads is the number of physical and not logical cores, which is in general `N = NMAX / 2` if your CPU supports simultaneous multithreading (SMT).
By default Julia ships with OpenBLAS but it's also possible to use Intel MKL BLAS and LAPACK with [MKL.jl](https://github.com/JuliaLinearAlgebra/MKL.jl).
From 426557de1a0cf962186ab6001b61ab510c3759fc Mon Sep 17 00:00:00 2001
From: Alexis <35051714+amontoison@users.noreply.github.com>
Date: Wed, 28 Sep 2022 00:15:20 -0400
Subject: [PATCH 051/132] Add six Krylov processes
---
docs/make.jl | 1 +
docs/src/graphics/arnoldi.png | Bin 0 -> 118889 bytes
docs/src/graphics/golub_kahan.png | Bin 0 -> 135734 bytes
docs/src/graphics/hermitian_lanczos.png | Bin 0 -> 107418 bytes
docs/src/graphics/montoison_orban.png | Bin 0 -> 176065 bytes
docs/src/graphics/nonhermitian_lanczos.png | Bin 0 -> 136590 bytes
docs/src/graphics/saunders_simon_yip.png | Bin 0 -> 149026 bytes
docs/src/processes.md | 278 +++++++++++++
src/Krylov.jl | 1 +
src/krylov_processes.jl | 459 +++++++++++++++++++++
test/gpu/amd.jl | 14 +-
test/gpu/gpu.jl | 38 ++
test/gpu/intel.jl | 14 +-
test/gpu/metal.jl | 14 +-
test/gpu/nvidia.jl | 14 +-
test/runtests.jl | 1 +
test/test_processes.jl | 148 +++++++
17 files changed, 962 insertions(+), 20 deletions(-)
create mode 100644 docs/src/graphics/arnoldi.png
create mode 100644 docs/src/graphics/golub_kahan.png
create mode 100644 docs/src/graphics/hermitian_lanczos.png
create mode 100644 docs/src/graphics/montoison_orban.png
create mode 100644 docs/src/graphics/nonhermitian_lanczos.png
create mode 100644 docs/src/graphics/saunders_simon_yip.png
create mode 100644 docs/src/processes.md
create mode 100644 src/krylov_processes.jl
create mode 100644 test/gpu/gpu.jl
create mode 100644 test/test_processes.jl
diff --git a/docs/make.jl b/docs/make.jl
index 0ad50d52f..db49cb759 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -12,6 +12,7 @@ makedocs(
sitename = "Krylov.jl",
pages = ["Home" => "index.md",
"API" => "api.md",
+ "Krylov processes" => "processes.md",
"Krylov methods" => ["Symmetric positive definite linear systems" => "solvers/spd.md",
"Symmetric indefinite linear systems" => "solvers/sid.md",
"Unsymmetric linear systems" => "solvers/unsymmetric.md",
diff --git a/docs/src/graphics/arnoldi.png b/docs/src/graphics/arnoldi.png
new file mode 100644
index 0000000000000000000000000000000000000000..9ef8bd3a3a54cfe2456d0c566f95e67a4d000f8b
GIT binary patch
literal 118889
zcmd43i9gkA`#r8Ynp5XgDv9QiRE9D(=SavrlVlE=vrWyN5~7fq%(gMMd8kC0iEZ9g
zGEbR9^j-J)ex6TyzJI{)=kC2y?#6%qusS25upB>*%dh=|zi+WVuVO84u5WFpW2whvU}kQr$8Du+
zsi$XVWoT|ayre)3AKFVkbkg}xZW-L^AHoI9;>GjM=mXZ@T$GdXhT{<<^E*BS4>vzZ>5Ub#EZFSGo8?s3{ZuXTSX@eXswmzN#c!|UgF
zZ*(r+TTlPwY}SVxkl
z(H>n0mtA`I&0yG3@%}6;#vKaFuTSDnW->!8cQ-1C+ErQYGdDMPwc&niEO?q0AJ3(y
zr`KtJ)FkY%a`Av$_e5}RCZm7R!tdJcvRj%s%B!K@@1Ht#lTuzY%ih`FZ}O)K?<#8N
zAM{tLsotKRVV^#I65%0J@#`n^_?~_HB#*W0ROD>my7<@c@u$|UT<(i(-UW*~@EKIH
z+fDucubF38Z#j0Z!k^#jW|{q6mjL@L!(DjJ$2THhLbA5{GKOh&A?ZRoe}Fxfr1
z=zXv$*XP;U96#j?7aojst!Qd$x+p0rFDK`9P&siROZxX~+C_ftueV+2SaMNi&2QIvx+#leiGRzr-+sw7`nIdmcLdrCz%y23wSIyJs18^ZNZ~&Yq>&kLeG#7ib?iaKN@(;Ckcb
zWi0F;qAM?7o*ZsZsyg{oqrV|Z8MD_{7aQ)rP52-mpThaa8xHhL#+O!B*2l<(rx{dn
zY!kV8q~WRN)DVMqwdk>9A4~FcfB!hXCogzGS;r#_U)88V>=LvQX7~^p$G*
z`cW6$*z{J$NqoG+ylVV=hlk0xPj9B1%LbZKZRUSWJ8G`aNGfvOWb*0tZ7e-rw{1e!
ze?Mf|_OXvnrRfwdRg70jVG>R|eCUv*e6+O9O)+t%@X!RUJT(akiKzF7l;XF?+pfJi
zL07i<_UUwvRh)8CungnxyUW&APW)VCHt(|0AV|>a!o#(kN1MVAjYsoFOAk%AdK;`$I(u3@^F~-x
z2cDz1I_xZS-I`I!Jjz5}gv5H$Z?Bh3=Rb?%cVpw1>nL_pq!}w_)Jw_8G`zmOg#2Z0
zw*9BlL?4~suzI_@JLuUVXmo@oCX-2j2?!!%~;+~$K)6J3|9HMm@=Gr!c&5Cc{
zyqV4xFyoPrx+Ht`>PvstHbAKbC-|1(sOg;Oj(
zCML#lXeo2--<~^-KSoHL&d`xd(h?;zeYinFS~^__17#!T@@qAQV3Wx8XA8M-Y+p*(KhYtUvsbgj`zC7CX*gK|L2Hmj^
zG6);1vYC1Dx`!JK+w=42BT5NsC1qu0oiqPC_DnZ~b4=eFlWCEYld}x+Z5O}Y_Qd5}
zzTKLw0+;gh^KHBNOq&&K`syr7NV_`oLn&&V@
z$=v!{Lp|7#|G
zKKRyn|8j$>V0xlvjzY}k9TPn@XR*ARQ*<8|<>HHdBkwtRZm}$yauQp*bZOs2gSMR{
z_pdR}eAdb;Os#lvv@mwsm6coW%$Yk6vlm@=7|QClo*Ym;FD`CU6)bx3@4u_#6sfI$
zDG0mF=h_Z7cRo0LTe#}wdDm-vZ1faeapq;q`uz-oQz+v@uk^~E2L@^|ZCiZLe!9+#GqS6tAK~Vc)#@MZaiCNeNl}`&t$ctX#1|wl+#?K0(YHqm>S}iI4B?Wq&dUf{3y?!J?E)P38f8fx7Z!Ueu0r&CM!ka1x)I>^z`-p
zIb0Cx4{GN-CQelZ3f4X|T{L}g@7@dT&aWvUIdv+hr~i@iY$mU}mb|rzzWT
zZL|lS3`1#BJrnPLU79c6Cn(62wfcbU%X>>#Zw^Hy?V=2SE__IHm@<<|U-orXcufpe
znHKg3Gh^lD?UuHR4x=Ue5a*B9-o!5aIy`KC)P<34^Vq|qDr5c@dq&}q@2{D-vF}iI
zazu%C0aU?8M{pi`vOQsJT
zJ0|zugHt)>YEk^s+Uwhx{ny`EGP*v*!DeUC@_p?8LjCmZ^%8f}&q%lcCo7nFBrpgd>*e5_ogLY%?@Jc1;*TH0kybX|<>uIZVsLDO^s9|SGWJ$m%$;>C+{
z$VXn!o_Rlier=?^y85!{M4i0;)YjEYN$@tQkFOc&c<&t?oQPD8g{a2HXZUoe;OD%C
zL*pwwS8R_+l1cIY*;w!uSu|t1rC~;NU7f-lP{Pc3t&Gj+SBV@eq`0_U;_hG7#;_}W
zeSMV^$Jljvo>(CIa;v9bl9iPevgvz{?8VK)!?Te`OGB^pxooO_#rTI^9zCBQa{6Rs
zjH#;JeG`iCbKvOFn(?v9>C$QY_8)qK-NCz#9+f?$oT#9n@LW1rB=*@pnbMY)mT_SP
zgnAMqrWkf1g>!rM>@jM3r8hHHby8~hQDC<=hNpJSyg4oQ$B!R@&ht|*1FKbmrxG}(
zn`kD=8xJX!52Rko9?7xq{kR8_w6a5V@18ve1O${)Qc{dtGgIhOwihoxCK-2p+?pOE
zTR3n+UL5Lr^UB1(-
zHBgUcj;$0@Mw~*d7oty;bdb=26DJxwoHFHYZPUpjS+(hKjA6}7@`1;jc+6}Pa|!{I
zPh$bdU%wuYwX2Y0YnoBNFuzOPzE6Mq_HFrmjosG|p85^KwgWL(lw`JS`^y6b$K2Oa
zL9z~rh-hG_KO*OU{^r!y)`rv^^kDUtmj=Q17Xpr(*MA@Bpkd8W$AD-PfQ*gX^PS>k
z!ygp(1J|7a{-}SjX6tiLPaPX+Fp#TPuWnesJ{+@c*!wZkE8^+Xb$Q?3u3{5&%sg}E
z47-+mcFLI6&6}BLt;`uZ=6HU}bf>S)r`L;8M}ROSy~O6f>x#~e*REW(O1{Kvk7<3-
zF}=sm^V0?oDaov9SxxUAt><@|u?`ZplLoB}^T(rG8nyuzvkM6wPa-+3w>H}3%^hZ2
zyN?6`?Uu0Vucz+|aeRriAw-h~m<68d{`Lbr$TbB)$E5hl780Djyu1e5P8!$k#}-q-
zQQfA)kKvW`c^1jEtg2@UVTL8FEO>T=gs0tf9xsM_)4e8gGX!QyXZ}QQV7vJ(fcf
z1h0O1H2~O#jTL$Rwez_U%+K6k+52c;aP`w2XX{c8WHKf{NxygBR*M98>Gr3Af$ORLig60ENRI@X
zVtnfOf*X>X3A{I^AFQns?l$1qw8>z`q574cq-lG;=&%ftL;mMyh>7_xB}w4usdg80
ziUxqA&2Sszx;x^4*3Hk%c@y>OwkF#|DGI`NLkfsgA|I*Iri^Swp8W4G0T%Qv60Cba
zV&QrpREP=1Xp!LMGCv)CVE>gPo6{^hE||7v0-Cse>@4%$!?-!9mVP}}oWuSch)*qW
znH2x}zy2D3vy4;akCX9_{`sf+P*U#Il{Xpjm;8?PwPvMqIE~AC^Jr^9r`W`!RfmC3
zvmI0`DlWe0MbBp_+xnT$7qh1sv
z6g!i--gr=<8laV?U%^)E!22F6k!Cln#jX14T$vBIyywpI<%u#OVw#`^0?a>$36B35tLlHq>0I4RGre+`Z0^Jk6
zweR~3>M;_ntV*tr9@UKZ)>;Mmj;ZSMv5~ErH#gZ5r;_rr&AxjZJwaU=(^{vj?Gy>H
za~`Q92Sb#IEf_$5A?lD`8FF&CT1YVJlHW@*9*`vZ+6xLUcyJ^fRE*n?)tzHI_z@9{
zM4&-%tJ-LphOZ^hOgX~(o=Ey0HbP{J(%iN6xLJIP?)y01_wM?)Gr?SpK~ZM%#$s3>
zuj^FLqkY-!F=HX%Qj>F@}oDuU2)?)
z+16#IC8%E+XfF;xQW+TQtL==$tg7DuN=L2>pnT2ZUKgFJOfPV8Nzlxxzb>-x-UM>y
zAug^<__gnDfd7)K4B+#w<`$4!eq1;;$YFP^O3GFLBRY4;?QHg?{JHDGer|3lGAb@V
zXSHi9mHhntX1W5*$IEyM48iJ*0Z9@?L7Ek%{P|=_!U`5KnsR!&UTtP5XLuZMBXFlBO`WEe2L;RLs|K?Uner?PER>R`9E&cGJ8fXomH_)U#9TcVR
zYIIVWFHgYz73TD88OF&eG1ZJYOX;_5^unuS8#c
zLO`(xr+H0389A(BvZ+{U^@*BP5IG~vxtH0%RetN9%e6ae>PeGi-d$=6CV@vRIN?XQzf
zOb6FEZJsybBPTC!^Z6~aX=C!~6)RS-j#;)@b-edzxVK{cI3S8~`HQ0jd7V3VZ?8=7
zb^m~;PeVcRO@Q_F`L!HJ&Lg7qVO!1Mk!`-bTM^K|dG*q~@sE-#*RE~A`iKR)RLxb=
z%(j*#5EG$MxM?aE{h2WOw(W2o3`buA}TZl3R=|_xcn5E`pkkL$Lah%j3n9GcCY#b_5S{(0;R$S
zYbv_ZO&X-V#_{$34v&-dVBRFO4z_0TR_V|%`lS#I5uAi>e7d!B=S~fWGye(iOiVYT
zu`rHwIYvfCZb7VlQd3h?jKsQCw9`|Z7mv++|GpX7!^FxA*g?w5)CmgY){ZY<
zMC#4mA$>FtgyHR_>1xSWZda%Qb*4BZy}5Ek3)gDk`e0xex>9=C<{?ZHvQeLq4_)Glub0
zUY;34DB48miz`ola+W?K(i1?B#(XD-T8F*{Z5Nt4&(581^+lmycK|Mr)pftQyUY#Q
zC}Y;j!_AE!xS77cWb=j%*EA5?8UErP9JE<~^Ty;ixPu!qJVuE1utwu%4i2*n{Y#gg
z)Kpbf=~sYBboNfSSAd_m0S{v!&7j#eOd}N~&ooO1o~YS12VA*zE1%(&6fK}Y3*e;-
z0MH%LA}Ha+F>=X!l8J{XS_vMt^6gUf?mki@`HJjGT-nE90hD
zk+u9EKYrwzcYeYy`0<6GUvKr?pMU;YeCrmIE@Uz{tYKbiEwFU->`^wi;^F++$}a}N
zzTLl+Gp3Bxo*qM8F!7_@+!fDd#+1Z_9ls|xx$l$_n~s2decfvQgXb=*1V102C`5$p
z$bui=ATIRW-=vjH8N}+61+y4v8%FHFuUq@1&ov}zH+=hCDCNB$5NIDidQ?Jt7#g~L4TnhM=ip%lY_Df~r9L9W
zkR5ye+_@O5?n>KXqqf|vigw+*Z=QB{41+?2BDl8X1_uQt{QS{>@|4=
zS(fjDoE|l1WYa48QF=&o_?c&jD3fj3
zc`-3blSN%zt!u!s+icIp=hYDs&$x<{G;^YMoP7`jrpEeSL4EepBdt$CS72gF=ZoUu!v+_08$J_UV?Dg@h-+rR*&8=SFCF_rL
zR%>1`x3~Agk$ndbUc^P~a8bg3MSpxNR2mkxAB|{pn$n~;5DEk4e4?(ULdb3?UMr8H
z`lS0_DeyddqGGJvo`41dME;|S%=9O5C$_9&{ysa~vgT4-x#upyVQazRB>>ahn%N3#
z#lDB0o$6!{GN?d$9BuW;3vocCS4z|{>Z5yBt&{*M%BwITg!!L8ckPyFclnuj;6}&A
z3S=Qacbj)>1V4uvgg7Zd+RCbQ8Ay2_pTCK4m4f-1vcWltS2Jze(4|2^#u3<5HMSnR
z_K)fb6|Xi*e}aZb@5JcntL2NMBO{GkGcDKzuD@WkCj>ArT^fhHs;>*UW82fl9xW46
z4@p~USQ$%&8WQXNLdYhrscN!llXe}6N(zUP2qLSME%vhhIX7hja3IYY)S6{wKmeQB
zU;;>|S;8*@$z*rOaSfHwW=Q-|@3EWZ!3nJLsyrF=`I&wmq|=hf`O&i=sBa4kO|{Gs
z5DIA=l%5>{4Xj)$S7!1xvMdShBk(mKjL-~(#Y21c6!lGj^$N8%B>W3vM0V{e+_2J;
z4vcZhgv|1n?7aEe@!pPaEW%TFKrau9ifR%rAit<+<;H`Jxm}m`*K83S9)lny%--WU
z@Q@`uJlq@UmP3@%$e<4@Weh#BNsWlut(ke_&1W^c9I&DUFjX&hXE&sb@m8xU)x31W
znw=RFwy8VMKUR5C;m0dZ(82Qi5^|lT-W#`Ui6yBUBGOE!ul9JQ@bFio1V|6PKc>fK
zhFyO4D^z7n(p-KzKOE-YyqfLEm0PMWvcZmtZYMALHAcH2zv=KxF#Nt26hk`H9$K2eQLRGzcjJ;y+O^b~Bh06Eo{0DUBA0y(aE`PXuzQ2jRnQ`s)twk$W
zuP(P~!-mr6<2k93dE+#SR5bfM@FJFmY@{($trjS&yrx9qXwG-ap_`mOeVR}v`3bXn
z05fXX+TLP!`QytrkM#k1dqp7Rspg4Y`>>80_WnJGLwTR9%TzYTVm}{WU95u{L$*(!
z6lZoZGY;_b%FoZw&wRSOHZVKsT8Wo!l7qc{Z)%m8l}Uw3L!zMKuvt1#Hsq+D{MjLS
z1lDc&JOv($Xk|gdbk9RN+Q`m6P$#-*(ISB)kU|VdX0wl?C}sS{!LJ9Jh84X4uEW{*jWcQb(xHF-
zJgHHiV`m=iu6zAs#3SSSc%!59z!JTm-rP0o=Q*BuuR0KGaBcK(%B0vPalibj(2C{N_y(f;EFnDENTF!|Y1*uLu5
z2;Gp%zfXs)@+4n`L+)F|^mZn$AWHA>63LdO!la`uvU!F&Qyz$UL
zzW$^;j1zg`15Yl5BV+ZAvXQV~#((2105Xa?5A9zz2m_HB5e$wBR2d=48)Ze#CnzOpnI2``upt5gC`&{5#j=rs)GA|8z--Dy9K#+!CdahU*o#DIB5}QULPE`*{G*4?E#7Vp%_I%gW(iV_Q6;
z9D5eCBV({jH{0qtk_B1yTU7ZY>*A|oPtKFXKD)0{3t6d?|IXdJnD5tC1R0f=U$S>_pdvzLJy0>tvg~|Juc{*T
zbd)SP7@k}OZ;hw7#1ryOGil%|hU_uuGgE)Z4%qCXj7$dGH7iAsh9Z?KS3+ABRK}T3
zRCVakfTwy0d)jKX`$iWjxfvnxb7=45K@5!qCN@EOVHJNo2jWL`1sD`e+suz_=A(=o
zf{T`XQ7d}9k%nX$J(J}){YrjpY-dJzZ9Mayz>OO=e6?J|6#UHUj~+-yF?Q`v$E$2m
zZ$8xb4J^*Vlibtlf;fgc8NDT*%~CWfks|Ahkt(nK@@%(%&|a!ne~DquYnyOrJYm
ze{es%Q~&unHJ@6*@ZQ>*O;vMN&Ag$L`2Id?1R?oYZO{Rq7SW(4YC|#3VB3BIoRq`<
z>z_`ugBh$12&J(gaa3Sr=?O2+9Z9IKbb}b(rFv@p~C3Rz6Smw}M`qU_aL7=Z1oHPUIw5F;_834L;5FX;5%NoF@I6
z8WB81(>WH*sRhR>Dv=q}I-c#$!}1y(8L?2G>kf912EDBZ<{GFB09i8Kq7;BqNbl@l
zdoI4n=TVG%PMI9=OUeWCynwf2ec#)edE@h)GLv^jMNPS^>()ITu9`Ocnz(D0$*!zP
zF#&@?N?a{{V&IY#KtAraSyGyV?uaeanqJQsLP0idIdz+uV=1;b
zAa@*;i}(k-MoJfv4WgtMeb<=dml`Fyh
z?IlQP16mmyx0ieR_~_5f+lmgcepo>zDtCenke@F9`19w_BOnlG+9c#?0-KULcWMgm
zo8{=JkCIoBCj$;GS^SG;oQX$4K|!$djFIaIf;Hv3)UQdnr7PlPCg?C3?>sl5T-!m&
z5Ndpb{j1qe9BB~5hCw0c#%y!qhtkqx!mli_;|||{)A|`KIL#i*)DVl&J~2lHp}
zb$KQ?XS6$;uu7Az+^6c`sgUmRC9Y9^c|xY%vMRLpaaO
zoj?O*MY8X9AK07^ty`@60-1!710nB{qtR5>=CKwe5wr4co`Mf-FQJMj#bgkGnXJW_(IA7`T581Zrt9
zj!sS!ua~Snk*8aVl3o0K_^hsnXcU}djd^yE$4KEERX^^<8bEHTlzU%@`3I(#+73{T
z=J*i!L=p4--4MlUx>$}m&5Wlkr)Cv4r5ZqHO#Jrk1_Ke26sgFr{x*jwRu8!nK0_JI
zLg9cK)LKGhAty`1v6aFWTIzS);>$TFgaf6s_aLLxfUR4t5ra(eW`J|dO86rWggRt5
zeO+f~XJQ;+%nn#|ihkL}q~Z=Td@u9Yu5P909y@mIiQrnm
zSp0#cJxmH}ZB>0z#FZQ^9i*Z5om{Fq^tSvIs=}&BN>)aiCfT-warm726BMkP24bWQ
zu>Xl2SRe`0*`ZXpZb$&dL?=XU>A590kuw38!;_PbP;>JBfQ|`ingSU^>b=Yb*zhng
zBvye+a1E_sVW}7Abc)!db@NRalD45ZEZxDMOYTX
ziX6Ugx^xtdq6ql-;~*Ijiy$;gZ$G~SD33r3d~3G-sHi9-GJJeD0eh^UOW}aE4K$^Ji{U*@X(+#P!?>^d8T(?K#n~S
z!X*@QU=yH@Nn3KI%g`SuX+5Px}@6H$MA{^CUph&Xu{
z5~DzVxcW96)*!Y7%8!xvil|CV&&G5IcMJ>;x!B07r&-yB5NU687HpHmi{@
z=iS%^<%lIdV(WV?7c5;iVcYA8Ns{2DA8Tv9y}Vvx$ViAl9P=&8MQG_mEk+(87FlyU
zM0VFt{RxB%FIuwn#o{`ok$RXd-n2o`3z=0H9!6pFtdOhm%YO2@6qb
zAmvC&o5I4vL9q}J^(|ZZjlX(!TlY7hED-3N2h25s&{mAHEnOwzZ%>gV5C;1W1T#16
z91G(?2(wu~a!nxJMt27ldQY9gvI$Z%0rx3}a3rkJj-L}Q^FK8Dqaq`-eh8syp#5bc
zR+A*Z!$hCAc^(F_Intk~*)gi9#VmF*Z<8LKarldswQF1rLuEZJy0hd298Ia&Id;Rg
zv27$pjWkGljbLh`gc2Pu%);k$O-`;A`F5)%SY69`fxq*U?VsDW
znO^$)?<073zfmK5p`~>V4Go_=Qm=zBSj+^&l9HX9n2=4-L{?gGUQjt4o-Z-c{(}P@
z5;{6M#;8Rxg4*I^F-yQU@u-WKpc2}gZW@q|O0UJGFBQnH*j=JzGZXI(@g+uw*a23Q
zoitb0)>dv?lC{l>LM2v(WotR=ux0ohCa;s!*e7j*J=xck8a)rSEe>u97P{?(O`7dj
z6C|+;_j+q_sS@72rR3RG1=51th}IyNG(C|M!igF3^6g+*vSbNk)b3L~&B@VGBd))E
zNIB6lvDNVj?NEp(O4KmAkyH=nVFZI4F|C0H$DV(n{L7+z6X^2Nu#Gc3Q?n+SO@lyxW0enV^aE{r=aheSG
z=ZYP7|AYB|;s1?{lG>x8H!4sk9!D))P1L6v`v?h79*fj;X|AQSG!hC@fj?|z1J&8!
z+%>SRu?yYdJx=PfpvW7+n9+hFAqVY}L>J^?{)Udur#Z-D#MgE7=ux;oo?$%`mlB*J
zTG~v^J0XK2&3*(#AqET>DGr`Ep#sm-WrzuNVP}^`-_2&;gH8(KRwN&STZtGd;o(#O
zrkr@UUPeW9Xgtza%M0X;=w47$h`(~&sd7_D)&IU9(~Y|{-BT;5ovFZ8#;E?Vc5D;2
ztusl=i`o}_^KGx@2NdY6;pRkP*AVSFLX`2CiY(Y-Kg!F9c3QXpx)A>KX}^gMe?vy&
zokA84k(dT8hl;DY=I!}2Fu>A*Per!*ZrQv!21X!5+(^js>pw(6#dE0!&9;~q2x@Zv
zb~TOjD%_k>fhT1D6{Q#!dj?DggDCi!_D4iU
z_M#+2+l-)+3^D0*y3Ox*k;N0qV%o=xx0aD#xR=m9L2StC->W(
ze|6sAUwDXIN0GET76&c^KgS=4V;W8ifC1&Fc?>%0oT18E_UB1Cj*^FR0P)WHA2&}#
z@*-;#HTb8y_w3!<2T5wC{pWl%e5F0eAtbma)F05yP9w%Qz|WflS--!11@Rtk-n{?h
z$&>aD?Tzwo$!d$hZ=imQpoMp3d+H~sB0AJ*v5T8J7%
zGi?zd_BCREwn0#ac+^b^$;iln$s!(fs89Rnj4x!QFr?U6r2FaW--lCWXT0r02^kq>
zj6Vn{X*L6;fba1GA^=QnD_K~Su%)F=Ui$6+ncGxWQKwmN$RvjYhQ(bDy8}_|a&^LQ
zAG#&k=(V6eg@5~G@!y?=9OD1gJ^26SN4f)i1JE>Q(^I|U|LqWkSa1rV>Bddx;J3yk
zAi=&hYI*3lH~w_^2pIeXQupU+_9Y9fviQ`uOp(EOfFH?R
z(hv_6vF^wI`_dfmj)wmJC68V43uF&+ixY2+|9|z78xQ|+bE6}zSpWDwf~Ly_5BJ(A
zpP9jQBL@y5#H8mTA=;-i7QcXxN|7i5&{2tL(gcv3neo?xuicW@V4))ZQMmX%cXaFm
zzyyvhg<6Qcpp1eeLE5PK#2^{KMK}7PIdf_NJ(&=4v!ID3fsDg&Esp4~052W7FXgHS
zKXXjMG1&)L?iB&s$%Y%pr-=gv#7-V4mw5Dn9f|x)`oW;6TEnSLv|>nK2YZwj1eH^V
z$+Mk1*8@G>R-8wA)#}v>0K)K_FF~Q@4AGX+JFqfhQ)?60U_1<$q_)Xti{(Zdtw>)U
z!l?p2vv^?L)GJ`zeo#|hQTIoW4!kxStpdqSL^A0X;t>@U<;-4iXQsr_!#ztiJi+V!
z`Dgk4`}d7d{5hze9v^JqwS_c%EWH~R+ETb}r;v~uL}~t2af|^#Q_{RLnsw;YQdl^A
zwJ0S6yxa#4Ku(ijZd9C)0hHo!{;orhSCt1F@bd8Rpxe9u(Y#-rdAJg7yGbhoe07STyrZ#)K0Ux7Xc2>Z(M?PabW&NSZJi-N{zj|hvz@pR>cgXLCgAZ~AGY06-$G*y^JVm}#%rk;I6Eck&`6d>BPC0c0UsN&eZZ2a
zLabRteyJ2j^$E&hB%*i^=0P4IdYjggDId&>um~S@JR-i&OwdQzTm@Ad_$N
z_YDpn`Yg@#uMHI|E?x_6?2>LXv$YT$k2wqzc4RujcTY<5`|{xekiw))==$4QxYpeP5#Z|&43
zxr%QODl;Ob=KVEJ3wo2p5!f*er+-ozcB*+9{qrI$A!D~S5rx?{A(kI6+RHfx;4LE
zfN4KfZM{-I28E!90d|Ob*i5lrLT99op_7le)GGpxA9u$tFd~r9`9~%wEj3xV^%9hj
zaVW=S#l;s^v2r~{PZ3ODDu!(po0Ske_yqM87mk#%5AK7{JVQ6#qcK!_!$Cw9&xpqu
zk$sBYcL?2qq&*vtF#((Si$5A~JtobB#G#BXkjp41?vVhqKFl|)zls_e9SvXf*}b~I
z<>VMuV<}8WD}x~!sC#6yH7Mf6o~8WW*!}=>J2q|djWc6D?uTY9a4vgGJG+1V=^dF_
z+lMMxm0HlNj2;Q8;@^%uxSU!Sm#hW}?`Iz1%@2~PolCE?}D*@y_0q@-pAixEzTY>?cXa+1OAZWB*V7F
z)L+E@Rf(Wg%dLwakq{1r(~LhCCxaHvUDMT-=dW0BAtqPyOh(F=at6~|4rZ=r+MkDd
za^CkZuCG#3YLE=jjB8-o)>>-(u_AS&Az~L6`?X`mtf31oQYV6^Vddo1{=$w-1bH{|
zuk7iQm}qax$Y7@l<*cZMPOc{VD4q`zrp
z@A+)jYeZuMh9t}ec0V+AO5lH_#SO*3SdbA-q}ONR;kuqSmSw4p=IXVLkRf`omC(
z@`Ga*NTn7nwV^1U#zBbRlRxC8(8=rX?H!3*5+5$oSzlXT4qRg<-p%|?q}K(N$PX12
zQpgnfO@hZAR0hdnf;ku3{YHOZC}R+~O6hb(>`Gr(SIIp#%NI=P#qj!jbmR}MFM%NX
z#Mq{kXAWHx(weaDi@5#htRasNhC=8cm~UBIn-(5|j!x%LD79*lAr<6~Ou)&=q!
zxRVAVZlso>p`plZ@!Ngho;agwOF=`ix4*wJyOphNJ#4E{D6=$w$)>$Q9r!t1%fuG-
z;g%jsE*0+WTskxmGe7nzyhD0=1M5;ZU^%$&NCOn4`pw-ZPMk1=6VD$z86}zEi-VgM
zjN=RCLvM|7U;;>!R*DstXd0LbHSud2PRSs>sM$9@p9!-fSntJAU6PqdyAygJgdo|I
z@*ruF_((fN=PCH{5+aBc)FryZaNfRU%a*xPj&5EJPUER%-|2nkbj(Qb+79-dKw6
zy9qdci7um-s_zD)pI+crxIT}!JJ!AozMR&-ghCb@);{;$H`|4Uh0(9jj|`;KBeuv8Q%@~ery4NFJ_!fr1}pPip^JuA^hiC9BeJ0_Id
zvaKTbl^TA+p6X%8!q36yGVe$ZswgchtA$=0gJzC6^vsYvz{;&g*19nUoH&nw3c2>}
zi#|x%xL_bp#WCy9#=5JB>9NRcO%s?n%99Bn5)PFegoTrg3w$H1f~;=bs3vwDAd3J^
zw|*QdQ-i-6uHGlnptNiD>oXbanQS6Loe!T4T}3ODwNwEO-gmL&=pRJ+aZjaufFOkI!@y`vA=MQJ3aW
zQ@j9}$#X2m7J+>~8hMGm7y_KJhH53i0`x#)PeYU?E#ndmXC#QENdgkg%p04XlXhcL
zNwErI+Xi@PpwUyWTTswPGY2#5W3(rumzEC6!Lq>->z$_io`sp2`2gSpc09Lo9;pH1
zY!e}P?upQfFwZJ@{Nzb(2m+KYKZD&8h@kZl9y^DJc_d~a89vUPgk+_F6ye!Tt>vAm
zl{x8mea?t9<$`u;_xaO`uz92L9HcIc0=o92v5JI5Ti*D1+m0!+5lP1s@syGNrUdPR
zwgCruV03V0Z2p&rH$d6MB$ySvK$!ok6O;woWBj6>o~to{y9RQZD^<)oVw@OR_-^}3U}
zkQT}D1W8c*E
zRn<8AqP(`|9@G>_l|%t!;q>jzvdHWx?7#cPW)G6pP~#}E(9#OV9FuOy)Rr$oM2B1s!B=@*wPnt
zzcF1-!HD_z`bJ|I`cVBP4hkX%z-G9)P!7Ww8CRRADGa*Z3+GHNqD3dd8(j?cU{}op
z{X}`YZdx{Jo1L9){N>%F`^C?^ywae-T}{$NfWx#s7CDKT*ZmfsHZU-d24CaK6~1rk
zFdKXR-OosxD}!9nIO3X%qa{*aWn}R1X)%IjCHl}78apIkZ$R7Ig1%0iH5CeZk1YAf
z{7MZKoV6f>S9+!IKre-SyJ~LR5m+72CK_AEZ?@VNJNo0nyanpPtyk31WW?4%lBrS)
z(4A+M2V=j#pj8wq(N}HnM=wT+1wD>bLeh3-EamoqQSE?`Q1*v01kALUA+)2HyKl;<
zxN2$nij>Q*_KS^`^RiCVqVPa9d$?k3rP5NqO@BO#fZTotC)XU{Yr_djq?D`6sZ*DF=IyQ5YBoi1&)vIcW0UwZ*2E~}WGv3JSan%hS=k9?
zOuaY@-OYSRPrw2N!a;S!9yXn8u5Y)J^SMYeC#Qj+1PhEVi5`Q4mNsd9<4?5A@f2*l`cHs{
z6?$5?*|mnj#H>=MEJgZ*4#0gtT;oFko5wWO>jY>>u0>+v{7r5;G!zhHq0Q^X!Of;*
z$c59x;A=W4W3ph2zh11!p>)WV6k&nKYJuDN(FuZmQaX@}h9pV5Hv4E&jlk~rp3VRK
z`7`X8mpr>ASN3msUs6JtAvt+!RnRIH76J8HK%T?tT?m8RL*ib9F`#6&pO-gP!{%}>
z%G)Ky#ZirdASQ?DtbX*tEqPk7;r!d34%kVIQK%51>0Wrv<|C(2qV@_33M$H@?sJLk=XCaYNpRoM#e&V@dTd@Ew?GpI|->RJ&k+8t6gSM%wH3;hgf9L*b4Mc
z`yhxPGSG_Y4lMvhDcw-T$`lJ?)|Db(t#kvuH9?9msy`%^8G9l+uMSk?;jOOAjbV4a-A0Hbp>fJH0cX?_XP6Gx6mL0jcnbQc^Nn>ennVv1^QX+;J9
zmVPiezy24iEcCBkQ)yiYdy_TXq;}KbswAD;!o#`pUjv~ziefjQz-^T^w2_TXZNPr*
zS`SM)&%t|MFJ7n^61U2prQBRA-iD3>=%^PODXVbk-Yjp`j6|dyxiumm{ba~
z33nprUw>V-{ejq~H+0?$@rG-N**hA3s{cDcFG@>FYLN2wXsU0qczbV!Qi4nBJvvCr
zF7=jI+)Mp~gUtj!G@%!9G;N^}&7}}tXpCo4!tkRb`q9pAbVQS8J0$Rw?_zuIp9XMA
zc*W>(gMnPzkZbSWsCWIPIi0%>ii40QC{cw(!;kRW4Qaq@LXJd3N2*?yF+pKKihFfw
zN8^3aHVXeq;k9xv?o%FcC>S1+Ey+GDyKuo!_=PY8*sq7kP~ecc!wsUyWsgPM;Y(e^
zDVD1-l}XA_))?B2Y%&rO2PTEKZiVPj_aQDX^lD~yw%WflG2()X&G=-CIcY^lL0R3i
zCDw;@L$17^X{@UXSPn^ZzE0T{V9i5%?VeV9xV1$Yc3?}$)OpvbHpV*Y=kb%aJ1Q;N3LR`f`{s2BB
zIYTb=&qAJ6iq+O+hsFJCCL~%6?m>CCCr?&m%&FCxH08|;Wywh8jzZ%|%^mv=AFll3
zV2ym%IOe_O4_Ll!9$CRBb};N&oZ?m`kJPR{X?pLL`_BUcsE8mJy>QY8A$`<3ZeyGD
z=Jbs{Zns;I)qJyx@vw0I7#-{C>WV^i^rjwPF#lJw*f^CAC6L2llHf>ja20i#9@TR-
z@%09t1Hq6T2n^T}&$@oSJe^zN5bntR_|27SBC$hoCFWYtHEN&F!EyoB}+iF?qdy&nHIWXDcCY3II~VIEOC@}N3Ed3`={z?3UG>(gu-`LJ$(4^
z5DZFRX1d^X++=Ttmf0dL2jmy(q+HEohlKNj*woC{a#o5N2ICj7G=oSXkZ3i)T(zbd
zjJ<2ut|c5*X)spkADpKEb#)JvD&i_rN-#CsdO3FTKHY5fF(AG4@&*9H`fH>`7NCfd
z<*-ne_Q^4NYfO$?DQakLYU(B2^)nE;%MkKx^A1BexDA#QOezw!Z_cAFd5cuiYk23QlSMPp
zqg=(RHEXbt;b116dM-aYIE8M03oXfllPv7=tuGX9rwPT_nf)c$U>k8X
ze4t4lIycX&3*d!WcO;-J;f%fH?=xGyH}I8IRB+4ZfuA7e_(n&n#UchV!*WI1FE-7J
z%)$6Ua$~|rb;VUHSAGN;r~`W?61P&4wr9&Nw$)1^U3%xZ^WUUU{Mm(tgnW8uAZ1}|
zAm0$&QOa1DC-#S6dG=!+$98$|jZcU9n?Mni6%TK)yKH**cMY6H>*|NLw)_ucq%YvS=x#J=LXpTg-@qEFnyOzE%wwW5c=G48j9*gL
z7KIE01uxQj3QZ-lQIJ$!pvYy-#Ie8!H4bH<6nvgI$$%8%o!1hZcSq+6(jnXnw&)uW
z5CHOG4aapqwgqcL>H|mwe(b(_k?+azYdC%C2!}AFAh=R3LHX5O7VN^;$N0tKZQpOR
z`|##X*gJc`86sgm4zijvz6SVzzfuOa6v8GPOu>rC=`g_BK8>z|04Hpslr#(GICAy{
z>T`!J=dt>uaF$HwhcU7*Ai8KoalnMek5(1*WIpfB_OixyYWyTEp%c{~BwX2&+J&3bE_0!PKO|kGu;l)#TL3jh1W0#l=&p-I}>KYrgoCy#~oB
z97-zHmHnJClyXa0t_XU4r^?WGG251>|r!Mh+KI5qByp&-R3STwE-NC#7W%4|P;-E5DdV=+@zvOfd%@ShalZ9uc$
z36~8G45Bw_JJ#IHg;d({f5>~&xSaDg{`<7A*^A2BBuxsXGLj`?C=uF|ElWz-itG_t
zvn5NIsfhNnoTU;HvW3z@lqHg~@4NebxUQMY%>V!BessTHwR;zT^m|KNusp(?Opb@QJd`nJ6`Y(K*qn}O}=L#Y__Q2Rr5>mNEL@xB-@?T5}4
zcFxcL5zfJU!Q;-KKR@Zwf`tn`*}WQs6YTf8HCT#(q|A3=TD`65>106%w*Rx5a~*CRcw(_S-swBqtB%vn6@CJ
zbJ_X}?=G(=$)u8pOS}YOIg)O3;!{;uSv0@RCr?&7P8>InwVtcCXc5V$+L4ySqW7W_
zojRxs-A}$KxPWkCX=vz<9jR^D)B6a*u@K|}v{o`X(4I$26$w2O^ZY9UdvP`PP$$VP
z%`m{;p?&-It5Sw7rk#9?Bge@uiOw#7f<&%2`>z`~*a0+>Zm51nrg`wxsZ$Nzf-k#(
zRjM{`9svs`Qmq0MmZ7_MyLyz#BNgo-SgmlWAC^+FA}z{immCFTlc<2f-<z8J0J$Y{E>X@`zus9j(_%BvY~7DM6L&+Yi}<2{`L7X
z0lBTyQ|zli3IWc#+k2k;X(gPp`;U~V3uoHRl5wtM65urUd&M1URUT5$ur^eIOP*Dq|9XRNIrN;Gta_^q>_M!rua^ewyp$V0p#NVOLEkaudvpSJfNwo!gDM*KG
zXcHm(i7>arsomdqSS5}{N-|N>iCczsp@Vaz^ogfeo!8IPtGf}Gka9Wc)@UpMdF^Ue
z{G|mbkc9@i$>y*keq6WJr}S~450ZZ>*PNtw%)Wi)O5*KI>2JO~3AQUTd#Hhu)6doeqE5Fc%D
z;#B)BUE_AOL>5Z{N;rbZYG|jo96WfC&WqxIn(5B%JVCm$&`<4YtqM^M`6&PJ3aomy
zA@b8OlK#;Sz2P@KVQTD4H){WiC`po~g%3uyF8Y0He2GqbcKr2~Sf8IS;M?H?9US7G
z@w^K#KCrCgMnA8;WO=U(*TWTmnYo_f4RyVAC?
zmwK+NdL)7pgq${V4h+?(X=ipPclH0ljZH_|;MbR7G`+AF}!9tH`M(5|4nj
z|%EbuT?Ftiq
z`u_ngL1=qUiF60Rx`5Kv>HqTo2W)M{wZRtrvEvUOoMxZ;bN}P1E@HW1`RvSxckhOC
zJ}F~ee`+`s%FRpcdj-T&C77`^H8(YF`l=02&2>B|%Yy!zdjtM8me2nn~U6N#{Sf8H-#|4vS{7c4|td~;b
z=O$O7uxs!y)nP!xe~UV8@BKp$!mHG4`wtP2LNQ>~ztxP>(n-bP2z_baj}Jn~L(8K-5o9oi8($1(+P`wL
zOK3Za=Ony-LnL@jya~&t+?0lVhwG@e?K$U?Q6vreFtHw+~%N
z=x^WIi0e>@akmoioM}~=JhlAmU}rgEqT9g+C$PyJ2~zk=QqSho2=v@KdFIR{untdZ
z7IA|?q)6yH+OeVhi1s)7`pg5q6;D6epIiCLy9b+N4$d^d40YWkoH~51O+G=F=E86<4nV8+9}ki`DU|c;N&q?mSHF=ZFLB^1N#f
zMvoXOY%JuwL`z8ph{~a1?oUhH>X-igPsX|`6koo}p|E$a!7y|s28AYdUURqV#V3`l
zc;9-;S<`Uc9XKDV2?70+?k+<5*5Sn`$mxPk-lRQD+&{XR?=<6rg{FlhblNynlamVM5JlZQcnMN=J-m
zBGW+|5)creHGjdtvy43G`K@e1k8M?9D;5-wZ}i%K*(isVg%b`T$jSYquPaZkm`Z4D
zMKyf85OjO{FB`X2@-$9+Mw^O=YL5AYQvtF66D``gqQQr5E=ikv5aNz&HGv}J0yil&
zW;%uin5A
zpU3(%HyO@Oe0k(~5?;f!~f1g-B~yhB?*mgLr@bT1|Hm|U-2TlRN{9;)J}+kn3+5~
ze2AQJzr0*(U+|Eze9Eo+`3n?w!>F1}d~AwIEJn&U7%9iZTSK;8{AH7QBtQt(MMdXG*2K@46@p{MUwc@8BzR<~$Vy=Gu=t!Tx9{vEot&lHt6ne%~r=*7A
zq?ICSeO%J?3V|YS?DXi$d9(Lo8xaWru%e?7!w?HZ#AhG-?f3tn+HyVX0;%Ie64Ri_
zqa0i^)6+ye)Z;!{#!r3!5lxoP{Cv>L(iHXEJo|K=x$j)t`t
zJsB;!6n9Y#V^v?gc(EBMpL0^mYS7b{>Xw~xGZym};f(Z#iNj5@aNvV>UOu8!+EX1%
zH1DdQAWxib!cB{g8cVGmhopU8KfNvB*%W@VB8TNu(HD|Bw%!xN>aMn_B+egyTBuvJ
zxO)w|c#3sdvhb(^2TKHCxz#?^dk2~dcr6q2HKaYIvrg0b
zKrEu7C)b5}SvY6X%814-Et@do&|W9GfWC(CE=sDV~zQ
zGIZ-7i(g%sQ!FlODw5zF>l
z`nyidcB!O%*7wdREq$l2RHKHAE$+cz7Ev}
z`gja)kp6JH>&4Ozu{$&!2fZdA>bGtFJj25$%HkS%o!Fu9{L|%gX*hiVDRqGW>;1l>
zR1rc$LXU8W#&X5lmyQcQ0$wGfNVq~CCW_Uv&9W6)R23(`yjoAk=IztM(}rGm&MVGJ38a>*PlOs{;=W|^G^~N53zTUK6_`pnZqq@
ztaL!m=iG&X#hMW50xYMKQzy%$pj(lPiUAd?xb@s&x)67T?mH6pXzF<9CIeo2dJmMy
zFZmdgOAM}phr73@HoLNU2@U%e&kcrNi9ZU!*Tv>_#
zZ6HUhTXhRWbAlKbv-L>P)N$_ciG}s>WyuQKDJl%}zJ0q!{dsW?#9~M}E)=J>ruy|i
ztJbGf2g3!aGI-XsaFfzu&@a(vFV0b{bM(sgOirZQkQoh6M*3s2lzd8*e0ET5+80nW
zQ~N-gg-{75Qs@8kxr9GqjyL#L
z@16ik;H)wN>kf5#?%cmBJ#F8n>KYAj4i+*uQ;e_OQBdR*eLO)_tUrpy1cGeG4*WM3j0
zA{6$Vtu%Q&N6}JCttwSO(LR$i=gvV#_UYp~_R?*U6ZLYdA7h#^yK@f;oAOAtDfTbY
z{Fm>3+WU|kar3m|gIZv8aouV>>+Y$6k7SDqf6DXu5gS2w0mfB@-k4b&yOv5?=KlR{
zLaqxFBNyn)i&KO+0N)-YoV)nTHk3<_5h6jUYdAHfBo*v`U4l$4uu
zHz?voP(#Ub^tTyXF~p&qJ?qcK-UEl#6|2(Fk!U{T8Hrnsf>AiG{a*Ld@y&7L$!G(4
zzpC?H<_Ta&{JnbDkpr&XR`v?I7aNC!@V`b1(Mryr@5p!W`eZ5X3xfl=(8R$r*3GRD
z2Zm<{2R;99jhsH2bYOzYt0$u4VOZSc5!8Otb-%5$=TXK&%>y6CzId-ow}I-(f$dM<
z{_x(tQJvl&Dq7_1%6%cj1gYNzJACr=lG=5;ol(SQa!jzJ94*{xhOST6uo74Cg~d)!
zu|L=mA_IhU0LO%?*&T>#$-IG5l?Nx`*tk;u#CbP;SGM`dZ=d48uli=bEAi4n_yp;*
zmGXDCrm&3L+c?CHIk<6#w|B60x7KSmY|tMwKDr&{L;*l;_sln25iyG0qLPrwUW?p#
z8W3iSm1KzUNKSrzw_BJ|_-Olq#ecn?@W@uR8}nv=VNHh2d)r@}8jMH2o?i%3+kFFUaEsEUaIED3==@j>Ygpt*cl09Vh$dMz7`m&_*
zGu%kgcOftD(3!K-^vW~iAEQJ%3M3O^u-4n#+YegZ1D|J9!vv{>;Mrv6*M6_{9~!qa
zJ9>IyT|LI4Zqx12!d@E_Rb7NaT6Lw1wssr97l!EQ1!mbk86QdUDUeecu9T3MSSYur
zCxvO#Q4n74;OWW0&C%LY;4*;;vtzRV+rHMSD~%dGc=+&FZEZ_QR~PL0iBgybqHrV_
zXIQ?-1R=e5ehvE@mNgla<*>9@V#7DIAzu7QpkAX`b?9#OvE9)#xG{*qb}x*HjO~CI
zo<8NMw;?#UcrV$Yw)i~cW@g`rS&nbK1p}2#gW>TzetPuVlqt_%C%=k!newcJ+Hiff
z{eC(BC?Tc6VjmPTrH*sr0*?mZO_Lknck^O>sdnVRnIR
zXDKY$`gG!=k(6PB@)u-9&wuW7)^Y4Y$s9s`8dDM0=;N@_=$
z#?44c$LHt)Ebu)Pr{{2Pk#h*QZWR~acIs!?s7^dH)`ityWEI4=4A47cMmkoAVe}6{
zU6BYf1oE2vSj?<|znK(QW#7;E5rlO0M6sE527Y*8k+a{We$$dj@;
zF~zly!w@F}5qB+AeJEEXY8yTpOYPw2XBi{eAv$x>uS(#W419payiQQen}RVTLwg4>
z$|C}>M+^o(P+jqXT-&~b%G93w3e~ugfCY5Z$kGJOy!d{S!8Je*_^$ILoyKo(!=9!8
z)!5|9lq?ZNtx8N~Z$tjA%lzHqymElBNwA4*@nP_z*SVsG_sf0<<}58Sr?%KZ`P3yD
z^OIO*lOJE&rp+pqF*n$OXmg6W(Y+2IcIPwT9BCwaPPsck2W8IPu;LR-*$FN)oDz)6
zE`{mA!S%-(C8x1K6115ADa3($wkCedoH8t8@2zEsNQq7ew1;BON24+<$Mc01bUeI4VuBdPDW+)V7k!g~5rj&9$ZvGL=L|J&4Cb_AhB7g0)hC3kVI2B~3GBvsX;Yk+|U%D<&6V
zx#qa)+4H(y*9p#prcON|wGXG0$Tnb+_@+jEin)+!s;|TUiV_2yO;vMvdLEU!P^jfr
zD9%qw|9XmGg
z&$&LVSAfEFwz2Om@6=tHS7_OCd>p&z1(+bK5#sWo24|?O(nzL
zCgqd04EA-{u$Gk07
zO$W@1+bMkvb7p<*YugXmsnv)P4nPw2sgq9gIq(qby;?NidvxZvLfb%@Nr+S+@uffa
z0>I1!DPed&iJ5@C`hlJD`N_cJD*irTR&ZH=!3uD;g>|LI>
zKaA0$)}}qvzuy1JU$@tQx0?YqQ6xz7Wovuk?AgJ^&{~ci
zW#G6I1LJhGB;vgjJ^=(v#x_tjNoKRm3kM5uWz9fTyW6LB9aa^m_uwxrz|_{Wqu1Eq
zWqHKfIC*p2oJ$ynzV+Lr>?SfEj^oR)pf%p*s;fs^v}l3-WKOw~EMQSiY_U+v%xAnW
zJAjHMlN!+GF@>IfVc)JL!Eq6g&n|Y4@YP0nt@p+GAAdgFWqEb>#5%W>F0azw()$Nf
zONtx$GLRV7&!QO}tInx*w!Y-@e2R^7$>({JgonA%xIrx5pC%{1EgF;cF8=HSY!Kov
z4e?$b=XxskKyN&3#24EYdbf(9s>Ad)MCq1R;8H)%*#hwZK=1@3Zmc+&<%UAbPt|Wq
zJ8%@?UkV*G#gl*GCB27L<2V5mQLBpg;^u1!_@cSDImZyOLq>1KWZS}0230D
zLM_a_jv2b?)Nltdj`|-VvME
zqM#M
z_T1_4aq9MvUD`cEb`8JyH7zl7`}@9g%U;jgIr+ZdGxM3Fnu})C#x}R8NKK{=l>
zB>~RXNO8=sS`F6f9vpaaTE%Ev+m2Osop)RcE=zGF3tn5hHVf7J)$LEuucAtCv-m5G
znncQl&893GcPoL@(5;n2$c4#6N^6ZNyOcs0QU~xYOO#I65a%b9b4f)W>i1_I?$vY9
z4=ZRBJt{UlJ@&iO}{gz0ig$Iy$8{KH~0_Tf0@mRabiXTF;!~;mI*N)XM5KZt3AE
z2Z%tbte3`hZTe|psMRVq+62x@m6Up=2rxaKl$T$>J1;`zRvEgH~(TLqr)jFL!6x21U2oj5e$=jv-Z?P(ed$?zP`rnBt!7!sow54#-^J!9j3i?YC=r)
zm!kZ8mE5~wvjY36sFRUqvTXaMhYL-PH>+I(mr3hYTTsQSbm`J1v)b3sPc^vv_!Bc;
zCWj@Cm|)|#_WFDSr16>QJ!>a<9Oe2s?A5rG!IYsZ
zI@WK)y>q3UE3J8O^-ROkW}u^Qo|S6Sch(L6>Sr0G-g}NVPM@24sbiFf53k6`ShV^>
z`b3^G5Ka%odToE3HNdz(9B3o^$r^M(iU7dAuo*UMEo-va!C7iXt9&p{*^x1}TMl(m
zAk}-N`8a#~RkAsZ-yoA|Lcum@`GNZm}KD8dRU~?uHz8b^jdc?fBJTz}`K3uAN-A
zEU>sYqtA=>L@jH%@_&nZO=HJVqn6*kJ(j#y(?!)o!&AIQxj7m1>7xnMk)U;air(m$
zR*;ToP=L+CLII$p)ul^4HX*(287eQs38xer+V@RAYgTjD{W}T$+$N?L<$oO#Ve9d2
z?6`3osd3yzT3e)fa&>+kkf=O6VSh4Ylv`j+Ya{zpvER&S
z^y{wLQkC+1g8TXRKn{(;hSya{B<}l<{NRy>*`7hnN*y?5N@r2cz4oW+jb6TP-@ae)
z!9=|OG{67+spr>~)Cwvbe&*7pMq~xmv(pP5Wnk#K4+xO}$t-BQ$zBeQj$Tox*RNZL
zCyyF8@+Cf=JPK+a()?#_3p`CoesK3SP$Yv}E=Ex7kN#byY~42feM_AT7%_SGdq7Nd
z^eM2ZqZNcYhK
zPIRH?o<8%N03KCdKiCvk)V+#|sYNcneXFNG+rBrUcYMXOXB#;Myw35gEv>8?@)%a2
zG(U|u#o6sHC(5WXWA>?Enr!r@y7=l{KdMd3^4M6teO^YZPM;o#)zRY|BD)uiMf6Pg
zWRU<~@9DSIYpA`wDrJGOJNp(uJ~OFZhYsB}@GMl3OT#HL|Jm)l4O5JEjrypNwGY$Ia2vtn9{s
zk)uX6r{3!l)^lEUA?SvphgrOXi_1D}>S%UV6rRgM0wyYp@aDEfOQM>qexy#NNElOE
z)VUV1RPhVQ=YM~BcCQ-PkKl>?Y{;%(`}4$`^eW=IaU(=ZJ)o!=sEa#cN*k`&0d2@
z<(>_emp2w~;M*nx3s2SqZ5fi8WaHQU7Gz{x-9LM_R#lzum(Z?6;HUCzp?0j9ZmsCL++X(lEnQ=dl*+LMr$(I-&d
z%JG_wPDs|dbLT2eY^L+9h?(71FnJdDstUY(z(W2Qb*pjyW7PsN!&6+)XIkMTT>SnuIWP9!pNjvT7*Y0u^
zKa}QI&3Z`Yngt_&^|)4V=HC4L(ejr7R6JkRy9iRqHN^y{mR!sV&oTLOvZ23je8SY(
z;p^sI!NDqkM^j&z@wkh^GO$7KE*!Dzr_<7990w0PUGp|=JPu#!$v}n3Ls7NN3_5k~
z+K7_hC0HMBr#aJ9-jz=GT~+A&bnCwEZWg_`ruP;1G>NfWN5E`-TKfEjCkwe3l7aJv
z?>4nbdNx9mRMWQ`mhL6?#`vmZwbQ~rW9QaUlKK$)KFw39!7a_*uBlx+fJrZTPC6m#
zzDt4%FLy_iYHT{SQ7Pu3fm&NCmyB0!SciLU7OPT@&U6ja`PP%Q#!PSu_JxWc!T^H{
zOWpU6`1tQIrKadZ5)dGSHl{CvC`#???}eA9PIR&ccr
z;gB!-{LE)X$GfOO27uf9)%{#$#~HqAGdS^vDqY8A?ZOwhYM44soEYI@J&qMF!(x>V
zb61FJgA%M@0}kZ3&lW~;8FTdJ?MY4TMeq(0U>rU`dSQp`^h~0E8QYE{@7;m$a0Q@l
zWEvou`q3blgdsH9yor*^BHH}48;7SfqVTNf+d0&V7{YzZI^DNK8O9G0U%+gpzsr=J
z{nfNjzW_2C_rhhpuSkdnhf9;)GfsKmJp7}wrP~c#{FD~m?@IB^+!BqmIXUIf`lxJ_
zzQ3w$z4VZC`z*NV9j!$L2M-)rcaEmr$UhjK*-IJA^?r_Yv@m#HYX227JU!MvdHda&xjp6TJPDrSH+XXGwF#{Atu9gl{B|P5CrjCKS4%c|R5x2SxXXM$6uIjt
zzH)Em<&8=T*x3z@aO>cNgapG$XX*zQ6K0x&S*{0=mXoWiroFZ6EUH3)u$bI
zXxF6UlkD1gui}dqHGPoaZ2eU+=6f%T#@>D1#*7IB^jTYh-RA|y2a^obziwy#>FLZV
zfQoE60P=l0!
z)wq=3-#!nHs8PlhC?Bs!kL1DgbgJu1Gr?z1Ne_2v*Q-}AIe%KLJ(C}`Q}xFZjNgvq
z_^=5P`^MEZl}`=CVz{q3<8!_7r;dHSjE?Xn=+Q)IFEww&TP>K4w3%yOBm({IQq+ug
zH3ncYbM3i1kf&SOE3EO?rU&7brf0Us8Fr)Ft@$ngx0#jm{jN41u3G=t(ywOJb;j-p
zHPIU7XnVP?a7j3a8FexD+bTDIxC(3l^3QdYyWqzg(CsQQj=+Xf^!XsTN3
zck8AW{1lEvWEMLA^ix?>ERh^l9&r;hA+jJU#UUHmF^%
zo;#OGr2E@!Q;sU`>%8g`uH4AvNISj4hNFRhCh&IaH*9zs3zKi6`^7~!%UHQy-koL)
z-Pufmk1LS1Oe1q~Ta=#%LbL?a=(+_<9SREyD!+WY^~cfYC)=s((~w>Fe1q&*$9YV%
z+Tt-Yeov5wN!MT90Bi=GjHz{s9GexiM8`>p8d99M{l=|Zzc33mf67rk5c$YM5w2N@
zC?}1d6~?v3+EhBV7zg0rzYLNRYi*2~7aP;zntK1G=97(f4J+~)M(;zlIv6ulW!d@3
zMvXO$>l2VdR?m?-T?<65#3K$ruzqL9`ntNhJ@jnnKf2N`?-&nJ?Q807!k=`;V%x99
zsJ6_&&FEahl3l(|oAS1>=@Bqi^|s-dzH)%SE$0HY_w-O16L+%jOm1FHmCy7c;}zj=
z|xP(08rDH}$}m;%P6Mn%yXa34_Cr~RVq
z_J|+C7^*Wgbnow7AN&1N@yS{6r@0^S4P>J|->sbbwZopqUn7iu`K1<_KzQuT1Aw_r
z;?z7dq-CMV@;Jvo;i7+|My%!RrH~CMrL<
zetg_eRk=;(0|(gp`6Op4rYmW73d(TRyc99m_??8cVC2pQKQ
zrOUk93l;f{ykmzxZ{EFoJ!NVHFtp3nP`Le3_V!Wgw{Xj6+CXZ?R^G`)?bCIJPW=@X
zyD}4dO#0d*0z!CPF+DmxU1V>!_um#GQwX$tPL
z8j3gMY)}sbA2ar{8PKQqxbK6Ke+3NHf~q`3xiYnQAP7Lyc}wp6&f|9?cF5xyfAb6+
ztBu1yDu+E6FIpkO^f(vPZ&AAIxN+@;Ce6KZ{d!BdnHdgC3}Rwq8v^+_9hy`}23$9Y
zMIh9XQ=9>*rx9GZ`q?I2fg86hr#cNR*45K%;pg!B&6^oU52lQme{$sVk`d;}R2J^-
zT7QL3<snS(6wvVgpDKE+zX4t5D7IL40Th7u}=k`7tf7Z)>QNJdvgveE+tf5
zN7LICHZ|0Eb%Jb1)nF!>CcO>gTW#CE{q(I{niM)NIrQO{Sr$>FV~UmEQL!c@j(_`o
zueFucW{Qnn!;3tea_!z9?i<@dr(jxKnHi_q2eM>PPpEnphTa9Of-n+ECW>Yk!9Gr8Kq&CShl+Lg8;<{f&bGG4K*xqmY@
z-T>=~z-NE|NKKO8F_N8DHM{{;f4&
z0w5Kd{Gc+|7FOCV1b}GiaQVW8EP!*3#Y+Ybta;*Ng(m7N77yJ&AqWn52|c}Hf2m&+
zz-w^M+&ObN>+9;aYAB73OIEjV(>)94I~vk%>`n4IUz*XMx4QOl9nRPrO0U)r<}3)g
zb%1(s%+bBoG@^bdx&a}}kqDA9n}LLrK!_vD{ce4&smxu5hnTi%IV!+j3zv+!ed(P(
z8kaY{<#4V0x-$*q2QI!XJ9n-zbUzo)Wolqg%fPCW1NHThIe-JC8eEIBp5U&3$Nl`UgwM#%oZjV}snidsmS*wh2rTR9!jlV~;m39EYdL
z_jti^?^&VUqQM^vt9~8xE%b8Yv()-vwKWQ!SucUqUHJZ@WX^c?Io$b?Lk!Tj-&42tE#>?5MvYfLov;YGE$dRk~SAK*WwS
z09_U_YJ`;#n}st>Mo9&62uzSOy%S4v+2>n#Y#khe5E)o#ZK2RFho*L7F!q72?z3j~
zK;@(LDeLFEq~rj|FjssyHXns&pLLiBTjM9L1&z>)xeKaLWGlhcbZke%2T%cxJ?`He
z%&BOl1Nx}-m@h(}noe#)4<~plc#N*`Ji`^+0j_<8EH@gW=$AWAK$ZQ-&t+Jg9;yIfRIZjuhXczkry
zw?vsi-77kXEEajNfHH%*0e#AjH-I#=sdt>VQ7(~L^XJc;lfbqSND;(n=DaE@B=MAai)R6`M~xo60%v4vuCbI^4-1-04>fOtU0h-G
zNAsP=e`x_?Vl3D<`+`pZ49dnGs@;M{D7&OY19GRu+S*kAk+b~WwXLE?zizq4B_W9Q
zOE1Ds#PxBq;%lyc<@06V`w+~$dTP>K9HVOodZ<8Up9L*g0RaXf(FhZ$4zadnPpVV*
zgLnX4@87?FO$FAMLvtQl#k(jQa?Q?~GiSx7O-)-htiz*>X`pa%b#=deY&I8wnf%>J
z{a2D^-GMK4pt6@M^-{BD%>r0Ao%;4|hhi}cEzwFskxKMUSQ2ad^{6_%sbC&&{=Tw!
zTsACxr}{OaDg|>vwinKt8}M}5gl;ZJ>u>7&&MEGjjqXo?MgAQK_?P5Mx-*gY7;DS;
zHMb#)`z((=VswnL>+N4dt!!Ewg{QQ1Msfv(_T8b>q>>X={t$n8@N2pWH2cq+S#0G)
zPDI=U08+#DmY+Ae%olIotlYb|9hB}Cu1O+l_ob(6@Q%Unen-8VRU>KpEWBA9|8?p6
z6yH)++^0;5(plr-?jFeVldh7B!@KA$c<>9VlKZb7UJ6STehii~teTBD5zA4V9uo${
zJ-g`W<`xW+a1Z|nlbObDq<5~rLV*E|lW9;v$EXwZLUK_mJX2pT#$b329b=#;6N%-5
zU_MmS@RSR{oIyv9^lZPdvQyxoSJmVPK+s@eX}QuP
zBd@B;^9%fA*t20pMMZonLIlp@A#a8&^SyHS-0rTz(uv?tF@P$JNsYVBA5n7+hJUKt
zr0p7Tchl-mh$4Pl`sFuCZfVeq<_bJVjo-74vEvDz0sm~eY}xmzZgD42U*}%&SG+(m
zXYzTG#ggqOPEOuKu_#N6@4xxdLlA2&RVkLgKh~;W6Vj&k?W(XfM0>dN
zFfdT(M%|<3$r|c9kes~1oxNtZm)w!pCM;0s<3XIZC;*#LhLWo@v%2zG$hKGfLtr9zBx3he@uD(d7CqpZruJ
zJiW%oeOWwy*}YY*$6dR5Haar0CRL!WzP=@V83j3OSa8shBd=Fgj?m-$r0{sfFxF!#0Ut*F&obX@7Nz_shY{Bnqm8cao<4JM}lXl&N9|v4&1-E%N
zfn`~9x$(Dzg{`}PUm_MG0Y!pOoH*pSA#+DXk;`d2+Ht|1c?{H_wqwTBf-EAq?Rw>V
z+cAKc*4I@Xyi#ukY@LXZwuaPy-Y-n}adw(7n1d$a7@&jlqUqrRtae2zjM+WptNzn-
zFBphg@fxeHZtATgsi`xs+w`NQSkGEF%OeYTenk9@MEl?DTkl6xyzZDb>Tt-
zzDvT?jFC%}z8?9kP&S+Qq3T^BVCZ>n5)j?2RjY#dYHKQ7{AQE)s_^{paf@C%Idbl^
z<-LohU*#IQM%1zO4vWdN8^<*d_`a%;D#E~}-v8i_=FLZ0(`OGvA(7FfDHs|mc}?)pyb43%li0!|H!u+0wF|
z65*{-k`_|AHt;Ka!XQd5tt}n(^couNx@@#+%=2Pl=L_M%k!@vY8lkNCs9fkm?kVD!|Tyi^uC
zw{osmZHob{eyZ$IZf+{nqnxppLxvc|4>1UusBF>=o|XHSw6WDu*3*xmQy?&|Q`l#&
zv6o~5p5nO=wExFz9E5zvka@Z&ADm^yXodKbMyT&rbIUk+mYj!%Q!lpxewr#Whcz2F
z$~D0C&xarpN1)TZYV?73VagFOoXXCw#SO$MJIl}StayGRsz^)!`&WGhg)#@dfm>bZ
zsDO+<>(#Bet&+FCTl;=TM716555bU`Po-^%4YdUOjaUayLaCH2b28?ChD
z<@g1rC#ax0AzN(|SgZ5lpP7&Uejv|4)}{0j{3y98Nw>d*{{2+`{+G)eCyJZvU%!m{
z{Bu+N=Z~L%!h!d%KgoZhdJF#h>;3&N|Ns2LA+G5P&kQ8-TXyYQi-ZY3Nd#;z)vWLm
zi0uvB+W+Ha%YP`f8pmxZW1G?D3gRMV6U9IEhS~UA9MH!JRP!eeZXy`v>Cn+jhyyu?
zCIt{N7{|cmDN{OeD8osCL&`lR;2By?>m3|M1~QJ@i4h89G%ZIJ(d*+U&@}L$@H%?`
zc$JawIQjA~QF|T2YB?6YL{jni6N}YJE7w2q;eI2)c2wHeFRP9o#O+Ow)l@h1;*jx=
zkSz{W9px&e3Fify)|2LuEy}0LqL?QHAExDyud`(?$+m%LzFpHB|6>VNb=<$bTYyU{
zXB)6iONqy77>h68oovhB7QNGlY1)i=SJfwN;ak0DneWGeTO90s=|Bc~#C)Ne43MtG
zEX}6%J>f%bX)sn@IfTx2ru`6`
zc}U6als2a`m4v=qw|~B(QE@p}JPX2U%LKs#M|a5qN0Bc4Gy?0M!lr{vcx9T>1XJao
zA%JFP(ZXS=7;Ww$We7V2Qvi+r61no4aXHBvd;2ZXK)#l|ZEr43$c_WlLFNs&1
zh}Dm(tG0rZmpLWsR6ug>97oi$C;s!Vf)R&~Xqb4&gz7*~xxXnAEiIJPs+Pb1-s@NqTiRC1BM
z;Y6`yHL|zdJ`b*1UJStH_1cAIvFNE9cj(s^kH#Rus{jxnBP?RT8~;GPAgSxvPxkpz?N-F`l>*Q_Zq3@RLY
z5V^N$n;xG5B=3_BRNOeFWl~`*of`>(NIuy&bV`;o#5AU
zhcMZG#;ugx{3z$!$m3^Y2fmA{mGkKb>LkyaY}U<}yfn95d^m@U1XZ+4<`m_D%&=XP
zQj~y}j-~eUa?h*DeN>gh1l{Nua^c^{wQ|huy4!r2$`^WN0s6Dyg!g&%Lj{J^K`+$m
z(E>F8J*O`zFo&p!oK4=ql!g&%NY`r6vkNVbCb1oHq7+Kw_mwukmBG^;4qQsxCk-f*
zq8qNY%&+WCLkz1i(AKuuff0LSZ?##dH5)WHq1$W}757`ufC5AM)pEGyb?Tm+Nud(vMR@iiV!+s;8G1BbZ0S?l
zupBV_NR#h)G0WK#y{7z(O3Lc=*60(rWjl>%^^+5|6_7D1f4nCU@h-aA$fvd8hm+~k
zQF4|%@Y_0DY;DRZUi5kL!Y9m}xrvyNV`*uu*^>N+N6pBZIyoB>P{4Wg?8$&0{=p*a
z;}!f6Eqs%foanZdeEHq6WB+jqX+@f@-&;7;8rC#WoLw5`McmypO4C(Av$Low0ucOji>gff8
zgg~ltwnc4e&=%*WafMoV!4|h;vudEGq0yA)l%oJspHG1c9GN8&>!TIDyi4{M=5GG`
zUjFef(Z5tRo2Vq6;`pbkI8l1?Ba3;9QapkNh)QZOYmMPBpO=9wD1BoR2KxE=1?KF}
zZ}M{c^i+`;;=jvlo9$V!1^Z-uMYuzp@B+hk_Nc+_brwufW!xpsS=`~*!meJqQt_zK
z&ufx@FInJ}5Fg)|ZP}JyH7
zxaQ@~CMK$oU%%5}>kQu+AUVL(>cwzz6JXfq17Q>j>ipK~>T8kJG7?GkiMB)2Y)@}u
zc8_|rZPn^bf=kSxOJt{^WB#&W|A7OMbZvDL9+B)d_ZxR<-G3+@dGQ5hM=xnJ@glVu
zUlV!5m)C!OKf~dxdAsTwg{%P3H2zIlO$q=ynxJ!tuoDYct&2A3gPzUfvcZ6ueb|qD
zUO3s?hh|k{^>Ti+cu>S||N7}R?sp3+#I?3bP!{eAe`c+{HJgT-+y8OY4$HZitg=9G
zd^Wa-X&}6Fbab+>`qPVoZi4mxc(Hjh1pLwQKmY7NGt_uoQp9uD;8pk6R4)7QapccS
zxZs2`!+XFofMw>|cHqtWlYOdU_8vdbIRG%Q{@7iM@G-r-HmGT*9zCuVM5QzYUb=xX
z)Z-Zu8!9KhQKJOU(_~d;L#sB9jQ}Qt6Z&n*Cu4Cka3%uC)dDI9QxP*{u`}qfSUC1R9mgYDE?a?YEQM&&to?85w;b
z`5NyUKJ~>yiZ8QA=mvumx14Vy+QS8}v_(;k#-3o!a`6G^wvvyI(2VqtRrH~voY68#
zdGo0MKQ@N*xQs<5^6Y9VBwR_zWJG1p=VpLdu&dlZzIqten-30BU(V7nU67Dq!6?YG
z(npF3q@~?Rn>#Hqr#}QkX0|7)(^I^G*-!#tpL>%=>4k!?k;AEm7jPwuu%?30(3iyT
z-p8}J9D_>P+ghhE`W=Fu${govXQ!>fc~dSxD?dV)u>Nx^8Y}J!T@MS9ObjJ#P#e1)
zX@z%c9;M60r!@1GxzZ&V_wijKjZeW`07%tR2nt8^#!Ie-)4*H$CM|dE93x$tw%o(~mY+w`x{ffN0qW>=GK#(#q=UfnhX7Il@5w-hM&t
z(J>!)KXa*i{pUZ9IQ=9y?^>}yL0CZ*k~V!8Rb+#t$6nz3E_0E)8Vy@wS51{Iehq-$
zjTA*|+S{QmDxbORvnE?~HJf1)CN-&^MNmZ8`ry~;IJ)&+QyPPVo`sWH1Jiw^z@_!&
zngJvmOvIK;;!xYkM(PxntEhBnP6xgDMBsyH2pA@K$l24#LC%i&k&a5HHcgPDoywj~
zvW~N-k#KF8iC5NXq&9)VNLa{SdODuHMQL&5YQOa)7pS+hs}(>jNkVTxRSKJ42hAgO
zsH>aOgr7seDHQ1_TN}O5Ndy7zH1*aXA7u@c%tZE3WkF0!#a%dDmT(g=*Y@?9zOxl?bPT-G1Q_a^w1u!NUMCjXMeI3U*HyX
zr_6B4LBE^K-6bMDNf|L4$ETEaL^E@2c6nVXFQJq?z16)NCfxf!mn`yZzha@8Al=_I
ziMxq3tYgOkU1x3HvSm3~$Iks%inM$5XvXQ?JQv|(LRK;!{L-W2l9Di$H^U5@STtxB
zIV$X#A&nddwi`?_B!+2KxHnKN%JhW1
zW;+mxAX5xv+e{!x;R9nJj5~Rc@ih4J`_I>n<>UoVJvFa|hj*m*TU$Zj7xv_x{rImY
ziZvPJVAhgq~I
z!t2_1?Jix)$sfpHFoRnj?9bRg7Z;f*KV7n7(rs_TwKvC-Uo1mZRNAkHG6ik`0+xa`
zp&Wz)U1$20x#W1Xz2{6m;$>_(
zb*c_gX~XhTPA1?m |