Skip to content

Commit

Permalink
Add a macro kfill!
Browse files Browse the repository at this point in the history
  • Loading branch information
amontoison committed Oct 16, 2024
1 parent 811870a commit 87597d2
Show file tree
Hide file tree
Showing 41 changed files with 182 additions and 169 deletions.
6 changes: 3 additions & 3 deletions src/bicgstab.jl
Original file line number Diff line number Diff line change
Expand Up @@ -154,9 +154,9 @@ kwargs_bicgstab = (:c, :M, :N, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose,
r₀ .= b
end

x .= zero(FC) # x₀
s .= zero(FC) # s₀
v .= zero(FC) # v₀
@kfill!(x, zero(FC) # x₀
@kfill!(s, zero(FC) # s₀
@kfill!(v, zero(FC) # v₀
MisI || mulorldiv!(r, M, r₀, ldiv) # r₀
p .= r # p₁

Expand Down
8 changes: 4 additions & 4 deletions src/bilq.jl
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ kwargs_bilq = (:c, :transfer_to_bicg, :M, :N, :ldiv, :atol, :rtol, :itmax, :time
end

# Initial solution x₀ and residual norm ‖r₀‖.
x .= zero(FC)
@kfill!(x, zero(FC))
bNorm = @knrm2(n, r₀) # ‖r₀‖ = ‖b₀ - Ax₀‖

history && push!(rNorms, bNorm)
Expand Down Expand Up @@ -191,13 +191,13 @@ kwargs_bilq = (:c, :transfer_to_bicg, :M, :N, :ldiv, :atol, :rtol, :itmax, :time

βₖ = (abs(cᴴb)) # β₁γ₁ = cᴴ(b - Ax₀)
γₖ = cᴴb / βₖ # β₁γ₁ = cᴴ(b - Ax₀)
vₖ₋₁ .= zero(FC) # v₀ = 0
uₖ₋₁ .= zero(FC) # u₀ = 0
@kfill!(vₖ₋₁, zero(FC)) # v₀ = 0
@kfill!(uₖ₋₁, zero(FC)) # u₀ = 0
vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁
uₖ .= c ./ conj(γₖ) # u₁ = c / γ̄₁
cₖ₋₁ = cₖ = -one(T) # Givens cosines used for the LQ factorization of Tₖ
sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the LQ factorization of Tₖ
d̅ .= zero(FC) # Last column of D̅ₖ = Vₖ(Qₖ)ᴴ
@kfill!(d̅, zero(FC)) # Last column of D̅ₖ = Vₖ(Qₖ)ᴴ
ζₖ₋₁ = ζbarₖ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁
ζₖ₋₂ = ηₖ = zero(FC) # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ
δbarₖ₋₁ = δbarₖ = zero(FC) # Coefficients of Lₖ₋₁ and L̅ₖ modified over the course of two iterations
Expand Down
14 changes: 7 additions & 7 deletions src/bilqr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -143,11 +143,11 @@ kwargs_bilqr = (:transfer_to_bicg, :atol, :rtol, :itmax, :timemax, :verbose, :hi
end

# Initial solution x₀ and residual norm ‖r₀‖ = ‖b - Ax₀‖.
x .= zero(FC) # x₀
@kfill!(x, zero(FC)) # x₀
bNorm = @knrm2(n, r₀) # rNorm = ‖r₀‖

# Initial solution t₀ and residual norm ‖s₀‖ = ‖c - Aᴴy₀‖.
t .= zero(FC) # t₀
@kfill!(t, zero(FC)) # t₀
cNorm = @knrm2(n, s₀) # sNorm = ‖s₀‖

iter = 0
Expand Down Expand Up @@ -175,21 +175,21 @@ kwargs_bilqr = (:transfer_to_bicg, :atol, :rtol, :itmax, :timemax, :verbose, :hi
# Set up workspace.
βₖ = (abs(cᴴb)) # β₁γ₁ = (c - Aᴴy₀)ᴴ(b - Ax₀)
γₖ = cᴴb / βₖ # β₁γ₁ = (c - Aᴴy₀)ᴴ(b - Ax₀)
vₖ₋₁ .= zero(FC) # v₀ = 0
uₖ₋₁ .= zero(FC) # u₀ = 0
@kfill!(vₖ₋₁, zero(FC)) # v₀ = 0
@kfill!(uₖ₋₁, zero(FC)) # u₀ = 0
vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁
uₖ .= s₀ ./ conj(γₖ) # u₁ = (c - Aᴴy₀) / γ̄₁
cₖ₋₁ = cₖ = -one(T) # Givens cosines used for the LQ factorization of Tₖ
sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the LQ factorization of Tₖ
d̅ .= zero(FC) # Last column of D̅ₖ = Vₖ(Qₖ)ᴴ
@kfill!(d̅, zero(FC)) # Last column of D̅ₖ = Vₖ(Qₖ)ᴴ
ζₖ₋₁ = ζbarₖ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁
ζₖ₋₂ = ηₖ = zero(FC) # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ
δbarₖ₋₁ = δbarₖ = zero(FC) # Coefficients of Lₖ₋₁ and L̅ₖ modified over the course of two iterations
ψbarₖ₋₁ = ψₖ₋₁ = zero(FC) # ψₖ₋₁ and ψbarₖ are the last components of h̅ₖ = Qₖγ̄₁e₁
norm_vₖ = bNorm / βₖ # ‖vₖ‖ is used for residual norm estimates
ϵₖ₋₃ = λₖ₋₂ = zero(FC) # Components of Lₖ₋₁
wₖ₋₃ .= zero(FC) # Column k-3 of Wₖ = Uₖ(Lₖ)⁻ᴴ
wₖ₋₂ .= zero(FC) # Column k-2 of Wₖ = Uₖ(Lₖ)⁻ᴴ
@kfill!(wₖ₋₃, zero(FC)) # Column k-3 of Wₖ = Uₖ(Lₖ)⁻ᴴ
@kfill!(wₖ₋₂, zero(FC)) # Column k-2 of Wₖ = Uₖ(Lₖ)⁻ᴴ
τₖ = zero(T) # τₖ is used for the dual residual norm estimate

# Stopping criterion.
Expand Down
2 changes: 1 addition & 1 deletion src/block_krylov_solvers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ for (KS, fun, nsol, nA, nAt, warm_start) in [
SM = typeof(solver.X)
(n == n2 && p == p2) || error("X0 should have size ($n, $p)")
allocate_if(true, solver, :ΔX, SM, n, p)
copyto!(solver.ΔX, X0)
@kcopyto!(solver.ΔX, X0)
solver.warm_start = true
return solver
end
Expand Down
10 changes: 5 additions & 5 deletions src/block_krylov_utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ end
function gs!(Q::AbstractMatrix{FC}, R::AbstractMatrix{FC}, v::AbstractVector{FC}) where FC <: FloatOrComplex
n, k = size(Q)
aⱼ = v
R .= zero(FC)
@kfill!(R, zero(FC))
for j = 1:k
qⱼ = view(Q,:,j)
aⱼ .= qⱼ
Expand Down Expand Up @@ -54,7 +54,7 @@ end

function mgs!(Q::AbstractMatrix{FC}, R::AbstractMatrix{FC}) where FC <: FloatOrComplex
n, k = size(Q)
R .= zero(FC)
@kfill!(R, zero(FC))
for i = 1:k
qᵢ = view(Q,:,i)
R[i,i] = @knrm2(n, qᵢ) # rᵢᵢ = ‖qᵢ‖
Expand Down Expand Up @@ -90,7 +90,7 @@ end

function givens!(Q::AbstractMatrix{FC}, R::AbstractMatrix{FC}, C::AbstractVector{T}, S::AbstractVector{FC}) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
n, k = size(Q)
R .= zero(FC)
@kfill!(R, zero(FC))
pos = 0
for j = 1:k
for i = n-1:-1:j
Expand All @@ -106,7 +106,7 @@ function givens!(Q::AbstractMatrix{FC}, R::AbstractMatrix{FC}, C::AbstractVector
R[i,j] = Q[i,j]
end
end
Q .= zero(FC)
@kfill!(Q, zero(FC))
for i = 1:k
Q[i,i] = one(FC)
end
Expand Down Expand Up @@ -194,7 +194,7 @@ end

function householder!(Q::AbstractMatrix{FC}, R::AbstractMatrix{FC}, τ::AbstractVector{FC}; compact::Bool=false) where FC <: FloatOrComplex
n, k = size(Q)
R .= zero(FC)
@kfill!(R, zero(FC))
@kgeqrf!(Q, τ)
copy_triangle(Q, R, k)
!compact && @korgqr!(Q, τ)
Expand Down
2 changes: 1 addition & 1 deletion src/car.jl
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ kwargs_car = (:M, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose, :history, :ca
rNorms, ArNorms = stats.residuals, stats.Aresiduals
reset!(stats)

x .= zero(FC)
@kfill!(x, zero(FC))
if warm_start
mul!(r, A, Δx)
@kaxpby!(n, one(FC), b, -one(FC), r)
Expand Down
2 changes: 1 addition & 1 deletion src/cg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ kwargs_cg = (:M, :ldiv, :radius, :linesearch, :atol, :rtol, :itmax, :timemax, :v
reset!(stats)
z = MisI ? r : solver.z

x .= zero(FC)
@kfill!(x, zero(FC))
if warm_start
mul!(r, A, Δx)
@kaxpby!(n, one(FC), b, -one(FC), r)
Expand Down
2 changes: 1 addition & 1 deletion src/cg_lanczos.jl
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ kwargs_cg_lanczos = (:M, :ldiv, :check_curvature, :atol, :rtol, :itmax, :timemax
v = MisI ? Mv : solver.v

# Initial state.
x .= zero(FC)
@kfill!(x, zero(FC))
if warm_start
mul!(Mv, A, Δx)
@kaxpby!(n, one(FC), b, -one(FC), Mv)
Expand Down
12 changes: 6 additions & 6 deletions src/cg_lanczos_shift.jl
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ kwargs_cg_lanczos_shift = (:M, :ldiv, :check_curvature, :atol, :rtol, :itmax, :t
# Initial state.
## Distribute x similarly to shifts.
for i = 1 : nshifts
x[i] .= zero(FC) # x₀
@kfill!(x[i], zero(FC)) # x₀
end
Mv .= b # Mv₁ ← b
MisI || mulorldiv!(v, M, Mv, ldiv) # v₁ = M⁻¹ * Mv₁
Expand All @@ -142,7 +142,7 @@ kwargs_cg_lanczos_shift = (:M, :ldiv, :check_curvature, :atol, :rtol, :itmax, :t
end

# Keep track of shifted systems with negative curvature if required.
indefinite .= false
@kfill!(indefinite, false)

if β == 0
stats.niter = 0
Expand All @@ -165,10 +165,10 @@ kwargs_cg_lanczos_shift = (:M, :ldiv, :check_curvature, :atol, :rtol, :itmax, :t

# Initialize some constants used in recursions below.
ρ = one(T)
σ .= β
δhat .= zero(T)
ω .= zero(T)
γ .= one(T)
@kfill!(σ, β)
@kfill!(δhat, zero(T))
@kfill!(ω, zero(T))
@kfill!(γ, one(T))

# Define stopping tolerance.
ε = atol + rtol * β
Expand Down
2 changes: 1 addition & 1 deletion src/cgls.jl
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ kwargs_cgls = (:M, :ldiv, :radius, :λ, :atol, :rtol, :itmax, :timemax, :verbose
Mr = MisI ? r : solver.Mr
Mq = MisI ? q : solver.Mr

x .= zero(FC)
@kfill!(x, zero(FC))
r .= b
bNorm = @knrm2(m, r) # Marginally faster than norm(b)
if bNorm == 0
Expand Down
10 changes: 5 additions & 5 deletions src/cgls_lanczos_shift.jl
Original file line number Diff line number Diff line change
Expand Up @@ -134,11 +134,11 @@ kwargs_cgls_lanczos_shift = (:M, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose
# Initial state.
## Distribute x similarly to shifts.
for i = 1 : nshifts
x[i] .= zero(FC) # x₀
@kfill!(x[i], zero(FC)) # x₀
end

u .= b
u_prev .= zero(T)
@kfill!(u_prev, zero(T))
mul!(v, Aᴴ, u) # v₁ ← Aᴴ * b
β = sqrt(@kdotr(n, v, v)) # β₁ = v₁ᵀ M v₁
rNorms .= β
Expand Down Expand Up @@ -169,9 +169,9 @@ kwargs_cgls_lanczos_shift = (:M, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose
# Initialize some constants used in recursions below.
ρ = one(T)
σ .= β
δhat .= zero(T)
ω .= zero(T)
γ .= one(T)
@kfill!(δhat, zero(T))
@kfill!(ω, zero(T))
@kfill!(γ, one(T))

# Define stopping tolerance.
ε = atol + rtol * β
Expand Down
10 changes: 5 additions & 5 deletions src/cgne.jl
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ kwargs_cgne = (:N, :ldiv, :λ, :atol, :rtol, :itmax, :timemax, :verbose, :histor
reset!(stats)
z = NisI ? r : solver.z

x .= zero(FC)
@kfill!(x, zero(FC))
r .= b
NisI || mulorldiv!(z, N, r, ldiv)
rNorm = @knrm2(m, r) # Marginally faster than norm(r)
Expand All @@ -167,10 +167,10 @@ kwargs_cgne = (:N, :ldiv, :λ, :atol, :rtol, :itmax, :timemax, :verbose, :histor
mul!(p, Aᴴ, z)

# Use ‖p‖ to detect inconsistent system.
# An inconsistent system will necessarily have AA' singular.
# Because CGNE is equivalent to CG applied to AA'y = b, there will be a
# conjugate direction u such that u'AA'u = 0, i.e., A'u = 0. In this
# implementation, p is a substitute for A'u.
# An inconsistent system will necessarily have AAᴴ singular.
# Because CGNE is equivalent to CG applied to AAᴴy = b, there will be a
# conjugate direction u such that uᴴAAᴴu = 0, i.e., Aᴴu = 0. In this
# implementation, p is a substitute for Aᴴu.
pNorm = @knrm2(n, p)

γ = @kdotr(m, r, z) # Faster than γ = dot(r, z)
Expand Down
8 changes: 4 additions & 4 deletions src/cgs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ kwargs_cgs = (:c, :M, :N, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose, :hist
r₀ .= b
end

x .= zero(FC) # x₀
@kfill!(x, zero(FC)) # x₀
MisI || mulorldiv!(r, M, r₀, ldiv) # r₀

# Compute residual norm ‖r₀‖₂.
Expand Down Expand Up @@ -189,9 +189,9 @@ kwargs_cgs = (:c, :M, :N, :ldiv, :atol, :rtol, :itmax, :timemax, :verbose, :hist
(verbose > 0) && @printf(iostream, "%5s %7s %5s\n", "k", "‖rₖ‖", "timer")
kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %.2fs\n", iter, rNorm, ktimer(start_time))

u .= r # u₀
p .= r # p₀
q .= zero(FC) # q₋₁
u .= r # u₀
p .= r # p₀
@kfill!(q, zero(FC)) # q₋₁

# Stopping criterion.
solved = rNorm ε
Expand Down
2 changes: 1 addition & 1 deletion src/cr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ kwargs_cr = (:M, :ldiv, :radius, :linesearch, :γ, :atol, :rtol, :itmax, :timema
Mq = MisI ? q : solver.Mq

# Initial state.
x .= zero(FC)
@kfill!(x, zero(FC))
if warm_start
mul!(p, A, Δx)
@kaxpby!(n, one(FC), b, -one(FC), p)
Expand Down
10 changes: 5 additions & 5 deletions src/craig.jl
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,8 @@ kwargs_craig = (:M, :N, :ldiv, :transfer_to_lsqr, :sqd, :λ, :btol, :conlim, :at
u = MisI ? Mu : solver.u
v = NisI ? Nv : solver.v

x .= zero(FC)
y .= zero(FC)
@kfill!(x, zero(FC))
@kfill!(y, zero(FC))

Mu .= b
MisI || mulorldiv!(u, M, Mu, ldiv)
Expand All @@ -226,10 +226,10 @@ kwargs_craig = (:M, :N, :ldiv, :transfer_to_lsqr, :sqd, :λ, :btol, :conlim, :at
@kscal!(m, one(FC) / β₁, u)
MisI || @kscal!(m, one(FC) / β₁, Mu)

Nv .= zero(FC)
w .= zero(FC) # Used to update y.
@kfill!(Nv, zero(FC))
@kfill!(w, zero(FC)) # Used to update y.

λ > 0 && (w2 .= zero(FC))
λ > 0 && @kfill!(w2, zero(FC))

Anorm² = zero(T) # Estimate of ‖A‖²_F.
Anorm = zero(T)
Expand Down
8 changes: 4 additions & 4 deletions src/craigmr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,8 @@ kwargs_craigmr = (:M, :N, :ldiv, :sqd, :λ, :atol, :rtol, :itmax, :timemax, :ver
v = NisI ? Nv : solver.v

# Compute y such that AAᴴy = b. Then recover x = Aᴴy.
x .= zero(FC)
y .= zero(FC)
@kfill!(x, zero(FC))
@kfill!(y, zero(FC))
Mu .= b
MisI || mulorldiv!(u, M, Mu, ldiv)
β = sqrt(@kdotr(m, u, Mu))
Expand Down Expand Up @@ -259,8 +259,8 @@ kwargs_craigmr = (:M, :N, :ldiv, :sqd, :λ, :atol, :rtol, :itmax, :timemax, :ver

wbar .= u
@kscal!(m, one(FC)/αhat, wbar)
w .= zero(FC)
d .= zero(FC)
@kfill!(w, zero(FC))
@kfill!(d, zero(FC))

status = "unknown"
solved = rNorm ɛ_c
Expand Down
2 changes: 1 addition & 1 deletion src/crls.jl
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ kwargs_crls = (:M, :ldiv, :radius, :λ, :atol, :rtol, :itmax, :timemax, :verbose
Mr = MisI ? r : solver.Ms
MAp = MisI ? Ap : solver.Ms

x .= zero(FC)
@kfill!(x, zero(FC))
r .= b
bNorm = @knrm2(m, r) # norm(b - A * x0) if x0 ≠ 0.
rNorm = bNorm # + λ * ‖x0‖ if x0 ≠ 0 and λ > 0.
Expand Down
2 changes: 1 addition & 1 deletion src/crmr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ kwargs_crmr = (:N, :ldiv, :λ, :atol, :rtol, :itmax, :timemax, :verbose, :histor
reset!(stats)
Nq = NisI ? q : solver.Nq

x .= zero(FC) # initial estimation x = 0
@kfill!(x, zero(FC)) # initial estimation x = 0
mulorldiv!(r, N, b, ldiv) # initial residual r = N * (b - Ax) = N * b
bNorm = @knrm2(m, r) # norm(b - A * x0) if x0 ≠ 0.
rNorm = bNorm # + λ * ‖x0‖ if x0 ≠ 0 and λ > 0.
Expand Down
10 changes: 5 additions & 5 deletions src/diom.jl
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ kwargs_diom = (:M, :N, :ldiv, :reorthogonalization, :atol, :rtol, :itmax, :timem
r₀ = MisI ? t : solver.w

# Initial solution x₀ and residual r₀.
x .= zero(FC) # x₀
@kfill!(x, zero(FC)) # x₀
if warm_start
mul!(t, A, Δx)
@kaxpby!(n, one(FC), b, -one(FC), t)
Expand Down Expand Up @@ -169,17 +169,17 @@ kwargs_diom = (:M, :N, :ldiv, :reorthogonalization, :atol, :rtol, :itmax, :timem

mem = length(V) # Memory
for i = 1 : mem
V[i] .= zero(FC) # Orthogonal basis of Kₖ(MAN, Mr₀).
@kfill!(V[i], zero(FC)) # Orthogonal basis of Kₖ(MAN, Mr₀).
end
for i = 1 : mem-1
P[i] .= zero(FC) # Directions Pₖ = NVₖ(Uₖ)⁻¹.
@kfill!(P[i], zero(FC)) # Directions Pₖ = NVₖ(Uₖ)⁻¹.
end
H .= zero(FC) # Last column of the band hessenberg matrix Hₖ = LₖUₖ.
@kfill!(H, zero(FC)) # Last column of the band hessenberg matrix Hₖ = LₖUₖ.
# Each column has at most mem + 1 nonzero elements.
# hᵢ.ₖ is stored as H[k-i+1], i ≤ k. hₖ₊₁.ₖ is not stored in H.
# k-i+1 represents the indice of the diagonal where hᵢ.ₖ is located.
# In addition of that, the last column of Uₖ is stored in H.
L .= zero(FC) # Last mem-1 pivots of Lₖ.
@kfill!(L, zero(FC)) # Last mem-1 pivots of Lₖ.

# Initial ξ₁ and V₁.
ξ = rNorm
Expand Down
Loading

0 comments on commit 87597d2

Please sign in to comment.