diff --git a/docs/make.jl b/docs/make.jl index 7dd919b50..ca402b1a9 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -46,6 +46,7 @@ makedocs( "CRAIG" => "examples/craig.md", "CRAIGMR" => "examples/craigmr.md", "CGLS" => "examples/cgls.md", + "CGLS-LANCZOS-SHIFT" => "examples/cgls_lanczos_shift.md", "CRLS" => "examples/crls.md", "LSQR" => "examples/lsqr.md", "LSMR" => "examples/lsmr.md"], diff --git a/docs/src/api.md b/docs/src/api.md index a256d4b0f..b3050c16e 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -41,6 +41,7 @@ BilqSolver QmrSolver BilqrSolver CglsSolver +CglsLanczosShiftSolver CrlsSolver CgneSolver CrmrSolver diff --git a/docs/src/examples/cgls_lanczos_shift.md b/docs/src/examples/cgls_lanczos_shift.md new file mode 100644 index 000000000..18f9482e5 --- /dev/null +++ b/docs/src/examples/cgls_lanczos_shift.md @@ -0,0 +1,35 @@ +# TODO + +```@example cgls_lanczos_shift +using MatrixMarket, SuiteSparseMatrixCollection +using Krylov, LinearOperators +using LinearAlgebra, Printf + +function residuals(A, b, shifts, x) + nshifts = length(shifts) + r = [ A' * (A * x - b) + shifts[i] * x[i] for i = 1 : nshifts ] + return r +end +ssmc = ssmc_db(verbose=false) +matrix = ssmc_matrices(ssmc, "HB", "well1033") +path = fetch_ssmc(matrix, format="MM") + +A = MatrixMarket.mmread(joinpath(path[1], "$(matrix.name[1]).mtx")) +b = MatrixMarket.mmread(joinpath(path[1], "$(matrix.name[1])_b.mtx"))[:] +(m, n) = size(A) +@printf("System size: %d rows and %d columns\n", m, n) + +# Define regularization parameters. +shifts = [1.0, 2.0, 3.0, 4.0] + +(x, stats) = cgls_lanczos_shift(A, b, shifts) +show(stats) +r = residuals(A, b, shifts, x) + +resids = map(norm, r) / norm(b) +@printf("CGLS: Relative residuals with shifts:\n") +for resid in resids + @printf(" %8.1e", resid) +end +@printf("\n") +``` diff --git a/docs/src/inplace.md b/docs/src/inplace.md index c74d5fd99..b2d723c34 100644 --- a/docs/src/inplace.md +++ b/docs/src/inplace.md @@ -15,7 +15,7 @@ Given an operator `A` and a right-hand side `b`, you can create a `KrylovSolver` For example, use `S = Vector{Float64}` if you want to solve linear systems in double precision on the CPU and `S = CuVector{Float32}` if you want to solve linear systems in single precision on an Nvidia GPU. !!! note - `DiomSolver`, `FomSolver`, `DqgmresSolver`, `GmresSolver`, `BlockGmresSolver`, `FgmresSolver`, `GpmrSolver` and `CgLanczosShiftSolver` require an additional argument (`memory` or `nshifts`). + `DiomSolver`, `FomSolver`, `DqgmresSolver`, `GmresSolver`, `BlockGmresSolver`, `FgmresSolver`, `GpmrSolver`, `CgLanczosShiftSolver` and `CglsLanczosShiftSolver` require an additional argument (`memory` or `nshifts`). The workspace is always the first argument of the in-place methods: diff --git a/docs/src/preconditioners.md b/docs/src/preconditioners.md index 22e24ce90..42567df75 100644 --- a/docs/src/preconditioners.md +++ b/docs/src/preconditioners.md @@ -60,7 +60,7 @@ However, there is no need to specify $L$ and one may specify $P_c = LL^H$ or its ### Linear least-squares problems -Methods concerned: [`CGLS`](@ref cgls), [`CRLS`](@ref crls), [`LSLQ`](@ref lslq), [`LSQR`](@ref lsqr) and [`LSMR`](@ref lsmr). +Methods concerned: [`CGLS`](@ref cgls), [`CGLS-LANCZOS-SHIFT`](@ref cgls_lanczos_shift), [`CRLS`](@ref crls), [`LSLQ`](@ref lslq), [`LSQR`](@ref lsqr) and [`LSMR`](@ref lsmr). | Formulation | Without preconditioning | With preconditioning | |:---------------------:|:------------------------------------:|:-------------------------------------------:| diff --git a/docs/src/processes.md b/docs/src/processes.md index 7452b0577..4583f13e5 100644 --- a/docs/src/processes.md +++ b/docs/src/processes.md @@ -91,7 +91,7 @@ Note that depending on how we normalize the vectors that compose $V_k$, $T_{k+1, The function [`hermitian_lanczos`](@ref hermitian_lanczos(::Any, ::AbstractVector{FC}, ::Int) where FC <: (Union{Complex{T}, T} where T <: AbstractFloat)) returns $V_{k+1}$, $\beta_1$ and $T_{k+1,k}$. -Related methods: [`SYMMLQ`](@ref symmlq), [`CG`](@ref cg), [`CR`](@ref cr), [`CAR`](@ref car), [`MINRES`](@ref minres), [`MINRES-QLP`](@ref minres_qlp), [`MINARES`](@ref minares), [`CGLS`](@ref cgls), [`CRLS`](@ref crls), [`CGNE`](@ref cgne), [`CRMR`](@ref crmr), [`CG-LANCZOS`](@ref cg_lanczos) and [`CG-LANCZOS-SHIFT`](@ref cg_lanczos_shift). +Related methods: [`SYMMLQ`](@ref symmlq), [`CG`](@ref cg), [`CR`](@ref cr), [`CAR`](@ref car), [`MINRES`](@ref minres), [`MINRES-QLP`](@ref minres_qlp), [`MINARES`](@ref minares), [`CGLS`](@ref cgls), [`CGLS-LANCZOS-SHIFT`](@ref cgls_lanczos_shift), [`CRLS`](@ref crls), [`CGNE`](@ref cgne), [`CRMR`](@ref crmr), [`CG-LANCZOS`](@ref cg_lanczos) and [`CG-LANCZOS-SHIFT`](@ref cg_lanczos_shift). ```@docs hermitian_lanczos(::Any, ::AbstractVector{FC}, ::Int) where FC <: (Union{Complex{T}, T} where T <: AbstractFloat) diff --git a/docs/src/solvers/ls.md b/docs/src/solvers/ls.md index fecfbc417..487f7274e 100644 --- a/docs/src/solvers/ls.md +++ b/docs/src/solvers/ls.md @@ -7,6 +7,8 @@ ```@docs cgls cgls! +cgls_lanczos_shift +cgls_lanczos_shift! ``` ## CRLS diff --git a/docs/src/storage.md b/docs/src/storage.md index fcd74abbb..ba235a3d7 100644 --- a/docs/src/storage.md +++ b/docs/src/storage.md @@ -39,7 +39,7 @@ This section provides the storage requirements of all Krylov methods available i We denote by $m$ and $n$ the number of rows and columns of the linear problem. The memory parameter of DIOM, FOM, DQGMRES, GMRES, FGMRES and GPMR is $k$. -The numbers of shifts of CG-LANCZOS-SHIFT is $p$. +The numbers of shifts of CG-LANCZOS-SHIFT and CGLS-LANCZOS-SHIFT is $p$. ## Theoretical storage requirements @@ -81,9 +81,9 @@ Each table summarizes the storage requirements of Krylov methods recommended to #### Least-squares problems -| Methods | [`USYMQR`](@ref usymqr) | [`CGLS`](@ref cgls) | [`CRLS`](@ref crls) | [`LSLQ`](@ref lslq) | [`LSQR`](@ref lsqr) | [`LSMR`](@ref lsmr) | +| Methods | [`USYMQR`](@ref usymqr) | [`CGLS`](@ref cgls) | [`CG-LANCZOS-SHIFT`](@ref cg_lanczos_shift) | [`CRLS`](@ref crls) | [`LSLQ`](@ref lslq) | [`LSQR`](@ref lsqr) | [`LSMR`](@ref lsmr) | |:-------:|:-----------------------:|:-------------------:|:-------------------:|:-------------------:|:-------------------:|:-------------------:| -| Storage | $6n + 3m$ | $3n + 2m$ | $4n + 3m$ | $4n + 2m$ | $4n + 2m$ | $5n + 2m$ | +| Storage | $6n + 3m$ | $3n + 2m$ | $3n + 2m + Xp$ | $4n + 3m$ | $4n + 2m$ | $4n + 2m$ | $5n + 2m$ | #### Adjoint systems diff --git a/src/Krylov.jl b/src/Krylov.jl index 760ad6074..282e63f19 100644 --- a/src/Krylov.jl +++ b/src/Krylov.jl @@ -47,6 +47,7 @@ include("qmr.jl") include("bilqr.jl") include("cgls.jl") +include("cgls_lanczos_shift.jl") include("crls.jl") include("cgne.jl") diff --git a/src/cgls_lanczos_shift.jl b/src/cgls_lanczos_shift.jl new file mode 100644 index 000000000..9400cef69 --- /dev/null +++ b/src/cgls_lanczos_shift.jl @@ -0,0 +1,292 @@ +# An implementation of the Lanczos version of the conjugate gradient method +# for a family of shifted systems of the form (AᵀA + λI) x = b. +# +# The implementation follows +# A. Frommer and P. Maass, Fast CG-Based Methods for Tikhonov-Phillips Regularization, +# SIAM Journal on Scientific Computing, 20(5), pp. 1831--1850, 1999. +# +# Tangi Migot, +# Montreal, July 2022. + +export cgls_lanczos_shift, cgls_lanczos_shift! + + +""" + (x, stats) = cgls_lanczos_shift(A, b::AbstractVector{FC}; + M=I, λ::T=zero(T), atol::T=√eps(T), rtol::T=√eps(T), + radius::T=zero(T), itmax::Int=0, verbose::Int=0, history::Bool=false, + callback=solver->false) + +`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. +`FC` is `T` or `Complex{T}`. + +Solve the regularized linear least-squares problem + + minimize ‖b - Ax‖₂² + λ‖x‖₂² + +using the Conjugate Gradient (CG) method, where λ ≥ 0 is a regularization +parameter. This method is equivalent to applying CG to the normal equations + + (AᵀA + λI) x = Aᵀb + +but is more stable. + +CGLS produces monotonic residuals ‖r‖₂ but not optimality residuals ‖Aᵀr‖₂. +It is formally equivalent to LSQR, though can be slightly less accurate, +but simpler to implement. + +#### Input arguments + +* `A`: a linear operator that models a Hermitian matrix of dimension n; +* `b`: a vector of length n; +* `shifts`: a vector of length p. + +#### Keyword arguments + +* `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning; +* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`; +* `check_curvature`: if `true`, check that the curvature of the quadratic along the search direction is positive, and abort if not, unless `linesearch` is also `true`; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`; +* `timemax`: the time limit in seconds; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. + +#### Output arguments + +* `x`: a vector of p dense vectors, each one of length n; +* `stats`: statistics collected on the run in a [`LanczosShiftStats`](@ref) structure. + +#### References +* M. R. Hestenes and E. Stiefel. [*Methods of conjugate gradients for solving linear systems*](https://doi.org/10.6028/jres.049.044), Journal of Research of the National Bureau of Standards, 49(6), pp. 409--436, 1952. +* A. Björck, T. Elfving and Z. Strakos, [*Stability of Conjugate Gradient and Lanczos Methods for Linear Least Squares Problems*](https://doi.org/10.1137/S089547989631202X), SIAM Journal on Matrix Analysis and Applications, 19(3), pp. 720--736, 1998. +""" +function cgls_lanczos_shift end + +""" + solver = cgls_lanczos_shift!(solver::CglsLanczosShiftSolver, A, b; kwargs...) +where `kwargs` are keyword arguments of [`cgls_lanczos_shift`](@ref). +See [`CglsLanczosShiftSolver`](@ref) for more details about the `solver`. +""" +function cgls_lanczos_shift! end + +def_args_cg_lanczos_shift = (:(A ), + :(b::AbstractVector{FC} ), + :(shifts::AbstractVector{T})) + +def_kwargs_cg_lanczos_shift = (:(; M = I ), + :(; ldiv::Bool = false ), + :(; check_curvature::Bool = false), + :(; atol::T = √eps(T) ), + :(; rtol::T = √eps(T) ), + :(; itmax::Int = 0 ), + :(; timemax::Float64 = Inf ), + :(; verbose::Int = 0 ), + :(; history::Bool = false ), + :(; callback = solver -> false ), + :(; iostream::IO = kstdout )) + +def_kwargs_cg_lanczos_shift = mapreduce(extract_parameters, vcat, def_kwargs_cg_lanczos_shift) + +args_cg_lanczos_shift = (:A, :b, :shifts) +kwargs_cg_lanczos_shift = (:M, :ldiv, :check_curvature, :atol, :rtol, :itmax, :timemax, :verbose, :history, :callback, :iostream) + +solver :: CglsLanczosShiftSolver{T,FC,S}, A, b :: AbstractVector{FC}, shifts :: AbstractVector{T}; + M=I, atol :: T=√eps(T), rtol :: T=√eps(T), + itmax :: Int=0, verbose :: Int=0, history :: Bool=false, + callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} + +@eval begin + function cgls_lanczos_shift($(def_args_cg_lanczos_shift...); $(def_kwargs_cg_lanczos_shift...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + start_time = time_ns() + nshifts = length(shifts) + solver = CglsLanczosShiftSolver(A, b, nshifts) + elapsed_time = ktimer(start_time) + timemax -= elapsed_time + cgls_lanczos_shift!(solver, $(args_cg_lanczos_shift...); $(kwargs_cg_lanczos_shift...)) + solver.stats.timer += elapsed_time + return (solver.x, solver.stats) + end + + function cgls_lanczos_shift!(solver :: CgLanczosShiftSolver{T,FC,S}, $(def_args_cgls_lanczos_shift...); $(def_kwargs_cgls_lanczos_shift...)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: AbstractVector{FC}} + + # Timer + start_time = time_ns() + timemax_ns = 1e9 * timemax + + m, n = size(A) + length(b) == m || error("Inconsistent problem size") + + nshifts = length(shifts) + nshifts == solver.nshifts || error("solver.nshifts = $(solver.nshifts) is inconsistent with length(shifts) = $nshifts") + (verbose > 0) && @printf(iostream, "CGLS-LANCZOS-SHIFT: system of %d equations in %d variables with %d shifts\n", m, n, nshifts) + + # Tests M = Iₙ + MisI = (M === I) + if !MisI + @warn "Preconditioner not implemented" + end + + # Check type consistency + eltype(A) == FC || @warn "eltype(A) ≠ $FC. This could lead to errors or additional allocations in operator-vector products." + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") + + # Compute the adjoint of A + Aᵀ = A' + + # Set up workspace. + allocate_if(!MisI, solver, :v, S, n) + u_prev, utilde = solver.Mv_prev, solver.Mv_next + u = solver.u + x, p, σ, δhat = solver.x, solver.p, solver.σ, solver.δhat + ω, γ, rNorms, converged = solver.ω, solver.γ, solver.rNorms, solver.converged + not_cv, stats = solver.not_cv, solver.stats + rNorms_history, indefinite, status = stats.residuals, stats.indefinite, stats.status + reset!(stats) + v = solver.v # v = MisI ? Mv : solver.v + + # Initial state. + ## Distribute x similarly to shifts. + for i = 1 : nshifts + x[i] .= zero(FC) # x₀ + end + + u .= b + u_prev .= zero(T) + mul!(v, A', u) # v₁ ← A' * b + β = sqrt(@kdotr(n, v, v)) # β₁ = v₁ᵀ M v₁ + rNorms .= β + if history + for i = 1 : nshifts + push!(rNorms_history[i], rNorms[i]) + end + end + + # Keep track of shifted systems with negative curvature if required. + indefinite .= false + + if β == 0 + stats.niter = 0 + stats.solved = true + stats.timer = ktimer(start_time) + status .= "x = 0 is a zero-residual solution" + return solver + end + + # Initialize each p to v. + for i = 1 : nshifts + p[i] .= v + end + + # Initialize Lanczos process. + # β₁v₁ = b + @kscal!(n, one(FC) / β, v) # v₁ ← v₁ / β₁ + # MisI || @kscal!(n, one(FC) / β, Mv) # Mv₁ ← Mv₁ / β₁ + # Mv_prev .= Mv + @kscal!(m, one(FC) / β, u) + + # Initialize some constants used in recursions below. + ρ = one(T) + σ .= β + δhat .= zero(T) + ω .= zero(T) + γ .= one(T) + + # Define stopping tolerance. + ε = atol + rtol * β + + # Keep track of shifted systems that have converged. + for i = 1 : nshifts + converged[i] = rNorms[i] ≤ ε + not_cv[i] = !converged[i] + end + iter = 0 + itmax == 0 && (itmax = 2 * max(m, n)) + + # Build format strings for printing. + (verbose > 0) && (fmt = Printf.Format("%5d" * repeat(" %8.1e", nshifts) * " %.2fs\n")) + kdisplay(iter, verbose) && Printf.format(iostream, fmt, iter, rNorms..., ktimer(start_time)) + + solved = !reduce(|, not_cv) # ArNorm ≤ ε + tired = iter ≥ itmax + status .= "unknown" + user_requested_exit = false + overtimed = false + + # Main loop. + while ! (solved || tired || user_requested_exit || overtimed) + + # Form next Lanczos vector. + mul!(utilde, A, v) # utildeₖ ← Avₖ + δ = @kdotr(m, utilde, utilde) # δₖ = vₖᵀAᵀAvₖ + @kaxpy!(m, -δ, u, utilde) # uₖ₊₁ = utildeₖ - δₖuₖ - βₖuₖ₋₁ + @kaxpy!(m, -β, u_prev, utilde) + mul!(v, A', utilde) # vₖ₊₁ = Aᵀuₖ₊₁ + β = sqrt(@kdotr(n, v, v)) # βₖ₊₁ = vₖ₊₁ᵀ M vₖ₊₁ + @kscal!(n, one(FC) / β, v) # vₖ₊₁ ← vₖ₊₁ / βₖ₊₁ + @kscal!(m, one(FC) / β, utilde) # uₖ₊₁ = uₖ₊₁ / βₖ₊₁ + u_prev .= u + u .= utilde + + MisI || (ρ = @kdotr(n, v, v)) + for i = 1 : nshifts + δhat[i] = δ + ρ * shifts[i] + γ[i] = 1 / (δhat[i] - ω[i] / γ[i]) + end + + # Compute next CG iterate for each shifted system that has not yet converged. + for i = 1 : nshifts + not_cv[i] = !converged[i] + if not_cv[i] + @kaxpy!(n, γ[i], p[i], x[i]) + ω[i] = β * γ[i] + σ[i] *= -ω[i] + ω[i] *= ω[i] + @kaxpby!(n, σ[i], v, ω[i], p[i]) + + # Update list of systems that have not converged. + rNorms[i] = abs(σ[i]) + converged[i] = rNorms[i] ≤ ε + end + end + + if length(not_cv) > 0 && history + for i = 1 : nshifts + not_cv[i] && push!(rNorms_history[i], rNorms[i]) + end + end + + # Is there a better way than to update this array twice per iteration? + for i = 1 : nshifts + not_cv[i] = !converged[i] + end + iter = iter + 1 + kdisplay(iter, verbose) && Printf.format(iostream, fmt, iter, rNorms..., ktimer(start_time)) + + user_requested_exit = callback(solver) :: Bool + solved = !reduce(|, not_cv) + tired = iter ≥ itmax + timer = time_ns() - start_time + overtimed = timer > timemax_ns + end + (verbose > 0) && @printf(iostream, "\n") + + # Termination status + overtimed && (status = "time limit exceeded") + for i = 1 : nshifts + tired && (stats.status[i] = "maximum number of iterations exceeded") + converged[i] && (stats.status[i] = "solution good enough given atol and rtol") + end + user_requested_exit && (status .= "user-requested exit") + + # Update stats + stats.niter = iter + stats.solved = solved + stats.timer = ktimer(start_time) + stats.inconsistent .= false + return solver + end +end diff --git a/src/krylov_solve.jl b/src/krylov_solve.jl index 2940bc7e8..ad2bef559 100644 --- a/src/krylov_solve.jl +++ b/src/krylov_solve.jl @@ -32,6 +32,7 @@ for (KS, fun, args, def_args, optargs, def_optargs, kwargs, def_kwargs) in [ (:CrmrSolver , :crmr! , args_crmr , def_args_crmr , () , () , kwargs_crmr , def_kwargs_crmr ) (:CgSolver , :cg! , args_cg , def_args_cg , optargs_cg , def_optargs_cg , kwargs_cg , def_kwargs_cg ) (:CgLanczosShiftSolver, :cg_lanczos_shift!, args_cg_lanczos_shift, def_args_cg_lanczos_shift, () , () , kwargs_cg_lanczos_shift, def_kwargs_cg_lanczos_shift) + (:CglsLanczosShiftSolver, :cgls_lanczos_shift!, args_cgls_lanczos_shift, def_args_cgls_lanczos_shift, () , () , kwargs_cgls_lanczos_shift, def_kwargs_cgls_lanczos_shift) (:CglsSolver , :cgls! , args_cgls , def_args_cgls , () , () , kwargs_cgls , def_kwargs_cgls ) (:CgLanczosSolver , :cg_lanczos! , args_cg_lanczos , def_args_cg_lanczos , optargs_cg_lanczos, def_optargs_cg_lanczos, kwargs_cg_lanczos , def_kwargs_cg_lanczos ) (:BilqSolver , :bilq! , args_bilq , def_args_bilq , optargs_bilq , def_optargs_bilq , kwargs_bilq , def_kwargs_bilq ) diff --git a/src/krylov_solvers.jl b/src/krylov_solvers.jl index 13c98f823..08b21f72c 100644 --- a/src/krylov_solvers.jl +++ b/src/krylov_solvers.jl @@ -1,8 +1,8 @@ export KrylovSolver, MinresSolver, CgSolver, CrSolver, SymmlqSolver, CgLanczosSolver, CgLanczosShiftSolver, MinresQlpSolver, DqgmresSolver, DiomSolver, UsymlqSolver, UsymqrSolver, TricgSolver, TrimrSolver, TrilqrSolver, CgsSolver, BicgstabSolver, -BilqSolver, QmrSolver, BilqrSolver, CglsSolver, CrlsSolver, CgneSolver, CrmrSolver, -LslqSolver, LsqrSolver, LsmrSolver, LnlqSolver, CraigSolver, CraigmrSolver, +BilqSolver, QmrSolver, BilqrSolver, CglsSolver, CglsLanczosShiftSolver, CrlsSolver, CgneSolver, +CrmrSolver, LslqSolver, LsqrSolver, LsmrSolver, LnlqSolver, CraigSolver, CraigmrSolver, GmresSolver, FomSolver, GpmrSolver, FgmresSolver, CarSolver, MinaresSolver export solve!, solution, nsolution, statistics, issolved, issolved_primal, issolved_dual, @@ -37,6 +37,7 @@ const KRYLOV_SOLVERS = Dict( :qmr => :QmrSolver , :bilqr => :BilqrSolver , :cgls => :CglsSolver , + :cgls_lanczos_shift => :CglsLanczosShiftSolver, :crls => :CrlsSolver , :cgne => :CgneSolver , :crmr => :CrmrSolver , @@ -1179,6 +1180,63 @@ function CglsSolver(A, b) CglsSolver(m, n, S) end +""" +Type for storing the vectors required by the in-place version of CGLS-LANCZOS-SHIFT. + +The outer constructors + + solver = CglsLanczosShiftSolver(m, n, nshifts, S) + solver = CglsLanczosShiftSolver(A, b, nshifts) + +may be used in order to create these vectors. +""" +mutable struct CglsLanczosShiftSolver{T,FC,S} <: KrylovSolver{T,FC,S} + Mv :: S + Mv_prev :: S + Mv_next :: S + u :: S + v :: S + x :: Vector{S} + p :: Vector{S} + σ :: Vector{T} + δhat :: Vector{T} + ω :: Vector{T} + γ :: Vector{T} + rNorms :: Vector{T} + converged :: BitVector + not_cv :: BitVector + stats :: SimpleShiftsStats{T} +end + +function CglsLanczosShiftSolver(m, n, nshifts, S) + FC = eltype(S) + T = real(FC) + Mv = S(undef, n) + Mv_prev = S(undef, m) + Mv_next = S(undef, m) + u = S(undef, m) + v = S(undef, n) + x = [S(undef, n) for i = 1 : nshifts] + p = [S(undef, n) for i = 1 : nshifts] + σ = Vector{T}(undef, nshifts) + δhat = Vector{T}(undef, nshifts) + ω = Vector{T}(undef, nshifts) + γ = Vector{T}(undef, nshifts) + rNorms = Vector{T}(undef, nshifts) + indefinite = BitVector(undef, nshifts) + converged = BitVector(undef, nshifts) + not_cv = BitVector(undef, nshifts) + stats = SimpleShiftsStats(0, false, zeros(Bool, nshifts), indefinite, [T[] for i = 1 : nshifts], [T[] for i = 1 : nshifts], zeros(T, nshifts), ["unknown" for i = 1 : nshifts]) + solver = new{T,FC,S}(Mv, Mv_prev, Mv_next, u, v, x, p, σ, δhat, ω, γ, rNorms, converged, not_cv, stats) + return solver +end + +function CglsLanczosShiftSolver(A, b, nshifts) + m, n = size(A) + S = ktypeof(b) + CglsLanczosShiftSolver(m, n, nshifts, S) +end + """ Type for storing the vectors required by the in-place version of CRLS. @@ -1916,6 +1974,7 @@ for (KS, fun, nsol, nA, nAt, warm_start) in [ (:CgSolver , :cg! , 1, 1, 0, true ) (:CgLanczosShiftSolver, :cg_lanczos_shift!, 1, 1, 0, false) (:CglsSolver , :cgls! , 1, 1, 1, false) + (:CglsLanczosShiftSolver, :cgls_lanczos_shift!, 1, 1, 0, false) (:CgLanczosSolver , :cg_lanczos! , 1, 1, 0, true ) (:BilqSolver , :bilq! , 1, 1, 1, true ) (:MinresQlpSolver , :minres_qlp! , 1, 1, 0, true ) diff --git a/test/test_allocations.jl b/test/test_allocations.jl index e4620d391..517232854 100644 --- a/test/test_allocations.jl +++ b/test/test_allocations.jl @@ -394,6 +394,30 @@ @test inplace_cgls_bytes == 0 end + @testset "CGLS-LANCZOS-SHIFT" begin + # CGLS-LANCZOS-SHIFT needs: + # - 3 n-vectors: x, p, s + # - 2 m-vectors: r, q + + # - 2 n-vectors: Mv, v + # - 3 m-vectors: Mv_prev, Mv_next, u + # - 2 (n*nshifts)-matrices: x, p + # - 5 nshifts-vectors: σ, δhat, ω, γ, rNorms + # - 2 nshifts-bitVector: converged, not_cv + + storage_cgls_lanczos_shift_bytes(m, n) = nbits_FC * (3 * n + 2 * m) + + expected_cgls_lanczos_shift_bytes = storage_cgls_lanczos_shift_bytes(m, k) + (x, stats) = cgls_lanczos_shift(Ao, b) # warmup + actual_cgls_lanczos_shift_bytes = @allocated cgls_lanczos_shift(Ao, b) + @test expected_cgls_lanczos_shift_bytes ≤ actual_cgls_lanczos_shift_bytes ≤ 1.02 * expected_cgls_lanczos_shift_bytes + + solver = CglsSolver(Ao, b) + cgls_lanczos_shift!(solver, Ao, b) # warmup + inplace_cgls_lanczos_shift_bytes = @allocated cgls_lanczos_shift!(solver, Ao, b) + @test inplace_cgls_lanczos_shift_bytes == 0 + end + @testset "LSLQ" begin # LSLQ needs: # - 4 n-vectors: x_lq, v, Aᴴu, w̄ (= x_cg) diff --git a/test/test_cgls_lanczos_shift.jl b/test/test_cgls_lanczos_shift.jl new file mode 100644 index 000000000..e69de29bb diff --git a/test/test_mp.jl b/test/test_mp.jl index 3e329f1b1..90af3664d 100644 --- a/test/test_mp.jl +++ b/test/test_mp.jl @@ -3,7 +3,7 @@ for fn in (:cg, :cgls, :usymqr, :cgne, :cgs, :crmr, :cg_lanczos, :dqgmres, :diom, :cr, :gpmr, :lslq, :lsqr, :lsmr, :lnlq, :craig, :bicgstab, :craigmr, :crls, :symmlq, :minres, :bilq, :minres_qlp, :qmr, :usymlq, :tricg, :trimr, :trilqr, :bilqr, :gmres, :fom, - :car, :minares, :fgmres, :cg_lanczos_shift) + :car, :minares, :fgmres, :cg_lanczos_shift, :cgls_lanczos_shift) @testset "$fn" begin for T in (Float16, Float32, Float64, BigFloat) for FC in (T, Complex{T}) diff --git a/test/test_solvers.jl b/test/test_solvers.jl index 993ed8bda..e89287871 100644 --- a/test/test_solvers.jl +++ b/test/test_solvers.jl @@ -36,6 +36,7 @@ function test_solvers(FC) $solvers[:craig] = $(KRYLOV_SOLVERS[:craig])($m, $n, $S) $solvers[:lslq] = $(KRYLOV_SOLVERS[:lslq])($n, $m, $S) $solvers[:cgls] = $(KRYLOV_SOLVERS[:cgls])($n, $m, $S) + $solvers[:cgls_lanczos_shift] = $(KRYLOV_SOLVERS[:cgls_lanczos_shift])($n, $m, $nshifts, $S) $solvers[:lsqr] = $(KRYLOV_SOLVERS[:lsqr])($n, $m, $S) $solvers[:crls] = $(KRYLOV_SOLVERS[:crls])($n, $m, $S) $solvers[:lsmr] = $(KRYLOV_SOLVERS[:lsmr])($n, $m, $S) @@ -69,6 +70,8 @@ function test_solvers(FC) method == :cg_lanczos_shift && @test_throws ErrorException("solver.nshifts = $(solver.nshifts) is inconsistent with length(shifts) = $(length(shifts2))") solve!(solver, A, b, shifts2) method ∈ (:cgne, :crmr, :lnlq, :craig, :craigmr) && @test_throws ErrorException("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($m2, $n2)") solve!(solver, Au2, c2) method ∈ (:cgls, :crls, :lslq, :lsqr, :lsmr) && @test_throws ErrorException("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($n2, $m2)") solve!(solver, Ao2, b2) + hod == :cg_lanczos_shift && @test_throws ErrorException("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($n2, $n2)") solve!(solver, Ao2, b2, shifts2) + method == :cg_lanczos_shift && @test_throws ErrorException("solver.nshifts = $(solver.nshifts) is inconsistent with length(shifts) = $(length(shifts2))") solve!(solver, A, b, shifts2) method ∈ (:bilqr, :trilqr) && @test_throws ErrorException("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($n2, $n2)") solve!(solver, A2, b2, b2) method == :gpmr && @test_throws ErrorException("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($n2, $m2)") solve!(solver, Ao2, Au2, b2, c2) method ∈ (:tricg, :trimr) && @test_throws ErrorException("(solver.m, solver.n) = ($(solver.m), $(solver.n)) is inconsistent with size(A) = ($n2, $m2)") solve!(solver, Ao2, b2, c2) diff --git a/test/test_verbose.jl b/test/test_verbose.jl index d688def16..06856d22a 100644 --- a/test/test_verbose.jl +++ b/test/test_verbose.jl @@ -15,7 +15,7 @@ function test_verbose(FC) for fn in (:cg, :cgls, :usymqr, :cgne, :cgs, :crmr, :cg_lanczos, :dqgmres, :diom, :cr, :gpmr, :lslq, :lsqr, :lsmr, :lnlq, :craig, :bicgstab, :craigmr, :crls, :symmlq, :minres, :bilq, :minres_qlp, :qmr, :usymlq, :tricg, :trimr, :trilqr, :bilqr, :gmres, :fom, - :car, :minares, :fgmres, :cg_lanczos_shift) + :car, :minares, :fgmres, :cg_lanczos_shift, :cgls_lanczos_shift) @testset "$fn" begin io = IOBuffer() @@ -33,7 +33,7 @@ function test_verbose(FC) @eval $fn($Ao, $b, $c, verbose=1, iostream=$io) elseif fn == :gpmr @eval $fn($Ao, $Au, $b, $c, verbose=1, iostream=$io) - elseif fn == :cg_lanczos_shift + elseif fn in (:cg_lanczos_shift, :cgls_lanczos_shift) @eval $fn($A, $b, $shifts, verbose=1, iostream=$io) else @eval $fn($A, $b, verbose=1, iostream=$io)