diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml new file mode 100644 index 000000000..d2cbb0258 --- /dev/null +++ b/.buildkite/pipeline.yml @@ -0,0 +1,67 @@ +steps: + - label: "Nvidia GPUs -- CUDA.jl" + plugins: + - JuliaCI/julia#v1: + version: 1.8 + agents: + queue: "juliagpu" + cuda: "*" + command: | + julia --color=yes --project -e ' + using Pkg + Pkg.add("CUDA") + Pkg.add("LinearOperators") + Pkg.instantiate() + include("test/gpu/nvidia.jl")' + timeout_in_minutes: 30 + + - label: "AMD GPUs -- AMDGPU.jl" + plugins: + - JuliaCI/julia#v1: + version: 1.9-nightly + agents: + queue: "juliagpu" + rocm: "*" + rocmgpu: "*" + env: + JULIA_AMDGPU_CORE_MUST_LOAD: "1" + JULIA_AMDGPU_HIP_MUST_LOAD: "1" + command: | + julia --color=yes --project -e ' + using Pkg + # Pkg.add("AMDGPU") + Pkg.add(url="https://github.com/JuliaGPU/AMDGPU.jl", rev="master") + Pkg.instantiate() + include("test/gpu/amd.jl")' + timeout_in_minutes: 30 + + - label: "Intel GPUs -- oneAPI.jl" + plugins: + - JuliaCI/julia#v1: + version: 1.8 + agents: + queue: "juliagpu" + intel: "*" + command: | + julia --color=yes --project -e ' + using Pkg + Pkg.add("oneAPI") + Pkg.instantiate() + include("test/gpu/intel.jl")' + timeout_in_minutes: 30 + + - label: "Apple M1 GPUs -- Metal.jl" + plugins: + - JuliaCI/julia#v1: + version: 1.8 + agents: + queue: "juliaecosystem" + os: "macos" + arch: "aarch64" + command: | + julia --color=yes --project -e ' + using Pkg + Pkg.add("Metal") + Pkg.instantiate() + include("test/gpu/metal.jl")' + timeout_in_minutes: 30 diff --git a/.cirrus.yml b/.cirrus.yml index d559cf609..f51d815a3 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -1,15 +1,41 @@ -freebsd_instance: - image: freebsd-13-0-release-amd64 task: - name: FreeBSD - env: - matrix: - - JULIA_VERSION: 1.6 - - JULIA_VERSION: 1 - - JULIA_VERSION: nightly - allow_failures: $JULIA_VERSION == 'nightly' - install_script: - - sh -c "$(fetch https://raw.githubusercontent.com/ararslan/CirrusCI.jl/master/bin/install.sh -o -)" + matrix: + - name: FreeBSD + freebsd_instance: + image_family: freebsd-13-1 + env: + matrix: + - JULIA_VERSION: 1.6 + - JULIA_VERSION: 1 + - name: Linux ARMv8 + arm_container: + image: ubuntu:latest + env: + - JULIA_VERSION: 1 + - name: musl Linux + container: + image: alpine:3.14 + env: + - JULIA_VERSION: 1 + - name: MacOS M1 + macos_instance: + image: ghcr.io/cirruslabs/macos-monterey-base:latest + env: + - JULIA_VERSION: 1 + install_script: | + URL="https://raw.githubusercontent.com/ararslan/CirrusCI.jl/master/bin/install.sh" + set -x + if [ "$(uname -s)" = "Linux" ] && command -v apt; then + apt update + apt install -y curl + fi + if command -v curl; then + sh -c "$(curl ${URL})" + elif command -v wget; then + sh -c "$(wget ${URL} -q -O-)" + elif command -v fetch; then + sh -c "$(fetch ${URL} -o -)" + fi build_script: - cirrusjl build test_script: diff --git a/.github/workflows/Aqua.yml b/.github/workflows/Aqua.yml new file mode 100644 index 000000000..da872e225 --- /dev/null +++ b/.github/workflows/Aqua.yml @@ -0,0 +1,17 @@ +name: Aqua +on: + push: + branches: + - main + pull_request: + types: [opened, synchronize, reopened] +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: julia-actions/setup-julia@latest + with: + version: '1' + - name: Aqua.jl + run: julia --color=yes -e 'using Pkg; Pkg.add("Aqua"); Pkg.develop(path="."); using Aqua, Krylov; Aqua.test_all(Krylov)' diff --git a/.github/workflows/Breakage.yml b/.github/workflows/Breakage.yml index 266eed3cc..8fd92afdd 100644 --- a/.github/workflows/Breakage.yml +++ b/.github/workflows/Breakage.yml @@ -24,14 +24,14 @@ jobs: pkgversion: [latest, stable] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 # Install Julia - uses: julia-actions/setup-julia@v1 with: version: '1' arch: x64 - - uses: actions/cache@v1 + - uses: actions/cache@v3 env: cache-name: cache-artifacts with: @@ -85,7 +85,7 @@ jobs: end; end' - - uses: actions/upload-artifact@v2 + - uses: actions/upload-artifact@v3 with: name: pr path: pr/ @@ -94,9 +94,9 @@ jobs: needs: break runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - - uses: actions/download-artifact@v2 + - uses: actions/download-artifact@v3 with: name: pr path: pr/ @@ -127,7 +127,7 @@ jobs: fi done >> MSG - - uses: actions/upload-artifact@v2 + - uses: actions/upload-artifact@v3 with: name: pr path: pr/ diff --git a/.github/workflows/CI_M1.yml b/.github/workflows/CI_M1.yml deleted file mode 100644 index 6f9aa720b..000000000 --- a/.github/workflows/CI_M1.yml +++ /dev/null @@ -1,31 +0,0 @@ -name: CI_M1 -on: - push: - branches: - - main - pull_request: - types: [opened, synchronize, reopened] -jobs: - test: - name: Julia ${{ matrix.version }} - macOS - ${{ matrix.arch }} - ${{ github.event_name }} - runs-on: self-hosted - strategy: - fail-fast: false - matrix: - version: - - '1' - arch: - - aarch64 - steps: - - uses: actions/checkout@v3 - - uses: julia-actions/setup-julia@v1 - with: - version: ${{ matrix.version }} - arch: ${{ matrix.arch }} - - name: Version Info - shell: julia --color=yes {0} - run: | - using InteractiveUtils - versioninfo() - - uses: julia-actions/julia-buildpkg@v1 - - uses: julia-actions/julia-runtest@v1 diff --git a/.github/workflows/CommentPR.yml b/.github/workflows/CommentPR.yml index 14f6dcd47..043113f74 100644 --- a/.github/workflows/CommentPR.yml +++ b/.github/workflows/CommentPR.yml @@ -39,16 +39,36 @@ jobs: - run: unzip pr.zip - name: 'Comment on PR' - uses: actions/github-script@v3 + uses: actions/github-script@v6 with: github-token: ${{ secrets.GITHUB_TOKEN }} script: | - var fs = require('fs'); - var issue_number = Number(fs.readFileSync('./NR')); - var msg = fs.readFileSync('./MSG', 'utf8'); - await github.issues.createComment({ + var fs = require('fs') + var issue_number = Number(fs.readFileSync('./NR')) + var msg = fs.readFileSync('./MSG', 'utf8') + + // Get the existing comments. + const {data: comments} = await github.rest.issues.listComments({ owner: context.repo.owner, repo: context.repo.repo, - issue_number: issue_number, - body: msg - }); + issue_number: issue_number + }) + + // Find any comment already made by the bot. + const botComment = comments.find(comment => comment.user.id === 41898282) + + if (botComment) { + await github.rest.issues.updateComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: botComment.id, + body: msg + }) + } else { + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: issue_number, + body: msg + }) + } diff --git a/.github/workflows/CompatHelper.yml b/.github/workflows/CompatHelper.yml index b546a8082..7a9c79fd4 100644 --- a/.github/workflows/CompatHelper.yml +++ b/.github/workflows/CompatHelper.yml @@ -1,19 +1,44 @@ name: CompatHelper - on: schedule: - - cron: '00 00 * * *' - + - cron: 0 0 * * * + workflow_dispatch: +permissions: + contents: write + pull-requests: write jobs: CompatHelper: runs-on: ubuntu-latest steps: - - uses: julia-actions/setup-julia@latest + - name: Check if Julia is already available in the PATH + id: julia_in_path + run: which julia + continue-on-error: true + - name: Install Julia, but only if it is not already available in the PATH + uses: julia-actions/setup-julia@v1 with: version: '1' - - name: CompatHelper - run: julia -e 'using Pkg; Pkg.add("CompatHelper")' - - name: CompatHelper.main() + arch: ${{ runner.arch }} + if: steps.julia_in_path.outcome != 'success' + - name: "Add the General registry via Git" + run: | + import Pkg + ENV["JULIA_PKG_SERVER"] = "" + Pkg.Registry.add("General") + shell: julia --color=yes {0} + - name: "Install CompatHelper" + run: | + import Pkg + name = "CompatHelper" + uuid = "aa819f21-2bde-4658-8897-bab36330d9b7" + version = "3" + Pkg.add(; name, uuid, version) + shell: julia --color=yes {0} + - name: "Run CompatHelper" + run: | + import CompatHelper + CompatHelper.main() + shell: julia --color=yes {0} env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: julia -e 'using CompatHelper; CompatHelper.main()' + COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }} diff --git a/.github/workflows/Documentation.yml b/.github/workflows/Documentation.yml index be0b86584..406f15e0d 100644 --- a/.github/workflows/Documentation.yml +++ b/.github/workflows/Documentation.yml @@ -10,12 +10,12 @@ jobs: build: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: julia-actions/setup-julia@latest with: version: '1' - name: Install dependencies - run: julia --project=docs -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()' + run: julia --project=docs --color=yes -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()' - name: Build and deploy env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/Invalidations.yml b/.github/workflows/Invalidations.yml new file mode 100644 index 000000000..b0c37e05f --- /dev/null +++ b/.github/workflows/Invalidations.yml @@ -0,0 +1,43 @@ +name: Invalidations +# Uses SnoopCompile to evaluate number of invalidations caused by `using` the package +# using https://github.com/julia-actions/julia-invalidations +# Based on https://github.com/julia-actions/julia-invalidations + +on: + pull_request: + +concurrency: + # Skip intermediate builds: always. + # Cancel intermediate builds: always. + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + evaluate: + # Only run on PRs to the default branch. + # In the PR trigger above branches can be specified only explicitly whereas this check should work for master, main, or any other default branch + if: github.base_ref == github.event.repository.default_branch + runs-on: ubuntu-latest + steps: + - uses: julia-actions/setup-julia@v1 + with: + version: '1' + - uses: actions/checkout@v3 + - uses: julia-actions/julia-buildpkg@v1 + - uses: julia-actions/julia-invalidations@v1 + id: invs_pr + + - uses: actions/checkout@v3 + with: + ref: ${{ github.event.repository.default_branch }} + - uses: julia-actions/julia-buildpkg@v1 + - uses: julia-actions/julia-invalidations@v1 + id: invs_default + + - name: Report invalidation counts + run: | + echo "Invalidations on default branch: ${{ steps.invs_default.outputs.total }} (${{ steps.invs_default.outputs.deps }} via deps)" >> $GITHUB_STEP_SUMMARY + echo "This branch: ${{ steps.invs_pr.outputs.total }} (${{ steps.invs_pr.outputs.deps }} via deps)" >> $GITHUB_STEP_SUMMARY + - name: Check if the PR does increase number of invalidations + if: steps.invs_pr.outputs.total > steps.invs_default.outputs.total + run: exit 1 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 409e0d146..9e1791f48 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,12 +31,12 @@ jobs: arch: x64 allow_failure: true steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: julia-actions/setup-julia@v1 with: version: ${{ matrix.version }} arch: ${{ matrix.arch }} - - uses: actions/cache@v1 + - uses: actions/cache@v3 env: cache-name: cache-artifacts with: @@ -49,6 +49,6 @@ jobs: - uses: julia-actions/julia-buildpkg@v1 - uses: julia-actions/julia-runtest@v1 - uses: julia-actions/julia-processcoverage@v1 - - uses: codecov/codecov-action@v1 + - uses: codecov/codecov-action@v3 with: - file: lcov.info + files: lcov.info diff --git a/Project.toml b/Project.toml index a91e07b8a..6249e13f4 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "Krylov" uuid = "ba0b0d4f-ebba-5204-a429-3ac8c609bfb7" -version = "0.8.3" +version = "0.9.0" [deps] LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" diff --git a/README.md b/README.md index a4664e187..55476e684 100644 --- a/README.md +++ b/README.md @@ -71,22 +71,22 @@ Overdetermined sytems are less common but also occur. 4. Adjoint systems

- Ax = b   and   Aᵀy = c + Ax = b   and   Aᴴy = c

where **_A_** can have any shape. -5. Saddle-point and symmetric quasi-definite (SQD) systems +5. Saddle-point and Hermitian quasi-definite systems

[M     A]  [x] = [b]
- [Aᵀ   -N]  [y]    [c] + [Aᴴ   -N]  [y]    [c]

where **_A_** can have any shape. -6. Generalized saddle-point and unsymmetric partitioned systems +6. Generalized saddle-point and non-Hermitian partitioned systems

[M   A]  [x] = [b] @@ -94,7 +94,7 @@ where **_A_** can have any shape. [B   N]  [y]    [c]

-where **_A_** can have any shape and **_B_** has the shape of **_Aᵀ_**. +where **_A_** can have any shape and **_B_** has the shape of **_Aᴴ_**. **_A_**, **_B_**, **_b_** and **_c_** must be all nonzero. Krylov solvers are particularly appropriate in situations where such problems must be solved but a factorization is not possible, either because: @@ -121,3 +121,10 @@ julia> ] pkg> add Krylov pkg> test Krylov ``` + +## Bug reports and discussions + +If you think you found a bug, feel free to open an [issue](https://github.com/JuliaSmoothOptimizers/Krylov.jl/issues). +Focused suggestions and requests can also be opened as issues. Before opening a pull request, start an issue or a discussion on the topic, please. + +If you want to ask a question not suited for a bug report, feel free to start a discussion [here](https://github.com/JuliaSmoothOptimizers/Organization/discussions). This forum is for general discussion about this repository and the [JuliaSmoothOptimizers](https://github.com/JuliaSmoothOptimizers) organization, so questions about any of our packages are welcome. diff --git a/docs/make.jl b/docs/make.jl index 57ad87cd2..441ddb3ee 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -6,23 +6,26 @@ makedocs( linkcheck = true, strict = true, format = Documenter.HTML(assets = ["assets/style.css"], - ansicolor=true, + ansicolor = true, prettyurls = get(ENV, "CI", nothing) == "true", collapselevel = 1), sitename = "Krylov.jl", pages = ["Home" => "index.md", "API" => "api.md", - "Krylov methods" => ["Symmetric positive definite linear systems" => "solvers/spd.md", - "Symmetric indefinite linear systems" => "solvers/sid.md", - "Unsymmetric linear systems" => "solvers/unsymmetric.md", + "Krylov processes" => "processes.md", + "Krylov methods" => ["Hermitian positive definite linear systems" => "solvers/spd.md", + "Hermitian indefinite linear systems" => "solvers/sid.md", + "Non-Hermitian square linear systems" => "solvers/unsymmetric.md", "Least-norm problems" => "solvers/ln.md", "Least-squares problems" => "solvers/ls.md", "Adjoint systems" => "solvers/as.md", - "Saddle-point and symmetric quasi-definite systems" => "solvers/sp_sqd.md", - "Generalized saddle-point and unsymmetric partitioned systems" => "solvers/gsp.md"], + "Saddle-point and Hermitian quasi-definite systems" => "solvers/sp_sqd.md", + "Generalized saddle-point and non-Hermitian partitioned systems" => "solvers/gsp.md"], "In-place methods" => "inplace.md", + "Preconditioners" => "preconditioners.md", + "Storage requirements" => "storage.md", "GPU support" => "gpu.md", - "Warm start" => "warm_start.md", + "Warm-start" => "warm-start.md", "Factorization-free operators" => "factorization-free.md", "Callbacks" => "callbacks.md", "Performance tips" => "tips.md", diff --git a/docs/src/api.md b/docs/src/api.md index 7f2f4dff7..238c86f1a 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -48,6 +48,7 @@ LnlqSolver CraigSolver CraigmrSolver GpmrSolver +FgmresSolver ``` ## Utilities @@ -60,4 +61,6 @@ Krylov.vec2str Krylov.ktypeof Krylov.kzeros Krylov.kones +Krylov.vector_to_matrix +Krylov.matrix_to_vector ``` diff --git a/docs/src/callbacks.md b/docs/src/callbacks.md index f44018687..91e0b521c 100644 --- a/docs/src/callbacks.md +++ b/docs/src/callbacks.md @@ -1,43 +1,80 @@ -## Callbacks +# [Callbacks](@id callbacks) -Each Krylov method is able to call a callback function as `callback(solver)` at each iteration. The callback should return `true` if the main loop should terminate, and `false` otherwise. +Each Krylov method is able to call a callback function as `callback(solver)` at each iteration. +The callback should return `true` if the main loop should terminate, and `false` otherwise. If the method terminated because of the callback, the output status will be `"user-requested exit"`. -For example, if the user defines `my_callback(solver::MinresSolver)`, it can be passed to the solver using +For example, if the user defines `minres_callback(solver::MinresSolver)`, it can be passed to the solver using ```julia -(x, stats) = minres(A, b, callback = my_callback) +(x, stats) = minres(A, b, callback = minres_callback) ``` -If you need to write a callback that uses variables that are not in the `MinresSolver`, use a closure: +If you need to write a callback that uses variables that are not in a `KrylovSolver`, use a closure: ```julia -function my_callback2(solver::MinresSolver, A, b, storage_vec, tol::Float64) - mul!(storage_vec, A, solver.x) - storage_vec .-= b - return norm(storage_vec) ≤ tol # tolerance based on the 2-norm of the residual +function custom_stopping_condition(solver::KrylovSolver, A, b, r, tol) + mul!(r, A, solver.x) + r .-= b # r := b - Ax + bool = norm(r) ≤ tol # tolerance based on the 2-norm of the residual + return bool end -storage_vec = similar(b) -(x, stats) = minres(A, b, callback = solver -> my_callback2(solver, A, b, storage_vec, 0.1)) +cg_callback(solver) = custom_stopping_condition(solver, A, b, r, tol) +(x, stats) = cg(A, b, callback = cg_callback) ``` Alternatively, use a structure and make it callable: ```julia -mutable struct MyCallback3{S, M} - A::M - b::S - storage_vec::S - tol::Float64 +mutable struct CallbackWorkspace{T} + A::Matrix{T} + b::Vector{T} + r::Vector{T} + tol::T end -MyCallback3(A, b; tol = 0.1) = MyCallback3(A, b, similar(b), tol) -function (my_cb::MyCallback3)(solver) - mul!(my_cb.storage_vec, my_cb.A, solver.x) - my_cb.storage_vec .-= my_cb.b - return norm(my_cb.storage_vec) ≤ my_cb.tol # tolerance based on the 2-norm of the residual +function (workspace::CallbackWorkspace)(solver::KrylovSolver) + mul!(workspace.r, workspace.A, solver.x) + workspace.r .-= workspace.b + bool = norm(workspace.r) ≤ workspace.tol + return bool end -my_cb = MyCallback3(A, b; tol = 0.1) -(x, stats) = minres(A, b, callback = my_cb) +bicgstab_callback = CallbackWorkspace(A, b, r, tol) +(x, stats) = bicgstab(A, b, callback = bicgstab_callback) +``` + +Although the main goal of a callback is to add new stopping conditions, it can also retrieve information from the workspace of a Krylov method along the iterations. +We now illustrate how to store all iterates $x_k$ of the GMRES method. + +```julia +S = Krylov.ktypeof(b) +global X = S[] # Storage for GMRES iterates + +function gmres_callback(solver) + z = solver.z + k = solver.inner_iter + nr = sum(1:k) + V = solver.V + R = solver.R + y = copy(z) + + # Solve Rk * yk = zk + for i = k : -1 : 1 + pos = nr + i - k + for j = k : -1 : i+1 + y[i] = y[i] - R[pos] * y[j] + pos = pos - j + 1 + end + y[i] = y[i] / R[pos] + end + + # xk = Vk * yk + xk = sum(V[i] * y[i] for i = 1:k) + push!(X, xk) + + return false # We don't want to add new stopping conditions +end + +(x, stats) = gmres(A, b, callback = gmres_callback) ``` diff --git a/docs/src/examples/tricg.md b/docs/src/examples/tricg.md index e981c2f7e..61750de5f 100644 --- a/docs/src/examples/tricg.md +++ b/docs/src/examples/tricg.md @@ -14,7 +14,7 @@ N = diagm(0 => [5.0 * i for i = 1:n]) c = -b # [I A] [x] = [b] -# [Aᵀ -I] [y] [c] +# [Aᴴ -I] [y] [c] (x, y, stats) = tricg(A, b, c) K = [eye(m) A; A' -eye(n)] B = [b; c] @@ -23,7 +23,7 @@ resid = norm(r) @printf("TriCG: Relative residual: %8.1e\n", resid) # [-I A] [x] = [b] -# [ Aᵀ I] [y] [c] +# [ Aᴴ I] [y] [c] (x, y, stats) = tricg(A, b, c, flip=true) K = [-eye(m) A; A' eye(n)] B = [b; c] @@ -32,7 +32,7 @@ resid = norm(r) @printf("TriCG: Relative residual: %8.1e\n", resid) # [I A] [x] = [b] -# [Aᵀ I] [y] [c] +# [Aᴴ I] [y] [c] (x, y, stats) = tricg(A, b, c, spd=true) K = [eye(m) A; A' eye(n)] B = [b; c] @@ -41,7 +41,7 @@ resid = norm(r) @printf("TriCG: Relative residual: %8.1e\n", resid) # [-I A] [x] = [b] -# [ Aᵀ -I] [y] [c] +# [ Aᴴ -I] [y] [c] (x, y, stats) = tricg(A, b, c, snd=true) K = [-eye(m) A; A' -eye(n)] B = [b; c] @@ -50,7 +50,7 @@ resid = norm(r) @printf("TriCG: Relative residual: %8.1e\n", resid) # [τI A] [x] = [b] -# [ Aᵀ νI] [y] [c] +# [ Aᴴ νI] [y] [c] (τ, ν) = (1e-4, 1e2) (x, y, stats) = tricg(A, b, c, τ=τ, ν=ν) K = [τ*eye(m) A; A' ν*eye(n)] @@ -60,7 +60,7 @@ resid = norm(r) @printf("TriCG: Relative residual: %8.1e\n", resid) # [M⁻¹ A ] [x] = [b] -# [Aᵀ -N⁻¹] [y] [c] +# [Aᴴ -N⁻¹] [y] [c] (x, y, stats) = tricg(A, b, c, M=M, N=N, verbose=1) K = [inv(M) A; A' -inv(N)] H = BlockDiagonalOperator(M, N) diff --git a/docs/src/examples/trimr.md b/docs/src/examples/trimr.md index 2aa48be1e..adc4e82e5 100644 --- a/docs/src/examples/trimr.md +++ b/docs/src/examples/trimr.md @@ -14,7 +14,7 @@ m, n = size(A) c = -b # [D A] [x] = [b] -# [Aᵀ 0] [y] [c] +# [Aᴴ 0] [y] [c] llt_D = cholesky(D) opD⁻¹ = LinearOperator(Float64, 5, 5, true, true, (y, v) -> ldiv!(y, llt_D, v)) opH⁻¹ = BlockDiagonalOperator(opD⁻¹, eye(n)) @@ -34,7 +34,7 @@ N = diagm(0 => [5.0 * i for i = 1:n]) c = -b # [I A] [x] = [b] -# [Aᵀ -I] [y] [c] +# [Aᴴ -I] [y] [c] (x, y, stats) = trimr(A, b, c) K = [eye(m) A; A' -eye(n)] B = [b; c] @@ -43,7 +43,7 @@ resid = norm(r) @printf("TriMR: Relative residual: %8.1e\n", resid) # [M A] [x] = [b] -# [Aᵀ -N] [y] [c] +# [Aᴴ -N] [y] [c] ldlt_M = ldl(M) ldlt_N = ldl(N) opM⁻¹ = LinearOperator(Float64, size(M,1), size(M,2), true, true, (y, v) -> ldiv!(y, ldlt_M, v)) diff --git a/docs/src/factorization-free.md b/docs/src/factorization-free.md index aa0f51f07..b97108b99 100644 --- a/docs/src/factorization-free.md +++ b/docs/src/factorization-free.md @@ -1,3 +1,32 @@ +```@raw html + +``` + ## [Factorization-free operators](@id factorization-free) All methods are factorization-free, which means that you only need to provide operator-vector products. @@ -10,8 +39,11 @@ Some methods only require `A * v` products, whereas other ones also require `A' |:--------------------------------------:|:----------------------------------------:| | CG, CR | CGLS, CRLS, CGNE, CRMR | | SYMMLQ, CG-LANCZOS, MINRES, MINRES-QLP | LSLQ, LSQR, LSMR, LNLQ, CRAIG, CRAIGMR | -| DIOM, FOM, DQGMRES, GMRES | BiLQ, QMR, BiLQR, USYMLQ, USYMQR, TriLQR | -| CGS, BICGSTAB | TriCG, TriMR, USYMLQR | +| DIOM, FOM, DQGMRES, GMRES, FGMRES | BiLQ, QMR, BiLQR, USYMLQ, USYMQR, TriLQR | +| CGS, BICGSTAB | TriCG, TriMR | + +!!! info + GPMR is the only method that requires `A * v` and `B * w` products. Preconditioners `M`, `N`, `C`, `D`, `E` or `F` can be also linear operators and must implement `mul!` or `ldiv!`. diff --git a/docs/src/gpu.md b/docs/src/gpu.md index 4c9887f24..378f4f5d3 100644 --- a/docs/src/gpu.md +++ b/docs/src/gpu.md @@ -1,6 +1,15 @@ -## GPU support +# [GPU support](@id gpu) -All solvers in Krylov.jl can be used with `CuArrays` and allow computations with Nvidia GPU. Problems stored in CPU format (`Matrix` and `Vector`) must first be converted to GPU format (`CuMatrix` and `CuVector`). +Krylov methods are well suited for GPU computations because they only require matrix-vector products ($u \leftarrow Av$, $u \leftarrow A^{H}w$) and vector operations ($\|v\|$, $u^H v$, $v \leftarrow \alpha u + \beta v$), which are highly parallelizable. + +The implementations in Krylov.jl are generic so as to take advantage of the multiple dispatch and broadcast features of Julia. +Those allow the implementations to be specialized automatically by the compiler for both CPU and GPU. +Thus, Krylov.jl works with GPU backends that build on [GPUArrays.jl](https://github.com/JuliaGPU/GPUArrays.jl), such as [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl), [AMDGPU.jl](https://github.com/JuliaGPU/AMDGPU.jl), [oneAPI.jl](https://github.com/JuliaGPU/oneAPI.jl) or [Metal.jl](https://github.com/JuliaGPU/Metal.jl). + +## Nvidia GPUs + +All solvers in Krylov.jl can be used with [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl) and allow computations on Nvidia GPUs. +Problems stored in CPU format (`Matrix` and `Vector`) must first be converted to the related GPU format (`CuMatrix` and `CuVector`). ```julia using CUDA, Krylov @@ -13,11 +22,11 @@ b_cpu = rand(20) A_gpu = CuMatrix(A_cpu) b_gpu = CuVector(b_cpu) -# Solve a square and dense system on GPU +# Solve a square and dense system on an Nivida GPU x, stats = bilq(A_gpu, b_gpu) ``` -Sparse matrices have a specific storage on GPU (`CuSparseMatrixCSC` or `CuSparseMatrixCSR`): +Sparse matrices have a specific storage on Nvidia GPUs (`CuSparseMatrixCSC`, `CuSparseMatrixCSR` or `CuSparseMatrixCOO`): ```julia using CUDA, Krylov @@ -31,7 +40,7 @@ b_cpu = rand(200) A_gpu = CuSparseMatrixCSC(A_cpu) b_gpu = CuVector(b_cpu) -# Solve a rectangular and sparse system on GPU +# Solve a rectangular and sparse system on an Nvidia GPU x, stats = lsmr(A_gpu, b_gpu) ``` @@ -47,14 +56,14 @@ using SparseArrays, Krylov, LinearOperators using CUDA, CUDA.CUSPARSE # Transfer the linear system from the CPU to the GPU -A_gpu = CuSparseMatrixCSC(A_cpu) # A = CuSparseMatrixCSR(A_cpu) +A_gpu = CuSparseMatrixCSC(A_cpu) # A_gpu = CuSparseMatrixCSR(A_cpu) b_gpu = CuVector(b_cpu) -# LLᵀ ≈ A for CuSparseMatrixCSC or CuSparseMatrixCSR matrices +# LLᴴ ≈ A for CuSparseMatrixCSC or CuSparseMatrixCSR matrices P = ic02(A_gpu, 'O') # Solve Py = x -function ldiv!(y, P, x) +function ldiv_ic0!(y, P, x) copyto!(y, x) # Variant for CuSparseMatrixCSR sv2!('T', 'U', 'N', 1.0, P, y, 'O') # sv2!('N', 'L', 'N', 1.0, P, y, 'O') sv2!('N', 'U', 'N', 1.0, P, y, 'O') # sv2!('T', 'L', 'N', 1.0, P, y, 'O') @@ -65,12 +74,15 @@ end n = length(b_gpu) T = eltype(b_gpu) symmetric = hermitian = true -opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv!(y, P, x)) +opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ic0!(y, P, x)) # Solve a symmetric positive definite system with an incomplete Cholesky preconditioner on GPU -(x, stats) = cg(A_gpu, b_gpu, M=opM) +x, stats = cg(A_gpu, b_gpu, M=opM) ``` +!!! note + You need to replace `'T'` by `'C'` in `ldiv_ic0!` if `A_gpu` is a complex matrix. + ### Example with a general square system ```julia @@ -84,14 +96,14 @@ A_cpu = A_cpu[p,:] b_cpu = b_cpu[p] # Transfer the linear system from the CPU to the GPU -A_gpu = CuSparseMatrixCSC(A_cpu) # A = CuSparseMatrixCSR(A_cpu) +A_gpu = CuSparseMatrixCSC(A_cpu) # A_gpu = CuSparseMatrixCSR(A_cpu) b_gpu = CuVector(b_cpu) # LU ≈ A for CuSparseMatrixCSC or CuSparseMatrixCSR matrices P = ilu02(A_gpu, 'O') # Solve Py = x -function ldiv!(y, P, x) +function ldiv_ilu0!(y, P, x) copyto!(y, x) # Variant for CuSparseMatrixCSR sv2!('N', 'L', 'N', 1.0, P, y, 'O') # sv2!('N', 'L', 'U', 1.0, P, y, 'O') sv2!('N', 'U', 'U', 1.0, P, y, 'O') # sv2!('N', 'U', 'N', 1.0, P, y, 'O') @@ -102,8 +114,85 @@ end n = length(b_gpu) T = eltype(b_gpu) symmetric = hermitian = false -opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv!(y, P, x)) +opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ilu0!(y, P, x)) # Solve an unsymmetric system with an incomplete LU preconditioner on GPU -(x, stats) = bicgstab(A_gpu, b_gpu, M=opM) +x, stats = bicgstab(A_gpu, b_gpu, M=opM) +``` + +## AMD GPUs + +All solvers in Krylov.jl can be used with [AMDGPU.jl](https://github.com/JuliaGPU/AMDGPU.jl) and allow computations on AMD GPUs. +Problems stored in CPU format (`Matrix` and `Vector`) must first be converted to the related GPU format (`ROCMatrix` and `ROCVector`). + +```julia +using Krylov, AMDGPU + +# CPU Arrays +A_cpu = rand(ComplexF64, 20, 20) +A_cpu = A_cpu + A_cpu' +b_cpu = rand(ComplexF64, 20) + +A_gpu = ROCMatrix(A_cpu) +b_gpu = ROCVector(b_cpu) + +# Solve a dense Hermitian system on an AMD GPU +x, stats = minres(A_gpu, b_gpu) +``` + +!!! info + The library `rocSPARSE` is not interfaced yet in AMDGPU.jl and only dense linear systems are supported. + +## Intel GPUs + +All solvers in Krylov.jl can be used with [oneAPI.jl](https://github.com/JuliaGPU/oneAPI.jl) and allow computations on Intel GPUs. +Problems stored in CPU format (`Matrix` and `Vector`) must first be converted to the related GPU format (`oneMatrix` and `oneVector`). + +```julia +using Krylov, oneAPI + +T = Float32 # oneAPI.jl also works with ComplexF32 +m = 20 +n = 10 + +# CPU Arrays +A_cpu = rand(T, m, n) +b_cpu = rand(T, m) + +# GPU Arrays +A_gpu = oneMatrix(A_cpu) +b_gpu = oneVector(b_cpu) + +# Solve a dense least-squares problem on an Intel GPU +x, stats = lsqr(A_gpu, b_gpu) ``` + +!!! warning + The library `oneMKL` is not interfaced yet in oneAPI.jl and all BLAS routines (dot, norm, mul!, etc.) dispatch to generic fallbacks. + +## Apple M1 GPUs + +All solvers in Krylov.jl can be used with [Metal.jl](https://github.com/JuliaGPU/Metal.jl) and allow computations on Apple M1 GPUs. +Problems stored in CPU format (`Matrix` and `Vector`) must first be converted to the related GPU format (`MtlMatrix` and `MtlVector`). + +```julia +using Krylov, Metal + +T = Float32 # Metal.jl also works with ComplexF32 +n = 10 +m = 20 + +# CPU Arrays +A_cpu = rand(T, n, m) +b_cpu = rand(T, n) + +# GPU Arrays +A_gpu = MtlMatrix(A_cpu) +b_gpu = MtlVector(b_cpu) + +# Solve a dense least-norm problem on an Apple M1 GPU +x, stats = craig(A_gpu, b_gpu) +``` + +!!! warning + Metal.jl is under heavy development and is considered experimental for now. diff --git a/docs/src/graphics/arnoldi.png b/docs/src/graphics/arnoldi.png new file mode 100644 index 000000000..9ef8bd3a3 Binary files /dev/null and b/docs/src/graphics/arnoldi.png differ diff --git a/docs/src/graphics/golub_kahan.png b/docs/src/graphics/golub_kahan.png new file mode 100644 index 000000000..32fc3d7b8 Binary files /dev/null and b/docs/src/graphics/golub_kahan.png differ diff --git a/docs/src/graphics/hermitian_lanczos.png b/docs/src/graphics/hermitian_lanczos.png new file mode 100644 index 000000000..c70082e72 Binary files /dev/null and b/docs/src/graphics/hermitian_lanczos.png differ diff --git a/docs/src/graphics/montoison_orban.png b/docs/src/graphics/montoison_orban.png new file mode 100644 index 000000000..5a14eda04 Binary files /dev/null and b/docs/src/graphics/montoison_orban.png differ diff --git a/docs/src/graphics/nonhermitian_lanczos.png b/docs/src/graphics/nonhermitian_lanczos.png new file mode 100644 index 000000000..b8d83961c Binary files /dev/null and b/docs/src/graphics/nonhermitian_lanczos.png differ diff --git a/docs/src/graphics/saunders_simon_yip.png b/docs/src/graphics/saunders_simon_yip.png new file mode 100644 index 000000000..c3acfd181 Binary files /dev/null and b/docs/src/graphics/saunders_simon_yip.png differ diff --git a/docs/src/index.md b/docs/src/index.md index ce657436d..1a18e2315 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -46,26 +46,26 @@ Overdetermined sytems are less common but also occur. 4 - Adjoint systems ```math - Ax = b \quad \text{and} \quad A^T y = c + Ax = b \quad \text{and} \quad A^H y = c ``` where **_A_** can have any shape. -5 - Saddle-point and symmetric quasi-definite (SQD) systems +5 - Saddle-point and Hermitian quasi-definite systems ```math - \begin{bmatrix} M & \phantom{-}A \\ A^T & -N \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \left(\begin{bmatrix} b \\ 0 \end{bmatrix},\begin{bmatrix} 0 \\ c \end{bmatrix},\begin{bmatrix} b \\ c \end{bmatrix}\right) + \begin{bmatrix} M & \phantom{-}A \\ A^H & -N \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \left(\begin{bmatrix} b \\ 0 \end{bmatrix},\begin{bmatrix} 0 \\ c \end{bmatrix},\begin{bmatrix} b \\ c \end{bmatrix}\right) ``` where **_A_** can have any shape. -6 - Generalized saddle-point and unsymmetric partitioned systems +6 - Generalized saddle-point and non-Hermitian partitioned systems ```math \begin{bmatrix} M & A \\ B & N \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} b \\ c \end{bmatrix} ``` -where **_A_** can have any shape and **_B_** has the shape of **_Aᵀ_**. +where **_A_** can have any shape and **_B_** has the shape of **_Aᴴ_**. **_A_**, **_B_**, **_b_** and **_c_** must be all nonzero. Krylov solvers are particularly appropriate in situations where such problems must be solved but a factorization is not possible, either because: @@ -92,3 +92,10 @@ julia> ] pkg> add Krylov pkg> test Krylov ``` + +# Bug reports and discussions + +If you think you found a bug, feel free to open an [issue](https://github.com/JuliaSmoothOptimizers/Krylov.jl/issues). +Focused suggestions and requests can also be opened as issues. Before opening a pull request, start an issue or a discussion on the topic, please. + +If you want to ask a question not suited for a bug report, feel free to start a discussion [here](https://github.com/JuliaSmoothOptimizers/Organization/discussions). This forum is for general discussion about this repository and the [JuliaSmoothOptimizers](https://github.com/JuliaSmoothOptimizers) organization, so questions about any of our packages are welcome. diff --git a/docs/src/inplace.md b/docs/src/inplace.md index 71a4e25de..9950575fe 100644 --- a/docs/src/inplace.md +++ b/docs/src/inplace.md @@ -15,7 +15,7 @@ Given an operator `A` and a right-hand side `b`, you can create a `KrylovSolver` For example, use `S = Vector{Float64}` if you want to solve linear systems in double precision on the CPU and `S = CuVector{Float32}` if you want to solve linear systems in single precision on an Nvidia GPU. !!! note - `DiomSolver`, `FomSolver`, `DqgmresSolver`, `GmresSolver`, `GpmrSolver` and `CgLanczosShiftSolver` require an additional argument (`memory` or `nshifts`). + `DiomSolver`, `FomSolver`, `DqgmresSolver`, `GmresSolver`, `FgmresSolver`, `GpmrSolver` and `CgLanczosShiftSolver` require an additional argument (`memory` or `nshifts`). The workspace is always the first argument of the in-place methods: diff --git a/docs/src/preconditioners.md b/docs/src/preconditioners.md new file mode 100644 index 000000000..fd203dddb --- /dev/null +++ b/docs/src/preconditioners.md @@ -0,0 +1,237 @@ +# [Preconditioners](@id preconditioners) + +The solvers in Krylov.jl support preconditioners, i.e., transformations that modify a linear system $Ax = b$ into an equivalent form that may yield faster convergence in finite-precision arithmetic. +Preconditioning can be used to reduce the condition number of the problem or cluster its eigenvalues or singular values for instance. + +The design of preconditioners is highly dependent on the origin of the problem and most preconditioners need to take application-dependent information and structure into account. +Specialized preconditioners generally outperform generic preconditioners such as incomplete factorizations. + +The construction of a preconditioner necessitates trade-offs because we need to apply it at least once per iteration within a Krylov method. +Hence, a preconditioner must be constructed such that it is cheap to apply, while also capturing the characteristics of the original system in some sense. + +There exist three variants of preconditioning: + +| Left preconditioning | Two-sided preconditioning | Right preconditioning | +|:----------------------------------:|:----------------------------------------------------------------------:|:--------------------------------------------:| +| $P_{\ell}^{-1}Ax = P_{\ell}^{-1}b$ | $P_{\ell}^{-1}AP_r^{-1}y = P_{\ell}^{-1}b~~\text{with}~~x = P_r^{-1}y$ | $AP_r^{-1}y = b~~\text{with}~~x = P_r^{-1}y$ | + +where $P_{\ell}$ and $P_r$ are square and nonsingular. + +In Krylov.jl , we call $P_{\ell}^{-1}$ and $P_r^{-1}$ the preconditioners and we assume that we can apply them with the operation $y \leftarrow P^{-1} * x$. +It is also common to call $P_{\ell}$ and $P_r$ the preconditioners if the equivalent operation $y \leftarrow P~\backslash~x$ is available. +Krylov.jl supports both approaches thanks to the argument `ldiv` of the Krylov solvers. + +## How to use preconditioners in Krylov.jl? + +!!! info + - A preconditioner only need support the operation `mul!(y, P⁻¹, x)` when `ldiv=false` or `ldiv!(y, P, x)` when `ldiv=true` to be used in Krylov.jl. + - The default value of a preconditioner in Krylov.jl is the identity operator `I`. + +### Square non-Hermitian linear systems + +Methods concerned: [`CGS`](@ref cgs), [`BiCGSTAB`](@ref bicgstab), [`DQGMRES`](@ref dqgmres), [`GMRES`](@ref gmres), [`FGMRES`](@ref fgmres), [`DIOM`](@ref diom) and [`FOM`](@ref fom). + +A Krylov method dedicated to non-Hermitian linear systems allows the three variants of preconditioning. + +| Preconditioners | $P_{\ell}^{-1}$ | $P_{\ell}$ | $P_r^{-1}$ | $P_r$ | +|:---------------:|:---------------------:|:--------------------:|:---------------------:|:--------------------:| +| Arguments | `M` with `ldiv=false` | `M` with `ldiv=true` | `N` with `ldiv=false` | `N` with `ldiv=true` | + +### Hermitian linear systems + +Methods concerned: [`SYMMLQ`](@ref symmlq), [`CG`](@ref cg), [`CG-LANCZOS`](@ref cg_lanczos), [`CG-LANCZOS-SHIFT`](@ref cg_lanczos_shift), [`CR`](@ref cr), [`MINRES`](@ref minres) and [`MINRES-QLP`](@ref minres_qlp). + +When $A$ is Hermitian, we can only use centered preconditioning $L^{-1}AL^{-H}y = L^{-1}b$ with $x = L^{-H}y$. +Centered preconditioning is a special case of two-sided preconditioning with $P_{\ell} = L = P_r^H$ that maintains hermicity. +However, there is no need to specify $L$ and one may specify $P_c = LL^H$ or its inverse directly. + +| Preconditioners | $P_c^{-1}$ | $P_c$ | +|:---------------:|:-------------------------:|:--------------------:| +| Arguments | `M` with `ldiv=false` | `M` with `ldiv=true` | + +!!! warning + The preconditioner `M` must be hermitian and positive definite. + +### Linear least-squares problems + +Methods concerned: [`CGLS`](@ref cgls), [`CRLS`](@ref crls), [`LSLQ`](@ref lslq), [`LSQR`](@ref lsqr) and [`LSMR`](@ref lsmr). + +| Formulation | Without preconditioning | With preconditioning | +|:---------------------:|:------------------------------------:|:-------------------------------------------:| +| least-squares problem | $\min \tfrac{1}{2} \\|b - Ax\\|^2_2$ | $\min \tfrac{1}{2} \\|b - Ax\\|^2_{E^{-1}}$ | +| Normal equation | $A^HAx = A^Hb$ | $A^HE^{-1}Ax = A^HE^{-1}b$ | +| Augmented system | $\begin{bmatrix} I & A \\ A^H & 0 \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ | $\begin{bmatrix} E & A \\ A^H & 0 \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ | + +[`LSLQ`](@ref lslq), [`LSQR`](@ref lsqr) and [`LSMR`](@ref lsmr) also handle regularized least-squares problems. + +| Formulation | Without preconditioning | With preconditioning | +|:---------------------:|:-------------------------------------------------------------------------:|:--------------------------------------------------------------------------------:| +| least-squares problem | $\min \tfrac{1}{2} \\|b - Ax\\|^2_2 + \tfrac{1}{2} \lambda^2 \\|x\\|^2_2$ | $\min \tfrac{1}{2} \\|b - Ax\\|^2_{E^{-1}} + \tfrac{1}{2} \lambda^2 \\|x\\|^2_F$ | +| Normal equation | $(A^HA + \lambda^2 I)x = A^Hb$ | $(A^HE^{-1}A + \lambda^2 F)x = A^HE^{-1}b$ | +| Augmented system | $\begin{bmatrix} I & A \\ A^H & -\lambda^2 I \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ | $\begin{bmatrix} E & A \\ A^H & -\lambda^2 F \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ | + +| Preconditioners | $E^{-1}$ | $E$ | $F^{-1}$ | $F$ | +|:---------------:|:-----------------------:|:--------------------:|:-----------------------:|:--------------------:| +| Arguments | `M` with `ldiv=false` | `M` with `ldiv=true` | `N` with `ldiv=false` | `N` with `ldiv=true` | + +!!! warning + The preconditioners `M` and `N` must be hermitian and positive definite. + +### Linear least-norm problems + +Methods concerned: [`CGNE`](@ref cgne), [`CRMR`](@ref crmr), [`LNLQ`](@ref lnlq), [`CRAIG`](@ref craig) and [`CRAIGMR`](@ref craigmr). + +| Formulation | Without preconditioning | With preconditioning | +|:--------------------:|:----------------------------------------------------:|:----------------------------------------------------:| +| minimum-norm problem | $\min \tfrac{1}{2} \\|x\\|^2_2~~\text{s.t.}~~Ax = b$ | $\min \tfrac{1}{2} \\|x\\|^2_F~~\text{s.t.}~~Ax = b$ | +| Normal equation | $AA^Hy = b~~\text{with}~~x = A^Hy$ | $AF^{-1}A^Hy = b~~\text{with}~~x = F^{-1}A^Hy$ | +| Augmented system | $\begin{bmatrix} -I & A^H \\ \phantom{-}A & 0 \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ | $\begin{bmatrix} -F & A^H \\ \phantom{-}A & 0 \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ | + +[`LNLQ`](@ref lslq), [`CRAIG`](@ref lsqr) and [`CRAIGMR`](@ref lsmr) also handle penalized minimum-norm problems. + +| Formulation | Without preconditioning | With preconditioning | +|:--------------------:|:---------------------------------------------------------------------------------------------:|:----------------------------------------------------------------------------------------------:| +| minimum-norm problem | $\min \tfrac{1}{2} \\|x\\|^2_2 + \tfrac{1}{2} \\|y\\|^2_2~~\text{s.t.}~~Ax + \lambda^2 y = b$ | $\min \tfrac{1}{2} \\|x\\|^2_F + \tfrac{1}{2} \\|y\\|^2_E~~\text{s.t.}~~Ax + \lambda^2 Ey = b$ | +| Normal equation | $(AA^H + \lambda^2 I)y = b~~\text{with}~~x = A^Hy$ | $(AF^{-1}A^H + \lambda^2 E)y = b~~\text{with}~~x = F^{-1}A^Hy$ | +| Augmented system | $\begin{bmatrix} -I & A^H \\ \phantom{-}A & \lambda^2 I \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ | $\begin{bmatrix} -F & A^H \\ \phantom{-}A & \lambda^2 E \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ | + +| Preconditioners | $E^{-1}$ | $E$ | $F^{-1}$ | $F$ | +|:---------------:|:-----------------------:|:--------------------:|:-----------------------:|:--------------------:| +| Arguments | `M` with `ldiv=false` | `M` with `ldiv=true` | `N` with `ldiv=false` | `N` with `ldiv=true` | + +!!! warning + The preconditioners `M` and `N` must be hermitian and positive definite. + +### Saddle-point and symmetric quasi-definite systems + +[`TriCG`](@ref tricg) and [`TriMR`](@ref trimr) can take advantage of the structure of Hermitian systems $Kz = d$ with the 2x2 block structure +```math + \begin{bmatrix} \tau E & \phantom{-}A \\ A^H & \nu F \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} b \\ c \end{bmatrix}, +``` +| Preconditioners | $E^{-1}$ | $E$ | $F^{-1}$ | $F$ | +|:---------------:|:---------------------:|:--------------------:|:---------------------:|:--------------------:| +| Arguments | `M` with `ldiv=false` | `M` with `ldiv=true` | `N` with `ldiv=false` | `N` with `ldiv=true` | + +!!! warning + The preconditioners `M` and `N` must be hermitian and positive definite. + +### Generalized saddle-point and unsymmetric partitioned systems + +[`GPMR`](@ref gpmr) can take advantage of the structure of general square systems $Kz = d$ with the 2x2 block structure +```math + \begin{bmatrix} \lambda M & A \\ B & \mu N \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} b \\ c \end{bmatrix}, +``` +| Relations | $CE = M^{-1}$ | $EC = M$ | $DF = N^{-1}$ | $FD = N$ | +|:---------------:|:-----------------------------:|:----------------------------:|:-----------------------------:|:----------------------------:| +| Arguments | `C` and `E` with `ldiv=false` | `C` and `E` with `ldiv=true` | `D` and `F` with `ldiv=false` | `D` and `F` with `ldiv=true` | + +!!! note + Our implementations of [`BiLQ`](@ref bilq), [`QMR`](@ref qmr), [`BiLQR`](@ref bilqr), [`USYMLQ`](@ref usymlq), [`USYMQR`](@ref usymqr) and [`TriLQR`](@ref trilqr) don't support preconditioning. + +## Packages that provide preconditioners + +- [IncompleteLU.jl](https://github.com/haampie/IncompleteLU.jl) implements the left-looking and Crout versions of ILU decompositions. +- [ILUZero.jl](https://github.com/mcovalt/ILUZero.jl) is a Julia implementation of incomplete LU factorization with zero level of fill-in. +- [LimitedLDLFactorizations.jl](https://github.com/JuliaSmoothOptimizers/LimitedLDLFactorizations.jl) for limited-memory LDLᵀ factorization of symmetric matrices. +- [AlgebraicMultigrid.jl](https://github.com/JuliaLinearAlgebra/AlgebraicMultigrid.jl) provides two algebraic multigrid (AMG) preconditioners. +- [RandomizedPreconditioners.jl](https://github.com/tjdiamandis/RandomizedPreconditioners.jl) uses randomized numerical linear algebra to construct approximate inverses of matrices. +- [BasicLU.jl](https://github.com/JuliaSmoothOptimizers/BasicLU.jl) uses a sparse LU factorization to compute a maximum volume basis that can be used as a preconditioner for least-norm and least-squares problems. + +## Examples + +```julia +using Krylov +n, m = size(A) +d = [A[i,i] ≠ 0 ? 1 / abs(A[i,i]) : 1 for i=1:n] # Jacobi preconditioner +P⁻¹ = diagm(d) +x, stats = symmlq(A, b, M=P⁻¹) +``` + +```julia +using Krylov +n, m = size(A) +d = [1 / norm(A[:,i]) for i=1:m] # diagonal preconditioner +P⁻¹ = diagm(d) +x, stats = minres(A, b, M=P⁻¹) +``` + +```julia +using IncompleteLU, Krylov +Pℓ = ilu(A) +x, stats = gmres(A, b, M=Pℓ, ldiv=true) # left preconditioning +``` + +```julia +using LimitedLDLFactorizations, Krylov +P = lldl(A) +P.D .= abs.(P.D) +x, stats = cg(A, b, M=P, ldiv=true) # centered preconditioning +``` + +```julia +using ILUZero, Krylov +Pᵣ = ilu0(A) +x, stats = bicgstab(A, b, N=Pᵣ, ldiv=true) # right preconditioning +``` + +```julia +using LDLFactorizations, Krylov + +M = ldl(E) +N = ldl(F) + +# [E A] [x] = [b] +# [Aᴴ -F] [y] [c] +x, y, stats = tricg(A, b, c, M=M, N=N, ldiv=true) +``` + +```julia +using SuiteSparse, Krylov +import LinearAlgebra.ldiv! + +M = cholesky(E) + +# ldiv! is not implemented for the sparse Cholesky factorization (SuiteSparse.CHOLMOD) +ldiv!(y::Vector{T}, F::SuiteSparse.CHOLMOD.Factor{T}, x::Vector{T}) where T = (y .= F \ x) + +# [E A] [x] = [b] +# [Aᴴ 0] [y] [c] +x, y, stats = trimr(A, b, c, M=M, sp=true, ldiv=true) +``` + +```julia +using Krylov + +C = lu(M) + +# [M A] [x] = [b] +# [B 0] [y] [c] +x, y, stats = gpmr(A, B, b, c, C=C, gsp=true, ldiv=true) +``` + +```julia +import BasicLU +using LinearOperators, Krylov + +# Least-squares problem +m, n = size(A) +Aᴴ = sparse(A') +basis, B = BasicLU.maxvolbasis(Aᴴ) +opA = LinearOperator(A) +B⁻ᴴ = LinearOperator(Float64, n, n, false, false, (y, v) -> (y .= v ; BasicLU.solve!(B, y, 'T')), + (y, v) -> (y .= v ; BasicLU.solve!(B, y, 'N')), + (y, v) -> (y .= v ; BasicLU.solve!(B, y, 'N'))) + +d, stats = lsmr(opA * B⁻ᴴ, b) # min ‖AB⁻ᴴd - b‖₂ +x = B⁻ᴴ * d # recover the solution of min ‖Ax - b‖₂ + +# Least-norm problem +m, n = size(A) +basis, B = maxvolbasis(A) +opA = LinearOperator(A) +B⁻¹ = LinearOperator(Float64, m, m, false, false, (y, v) -> (y .= v ; BasicLU.solve!(B, y, 'N')), + (y, v) -> (y .= v ; BasicLU.solve!(B, y, 'T')), + (y, v) -> (y .= v ; BasicLU.solve!(B, y, 'T'))) + +x, y, stats = craigmr(B⁻¹ * opA, B⁻¹ * b) # min ‖x‖₂ s.t. B⁻¹Ax = B⁻¹b +``` diff --git a/docs/src/processes.md b/docs/src/processes.md new file mode 100644 index 000000000..e9d4066d2 --- /dev/null +++ b/docs/src/processes.md @@ -0,0 +1,334 @@ +```@raw html + +``` + +# [Krylov processes](@id krylov-processes) + +Krylov processes are the foundation of Krylov methods, they generate bases of Krylov subspaces. +Depending on the Krylov subspaces generated, Krylov processes are more or less specialized for a subset of linear problems. +The following table summarizes the most relevant processes for each linear problem. + +| Linear problems | Processes | +|:--------------------------------------------------------------:|:---------------------------------:| +| Hermitian linear systems | Hermitian Lanczos | +| Square Non-Hermitian linear systems | Non-Hermitian Lanczos -- Arnoldi | +| Least-squares problems | Golub-Kahan -- Saunders-Simon-Yip | +| Least-norm problems | Golub-Kahan -- Saunders-Simon-Yip | +| Saddle-point and Hermitian quasi-definite systems | Golub-Kahan -- Saunders-Simon-Yip | +| Generalized saddle-point and non-Hermitian partitioned systems | Montoison-Orban | + +### Notation + +For a matrix $A$, $A^H$ denotes the conjugate transpose of $A$. +It coincides with $A^T$, the transpose of $A$, for real matrices. +Define $V_k := \begin{bmatrix} v_1 & \ldots & v_k \end{bmatrix} \enspace$ and $\enspace U_k := \begin{bmatrix} u_1 & \ldots & u_k \end{bmatrix}$. + +For a matrix $C \in \mathbb{C}^{n \times n}$ and a vector $t \in \mathbb{C}^{n}$, the $k$-th Krylov subspace generated by $C$ and $t$ is +```math +\mathcal{K}_k(C, t) := +\left\{\sum_{i=0}^{k-1} \omega_i C^i t \, \middle \vert \, \omega_i \in \mathbb{C},~0 \le i \le k-1 \right\}. +``` + +For matrices $C \in \mathbb{C}^{n \times n} \enspace$ and $\enspace T \in \mathbb{C}^{n \times p}$, the $k$-th block Krylov subspace generated by $C$ and $T$ is +```math +\mathcal{K}_k^{\square}(C, T) := +\left\{\sum_{i=0}^{k-1} C^i T \, \Omega_i \, \middle \vert \, \Omega_i \in \mathbb{C}^{p \times p},~0 \le i \le k-1 \right\}. +``` + +## Hermitian Lanczos + +![hermitian_lanczos](./graphics/hermitian_lanczos.png) + +After $k$ iterations of the Hermitian Lanczos process, the situation may be summarized as +```math +\begin{align*} + A V_k &= V_k T_k + \beta_{k+1,k} v_{k+1} e_k^T = V_{k+1} T_{k+1,k}, \\ + V_k^H V_k &= I_k, +\end{align*} +``` +where $V_k$ is an orthonormal basis of the Krylov subspace $\mathcal{K}_k (A,b)$, +```math +T_k = +\begin{bmatrix} + \alpha_1 & \beta_2 & & \\ + \beta_2 & \alpha_2 & \ddots & \\ + & \ddots & \ddots & \beta_k \\ + & & \beta_k & \alpha_k +\end{bmatrix} +, \qquad +T_{k+1,k} = +\begin{bmatrix} + T_{k} \\ + \beta_{k+1} e_{k}^T +\end{bmatrix}. +``` +Note that $T_{k+1,k}$ is a real tridiagonal matrix even if $A$ is a complex matrix. + +The function [`hermitian_lanczos`](@ref hermitian_lanczos) returns $V_{k+1}$ and $T_{k+1,k}$. + +Related methods: [`SYMMLQ`](@ref symmlq), [`CG`](@ref cg), [`CR`](@ref cr), [`MINRES`](@ref minres), [`MINRES-QLP`](@ref minres_qlp), [`CGLS`](@ref cgls), [`CRLS`](@ref crls), [`CGNE`](@ref cgne), [`CRMR`](@ref crmr), [`CG-LANCZOS`](@ref cg_lanczos) and [`CG-LANCZOS-SHIFT`](@ref cg_lanczos_shift). + +```@docs +hermitian_lanczos +``` + +## Non-Hermitian Lanczos + +![nonhermitian_lanczos](./graphics/nonhermitian_lanczos.png) + +After $k$ iterations of the non-Hermitian Lanczos process (also named the Lanczos biorthogonalization process), the situation may be summarized as +```math +\begin{align*} + A V_k &= V_k T_k + \beta_{k+1} v_{k+1} e_k^T = V_{k+1} T_{k+1,k}, \\ + A^H U_k &= U_k T_k^H + \bar{\gamma}_{k+1} u_{k+1} e_k^T = U_{k+1} T_{k,k+1}^H, \\ + V_k^H U_k &= U_k^H V_k = I_k, +\end{align*} +``` +where $V_k$ and $U_k$ are bases of the Krylov subspaces $\mathcal{K}_k (A,b)$ and $\mathcal{K}_k (A^H,c)$, respectively, +```math +T_k = +\begin{bmatrix} + \alpha_1 & \gamma_2 & & \\ + \beta_2 & \alpha_2 & \ddots & \\ + & \ddots & \ddots & \gamma_k \\ + & & \beta_k & \alpha_k +\end{bmatrix} +, \qquad +T_{k+1,k} = +\begin{bmatrix} + T_{k} \\ + \beta_{k+1} e_{k}^T +\end{bmatrix} +, \qquad +T_{k,k+1} = +\begin{bmatrix} + T_{k} & \gamma_{k+1} e_k +\end{bmatrix}. +``` + +The function [`nonhermitian_lanczos`](@ref nonhermitian_lanczos) returns $V_{k+1}$, $T_{k+1,k}$, $U_{k+1}$ and $T_{k,k+1}^H$. + +Related methods: [`BiLQ`](@ref bilq), [`QMR`](@ref qmr), [`BiLQR`](@ref bilqr), [`CGS`](@ref cgs) and [`BICGSTAB`](@ref bicgstab). + +!!! note + The scaling factors used in our implementation are $\beta_k = |u_k^H v_k|^{\tfrac{1}{2}}$ and $\gamma_k = (u_k^H v_k) / \beta_k$. + With these scaling factors, the non-Hermitian Lanczos process coincides with the Hermitian Lanczos process when $A = A^H$ and $b = c$. + +```@docs +nonhermitian_lanczos +``` + +## Arnoldi + +![arnoldi](./graphics/arnoldi.png) + +After $k$ iterations of the Arnoldi process, the situation may be summarized as +```math +\begin{align*} + A V_k &= V_k H_k + h_{k+1,k} v_{k+1} e_k^T = V_{k+1} H_{k+1,k}, \\ + V_k^H V_k &= I_k, +\end{align*} +``` +where $V_k$ is an orthonormal basis of the Krylov subspace $\mathcal{K}_k (A,b)$, +```math +H_k = +\begin{bmatrix} + h_{1,1}~ & h_{1,2}~ & \ldots & h_{1,k} \\ + h_{2,1}~ & \ddots~ & \ddots & \vdots \\ + & \ddots~ & \ddots & h_{k-1,k} \\ + & & h_{k,k-1} & h_{k,k} +\end{bmatrix} +, \qquad +H_{k+1,k} = +\begin{bmatrix} + H_{k} \\ + h_{k+1,k} e_{k}^T +\end{bmatrix}. +``` + +The function [`arnoldi`](@ref arnoldi) returns $V_{k+1}$ and $H_{k+1,k}$. + +Related methods: [`DIOM`](@ref diom), [`FOM`](@ref fom), [`DQGMRES`](@ref dqgmres), [`GMRES`](@ref gmres) and [`FGMRES`](@ref fgmres). + +!!! note + The Arnoldi process coincides with the Hermitian Lanczos process when $A$ is Hermitian. + +```@docs +arnoldi +``` + +## Golub-Kahan + +![golub_kahan](./graphics/golub_kahan.png) + +After $k$ iterations of the Golub-Kahan bidiagonalization process, the situation may be summarized as +```math +\begin{align*} + A V_k &= U_{k+1} B_k, \\ + A^H U_{k+1} &= V_k B_k^H + \alpha_{k+1} v_{k+1} e_{k+1}^T = V_{k+1} L_{k+1}^H, \\ + V_k^H V_k &= U_k^H U_k = I_k, +\end{align*} +``` +where $V_k$ and $U_k$ are bases of the Krylov subspaces $\mathcal{K}_k (A^HA,A^Hb)$ and $\mathcal{K}_k (AA^H,b)$, respectively, +```math +L_k = +\begin{bmatrix} + \alpha_1 & & & \\ + \beta_2 & \alpha_2 & & \\ + & \ddots & \ddots & \\ + & & \beta_k & \alpha_k +\end{bmatrix} +, \qquad +B_k = +\begin{bmatrix} + \alpha_1 & & & \\ + \beta_2 & \alpha_2 & & \\ + & \ddots & \ddots & \\ + & & \beta_k & \alpha_k \\ + & & & \beta_{k+1} \\ +\end{bmatrix} += +\begin{bmatrix} + L_{k} \\ + \beta_{k+1} e_{k}^T +\end{bmatrix}. +``` +Note that $L_k$ is a real bidiagonal matrix even if $A$ is a complex matrix. + +The function [`golub_kahan`](@ref golub_kahan) returns $V_{k+1}$, $U_{k+1}$ and $L_{k+1}$. + +Related methods: [`LNLQ`](@ref lnlq), [`CRAIG`](@ref craig), [`CRAIGMR`](@ref craigmr), [`LSLQ`](@ref lslq), [`LSQR`](@ref lsqr) and [`LSMR`](@ref lsmr). + +!!! note + The Golub-Kahan process coincides with the Hermitian Lanczos process applied to the normal equations $A^HA x = A^Hb$ and $AA^H x = b$. + It is also related to the Hermitian Lanczos process applied to $\begin{bmatrix} 0 & A \\ A^H & 0 \end{bmatrix}$ with initial vector $\begin{bmatrix} b \\ 0 \end{bmatrix}$. + +```@docs +golub_kahan +``` + +## Saunders-Simon-Yip + +![saunders_simon_yip](./graphics/saunders_simon_yip.png) + +After $k$ iterations of the Saunders-Simon-Yip process (also named the orthogonal tridiagonalization process), the situation may be summarized as +```math +\begin{align*} + A U_k &= V_k T_k + \beta_{k+1} v_{k+1} e_k^T = V_{k+1} T_{k+1,k}, \\ + A^H V_k &= U_k T_k^H + \gamma_{k+1} u_{k+1} e_k^T = U_{k+1} T_{k,k+1}^H, \\ + V_k^H V_k &= U_k^H U_k = I_k, +\end{align*} +``` +where $\begin{bmatrix} V_k & 0 \\ 0 & U_k \end{bmatrix}$ is an orthonormal basis of the block Krylov subspace $\mathcal{K}^{\square}_k \left(\begin{bmatrix} 0 & A \\ A^H & 0 \end{bmatrix}, \begin{bmatrix} b & 0 \\ 0 & c \end{bmatrix}\right)$, +```math +T_k = +\begin{bmatrix} + \alpha_1 & \gamma_2 & & \\ + \beta_2 & \alpha_2 & \ddots & \\ + & \ddots & \ddots & \gamma_k \\ + & & \beta_k & \alpha_k +\end{bmatrix} +, \qquad +T_{k+1,k} = +\begin{bmatrix} + T_{k} \\ + \beta_{k+1} e_{k}^T +\end{bmatrix} +, \qquad +T_{k,k+1} = +\begin{bmatrix} + T_{k} & \gamma_{k+1} e_{k} +\end{bmatrix}. +``` + +The function [`saunders_simon_yip`](@ref saunders_simon_yip) returns $V_{k+1}$, $T_{k+1,k}$, $U_{k+1}$ and $T_{k,k+1}^H$. + +Related methods: [`USYMLQ`](@ref usymlq), [`USYMQR`](@ref usymqr), [`TriLQR`](@ref trilqr), [`TriCG`](@ref tricg) and [`TriMR`](@ref trimr). + +```@docs +saunders_simon_yip +``` + +!!! note + The Saunders-Simon-Yip is equivalent to the block-Lanczos process applied to $\begin{bmatrix} 0 & A \\ A^H & 0 \end{bmatrix}$ with initial matrix $\begin{bmatrix} b & 0 \\ 0 & c \end{bmatrix}$. + +## Montoison-Orban + +![montoison_orban](./graphics/montoison_orban.png) + +After $k$ iterations of the Montoison-Orban process (also named the orthogonal Hessenberg reduction process), the situation may be summarized as +```math +\begin{align*} + A U_k &= V_k H_k + h_{k+1,k} v_{k+1} e_k^T = V_{k+1} H_{k+1,k}, \\ + B V_k &= U_k F_k + f_{k+1,k} u_{k+1} e_k^T = U_{k+1} F_{k+1,k}, \\ + V_k^H V_k &= U_k^H U_k = I_k, +\end{align*} +``` +where $\begin{bmatrix} V_k & 0 \\ 0 & U_k \end{bmatrix}$ is an orthonormal basis of the block Krylov subspace $\mathcal{K}^{\square}_k \left(\begin{bmatrix} 0 & A \\ B & 0 \end{bmatrix}, \begin{bmatrix} b & 0 \\ 0 & c \end{bmatrix}\right)$, +```math +H_k = +\begin{bmatrix} + h_{1,1}~ & h_{1,2}~ & \ldots & h_{1,k} \\ + h_{2,1}~ & \ddots~ & \ddots & \vdots \\ + & \ddots~ & \ddots & h_{k-1,k} \\ + & & h_{k,k-1} & h_{k,k} +\end{bmatrix} +, \qquad +F_k = +\begin{bmatrix} + f_{1,1}~ & f_{1,2}~ & \ldots & f_{1,k} \\ + f_{2,1}~ & \ddots~ & \ddots & \vdots \\ + & \ddots~ & \ddots & f_{k-1,k} \\ + & & f_{k,k-1} & f_{k,k} +\end{bmatrix}, +``` +```math +H_{k+1,k} = +\begin{bmatrix} + H_{k} \\ + h_{k+1,k} e_{k}^T +\end{bmatrix} +, \qquad +F_{k+1,k} = +\begin{bmatrix} + F_{k} \\ + f_{k+1,k} e_{k}^T +\end{bmatrix}. +``` + +The function [`montoison_orban`](@ref montoison_orban) returns $V_{k+1}$, $H_{k+1,k}$, $U_{k+1}$ and $F_{k+1,k}$. + +Related methods: [`GPMR`](@ref gpmr). + +!!! note + The Montoison-Orban is equivalent to the block-Arnoldi process applied to $\begin{bmatrix} 0 & A \\ B & 0 \end{bmatrix}$ with initial matrix $\begin{bmatrix} b & 0 \\ 0 & c \end{bmatrix}$. + It also coincides with the Saunders-Simon-Yip process when $B = A^H$. + +```@docs +montoison_orban +``` diff --git a/docs/src/reference.md b/docs/src/reference.md index 0896e1639..f73e10043 100644 --- a/docs/src/reference.md +++ b/docs/src/reference.md @@ -6,6 +6,7 @@ ``` ```@docs +Krylov.kstdout Krylov.FloatOrComplex Krylov.niterations Krylov.Aprod diff --git a/docs/src/solvers/gsp.md b/docs/src/solvers/gsp.md index 10aaccbe0..33c580b8a 100644 --- a/docs/src/solvers/gsp.md +++ b/docs/src/solvers/gsp.md @@ -1,5 +1,5 @@ ```@meta -# Generalized saddle-point and unsymmetric partitioned systems +# Generalized saddle-point and non-Hermitian partitioned systems ``` ## GPMR diff --git a/docs/src/solvers/ln.md b/docs/src/solvers/ln.md index c5396ffdd..b638b8247 100644 --- a/docs/src/solvers/ln.md +++ b/docs/src/solvers/ln.md @@ -36,3 +36,10 @@ craig! craigmr craigmr! ``` + +## USYMLQ + +```@docs +usymlq +usymlq! +``` diff --git a/docs/src/solvers/ls.md b/docs/src/solvers/ls.md index f77057d94..fecfbc417 100644 --- a/docs/src/solvers/ls.md +++ b/docs/src/solvers/ls.md @@ -36,3 +36,10 @@ lsqr! lsmr lsmr! ``` + +## USYMQR + +```@docs +usymqr +usymqr! +``` diff --git a/docs/src/solvers/sid.md b/docs/src/solvers/sid.md index 1bd459cd2..e911681be 100644 --- a/docs/src/solvers/sid.md +++ b/docs/src/solvers/sid.md @@ -1,5 +1,5 @@ ```@meta -# Symmetric indefinite linear systems +# Hermitian indefinite linear systems ``` ## SYMMLQ diff --git a/docs/src/solvers/sp_sqd.md b/docs/src/solvers/sp_sqd.md index 518684b5b..4ee4ab09b 100644 --- a/docs/src/solvers/sp_sqd.md +++ b/docs/src/solvers/sp_sqd.md @@ -1,5 +1,5 @@ ```@meta -# Saddle-point and symmetric quasi-definite systems +# Saddle-point and Hermitian quasi-definite systems ``` ## TriCG diff --git a/docs/src/solvers/spd.md b/docs/src/solvers/spd.md index 79bb6e9e8..aebda285b 100644 --- a/docs/src/solvers/spd.md +++ b/docs/src/solvers/spd.md @@ -1,5 +1,5 @@ ```@meta -# Symmetric positive definite linear systems +# Hermitian positive definite linear systems ``` ## CG diff --git a/docs/src/solvers/unsymmetric.md b/docs/src/solvers/unsymmetric.md index 280908ea5..c9e77f787 100644 --- a/docs/src/solvers/unsymmetric.md +++ b/docs/src/solvers/unsymmetric.md @@ -1,5 +1,5 @@ ```@meta -# Unsymmetric linear systems +# Non-Hermitian square linear systems ``` ## BiLQ @@ -16,20 +16,6 @@ qmr qmr! ``` -## USYMLQ - -```@docs -usymlq -usymlq! -``` - -## USYMQR - -```@docs -usymqr -usymqr! -``` - ## CGS ```@docs @@ -71,3 +57,10 @@ dqgmres! gmres gmres! ``` + +## FGMRES + +```@docs +fgmres +fgmres! +``` diff --git a/docs/src/storage.md b/docs/src/storage.md new file mode 100644 index 000000000..903cc0558 --- /dev/null +++ b/docs/src/storage.md @@ -0,0 +1,152 @@ +```@meta +# Thanks Morten Piibeleht for the hack with the tables! +``` + +```@raw html + +``` + +# [Storage requirements](@id storage-requirements) + +This section provides the storage requirements of all Krylov methods available in Krylov.jl. + +### Notation + +We denote by $m$ and $n$ the number of rows and columns of the linear problem. +The memory parameter of DIOM, FOM, DQGMRES, GMRES, FGMRES and GPMR is $k$. +The numbers of shifts of CG-LANCZOS-SHIFT is $p$. + +## Theoretical storage requirements + +The following tables provide the number of coefficients that must be allocated for each Krylov method. +The coefficients have the same type as those that compose the linear problem we seek to solve. +Each table summarizes the storage requirements of Krylov methods recommended to a specific linear problem. + +#### Hermitian positive definite linear systems + +| Methods | [`CG`](@ref cg) | [`CR`](@ref cr) | [`CG-LANCZOS`](@ref cg_lanczos) | [`CG-LANCZOS-SHIFT`](@ref cg_lanczos_shift) | +|:-------:|:---------------:|:---------------:|:-------------------------------:|:-------------------------------------------:| + Storage | $4n$ | $5n$ | $5n$ | $3n + 2np + 5p$ | + +#### Hermitian indefinite linear systems + +| Methods | [`SYMMLQ`](@ref symmlq) | [`MINRES`](@ref minres) | [`MINRES-QLP`](@ref minres_qlp) | +|:-------:|:-----------------------:|:-----------------------:|:-------------------------------:| +| Storage | $5n$ | $6n$ | $6n$ | + +#### Non-Hermitian square linear systems + +| Methods | [`CGS`](@ref cgs) | [`BICGSTAB`](@ref bicgstab) | [`BiLQ`](@ref bilq) | [`QMR`](@ref qmr) | +|:-------:|:-----------------:|:---------------------------:|:-------------------:|:-----------------:| +| Storage | $6n$ | $6n$ | $8n$ | $9n$ | + +| Methods | [`DIOM`](@ref diom) | [`DQGMRES`](@ref dqgmres) | +|:-------:|:-------------------:|:-------------------------:| +| Storage | $n(2k+1) + 2k - 1$ | $n(2k+2) + 3k + 1$ | + +| Methods | [`FOM`](@ref fom) | [`GMRES`](@ref gmres) | [`FGMRES`](@ref fgmres) | +|:-------:|:--------------------------------------------------:|:---------------------------------------:|:----------------------------------------:| +| Storage$\dfrac{}{}$ | $\!n(2+k) +2k + \dfrac{k(k + 1)}{2}\!$ | $\!n(2+k) + 3k + \dfrac{k(k + 1)}{2}\!$ | $\!n(2+2k) + 3k + \dfrac{k(k + 1)}{2}\!$ | + +#### Least-norm problems + +| Methods | [`USYMLQ`](@ref usymlq) | [`CGNE`](@ref cgne) | [`CRMR`](@ref crmr) | [`LNLQ`](@ref lnlq) | [`CRAIG`](@ref craig) | [`CRAIGMR`](@ref craigmr) | +|:-------:|:-----------------------:|:-------------------:|:-------------------:|:-------------------:|:---------------------:|:-------------------------:| +| Storage | $5n + 3m$ | $3n + 2m$ | $3n + 2m$ | $3n + 4m$ | $3n + 4m$ | $4n + 5m$ | + +#### Least-squares problems + +| Methods | [`USYMQR`](@ref usymqr) | [`CGLS`](@ref cgls) | [`CRLS`](@ref crls) | [`LSLQ`](@ref lslq) | [`LSQR`](@ref lsqr) | [`LSMR`](@ref lsmr) | +|:-------:|:-----------------------:|:-------------------:|:-------------------:|:-------------------:|:-------------------:|:-------------------:| +| Storage | $6n + 3m$ | $3n + 2m$ | $4n + 3m$ | $4n + 2m$ | $4n + 2m$ | $5n + 2m$ | + +#### Adjoint systems + +| Methods | [`BiLQR`](@ref bilqr) | [`TriLQR`](@ref trilqr) | +|:-------:|:---------------------:|:-----------------------:| +| Storage | $11n$ | $6m + 5n$ | + +#### Saddle-point and Hermitian quasi-definite systems + +| Methods | [`TriCG`](@ref tricg) | [`TriMR`](@ref trimr) | +|:--------:|:---------------------:|:---------------------:| +| Storage | $6n + 6m$ | $8n + 8m$ | + +#### Generalized saddle-point and non-Hermitian partitioned systems + +| Method | [`GPMR`](@ref gpmr) | +|:-------:|:-------------------------:| +| Storage | $(2+k)(n+m) + 2k^2 + 11k$ | + +## Practical storage requirements + +Each method has its own `KrylovSolver` that contains all the storage needed by the method. +In the REPL, the size in bytes of each attribute and the total amount of memory allocated by the solver are displayed when we show a `KrylovSolver`. + +```@example storage +using Krylov + +m = 5000 +n = 12000 +A = rand(Float64, m, n) +b = rand(Float64, m) +solver = LsmrSolver(A, b) +show(stdout, solver, show_stats=false) +``` + +If we want the total number of bytes used by the solver, we can call `nbytes = sizeof(solver)`. + +```@example storage +nbytes = sizeof(solver) +``` + +Thereafter, we can use `Base.format_bytes(nbytes)` to recover what is displayed in the REPL. + +```@example storage +Base.format_bytes(nbytes) +``` + +To verify that we match the theoretical results, we just need to multiply the storage requirement of a method by the number of bytes associated to the precision of the linear problem. +For instance, we need 4 bytes for the precision `Float32`, 8 bytes for precisions `Float64` and `ComplexF32`, and 16 bytes for the precision `ComplexF64`. + +```@example storage +FC = Float64 # precision of the least-squares problem +ncoefs_lsmr = 5*n + 2*m # number of coefficients +nbytes_lsmr = sizeof(FC) * ncoefs_lsmr # number of bytes +``` + +Therefore, you can check that you have enough memory in RAM to allocate a `KrylovSolver`. + +```@example storage +free_nbytes = Sys.free_memory() +Base.format_bytes(free_nbytes) # Total free memory in RAM in bytes. +``` + +!!! note + - Beyond having faster operations, using low precisions, such as simple precision, allows to store more coefficients in RAM and solve larger linear problems. + - In the file [test_allocations.jl](https://github.com/JuliaSmoothOptimizers/Krylov.jl/blob/main/test/test_allocations.jl), we use the macro `@allocated` to test that we match the expected storage requirement of each method with a tolerance of 2%. diff --git a/docs/src/tips.md b/docs/src/tips.md index 604c0633d..ca3d927bd 100644 --- a/docs/src/tips.md +++ b/docs/src/tips.md @@ -23,7 +23,7 @@ BLAS.set_num_threads(N) # 1 ≤ N ≤ NMAX BLAS.get_num_threads() ``` -The recommended number of BLAS threads is the number of physical and not logical cores, which is in general `N = NMAX / 2`. +The recommended number of BLAS threads is the number of physical and not logical cores, which is in general `N = NMAX / 2` if your CPU supports simultaneous multithreading (SMT). By default Julia ships with OpenBLAS but it's also possible to use Intel MKL BLAS and LAPACK with [MKL.jl](https://github.com/JuliaLinearAlgebra/MKL.jl). diff --git a/docs/src/warm_start.md b/docs/src/warm-start.md similarity index 59% rename from docs/src/warm_start.md rename to docs/src/warm-start.md index 030cad6c0..d926db183 100644 --- a/docs/src/warm_start.md +++ b/docs/src/warm-start.md @@ -1,9 +1,10 @@ -## Warm Start +# [Warm-start](@id warm-start) -Most Krylov methods in this module accept a starting point as argument. The starting point is used as initial approximation to a solution. +Most Krylov methods in this module accept a starting point as argument. +The starting point is used as initial approximation to a solution. ```julia -solver = CgSolver(n, n, S) +solver = CgSolver(A, b) cg!(solver, A, b, itmax=100) if !issolved(solver) cg!(solver, A, b, solver.x, itmax=100) # cg! uses the approximate solution `solver.x` as starting point @@ -28,7 +29,7 @@ If a Krylov method doesn't have the option to warm start, it can still be done e We provide an example with `cg_lanczos!`. ```julia -solver = CgLanczosSolver(n, n, S) +solver = CgLanczosSolver(A, b) cg_lanczos!(solver, A, b) x₀ = solver.x # Ax₀ ≈ b r = b - A * x₀ # r = b - Ax₀ @@ -41,33 +42,34 @@ Explicit restarts cannot be avoided in certain block methods, such as TriMR, due ```julia # [E A] [x] = [b] -# [Aᵀ F] [y] [c] +# [Aᴴ F] [y] [c] M = inv(E) N = inv(F) x₀, y₀, stats = trimr(A, b, c, M=M, N=N) # E and F are not available inside TriMR b₀ = b - Ex₀ - Ay -c₀ = c - Aᵀx₀ - Fy +c₀ = c - Aᴴx₀ - Fy Δx, Δy, stats = trimr(A, b₀, c₀, M=M, N=N) x = x₀ + Δx y = y₀ + Δy ``` - -## Restarted methods - -The storage requierements of Krylov methods based on the Arnoldi process, such as FOM and GMRES, increase as the iteration progresses. -For very large problems, the storage costs become prohibitive after only few iterations and restarted variants FOM(k) and GMRES(k) are prefered. -In this section, we show how to use warm starts to implement GMRES(k) and FOM(k). - -```julia -k = 50 -solver = GmresSolver(A, b, k) # FomSolver(A, b, k) -solver.x .= 0 # solver.x .= x₀ -nrestart = 0 -while !issolved(solver) || nrestart ≤ 10 - solve!(solver, A, b, solver.x, itmax=k) - nrestart += 1 -end +```@meta +# ## Restarted methods +# +# The storage requierements of Krylov methods based on the Arnoldi process, such as FOM and GMRES, increase as the iteration progresses. +# For very large problems, the storage costs become prohibitive after only few iterations and restarted variants FOM(k) and GMRES(k) are prefered. +# In this section, we show how to use warm starts to implement GMRES(k) and FOM(k). +# +# ```julia +# k = 50 +# solver = GmresSolver(A, b, k) # FomSolver(A, b, k) +# solver.x .= 0 # solver.x .= x₀ +# nrestart = 0 +# while !issolved(solver) || nrestart ≤ 10 +# solve!(solver, A, b, solver.x, itmax=k) +# nrestart += 1 +# end +# ``` ``` diff --git a/src/Krylov.jl b/src/Krylov.jl index b714ccd79..aadde1575 100644 --- a/src/Krylov.jl +++ b/src/Krylov.jl @@ -5,6 +5,7 @@ using LinearAlgebra, SparseArrays, Printf include("krylov_utils.jl") include("krylov_stats.jl") include("krylov_solvers.jl") +include("krylov_processes.jl") include("cg.jl") include("cr.jl") @@ -19,6 +20,7 @@ include("diom.jl") include("fom.jl") include("dqgmres.jl") include("gmres.jl") +include("fgmres.jl") include("gpmr.jl") @@ -49,6 +51,4 @@ include("lnlq.jl") include("craig.jl") include("craigmr.jl") -include("callback_utils.jl") - end diff --git a/src/bicgstab.jl b/src/bicgstab.jl index c3b914599..c4f16595e 100644 --- a/src/bicgstab.jl +++ b/src/bicgstab.jl @@ -16,40 +16,59 @@ export bicgstab, bicgstab! """ - (x, stats) = bicgstab(A, b::AbstractVector{FC}; c::AbstractVector{FC}=b, - M=I, N=I, atol::T=√eps(T), rtol::T=√eps(T), - itmax::Int=0, verbose::Int=0, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + (x, stats) = bicgstab(A, b::AbstractVector{FC}; + c::AbstractVector{FC}=b, M=I, N=I, + ldiv::Bool=false, atol::T=√eps(T), + rtol::T=√eps(T), itmax::Int=0, + verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. -Solve the square linear system Ax = b using the BICGSTAB method. + (x, stats) = bicgstab(A, b, x0::AbstractVector; kwargs...) + +BICGSTAB can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above. + +Solve the square linear system Ax = b of size n using BICGSTAB. BICGSTAB requires two initial vectors `b` and `c`. -The relation `bᵀc ≠ 0` must be satisfied and by default `c = b`. +The relation `bᴴc ≠ 0` must be satisfied and by default `c = b`. The Biconjugate Gradient Stabilized method is a variant of BiCG, like CGS, -but using different updates for the Aᵀ-sequence in order to obtain smoother +but using different updates for the Aᴴ-sequence in order to obtain smoother convergence than CGS. If BICGSTAB stagnates, we recommend DQGMRES and BiLQ as alternative methods for unsymmetric square systems. BICGSTAB stops when `itmax` iterations are reached or when `‖rₖ‖ ≤ atol + ‖b‖ * rtol`. -`atol` is an absolute tolerance and `rtol` is a relative tolerance. -Additional details can be displayed if verbose mode is enabled (verbose > 0). -Information will be displayed every `verbose` iterations. +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension n; +* `b`: a vector of length n. + +#### Optional argument -This implementation allows a left preconditioner `M` and a right preconditioner `N`. +* `x0`: a vector of length n that represents an initial guess of the solution x. -BICGSTAB can be warm-started from an initial guess `x0` with the method +#### Keyword arguments - (x, stats) = bicgstab(A, b, x0; kwargs...) +* `c`: the second initial vector of length `n` required by the Lanczos biorthogonalization process; +* `M`: linear operator that models a nonsingular matrix of size `n` used for left preconditioning; +* `N`: linear operator that models a nonsingular matrix of size `n` used for right preconditioning; +* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. -where `kwargs` are the same keyword arguments as above. +#### Output arguments -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### References @@ -86,15 +105,17 @@ function bicgstab!(solver :: BicgstabSolver{T,FC,S}, A, b :: AbstractVector{FC}, return solver end -function bicgstab!(solver :: BicgstabSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: AbstractVector{FC}=b, - M=I, N=I, atol :: T=√eps(T), rtol :: T=√eps(T), - itmax :: Int=0, verbose :: Int=0, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} +function bicgstab!(solver :: BicgstabSolver{T,FC,S}, A, b :: AbstractVector{FC}; + c :: AbstractVector{FC}=b, M=I, N=I, + ldiv :: Bool=false, atol :: T=√eps(T), + rtol :: T=√eps(T), itmax :: Int=0, + verbose :: Int=0, history :: Bool=false, + callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - n, m = size(A) + m, n = size(A) m == n || error("System must be square") length(b) == m || error("Inconsistent problem size") - (verbose > 0) && @printf("BICGSTAB: system of size %d\n", n) + (verbose > 0) && @printf(iostream, "BICGSTAB: system of size %d\n", n) # Check M = Iₙ and N = Iₙ MisI = (M === I) @@ -102,8 +123,8 @@ function bicgstab!(solver :: BicgstabSolver{T,FC,S}, A, b :: AbstractVector{FC}; # Check type consistency eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") - ktypeof(c) == S || error("ktypeof(c) ≠ $S") + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") + ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S") # Set up workspace. allocate_if(!MisI, solver, :t , S, n) @@ -150,14 +171,14 @@ function bicgstab!(solver :: BicgstabSolver{T,FC,S}, A, b :: AbstractVector{FC}; itmax == 0 && (itmax = 2*n) ε = atol + rtol * rNorm - (verbose > 0) && @printf("%5s %7s %8s %8s\n", "k", "‖rₖ‖", "|αₖ|", "|ωₖ|") - kdisplay(iter, verbose) && @printf("%5d %7.1e %8.1e %8.1e\n", iter, rNorm, abs(α), abs(ω)) + (verbose > 0) && @printf(iostream, "%5s %7s %8s %8s\n", "k", "‖rₖ‖", "|αₖ|", "|ωₖ|") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %8.1e %8.1e\n", iter, rNorm, abs(α), abs(ω)) next_ρ = @kdot(n, c, r) # ρ₁ = ⟨r̅₀,r₀⟩ if next_ρ == 0 stats.niter = 0 stats.solved, stats.inconsistent = false, false - stats.status = "Breakdown bᵀc = 0" + stats.status = "Breakdown bᴴc = 0" solver.warm_start = false return solver end @@ -207,9 +228,9 @@ function bicgstab!(solver :: BicgstabSolver{T,FC,S}, A, b :: AbstractVector{FC}; solved = resid_decrease_lim || resid_decrease_mach tired = iter ≥ itmax breakdown = (α == 0 || isnan(α)) - kdisplay(iter, verbose) && @printf("%5d %7.1e %8.1e %8.1e\n", iter, rNorm, abs(α), abs(ω)) + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %8.1e %8.1e\n", iter, rNorm, abs(α), abs(ω)) end - (verbose > 0) && @printf("\n") + (verbose > 0) && @printf(iostream, "\n") tired && (status = "maximum number of iterations exceeded") breakdown && (status = "breakdown αₖ == 0") diff --git a/src/bilq.jl b/src/bilq.jl index 39725fbfe..12ee40652 100644 --- a/src/bilq.jl +++ b/src/bilq.jl @@ -13,35 +13,54 @@ export bilq, bilq! """ - (x, stats) = bilq(A, b::AbstractVector{FC}; c::AbstractVector{FC}=b, - atol::T=√eps(T), rtol::T=√eps(T), transfer_to_bicg::Bool=true, - itmax::Int=0, verbose::Int=0, history::Bool=false, - callback=solver->false) + (x, stats) = bilq(A, b::AbstractVector{FC}; + c::AbstractVector{FC}=b, transfer_to_bicg::Bool=true, + atol::T=√eps(T), rtol::T=√eps(T), itmax::Int=0, + verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. -Solve the square linear system Ax = b using the BiLQ method. + (x, stats) = bilq(A, b, x0::AbstractVector; kwargs...) +BiLQ can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above. + +Solve the square linear system Ax = b of size n using BiLQ. BiLQ is based on the Lanczos biorthogonalization process and requires two initial vectors `b` and `c`. -The relation `bᵀc ≠ 0` must be satisfied and by default `c = b`. -When `A` is symmetric and `b = c`, BiLQ is equivalent to SYMMLQ. +The relation `bᴴc ≠ 0` must be satisfied and by default `c = b`. +When `A` is Hermitian and `b = c`, BiLQ is equivalent to SYMMLQ. + +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension n; +* `b`: a vector of length n. + +#### Optional argument -An option gives the possibility of transferring to the BiCG point, -when it exists. The transfer is based on the residual norm. +* `x0`: a vector of length n that represents an initial guess of the solution x. -BiLQ can be warm-started from an initial guess `x0` with the method +#### Keyword arguments - (x, stats) = bilq(A, b, x0; kwargs...) +* `c`: the second initial vector of length `n` required by the Lanczos biorthogonalization process; +* `transfer_to_bicg`: transfer from the BiLQ point to the BiCG point, when it exists. The transfer is based on the residual norm; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. -where `kwargs` are the same keyword arguments as above. +#### Output arguments -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. -#### Reference +#### References * A. Montoison and D. Orban, [*BiLQ: An Iterative Method for Nonsymmetric Linear Systems with a Quasi-Minimum Error Property*](https://doi.org/10.1137/19M1290991), SIAM Journal on Matrix Analysis and Applications, 41(3), pp. 1145--1166, 2020. +* R. Fletcher, [*Conjugate gradient methods for indefinite systems*](https://doi.org/10.1007/BFb0080116), Numerical Analysis, Springer, pp. 73--89, 1976. """ function bilq end @@ -73,23 +92,24 @@ function bilq!(solver :: BilqSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: A return solver end -function bilq!(solver :: BilqSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: AbstractVector{FC}=b, - atol :: T=√eps(T), rtol :: T=√eps(T), transfer_to_bicg :: Bool=true, - itmax :: Int=0, verbose :: Int=0, history :: Bool=false, - callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} +function bilq!(solver :: BilqSolver{T,FC,S}, A, b :: AbstractVector{FC}; + c :: AbstractVector{FC}=b, transfer_to_bicg :: Bool=true, + atol :: T=√eps(T), rtol :: T=√eps(T), itmax :: Int=0, + verbose :: Int=0, history :: Bool=false, + callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - n, m = size(A) + m, n = size(A) m == n || error("System must be square") length(b) == m || error("Inconsistent problem size") - (verbose > 0) && @printf("BILQ: system of size %d\n", n) + (verbose > 0) && @printf(iostream, "BILQ: system of size %d\n", n) # Check type consistency eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") - ktypeof(c) == S || error("ktypeof(c) ≠ $S") + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") + ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S") # Compute the adjoint of A - Aᵀ = A' + Aᴴ = A' # Set up workspace. uₖ₋₁, uₖ, q, vₖ₋₁, vₖ = solver.uₖ₋₁, solver.uₖ, solver.q, solver.vₖ₋₁, solver.vₖ @@ -122,29 +142,29 @@ function bilq!(solver :: BilqSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Ab itmax == 0 && (itmax = 2*n) ε = atol + rtol * bNorm - (verbose > 0) && @printf("%5s %7s\n", "k", "‖rₖ‖") - kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, bNorm) + (verbose > 0) && @printf(iostream, "%5s %7s\n", "k", "‖rₖ‖") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e\n", iter, bNorm) # Initialize the Lanczos biorthogonalization process. - cᵗb = @kdot(n, c, r₀) # ⟨c,r₀⟩ - if cᵗb == 0 + cᴴb = @kdot(n, c, r₀) # ⟨c,r₀⟩ + if cᴴb == 0 stats.niter = 0 stats.solved = false stats.inconsistent = false - stats.status = "Breakdown bᵀc = 0" + stats.status = "Breakdown bᴴc = 0" solver.warm_start = false return solver end - βₖ = √(abs(cᵗb)) # β₁γ₁ = cᵀ(b - Ax₀) - γₖ = cᵗb / βₖ # β₁γ₁ = cᵀ(b - Ax₀) + βₖ = √(abs(cᴴb)) # β₁γ₁ = cᴴ(b - Ax₀) + γₖ = cᴴb / βₖ # β₁γ₁ = cᴴ(b - Ax₀) vₖ₋₁ .= zero(FC) # v₀ = 0 uₖ₋₁ .= zero(FC) # u₀ = 0 vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁ uₖ .= c ./ conj(γₖ) # u₁ = c / γ̄₁ cₖ₋₁ = cₖ = -one(T) # Givens cosines used for the LQ factorization of Tₖ sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the LQ factorization of Tₖ - d̅ .= zero(FC) # Last column of D̅ₖ = Vₖ(Qₖ)ᵀ + d̅ .= zero(FC) # Last column of D̅ₖ = Vₖ(Qₖ)ᴴ ζₖ₋₁ = ζbarₖ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁ ζₖ₋₂ = ηₖ = zero(FC) # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ δbarₖ₋₁ = δbarₖ = zero(FC) # Coefficients of Lₖ₋₁ and L̅ₖ modified over the course of two iterations @@ -164,10 +184,10 @@ function bilq!(solver :: BilqSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Ab # Continue the Lanczos biorthogonalization process. # AVₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ - # AᵀUₖ = Uₖ(Tₖ)ᵀ + γ̄ₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᵀ + # AᴴUₖ = Uₖ(Tₖ)ᴴ + γ̄ₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᴴ mul!(q, A , vₖ) # Forms vₖ₊₁ : q ← Avₖ - mul!(p, Aᵀ, uₖ) # Forms uₖ₊₁ : p ← Aᵀuₖ + mul!(p, Aᴴ, uₖ) # Forms uₖ₊₁ : p ← Aᴴuₖ @kaxpy!(n, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁ @kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - β̄ₖ * uₖ₋₁ @@ -177,9 +197,9 @@ function bilq!(solver :: BilqSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Ab @kaxpy!(n, - αₖ , vₖ, q) # q ← q - αₖ * vₖ @kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ - pᵗq = @kdot(n, p, q) # pᵗq = ⟨p,q⟩ - βₖ₊₁ = √(abs(pᵗq)) # βₖ₊₁ = √(|pᵗq|) - γₖ₊₁ = pᵗq / βₖ₊₁ # γₖ₊₁ = pᵗq / βₖ₊₁ + pᴴq = @kdot(n, p, q) # pᴴq = ⟨p,q⟩ + βₖ₊₁ = √(abs(pᴴq)) # βₖ₊₁ = √(|pᴴq|) + γₖ₊₁ = pᴴq / βₖ₊₁ # γₖ₊₁ = pᴴq / βₖ₊₁ # Update the LQ factorization of Tₖ = L̅ₖQₖ. # [ α₁ γ₂ 0 • • • 0 ] [ δ₁ 0 • • • • 0 ] @@ -234,7 +254,7 @@ function bilq!(solver :: BilqSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Ab ηₖ = -ϵₖ₋₂ * ζₖ₋₂ - λₖ₋₁ * ζₖ₋₁ end - # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Vₖ(Qₖ)ᵀ. + # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Vₖ(Qₖ)ᴴ. # [d̅ₖ₋₁ vₖ] [cₖ s̄ₖ] = [dₖ₋₁ d̅ₖ] ⟷ dₖ₋₁ = cₖ * d̅ₖ₋₁ + sₖ * vₖ # [sₖ -cₖ] ⟷ d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * vₖ if iter ≥ 2 @@ -257,13 +277,13 @@ function bilq!(solver :: BilqSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Ab @. vₖ₋₁ = vₖ # vₖ₋₁ ← vₖ @. uₖ₋₁ = uₖ # uₖ₋₁ ← uₖ - if pᵗq ≠ 0 + if pᴴq ≠ 0 @. vₖ = q / βₖ₊₁ # βₖ₊₁vₖ₊₁ = q @. uₖ = p / conj(γₖ₊₁) # γ̄ₖ₊₁uₖ₊₁ = p end # Compute ⟨vₖ,vₖ₊₁⟩ and ‖vₖ₊₁‖ - vₖᵀvₖ₊₁ = @kdot(n, vₖ₋₁, vₖ) + vₖᴴvₖ₊₁ = @kdot(n, vₖ₋₁, vₖ) norm_vₖ₊₁ = @knrm2(n, vₖ) # Compute BiLQ residual norm @@ -273,7 +293,7 @@ function bilq!(solver :: BilqSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Ab else μₖ = βₖ * (sₖ₋₁ * ζₖ₋₂ - cₖ₋₁ * cₖ * ζₖ₋₁) + αₖ * sₖ * ζₖ₋₁ ωₖ = βₖ₊₁ * sₖ * ζₖ₋₁ - θₖ = conj(μₖ) * ωₖ * vₖᵀvₖ₊₁ + θₖ = conj(μₖ) * ωₖ * vₖᴴvₖ₊₁ rNorm_lq = sqrt(abs2(μₖ) * norm_vₖ^2 + abs2(ωₖ) * norm_vₖ₊₁^2 + 2 * real(θₖ)) end history && push!(rNorms, rNorm_lq) @@ -299,10 +319,10 @@ function bilq!(solver :: BilqSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Ab solved_lq = rNorm_lq ≤ ε solved_cg = transfer_to_bicg && (abs(δbarₖ) > eps(T)) && (rNorm_cg ≤ ε) tired = iter ≥ itmax - breakdown = !solved_lq && !solved_cg && (pᵗq == 0) - kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm_lq) + breakdown = !solved_lq && !solved_cg && (pᴴq == 0) + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e\n", iter, rNorm_lq) end - (verbose > 0) && @printf("\n") + (verbose > 0) && @printf(iostream, "\n") # Compute BICG point # (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * d̅ₖ diff --git a/src/bilqr.jl b/src/bilqr.jl index 09fef1f6c..5666f0863 100644 --- a/src/bilqr.jl +++ b/src/bilqr.jl @@ -1,5 +1,5 @@ # An implementation of BILQR for the solution of square -# consistent linear adjoint systems Ax = b and Aᵀy = c. +# consistent linear adjoint systems Ax = b and Aᴴy = c. # # This method is described in # @@ -14,33 +14,54 @@ export bilqr, bilqr! """ (x, y, stats) = bilqr(A, b::AbstractVector{FC}, c::AbstractVector{FC}; - atol::T=√eps(T), rtol::T=√eps(T), transfer_to_bicg::Bool=true, - itmax::Int=0, verbose::Int=0, history::Bool=false, - callback=solver->false) + transfer_to_bicg::Bool=true, atol::T=√eps(T), + rtol::T=√eps(T), itmax::Int=0, + verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. + (x, y, stats) = bilqr(A, b, c, x0::AbstractVector, y0::AbstractVector; kwargs...) + +BiLQR can be warm-started from initial guesses `x0` and `y0` where `kwargs` are the same keyword arguments as above. + Combine BiLQ and QMR to solve adjoint systems. [0 A] [y] = [b] - [Aᵀ 0] [x] [c] + [Aᴴ 0] [x] [c] + +The relation `bᴴc ≠ 0` must be satisfied. +BiLQ is used for solving primal system `Ax = b` of size n. +QMR is used for solving dual system `Aᴴy = c` of size n. + +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension n; +* `b`: a vector of length n; +* `c`: a vector of length n. -The relation `bᵀc ≠ 0` must be satisfied. -BiLQ is used for solving primal system `Ax = b`. -QMR is used for solving dual system `Aᵀy = c`. +#### Optional arguments -An option gives the possibility of transferring from the BiLQ point to the -BiCG point, when it exists. The transfer is based on the residual norm. +* `x0`: a vector of length n that represents an initial guess of the solution x; +* `y0`: a vector of length n that represents an initial guess of the solution y. -BiLQR can be warm-started from initial guesses `x0` and `y0` with the method +#### Keyword arguments - (x, y, stats) = bilqr(A, b, c, x0, y0; kwargs...) +* `transfer_to_bicg`: transfer from the BiLQ point to the BiCG point, when it exists. The transfer is based on the residual norm; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. -where `kwargs` are the same keyword arguments as above. +#### Output arguments -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +* `x`: a dense vector of length n; +* `y`: a dense vector of length n; +* `stats`: statistics collected on the run in an [`AdjointStats`](@ref) structure. #### Reference @@ -78,23 +99,24 @@ function bilqr!(solver :: BilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: end function bilqr!(solver :: BilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC}; - atol :: T=√eps(T), rtol :: T=√eps(T), transfer_to_bicg :: Bool=true, - itmax :: Int=0, verbose :: Int=0, history :: Bool=false, - callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} + transfer_to_bicg :: Bool=true, atol :: T=√eps(T), + rtol :: T=√eps(T), itmax :: Int=0, + verbose :: Int=0, history :: Bool=false, + callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - n, m = size(A) + m, n = size(A) m == n || error("Systems must be square") length(b) == m || error("Inconsistent problem size") length(c) == n || error("Inconsistent problem size") - (verbose > 0) && @printf("BILQR: systems of size %d\n", n) + (verbose > 0) && @printf(iostream, "BILQR: systems of size %d\n", n) # Check type consistency eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") - ktypeof(c) == S || error("ktypeof(c) ≠ $S") + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") + ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S") # Compute the adjoint of A - Aᵀ = A' + Aᴴ = A' # Set up workspace. uₖ₋₁, uₖ, q, vₖ₋₁, vₖ = solver.uₖ₋₁, solver.uₖ, solver.q, solver.vₖ₋₁, solver.vₖ @@ -109,7 +131,7 @@ function bilqr!(solver :: BilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: if warm_start mul!(r₀, A, Δx) @kaxpby!(n, one(FC), b, -one(FC), r₀) - mul!(s₀, Aᵀ, Δy) + mul!(s₀, Aᴴ, Δy) @kaxpby!(n, one(FC), c, -one(FC), s₀) end @@ -117,7 +139,7 @@ function bilqr!(solver :: BilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: x .= zero(FC) # x₀ bNorm = @knrm2(n, r₀) # rNorm = ‖r₀‖ - # Initial solution t₀ and residual norm ‖s₀‖ = ‖c - Aᵀy₀‖. + # Initial solution t₀ and residual norm ‖s₀‖ = ‖c - Aᴴy₀‖. t .= zero(FC) # t₀ cNorm = @knrm2(n, s₀) # sNorm = ‖s₀‖ @@ -128,38 +150,38 @@ function bilqr!(solver :: BilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: history && push!(sNorms, cNorm) εL = atol + rtol * bNorm εQ = atol + rtol * cNorm - (verbose > 0) && @printf("%5s %7s %7s\n", "k", "‖rₖ‖", "‖sₖ‖") - kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e\n", iter, bNorm, cNorm) + (verbose > 0) && @printf(iostream, "%5s %7s %7s\n", "k", "‖rₖ‖", "‖sₖ‖") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e\n", iter, bNorm, cNorm) # Initialize the Lanczos biorthogonalization process. - cᵗb = @kdot(n, s₀, r₀) # ⟨s₀,r₀⟩ = ⟨c - Aᵀy₀,b - Ax₀⟩ - if cᵗb == 0 + cᴴb = @kdot(n, s₀, r₀) # ⟨s₀,r₀⟩ = ⟨c - Aᴴy₀,b - Ax₀⟩ + if cᴴb == 0 stats.niter = 0 stats.solved_primal = false stats.solved_dual = false - stats.status = "Breakdown bᵀc = 0" + stats.status = "Breakdown bᴴc = 0" solver.warm_start = false return solver end # Set up workspace. - βₖ = √(abs(cᵗb)) # β₁γ₁ = (c - Aᵀy₀)ᵀ(b - Ax₀) - γₖ = cᵗb / βₖ # β₁γ₁ = (c - Aᵀy₀)ᵀ(b - Ax₀) + βₖ = √(abs(cᴴb)) # β₁γ₁ = (c - Aᴴy₀)ᴴ(b - Ax₀) + γₖ = cᴴb / βₖ # β₁γ₁ = (c - Aᴴy₀)ᴴ(b - Ax₀) vₖ₋₁ .= zero(FC) # v₀ = 0 uₖ₋₁ .= zero(FC) # u₀ = 0 vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁ - uₖ .= s₀ ./ conj(γₖ) # u₁ = (c - Aᵀy₀) / γ̄₁ + uₖ .= s₀ ./ conj(γₖ) # u₁ = (c - Aᴴy₀) / γ̄₁ cₖ₋₁ = cₖ = -one(T) # Givens cosines used for the LQ factorization of Tₖ sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the LQ factorization of Tₖ - d̅ .= zero(FC) # Last column of D̅ₖ = Vₖ(Qₖ)ᵀ + d̅ .= zero(FC) # Last column of D̅ₖ = Vₖ(Qₖ)ᴴ ζₖ₋₁ = ζbarₖ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁ ζₖ₋₂ = ηₖ = zero(FC) # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ δbarₖ₋₁ = δbarₖ = zero(FC) # Coefficients of Lₖ₋₁ and L̅ₖ modified over the course of two iterations ψbarₖ₋₁ = ψₖ₋₁ = zero(FC) # ψₖ₋₁ and ψbarₖ are the last components of h̅ₖ = Qₖγ̄₁e₁ norm_vₖ = bNorm / βₖ # ‖vₖ‖ is used for residual norm estimates ϵₖ₋₃ = λₖ₋₂ = zero(FC) # Components of Lₖ₋₁ - wₖ₋₃ .= zero(FC) # Column k-3 of Wₖ = Uₖ(Lₖ)⁻ᵀ - wₖ₋₂ .= zero(FC) # Column k-2 of Wₖ = Uₖ(Lₖ)⁻ᵀ + wₖ₋₃ .= zero(FC) # Column k-3 of Wₖ = Uₖ(Lₖ)⁻ᴴ + wₖ₋₂ .= zero(FC) # Column k-2 of Wₖ = Uₖ(Lₖ)⁻ᴴ τₖ = zero(T) # τₖ is used for the dual residual norm estimate # Stopping criterion. @@ -180,10 +202,10 @@ function bilqr!(solver :: BilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: # Continue the Lanczos biorthogonalization process. # AVₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ - # AᵀUₖ = Uₖ(Tₖ)ᵀ + γ̄ₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᵀ + # AᴴUₖ = Uₖ(Tₖ)ᴴ + γ̄ₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᴴ mul!(q, A , vₖ) # Forms vₖ₊₁ : q ← Avₖ - mul!(p, Aᵀ, uₖ) # Forms uₖ₊₁ : p ← Aᵀuₖ + mul!(p, Aᴴ, uₖ) # Forms uₖ₊₁ : p ← Aᴴuₖ @kaxpy!(n, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁ @kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - β̄ₖ * uₖ₋₁ @@ -193,9 +215,9 @@ function bilqr!(solver :: BilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: @kaxpy!(n, - αₖ , vₖ, q) # q ← q - αₖ * vₖ @kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ - pᵗq = @kdot(n, p, q) # pᵗq = ⟨p,q⟩ - βₖ₊₁ = √(abs(pᵗq)) # βₖ₊₁ = √(|pᵗq|) - γₖ₊₁ = pᵗq / βₖ₊₁ # γₖ₊₁ = pᵗq / βₖ₊₁ + pᴴq = @kdot(n, p, q) # pᴴq = ⟨p,q⟩ + βₖ₊₁ = √(abs(pᴴq)) # βₖ₊₁ = √(|pᴴq|) + γₖ₊₁ = pᴴq / βₖ₊₁ # γₖ₊₁ = pᴴq / βₖ₊₁ # Update the LQ factorization of Tₖ = L̅ₖQₖ. # [ α₁ γ₂ 0 • • • 0 ] [ δ₁ 0 • • • • 0 ] @@ -251,7 +273,7 @@ function bilqr!(solver :: BilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: ηₖ = -ϵₖ₋₂ * ζₖ₋₂ - λₖ₋₁ * ζₖ₋₁ end - # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Vₖ(Qₖ)ᵀ. + # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Vₖ(Qₖ)ᴴ. # [d̅ₖ₋₁ vₖ] [cₖ s̄ₖ] = [dₖ₋₁ d̅ₖ] ⟷ dₖ₋₁ = cₖ * d̅ₖ₋₁ + sₖ * vₖ # [sₖ -cₖ] ⟷ d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * vₖ if iter ≥ 2 @@ -271,7 +293,7 @@ function bilqr!(solver :: BilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: end # Compute ⟨vₖ,vₖ₊₁⟩ and ‖vₖ₊₁‖ - vₖᵀvₖ₊₁ = @kdot(n, vₖ, q) / βₖ₊₁ + vₖᴴvₖ₊₁ = @kdot(n, vₖ, q) / βₖ₊₁ norm_vₖ₊₁ = @knrm2(n, q) / βₖ₊₁ # Compute BiLQ residual norm @@ -281,7 +303,7 @@ function bilqr!(solver :: BilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: else μₖ = βₖ * (sₖ₋₁ * ζₖ₋₂ - cₖ₋₁ * cₖ * ζₖ₋₁) + αₖ * sₖ * ζₖ₋₁ ωₖ = βₖ₊₁ * sₖ * ζₖ₋₁ - θₖ = conj(μₖ) * ωₖ * vₖᵀvₖ₊₁ + θₖ = conj(μₖ) * ωₖ * vₖᴴvₖ₊₁ rNorm_lq = sqrt(abs2(μₖ) * norm_vₖ^2 + abs2(ωₖ) * norm_vₖ₊₁^2 + 2 * real(θₖ)) end history && push!(rNorms, rNorm_lq) @@ -318,7 +340,7 @@ function bilqr!(solver :: BilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: ψbarₖ = sₖ * ψbarₖ₋₁ end - # Compute the direction wₖ₋₁, the last column of Wₖ₋₁ = (Uₖ₋₁)(Lₖ₋₁)⁻ᵀ ⟷ (L̄ₖ₋₁)(Wₖ₋₁)ᵀ = (Uₖ₋₁)ᵀ. + # Compute the direction wₖ₋₁, the last column of Wₖ₋₁ = (Uₖ₋₁)(Lₖ₋₁)⁻ᴴ ⟷ (L̄ₖ₋₁)(Wₖ₋₁)ᵀ = (Uₖ₋₁)ᵀ. # w₁ = u₁ / δ̄₁ if iter == 2 wₖ₋₁ = wₖ₋₂ @@ -372,7 +394,7 @@ function bilqr!(solver :: BilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: @. vₖ₋₁ = vₖ # vₖ₋₁ ← vₖ @. uₖ₋₁ = uₖ # uₖ₋₁ ← uₖ - if pᵗq ≠ zero(FC) + if pᴴq ≠ zero(FC) @. vₖ = q / βₖ₊₁ # βₖ₊₁vₖ₊₁ = q @. uₖ = p / conj(γₖ₊₁) # γ̄ₖ₊₁uₖ₊₁ = p end @@ -392,13 +414,13 @@ function bilqr!(solver :: BilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: user_requested_exit = callback(solver) :: Bool tired = iter ≥ itmax - breakdown = !solved_lq && !solved_cg && (pᵗq == 0) + breakdown = !solved_lq && !solved_cg && (pᴴq == 0) - kdisplay(iter, verbose) && solved_primal && !solved_dual && @printf("%5d %7s %7.1e\n", iter, "", sNorm) - kdisplay(iter, verbose) && !solved_primal && solved_dual && @printf("%5d %7.1e %7s\n", iter, rNorm_lq, "") - kdisplay(iter, verbose) && !solved_primal && !solved_dual && @printf("%5d %7.1e %7.1e\n", iter, rNorm_lq, sNorm) + kdisplay(iter, verbose) && solved_primal && !solved_dual && @printf(iostream, "%5d %7s %7.1e\n", iter, "", sNorm) + kdisplay(iter, verbose) && !solved_primal && solved_dual && @printf(iostream, "%5d %7.1e %7s\n", iter, rNorm_lq, "") + kdisplay(iter, verbose) && !solved_primal && !solved_dual && @printf(iostream, "%5d %7.1e %7.1e\n", iter, rNorm_lq, sNorm) end - (verbose > 0) && @printf("\n") + (verbose > 0) && @printf(iostream, "\n") # Compute BICG point # (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * d̅ₖ diff --git a/src/callback_utils.jl b/src/callback_utils.jl deleted file mode 100644 index eac362e5d..000000000 --- a/src/callback_utils.jl +++ /dev/null @@ -1,50 +0,0 @@ -export StorageGetxRestartedGmres - -export get_x_restarted_gmres! - -mutable struct StorageGetxRestartedGmres{S} - x::S - y::S - p::S -end -StorageGetxRestartedGmres(solver::GmresSolver; N = I) = - StorageGetxRestartedGmres(similar(solver.x), similar(solver.z), (N === I) ? similar(solver.p) : similar(solver.x)) - -function get_x_restarted_gmres!(solver::GmresSolver{T,FC,S}, A, - stor::StorageGetxRestartedGmres{S}, N) where {T,FC,S} - NisI = (N === I) - x2, y2, p2 = stor.x, stor.y, stor.p - n = size(A, 2) - # Compute yₖ by solving Rₖyₖ = zₖ with backward substitution. - nr = sum(1:solver.inner_iter) - y = solver.z # yᵢ = zᵢ - y2 .= y - R = solver.R - V = solver.V - x2 .= solver.Δx - for i = solver.inner_iter : -1 : 1 - pos = nr + i - solver.inner_iter # position of rᵢ.ₖ - for j = solver.inner_iter : -1 : i+1 - y2[i] = y2[i] - R[pos] * y2[j] # yᵢ ← yᵢ - rᵢⱼyⱼ - pos = pos - j + 1 # position of rᵢ.ⱼ₋₁ - end - # Rₖ can be singular if the system is inconsistent - if abs(R[pos]) ≤ eps(T)^(3/4) - y2[i] = zero(FC) - inconsistent = true - else - y2[i] = y2[i] / R[pos] # yᵢ ← yᵢ / rᵢᵢ - end - end - - # Form xₖ = N⁻¹Vₖyₖ - for i = 1 : solver.inner_iter - @kaxpy!(n, y2[i], V[i], x2) - end - if !NisI - p2 .= solver.p - p2 .= x2 - mul!(x2, N, p2) - end - x2 .+= solver.x -end diff --git a/src/cg.jl b/src/cg.jl index 8a974accc..ed9d88cfa 100644 --- a/src/cg.jl +++ b/src/cg.jl @@ -15,36 +15,53 @@ export cg, cg! - """ (x, stats) = cg(A, b::AbstractVector{FC}; - M=I, atol::T=√eps(T), rtol::T=√eps(T), - itmax::Int=0, radius::T=zero(T), linesearch::Bool=false, + M=I, ldiv::Bool=false, radius::T=zero(T), + linesearch::Bool=false, atol::T=√eps(T), + rtol::T=√eps(T), itmax::Int=0, verbose::Int=0, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. -The conjugate gradient method to solve the symmetric linear system Ax=b. + (x, stats) = cg(A, b, x0::AbstractVector; kwargs...) -The method does _not_ abort if A is not definite. +CG can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above. -A preconditioner M may be provided in the form of a linear operator and is -assumed to be symmetric and positive definite. +The conjugate gradient method to solve the Hermitian linear system Ax = b of size n. + +The method does _not_ abort if A is not definite. M also indicates the weighted norm in which residuals are measured. -If `itmax=0`, the default number of iterations is set to `2 * n`, -with `n = length(b)`. +#### Input arguments + +* `A`: a linear operator that models a Hermitian positive definite matrix of dimension n; +* `b`: a vector of length n. + +#### Optional argument + +* `x0`: a vector of length n that represents an initial guess of the solution x. -CG can be warm-started from an initial guess `x0` with the method +#### Keyword arguments - (x, stats) = cg(A, b, x0; kwargs...) +* `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning; +* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`; +* `radius`: add the trust-region constraint ‖x‖ ≤ `radius` if `radius > 0`. Useful to compute a step in a trust-region method for optimization; +* `linesearch`: if `true`, indicate that the solution is to be used in an inexact Newton method with linesearch. If negative curvature is detected at iteration k > 0, the solution of iteration k-1 is returned. If negative curvature is detected at iteration 0, the right-hand side is returned (i.e., the negative gradient); +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. -where `kwargs` are the same keyword arguments as above. +#### Output arguments -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### Reference @@ -81,24 +98,25 @@ function cg!(solver :: CgSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: Abstr end function cg!(solver :: CgSolver{T,FC,S}, A, b :: AbstractVector{FC}; - M=I, atol :: T=√eps(T), rtol :: T=√eps(T), - itmax :: Int=0, radius :: T=zero(T), linesearch :: Bool=false, + M=I, ldiv :: Bool=false, radius :: T=zero(T), + linesearch :: Bool=false, atol :: T=√eps(T), + rtol :: T=√eps(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} + callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} linesearch && (radius > 0) && error("`linesearch` set to `true` but trust-region radius > 0") - n, m = size(A) + m, n = size(A) m == n || error("System must be square") length(b) == n || error("Inconsistent problem size") - (verbose > 0) && @printf("CG: system of %d equations in %d variables\n", n, n) + (verbose > 0) && @printf(iostream, "CG: system of %d equations in %d variables\n", n, n) # Tests M = Iₙ MisI = (M === I) # Check type consistency eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") # Set up workspace. allocate_if(!MisI, solver, :z, S, n) @@ -134,8 +152,8 @@ function cg!(solver :: CgSolver{T,FC,S}, A, b :: AbstractVector{FC}; pAp = zero(T) pNorm² = γ ε = atol + rtol * rNorm - (verbose > 0) && @printf("%5s %7s %8s %8s %8s\n", "k", "‖r‖", "pAp", "α", "σ") - kdisplay(iter, verbose) && @printf("%5d %7.1e ", iter, rNorm) + (verbose > 0) && @printf(iostream, "%5s %7s %8s %8s %8s\n", "k", "‖r‖", "pAp", "α", "σ") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e ", iter, rNorm) solved = rNorm ≤ ε tired = iter ≥ itmax @@ -164,9 +182,9 @@ function cg!(solver :: CgSolver{T,FC,S}, A, b :: AbstractVector{FC}; α = γ / pAp # Compute step size to boundary if applicable. - σ = radius > 0 ? maximum(to_boundary(x, p, radius, dNorm2=pNorm²)) : α + σ = radius > 0 ? maximum(to_boundary(n, x, p, radius, dNorm2=pNorm²)) : α - kdisplay(iter, verbose) && @printf("%8.1e %8.1e %8.1e\n", pAp, α, σ) + kdisplay(iter, verbose) && @printf(iostream, "%8.1e %8.1e %8.1e\n", pAp, α, σ) # Move along p from x to the boundary if either # the next step leads outside the trust region or @@ -201,9 +219,9 @@ function cg!(solver :: CgSolver{T,FC,S}, A, b :: AbstractVector{FC}; iter = iter + 1 tired = iter ≥ itmax user_requested_exit = callback(solver) :: Bool - kdisplay(iter, verbose) && @printf("%5d %7.1e ", iter, rNorm) + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e ", iter, rNorm) end - (verbose > 0) && @printf("\n") + (verbose > 0) && @printf(iostream, "\n") solved && on_boundary && (status = "on trust-region boundary") solved && linesearch && (pAp ≤ 0) && (status = "nonpositive curvature detected") diff --git a/src/cg_lanczos.jl b/src/cg_lanczos.jl index a8e24f02f..f648eb2a8 100644 --- a/src/cg_lanczos.jl +++ b/src/cg_lanczos.jl @@ -12,34 +12,52 @@ export cg_lanczos, cg_lanczos! - """ (x, stats) = cg_lanczos(A, b::AbstractVector{FC}; - M=I, atol::T=√eps(T), rtol::T=√eps(T), itmax::Int=0, - check_curvature::Bool=false, verbose::Int=0, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + M=I, ldiv::Bool=false, + check_curvature::Bool=false, atol::T=√eps(T), + rtol::T=√eps(T), itmax::Int=0, + verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. -The Lanczos version of the conjugate gradient method to solve the -symmetric linear system + (x, stats) = cg_lanczos(A, b, x0::AbstractVector; kwargs...) - Ax = b +CG-LANCZOS can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above. + +The Lanczos version of the conjugate gradient method to solve the +Hermitian linear system Ax = b of size n. The method does _not_ abort if A is not definite. -A preconditioner M may be provided in the form of a linear operator and is -assumed to be hermitian and positive definite. +#### Input arguments + +* `A`: a linear operator that models a Hermitian matrix of dimension n; +* `b`: a vector of length n. + +#### Optional argument + +* `x0`: a vector of length n that represents an initial guess of the solution x. -CG-LANCZOS can be warm-started from an initial guess `x0` with the method +#### Keyword arguments - (x, stats) = cg_lanczos(A, b, x0; kwargs...) +* `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning; +* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`; +* `check_curvature`: if `true`, check that the curvature of the quadratic along the search direction is positive, and abort if not, unless `linesearch` is also `true`; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. -where `kwargs` are the same keyword arguments as above. +#### Output arguments -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`LanczosStats`](@ref) structure. #### References @@ -77,21 +95,23 @@ function cg_lanczos!(solver :: CgLanczosSolver{T,FC,S}, A, b :: AbstractVector{F end function cg_lanczos!(solver :: CgLanczosSolver{T,FC,S}, A, b :: AbstractVector{FC}; - M=I, atol :: T=√eps(T), rtol :: T=√eps(T), itmax :: Int=0, - check_curvature :: Bool=false, verbose :: Int=0, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} + M=I, ldiv :: Bool=false, + check_curvature :: Bool=false, atol :: T=√eps(T), + rtol :: T=√eps(T), itmax :: Int=0, + verbose :: Int=0, history :: Bool=false, + callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - n, m = size(A) + m, n = size(A) m == n || error("System must be square") length(b) == n || error("Inconsistent problem size") - (verbose > 0) && @printf("CG Lanczos: system of %d equations in %d variables\n", n, n) + (verbose > 0) && @printf(iostream, "CG Lanczos: system of %d equations in %d variables\n", n, n) # Tests M = Iₙ MisI = (M === I) # Check type consistency eltype(A) == FC || error("eltype(A) ≠ $T") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") # Set up workspace. allocate_if(!MisI, solver, :v, S, n) @@ -111,7 +131,7 @@ function cg_lanczos!(solver :: CgLanczosSolver{T,FC,S}, A, b :: AbstractVector{F Mv .= b end MisI || mulorldiv!(v, M, Mv, ldiv) # v₁ = M⁻¹r₀ - β = sqrt(@kdotr(n, v, Mv)) # β₁ = v₁ᵀ M v₁ + β = sqrt(@kdotr(n, v, Mv)) # β₁ = v₁ᴴ M v₁ σ = β rNorm = σ history && push!(rNorms, rNorm) @@ -143,8 +163,8 @@ function cg_lanczos!(solver :: CgLanczosSolver{T,FC,S}, A, b :: AbstractVector{F # Define stopping tolerance. ε = atol + rtol * rNorm - (verbose > 0) && @printf("%5s %7s\n", "k", "‖rₖ‖") - kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm) + (verbose > 0) && @printf(iostream, "%5s %7s\n", "k", "‖rₖ‖") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e\n", iter, rNorm) indefinite = false solved = rNorm ≤ ε @@ -157,10 +177,10 @@ function cg_lanczos!(solver :: CgLanczosSolver{T,FC,S}, A, b :: AbstractVector{F # Form next Lanczos vector. # βₖ₊₁Mvₖ₊₁ = Avₖ - δₖMvₖ - βₖMvₖ₋₁ mul!(Mv_next, A, v) # Mvₖ₊₁ ← Avₖ - δ = @kdotr(n, v, Mv_next) # δₖ = vₖᵀ A vₖ + δ = @kdotr(n, v, Mv_next) # δₖ = vₖᴴ A vₖ # Check curvature. Exit fast if requested. - # It is possible to show that σₖ² (δₖ - ωₖ₋₁ / γₖ₋₁) = pₖᵀ A pₖ. + # It is possible to show that σₖ² (δₖ - ωₖ₋₁ / γₖ₋₁) = pₖᴴ A pₖ. γ = one(T) / (δ - ω / γ) # γₖ = 1 / (δₖ - ωₖ₋₁ / γₖ₋₁) indefinite |= (γ ≤ 0) (check_curvature & indefinite) && continue @@ -172,7 +192,7 @@ function cg_lanczos!(solver :: CgLanczosSolver{T,FC,S}, A, b :: AbstractVector{F end @. Mv = Mv_next # Mvₖ ← Mvₖ₊₁ MisI || mulorldiv!(v, M, Mv, ldiv) # vₖ₊₁ = M⁻¹ * Mvₖ₊₁ - β = sqrt(@kdotr(n, v, Mv)) # βₖ₊₁ = vₖ₊₁ᵀ M vₖ₊₁ + β = sqrt(@kdotr(n, v, Mv)) # βₖ₊₁ = vₖ₊₁ᴴ M vₖ₊₁ @kscal!(n, one(FC) / β, v) # vₖ₊₁ ← vₖ₊₁ / βₖ₊₁ MisI || @kscal!(n, one(FC) / β, Mv) # Mvₖ₊₁ ← Mvₖ₊₁ / βₖ₊₁ Anorm2 += β_prev^2 + β^2 + δ^2 # Use ‖Tₖ₊₁‖₂ as increasing approximation of ‖A‖₂. @@ -187,7 +207,7 @@ function cg_lanczos!(solver :: CgLanczosSolver{T,FC,S}, A, b :: AbstractVector{F rNorm = abs(σ) # ‖rₖ₊₁‖_M = |σₖ₊₁| because rₖ₊₁ = σₖ₊₁ * vₖ₊₁ and ‖vₖ₊₁‖_M = 1 history && push!(rNorms, rNorm) iter = iter + 1 - kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm) + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e\n", iter, rNorm) # Stopping conditions that do not depend on user input. # This is to guard against tolerances that are unreasonably small. @@ -198,7 +218,7 @@ function cg_lanczos!(solver :: CgLanczosSolver{T,FC,S}, A, b :: AbstractVector{F solved = resid_decrease_lim || resid_decrease_mach tired = iter ≥ itmax end - (verbose > 0) && @printf("\n") + (verbose > 0) && @printf(iostream, "\n") tired && (status = "maximum number of iterations exceeded") (check_curvature & indefinite) && (status = "negative curvature") diff --git a/src/cg_lanczos_shift.jl b/src/cg_lanczos_shift.jl index 01f11e41f..bf883649d 100644 --- a/src/cg_lanczos_shift.jl +++ b/src/cg_lanczos_shift.jl @@ -13,13 +13,13 @@ export cg_lanczos_shift, cg_lanczos_shift! - """ (x, stats) = cg_lanczos_shift(A, b::AbstractVector{FC}, shifts::AbstractVector{T}; - M=I, atol::T=√eps(T), rtol::T=√eps(T), - itmax::Int=0, check_curvature::Bool=false, + M=I, ldiv::Bool=false, + check_curvature::Bool=false, atol::T=√eps(T), + rtol::T=√eps(T), itmax::Int=0, verbose::Int=0, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. @@ -27,15 +27,38 @@ export cg_lanczos_shift, cg_lanczos_shift! The Lanczos version of the conjugate gradient method to solve a family of shifted systems - (A + αI) x = b (α = α₁, ..., αₙ) + (A + αI) x = b (α = α₁, ..., αₚ) + +of size n. The method does _not_ abort if A + αI is not definite. + +#### Input arguments + +* `A`: a linear operator that models a Hermitian matrix of dimension n; +* `b`: a vector of length n; +* `shifts`: a vector of length p. -The method does _not_ abort if A + αI is not definite. +#### Keyword arguments -A preconditioner M may be provided in the form of a linear operator and is -assumed to be hermitian and positive definite. +* `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning; +* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`; +* `check_curvature`: if `true`, check that the curvature of the quadratic along the search direction is positive, and abort if not, unless `linesearch` is also `true`; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +#### Output arguments + +* `x`: a vector of p dense vectors, each one of length n; +* `stats`: statistics collected on the run in a [`LanczosShiftStats`](@ref) structure. + +#### References + +* A. Frommer and P. Maass, [*Fast CG-Based Methods for Tikhonov-Phillips Regularization*](https://doi.org/10.1137/S1064827596313310), SIAM Journal on Scientific Computing, 20(5), pp. 1831--1850, 1999. +* C. C. Paige and M. A. Saunders, [*Solution of Sparse Indefinite Systems of Linear Equations*](https://doi.org/10.1137/0712047), SIAM Journal on Numerical Analysis, 12(4), pp. 617--629, 1975. """ function cg_lanczos_shift end @@ -56,24 +79,25 @@ See [`CgLanczosShiftSolver`](@ref) for more details about the `solver`. function cg_lanczos_shift! end function cg_lanczos_shift!(solver :: CgLanczosShiftSolver{T,FC,S}, A, b :: AbstractVector{FC}, shifts :: AbstractVector{T}; - M=I, atol :: T=√eps(T), rtol :: T=√eps(T), - itmax :: Int=0, check_curvature :: Bool=false, + M=I, ldiv :: Bool=false, + check_curvature :: Bool=false, atol :: T=√eps(T), + rtol :: T=√eps(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} + callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - n, m = size(A) + m, n = size(A) m == n || error("System must be square") length(b) == n || error("Inconsistent problem size") nshifts = length(shifts) - (verbose > 0) && @printf("CG Lanczos: system of %d equations in %d variables with %d shifts\n", n, n, nshifts) + (verbose > 0) && @printf(iostream, "CG Lanczos: system of %d equations in %d variables with %d shifts\n", n, n, nshifts) # Tests M = Iₙ MisI = (M === I) # Check type consistency eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") # Set up workspace. allocate_if(!MisI, solver, :v, S, n) @@ -92,7 +116,7 @@ function cg_lanczos_shift!(solver :: CgLanczosShiftSolver{T,FC,S}, A, b :: Abstr end Mv .= b # Mv₁ ← b MisI || mulorldiv!(v, M, Mv, ldiv) # v₁ = M⁻¹ * Mv₁ - β = sqrt(@kdotr(n, v, Mv)) # β₁ = v₁ᵀ M v₁ + β = sqrt(@kdotr(n, v, Mv)) # β₁ = v₁ᴴ M v₁ rNorms .= β if history for i = 1 : nshifts @@ -140,14 +164,10 @@ function cg_lanczos_shift!(solver :: CgLanczosShiftSolver{T,FC,S}, A, b :: Abstr itmax == 0 && (itmax = 2 * n) # Build format strings for printing. - if kdisplay(iter, verbose) - fmt = "%5d" * repeat(" %8.1e", nshifts) * "\n" - # precompile printf for our particular format - local_printf(data...) = Core.eval(Main, :(@printf($fmt, $(data)...))) - local_printf(iter, rNorms...) - end + (verbose > 0) && (fmt = Printf.Format("%5d" * repeat(" %8.1e", nshifts) * "\n")) + kdisplay(iter, verbose) && Printf.format(iostream, fmt, iter, rNorms...) - solved = sum(not_cv) == 0 + solved = !reduce(|, not_cv) tired = iter ≥ itmax status = "unknown" user_requested_exit = false @@ -157,7 +177,7 @@ function cg_lanczos_shift!(solver :: CgLanczosShiftSolver{T,FC,S}, A, b :: Abstr # Form next Lanczos vector. # βₖ₊₁Mvₖ₊₁ = Avₖ - δₖMvₖ - βₖMvₖ₋₁ mul!(Mv_next, A, v) # Mvₖ₊₁ ← Avₖ - δ = @kdotr(n, v, Mv_next) # δₖ = vₖᵀ A vₖ + δ = @kdotr(n, v, Mv_next) # δₖ = vₖᴴ A vₖ @kaxpy!(n, -δ, Mv, Mv_next) # Mvₖ₊₁ ← Mvₖ₊₁ - δₖMvₖ if iter > 0 @kaxpy!(n, -β, Mv_prev, Mv_next) # Mvₖ₊₁ ← Mvₖ₊₁ - βₖMvₖ₋₁ @@ -165,12 +185,12 @@ function cg_lanczos_shift!(solver :: CgLanczosShiftSolver{T,FC,S}, A, b :: Abstr end @. Mv = Mv_next # Mvₖ ← Mvₖ₊₁ MisI || mulorldiv!(v, M, Mv, ldiv) # vₖ₊₁ = M⁻¹ * Mvₖ₊₁ - β = sqrt(@kdotr(n, v, Mv)) # βₖ₊₁ = vₖ₊₁ᵀ M vₖ₊₁ + β = sqrt(@kdotr(n, v, Mv)) # βₖ₊₁ = vₖ₊₁ᴴ M vₖ₊₁ @kscal!(n, one(FC) / β, v) # vₖ₊₁ ← vₖ₊₁ / βₖ₊₁ MisI || @kscal!(n, one(FC) / β, Mv) # Mvₖ₊₁ ← Mvₖ₊₁ / βₖ₊₁ - # Check curvature: vₖᵀ(A + sᵢI)vₖ = vₖᵀAvₖ + sᵢ‖vₖ‖² = δₖ + ρₖ * sᵢ with ρₖ = ‖vₖ‖². - # It is possible to show that σₖ² (δₖ + ρₖ * sᵢ - ωₖ₋₁ / γₖ₋₁) = pₖᵀ (A + sᵢ I) pₖ. + # Check curvature: vₖᴴ(A + sᵢI)vₖ = vₖᴴAvₖ + sᵢ‖vₖ‖² = δₖ + ρₖ * sᵢ with ρₖ = ‖vₖ‖². + # It is possible to show that σₖ² (δₖ + ρₖ * sᵢ - ωₖ₋₁ / γₖ₋₁) = pₖᴴ (A + sᵢ I) pₖ. MisI || (ρ = @kdotr(n, v, v)) for i = 1 : nshifts δhat[i] = δ + ρ * shifts[i] @@ -208,13 +228,13 @@ function cg_lanczos_shift!(solver :: CgLanczosShiftSolver{T,FC,S}, A, b :: Abstr not_cv[i] = check_curvature ? !(converged[i] || indefinite[i]) : !converged[i] end iter = iter + 1 - kdisplay(iter, verbose) && local_printf(iter, rNorms...) + kdisplay(iter, verbose) && Printf.format(iostream, fmt, iter, rNorms...) user_requested_exit = callback(solver) :: Bool - solved = sum(not_cv) == 0 + solved = !reduce(|, not_cv) tired = iter ≥ itmax end - (verbose > 0) && @printf("\n") + (verbose > 0) && @printf(iostream, "\n") tired && (status = "maximum number of iterations exceeded") solved && (status = "solution good enough given atol and rtol") diff --git a/src/cgls.jl b/src/cgls.jl index f5529fbfb..55fe6d0ec 100644 --- a/src/cgls.jl +++ b/src/cgls.jl @@ -5,7 +5,7 @@ # # equivalently, of the normal equations # -# AᵀAx = Aᵀb. +# AᴴAx = Aᴴb. # # CGLS is formally equivalent to applying the conjugate gradient method # to the normal equations but should be more stable. It is also formally @@ -28,12 +28,12 @@ export cgls, cgls! - """ (x, stats) = cgls(A, b::AbstractVector{FC}; - M=I, λ::T=zero(T), atol::T=√eps(T), rtol::T=√eps(T), - radius::T=zero(T), itmax::Int=0, verbose::Int=0, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + M=I, ldiv::Bool=false, radius::T=zero(T), + λ::T=zero(T), atol::T=√eps(T), rtol::T=√eps(T), + itmax::Int=0, verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. @@ -42,19 +42,40 @@ Solve the regularized linear least-squares problem minimize ‖b - Ax‖₂² + λ‖x‖₂² -using the Conjugate Gradient (CG) method, where λ ≥ 0 is a regularization +of size m × n using the Conjugate Gradient (CG) method, where λ ≥ 0 is a regularization parameter. This method is equivalent to applying CG to the normal equations - (AᵀA + λI) x = Aᵀb + (AᴴA + λI) x = Aᴴb but is more stable. -CGLS produces monotonic residuals ‖r‖₂ but not optimality residuals ‖Aᵀr‖₂. +CGLS produces monotonic residuals ‖r‖₂ but not optimality residuals ‖Aᴴr‖₂. It is formally equivalent to LSQR, though can be slightly less accurate, but simpler to implement. -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension m × n; +* `b`: a vector of length m. + +#### Keyword arguments + +* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for preconditioning; +* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`; +* `radius`: add the trust-region constraint ‖x‖ ≤ `radius` if `radius > 0`. Useful to compute a step in a trust-region method for optimization; +* `λ`: regularization parameter; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. + +#### Output arguments + +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### References @@ -79,23 +100,24 @@ See [`CglsSolver`](@ref) for more details about the `solver`. function cgls! end function cgls!(solver :: CglsSolver{T,FC,S}, A, b :: AbstractVector{FC}; - M=I, λ :: T=zero(T), atol :: T=√eps(T), rtol :: T=√eps(T), - radius :: T=zero(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} + M=I, ldiv :: Bool=false, radius :: T=zero(T), + λ :: T=zero(T), atol :: T=√eps(T), rtol :: T=√eps(T), + itmax :: Int=0, verbose :: Int=0, history :: Bool=false, + callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} m, n = size(A) length(b) == m || error("Inconsistent problem size") - (verbose > 0) && @printf("CGLS: system of %d equations in %d variables\n", m, n) + (verbose > 0) && @printf(iostream, "CGLS: system of %d equations in %d variables\n", m, n) # Tests M = Iₙ MisI = (M === I) # Check type consistency eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") # Compute the adjoint of A - Aᵀ = A' + Aᴴ = A' # Set up workspace. allocate_if(!MisI, solver, :Mr, S, m) @@ -117,9 +139,9 @@ function cgls!(solver :: CglsSolver{T,FC,S}, A, b :: AbstractVector{FC}; return solver end MisI || mulorldiv!(Mr, M, r, ldiv) - mul!(s, Aᵀ, Mr) + mul!(s, Aᴴ, Mr) p .= s - γ = @kdotr(n, s, s) # γ = sᵀs + γ = @kdotr(n, s, s) # γ = sᴴs iter = 0 itmax == 0 && (itmax = m + n) @@ -128,8 +150,8 @@ function cgls!(solver :: CglsSolver{T,FC,S}, A, b :: AbstractVector{FC}; history && push!(rNorms, rNorm) history && push!(ArNorms, ArNorm) ε = atol + rtol * ArNorm - (verbose > 0) && @printf("%5s %8s %8s\n", "k", "‖Aᵀr‖", "‖r‖") - kdisplay(iter, verbose) && @printf("%5d %8.2e %8.2e\n", iter, ArNorm, rNorm) + (verbose > 0) && @printf(iostream, "%5s %8s %8s\n", "k", "‖Aᴴr‖", "‖r‖") + kdisplay(iter, verbose) && @printf(iostream, "%5d %8.2e %8.2e\n", iter, ArNorm, rNorm) status = "unknown" on_boundary = false @@ -140,12 +162,12 @@ function cgls!(solver :: CglsSolver{T,FC,S}, A, b :: AbstractVector{FC}; while ! (solved || tired || user_requested_exit) mul!(q, A, p) MisI || mulorldiv!(Mq, M, q, ldiv) - δ = @kdotr(m, q, Mq) # δ = qᵀMq - λ > 0 && (δ += λ * @kdotr(n, p, p)) # δ = δ + pᵀp + δ = @kdotr(m, q, Mq) # δ = qᴴMq + λ > 0 && (δ += λ * @kdotr(n, p, p)) # δ = δ + pᴴp α = γ / δ # if a trust-region constraint is give, compute step to the boundary - σ = radius > 0 ? maximum(to_boundary(x, p, radius)) : α + σ = radius > 0 ? maximum(to_boundary(n, x, p, radius)) : α if (radius > 0) & (α > σ) α = σ on_boundary = true @@ -154,9 +176,9 @@ function cgls!(solver :: CglsSolver{T,FC,S}, A, b :: AbstractVector{FC}; @kaxpy!(n, α, p, x) # Faster than x = x + α * p @kaxpy!(m, -α, q, r) # Faster than r = r - α * q MisI || mulorldiv!(Mr, M, r, ldiv) - mul!(s, Aᵀ, Mr) + mul!(s, Aᴴ, Mr) λ > 0 && @kaxpy!(n, -λ, x, s) # s = A' * r - λ * x - γ_next = @kdotr(n, s, s) # γ_next = sᵀs + γ_next = @kdotr(n, s, s) # γ_next = sᴴs β = γ_next / γ @kaxpby!(n, one(FC), s, β, p) # p = s + βp γ = γ_next @@ -165,12 +187,12 @@ function cgls!(solver :: CglsSolver{T,FC,S}, A, b :: AbstractVector{FC}; history && push!(rNorms, rNorm) history && push!(ArNorms, ArNorm) iter = iter + 1 - kdisplay(iter, verbose) && @printf("%5d %8.2e %8.2e\n", iter, ArNorm, rNorm) + kdisplay(iter, verbose) && @printf(iostream, "%5d %8.2e %8.2e\n", iter, ArNorm, rNorm) user_requested_exit = callback(solver) :: Bool solved = (ArNorm ≤ ε) | on_boundary tired = iter ≥ itmax end - (verbose > 0) && @printf("\n") + (verbose > 0) && @printf(iostream, "\n") tired && (status = "maximum number of iterations exceeded") solved && (status = "solution good enough given atol and rtol") diff --git a/src/cgne.jl b/src/cgne.jl index 2859414e1..f85af32be 100644 --- a/src/cgne.jl +++ b/src/cgne.jl @@ -10,7 +10,7 @@ # and is equivalent to applying the conjugate gradient method # to the linear system # -# AAᵀy = b. +# AAᴴy = b. # # This method is also known as Craig's method, CGME, and other # names, and is described in @@ -28,12 +28,13 @@ export cgne, cgne! - """ (x, stats) = cgne(A, b::AbstractVector{FC}; - M=I, λ::T=zero(T), atol::T=√eps(T), rtol::T=√eps(T), - itmax::Int=0, verbose::Int=0, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + N=I, ldiv::Bool=false, + λ::T=zero(T), atol::T=√eps(T), + rtol::T=√eps(T), itmax::Int=0, + verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. @@ -42,11 +43,11 @@ Solve the consistent linear system Ax + √λs = b -using the Conjugate Gradient (CG) method, where λ ≥ 0 is a regularization +of size m × n using the Conjugate Gradient (CG) method, where λ ≥ 0 is a regularization parameter. This method is equivalent to applying CG to the normal equations of the second kind - (AAᵀ + λI) y = b + (AAᴴ + λI) y = b but is more stable. When λ = 0, this method solves the minimum-norm problem @@ -60,10 +61,28 @@ CGNE produces monotonic errors ‖x-x*‖₂ but not residuals ‖r‖₂. It is formally equivalent to CRAIG, though can be slightly less accurate, but simpler to implement. Only the x-part of the solution is returned. -A preconditioner M may be provided in the form of a linear operator. +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension m × n; +* `b`: a vector of length m. + +#### Keyword arguments + +* `N`: +* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`; +* `λ`: regularization parameter; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. + +#### Output arguments -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### References @@ -88,35 +107,37 @@ See [`CgneSolver`](@ref) for more details about the `solver`. function cgne! end function cgne!(solver :: CgneSolver{T,FC,S}, A, b :: AbstractVector{FC}; - M=I, λ :: T=zero(T), atol :: T=√eps(T), rtol :: T=√eps(T), - itmax :: Int=0, verbose :: Int=0, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} + N=I, ldiv :: Bool=false, + λ :: T=zero(T), atol :: T=√eps(T), + rtol :: T=√eps(T), itmax :: Int=0, + verbose :: Int=0, history :: Bool=false, + callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} m, n = size(A) length(b) == m || error("Inconsistent problem size") - (verbose > 0) && @printf("CGNE: system of %d equations in %d variables\n", m, n) + (verbose > 0) && @printf(iostream, "CGNE: system of %d equations in %d variables\n", m, n) - # Tests M = Iₙ - MisI = (M === I) + # Tests N = Iₙ + NisI = (N === I) # Check type consistency eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") # Compute the adjoint of A - Aᵀ = A' + Aᴴ = A' # Set up workspace. - allocate_if(!MisI, solver, :z, S, m) + allocate_if(!NisI, solver, :z, S, m) allocate_if(λ > 0, solver, :s, S, m) - x, p, Aᵀz, r, q, s, stats = solver.x, solver.p, solver.Aᵀz, solver.r, solver.q, solver.s, solver.stats + x, p, Aᴴz, r, q, s, stats = solver.x, solver.p, solver.Aᴴz, solver.r, solver.q, solver.s, solver.stats rNorms = stats.residuals reset!(stats) - z = MisI ? r : solver.z + z = NisI ? r : solver.z x .= zero(FC) r .= b - MisI || mulorldiv!(z, M, r, ldiv) + NisI || mulorldiv!(z, N, r, ldiv) rNorm = @knrm2(m, r) # Marginally faster than norm(r) history && push!(rNorms, rNorm) if rNorm == 0 @@ -126,7 +147,7 @@ function cgne!(solver :: CgneSolver{T,FC,S}, A, b :: AbstractVector{FC}; return solver end λ > 0 && (s .= r) - mul!(p, Aᵀ, z) + mul!(p, Aᴴ, z) # Use ‖p‖ to detect inconsistent system. # An inconsistent system will necessarily have AA' singular. @@ -141,8 +162,8 @@ function cgne!(solver :: CgneSolver{T,FC,S}, A, b :: AbstractVector{FC}; ɛ_c = atol + rtol * rNorm # Stopping tolerance for consistent systems. ɛ_i = atol + rtol * pNorm # Stopping tolerance for inconsistent systems. - (verbose > 0) && @printf("%5s %8s\n", "k", "‖r‖") - kdisplay(iter, verbose) && @printf("%5d %8.2e\n", iter, rNorm) + (verbose > 0) && @printf(iostream, "%5s %8s\n", "k", "‖r‖") + kdisplay(iter, verbose) && @printf(iostream, "%5d %8.2e\n", iter, rNorm) status = "unknown" solved = rNorm ≤ ɛ_c @@ -158,11 +179,11 @@ function cgne!(solver :: CgneSolver{T,FC,S}, A, b :: AbstractVector{FC}; α = γ / δ @kaxpy!(n, α, p, x) # Faster than x = x + α * p @kaxpy!(m, -α, q, r) # Faster than r = r - α * q - MisI || mulorldiv!(z, M, r, ldiv) + NisI || mulorldiv!(z, N, r, ldiv) γ_next = @kdotr(m, r, z) # Faster than γ_next = dot(r, z) β = γ_next / γ - mul!(Aᵀz, Aᵀ, z) - @kaxpby!(n, one(FC), Aᵀz, β, p) # Faster than p = Aᵀz + β * p + mul!(Aᴴz, Aᴴ, z) + @kaxpby!(n, one(FC), Aᴴz, β, p) # Faster than p = Aᴴz + β * p pNorm = @knrm2(n, p) if λ > 0 @kaxpby!(m, one(FC), r, β, s) # s = r + β * s @@ -171,7 +192,7 @@ function cgne!(solver :: CgneSolver{T,FC,S}, A, b :: AbstractVector{FC}; rNorm = sqrt(γ_next) history && push!(rNorms, rNorm) iter = iter + 1 - kdisplay(iter, verbose) && @printf("%5d %8.2e\n", iter, rNorm) + kdisplay(iter, verbose) && @printf(iostream, "%5d %8.2e\n", iter, rNorm) # Stopping conditions that do not depend on user input. # This is to guard against tolerances that are unreasonably small. @@ -183,7 +204,7 @@ function cgne!(solver :: CgneSolver{T,FC,S}, A, b :: AbstractVector{FC}; inconsistent = (rNorm > 100 * ɛ_c) && (pNorm ≤ ɛ_i) tired = iter ≥ itmax end - (verbose > 0) && @printf("\n") + (verbose > 0) && @printf(iostream, "\n") tired && (status = "maximum number of iterations exceeded") inconsistent && (status = "system probably inconsistent") diff --git a/src/cgs.jl b/src/cgs.jl index c1eb1056e..cbb3db13b 100644 --- a/src/cgs.jl +++ b/src/cgs.jl @@ -11,17 +11,23 @@ export cgs, cgs! """ - (x, stats) = cgs(A, b::AbstractVector{FC}; c::AbstractVector{FC}=b, - M=I, N=I, atol::T=√eps(T), rtol::T=√eps(T), - itmax::Int=0, verbose::Int=0, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + (x, stats) = cgs(A, b::AbstractVector{FC}; + c::AbstractVector{FC}=b, M=I, N=I, + ldiv::Bool=false, atol::T=√eps(T), + rtol::T=√eps(T), itmax::Int=0, + verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. -Solve the consistent linear system Ax = b using conjugate gradient squared algorithm. + (x, stats) = cgs(A, b, x0::AbstractVector; kwargs...) + +CGS can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above. + +Solve the consistent linear system Ax = b of size n using CGS. CGS requires two initial vectors `b` and `c`. -The relation `bᵀc ≠ 0` must be satisfied and by default `c = b`. +The relation `bᴴc ≠ 0` must be satisfied and by default `c = b`. From "Iterative Methods for Sparse Linear Systems (Y. Saad)" : @@ -38,16 +44,33 @@ to become inaccurate. TFQMR and BICGSTAB were developed to remedy this difficulty.» -This implementation allows a left preconditioner M and a right preconditioner N. +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension n; +* `b`: a vector of length n. + +#### Optional argument + +* `x0`: a vector of length n that represents an initial guess of the solution x. -CGS can be warm-started from an initial guess `x0` with the method +#### Keyword arguments - (x, stats) = cgs(A, b, x0; kwargs...) +* `c`: the second initial vector of length `n` required by the Lanczos biorthogonalization process; +* `M`: linear operator that models a nonsingular matrix of size `n` used for left preconditioning; +* `N`: linear operator that models a nonsingular matrix of size `n` used for right preconditioning; +* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. -where `kwargs` are the same keyword arguments as above. +#### Output arguments -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### Reference @@ -83,15 +106,17 @@ function cgs!(solver :: CgsSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: Abs return solver end -function cgs!(solver :: CgsSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: AbstractVector{FC}=b, - M=I, N=I, atol :: T=√eps(T), rtol :: T=√eps(T), - itmax :: Int=0, verbose :: Int=0, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} +function cgs!(solver :: CgsSolver{T,FC,S}, A, b :: AbstractVector{FC}; + c :: AbstractVector{FC}=b, M=I, N=I, + ldiv :: Bool=false, atol :: T=√eps(T), + rtol :: T=√eps(T), itmax :: Int=0, + verbose :: Int=0, history :: Bool=false, + callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} m, n = size(A) m == n || error("System must be square") length(b) == m || error("Inconsistent problem size") - (verbose > 0) && @printf("CGS: system of size %d\n", n) + (verbose > 0) && @printf(iostream, "CGS: system of size %d\n", n) # Check M = Iₙ and N = Iₙ MisI = (M === I) @@ -99,8 +124,8 @@ function cgs!(solver :: CgsSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Abst # Check type consistency eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") - ktypeof(c) == S || error("ktypeof(c) ≠ $S") + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") + ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S") # Set up workspace. allocate_if(!MisI, solver, :vw, S, n) @@ -142,7 +167,7 @@ function cgs!(solver :: CgsSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Abst if ρ == 0 stats.niter = 0 stats.solved, stats.inconsistent = false, false - stats.status = "Breakdown bᵀc = 0" + stats.status = "Breakdown bᴴc = 0" solver.warm_start =false return solver end @@ -151,8 +176,8 @@ function cgs!(solver :: CgsSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Abst itmax == 0 && (itmax = 2*n) ε = atol + rtol * rNorm - (verbose > 0) && @printf("%5s %7s\n", "k", "‖rₖ‖") - kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm) + (verbose > 0) && @printf(iostream, "%5s %7s\n", "k", "‖rₖ‖") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e\n", iter, rNorm) u .= r # u₀ p .= r # p₀ @@ -207,9 +232,9 @@ function cgs!(solver :: CgsSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Abst solved = resid_decrease_lim || resid_decrease_mach tired = iter ≥ itmax breakdown = (α == 0 || isnan(α)) - kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm) + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e\n", iter, rNorm) end - (verbose > 0) && @printf("\n") + (verbose > 0) && @printf(iostream, "\n") tired && (status = "maximum number of iterations exceeded") breakdown && (status = "breakdown αₖ == 0") diff --git a/src/cr.jl b/src/cr.jl index c678c7d29..26f317385 100644 --- a/src/cr.jl +++ b/src/cr.jl @@ -16,32 +16,52 @@ export cr, cr! """ (x, stats) = cr(A, b::AbstractVector{FC}; - M=I, atol::T=√eps(T), rtol::T=√eps(T), γ::T=√eps(T), itmax::Int=0, - radius::T=zero(T), verbose::Int=0, linesearch::Bool=false, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + M=I, ldiv::Bool=false, radius::T=zero(T), + linesearch::Bool=false, γ::T=√eps(T), + atol::T=√eps(T), rtol::T=√eps(T), itmax::Int=0, + verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. -A truncated version of Stiefel’s Conjugate Residual method to solve the symmetric linear system Ax = b or the least-squares problem min ‖b - Ax‖. -The matrix A must be positive semi-definite. + (x, stats) = cr(A, b, x0::AbstractVector; kwargs...) -A preconditioner M may be provided in the form of a linear operator and is assumed to be symmetric and positive definite. +CR can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above. + +A truncated version of Stiefel’s Conjugate Residual method to solve the Hermitian linear system Ax = b +of size n or the least-squares problem min ‖b - Ax‖ if A is singular. +The matrix A must be Hermitian semi-definite. M also indicates the weighted norm in which residuals are measured. -In a linesearch context, 'linesearch' must be set to 'true'. +#### Input arguments + +* `A`: a linear operator that models a Hermitian positive definite matrix of dimension n; +* `b`: a vector of length n. -If `itmax=0`, the default number of iterations is set to `2 * n`, -with `n = length(b)`. +#### Optional argument -CR can be warm-started from an initial guess `x0` with the method +* `x0`: a vector of length n that represents an initial guess of the solution x. - (x, stats) = cr(A, b, x0; kwargs...) +#### Keyword arguments -where `kwargs` are the same keyword arguments as above. +* `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning; +* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`; +* `radius`: add the trust-region constraint ‖x‖ ≤ `radius` if `radius > 0`. Useful to compute a step in a trust-region method for optimization; +* `linesearch`: if `true`, indicate that the solution is to be used in an inexact Newton method with linesearch. If negative curvature is detected at iteration k > 0, the solution of iteration k-1 is returned. If negative curvature is detected at iteration 0, the right-hand side is returned (i.e., the negative gradient); +* `γ`: tolerance to determine that the curvature of the quadratic model is nonpositive; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +#### Output arguments + +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### References @@ -80,22 +100,25 @@ function cr!(solver :: CrSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: Abstr end function cr!(solver :: CrSolver{T,FC,S}, A, b :: AbstractVector{FC}; - M=I, atol :: T=√eps(T), rtol :: T=√eps(T), γ :: T=√eps(T), itmax :: Int=0, - radius :: T=zero(T), verbose :: Int=0, linesearch :: Bool=false, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} + M=I, ldiv :: Bool=false, radius :: T=zero(T), + linesearch :: Bool=false, γ :: T=√eps(T), + atol :: T=√eps(T), rtol :: T=√eps(T), itmax :: Int=0, + verbose :: Int=0, history :: Bool=false, + callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} linesearch && (radius > 0) && error("'linesearch' set to 'true' but radius > 0") - n, m = size(A) + + m, n = size(A) m == n || error("System must be square") length(b) == n || error("Inconsistent problem size") - (verbose > 0) && @printf("CR: system of %d equations in %d variables\n", n, n) + (verbose > 0) && @printf(iostream, "CR: system of %d equations in %d variables\n", n, n) # Tests M = Iₙ MisI = (M === I) # Check type consistency eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") # Set up workspace allocate_if(!MisI, solver, :Mq, S, n) @@ -146,10 +169,10 @@ function cr!(solver :: CrSolver{T,FC,S}, A, b :: AbstractVector{FC}; ArNorm = @knrm2(n, Ar) # ‖Ar‖ history && push!(ArNorms, ArNorm) ε = atol + rtol * rNorm - (verbose > 0) && @printf("%5s %8s %8s %8s\n", "k", "‖x‖", "‖r‖", "quad") - kdisplay(iter, verbose) && @printf(" %d %8.1e %8.1e %8.1e\n", iter, xNorm, rNorm, m) + (verbose > 0) && @printf(iostream, "%5s %8s %8s %8s\n", "k", "‖x‖", "‖r‖", "quad") + kdisplay(iter, verbose) && @printf(iostream, " %d %8.1e %8.1e %8.1e\n", iter, xNorm, rNorm, m) - descent = pr > 0 # pᵀr > 0 means p is a descent direction + descent = pr > 0 # pᴴr > 0 means p is a descent direction solved = rNorm ≤ ε tired = iter ≥ itmax on_boundary = false @@ -161,7 +184,7 @@ function cr!(solver :: CrSolver{T,FC,S}, A, b :: AbstractVector{FC}; if linesearch if (pAp ≤ γ * pNorm²) || (ρ ≤ γ * rNorm²) npcurv = true - (verbose > 0) && @printf("nonpositive curvature detected: pᵀAp = %8.1e and rᵀAr = %8.1e\n", pAp, ρ) + (verbose > 0) && @printf(iostream, "nonpositive curvature detected: pᴴAp = %8.1e and rᴴAr = %8.1e\n", pAp, ρ) stats.solved = solved stats.inconsistent = false stats.status = "nonpositive curvature" @@ -173,52 +196,52 @@ function cr!(solver :: CrSolver{T,FC,S}, A, b :: AbstractVector{FC}; MisI || mulorldiv!(Mq, M, q, ldiv) if radius > 0 - (verbose > 0) && @printf("radius = %8.1e > 0 and ‖x‖ = %8.1e\n", radius, xNorm) + (verbose > 0) && @printf(iostream, "radius = %8.1e > 0 and ‖x‖ = %8.1e\n", radius, xNorm) # find t1 > 0 and t2 < 0 such that ‖x + ti * p‖² = radius² (i = 1, 2) xNorm² = xNorm * xNorm - t = to_boundary(x, p, radius; flip = false, xNorm2 = xNorm², dNorm2 = pNorm²) + t = to_boundary(n, x, p, radius; flip = false, xNorm2 = xNorm², dNorm2 = pNorm²) t1 = maximum(t) # > 0 t2 = minimum(t) # < 0 - tr = maximum(to_boundary(x, r, radius; flip = false, xNorm2 = xNorm², dNorm2 = rNorm²)) - (verbose > 0) && @printf("t1 = %8.1e, t2 = %8.1e and tr = %8.1e\n", t1, t2, tr) + tr = maximum(to_boundary(n, x, r, radius; flip = false, xNorm2 = xNorm², dNorm2 = rNorm²)) + (verbose > 0) && @printf(iostream, "t1 = %8.1e, t2 = %8.1e and tr = %8.1e\n", t1, t2, tr) - if abspAp ≤ γ * pNorm * @knrm2(n, q) # pᵀAp ≃ 0 + if abspAp ≤ γ * pNorm * @knrm2(n, q) # pᴴAp ≃ 0 npcurv = true # nonpositive curvature - (verbose > 0) && @printf("pᵀAp = %8.1e ≃ 0\n", pAp) - if abspr ≤ γ * pNorm * rNorm # pᵀr ≃ 0 - (verbose > 0) && @printf("pᵀr = %8.1e ≃ 0, redefining p := r\n", pr) + (verbose > 0) && @printf(iostream, "pᴴAp = %8.1e ≃ 0\n", pAp) + if abspr ≤ γ * pNorm * rNorm # pᴴr ≃ 0 + (verbose > 0) && @printf(iostream, "pᴴr = %8.1e ≃ 0, redefining p := r\n", pr) p = r # - ∇q(x) q = Ar - # q(x + αr) = q(x) - α ‖r‖² + ½ α² rᵀAr - # 1) if rᵀAr > 0, the quadratic decreases from α = 0 to α = ‖r‖² / rᵀAr - # 2) if rᵀAr ≤ 0, the quadratic decreases to -∞ in the direction r + # q(x + αr) = q(x) - α ‖r‖² + ½ α² rᴴAr + # 1) if rᴴAr > 0, the quadratic decreases from α = 0 to α = ‖r‖² / rᴴAr + # 2) if rᴴAr ≤ 0, the quadratic decreases to -∞ in the direction r if ρ > 0 # case 1 - (verbose > 0) && @printf("quadratic is convex in direction r, curv = %8.1e\n", ρ) + (verbose > 0) && @printf(iostream, "quadratic is convex in direction r, curv = %8.1e\n", ρ) α = min(tr, rNorm² / ρ) else # case 2 - (verbose > 0) && @printf("r is a direction of nonpositive curvature: %8.1e\n", ρ) + (verbose > 0) && @printf(iostream, "r is a direction of nonpositive curvature: %8.1e\n", ρ) α = tr end else - # q_p = q(x + α_p * p) - q(x) = -α_p * rᵀp + ½ (α_p)² * pᵀAp - # q_r = q(x + α_r * r) - q(x) = -α_r * ‖r‖² + ½ (α_r)² * rᵀAr + # q_p = q(x + α_p * p) - q(x) = -α_p * rᴴp + ½ (α_p)² * pᴴAp + # q_r = q(x + α_r * r) - q(x) = -α_r * ‖r‖² + ½ (α_r)² * rᴴAr # Δ = q_p - q_r. If Δ > 0, r is followed, else p is followed α = descent ? t1 : t2 ρ > 0 && (tr = min(tr, rNorm² / ρ)) - Δ = -α * pr + tr * rNorm² - (tr)^2 * ρ / 2 # as pᵀAp = 0 + Δ = -α * pr + tr * rNorm² - (tr)^2 * ρ / 2 # as pᴴAp = 0 if Δ > 0 # direction r engenders a better decrease - (verbose > 0) && @printf("direction r engenders a bigger decrease. q_p - q_r = %8.1e > 0\n", Δ) - (verbose > 0) && @printf("redefining p := r\n") + (verbose > 0) && @printf(iostream, "direction r engenders a bigger decrease. q_p - q_r = %8.1e > 0\n", Δ) + (verbose > 0) && @printf(iostream, "redefining p := r\n") p = r q = Ar α = tr else - (verbose > 0) && @printf("direction p engenders an equal or a bigger decrease. q_p - q_r = %8.1e ≤ 0\n", Δ) + (verbose > 0) && @printf(iostream, "direction p engenders an equal or a bigger decrease. q_p - q_r = %8.1e ≤ 0\n", Δ) end end elseif pAp > 0 && ρ > 0 # no negative curvature - (verbose > 0) && @printf("positive curvatures along p and r. pᵀAp = %8.1e and rᵀAr = %8.1e\n", pAp, ρ) + (verbose > 0) && @printf(iostream, "positive curvatures along p and r. pᴴAp = %8.1e and rᴴAr = %8.1e\n", pAp, ρ) α = ρ / @kdotr(n, q, Mq) if α ≥ t1 α = t1 @@ -227,49 +250,49 @@ function cr!(solver :: CrSolver{T,FC,S}, A, b :: AbstractVector{FC}; elseif pAp > 0 && ρ < 0 npcurv = true - (verbose > 0) && @printf("pᵀAp = %8.1e > 0 and rᵀAr = %8.1e < 0\n", pAp, ρ) - # q_p is minimal for α_p = rᵀp / pᵀAp + (verbose > 0) && @printf(iostream, "pᴴAp = %8.1e > 0 and rᴴAr = %8.1e < 0\n", pAp, ρ) + # q_p is minimal for α_p = rᴴp / pᴴAp α = descent ? min(t1, pr / pAp) : max(t2, pr / pAp) Δ = -α * pr + tr * rNorm² + (α^2 * pAp - (tr)^2 * ρ) / 2 if Δ > 0 - (verbose > 0) && @printf("direction r engenders a bigger decrease. q_p - q_r = %8.1e > 0\n", Δ) - (verbose > 0) && @printf("redefining p := r\n") + (verbose > 0) && @printf(iostream, "direction r engenders a bigger decrease. q_p - q_r = %8.1e > 0\n", Δ) + (verbose > 0) && @printf(iostream, "redefining p := r\n") p = r q = Ar α = tr else - (verbose > 0) && @printf("direction p engenders an equal or a bigger decrease. q_p - q_r = %8.1e ≤ 0\n", Δ) + (verbose > 0) && @printf(iostream, "direction p engenders an equal or a bigger decrease. q_p - q_r = %8.1e ≤ 0\n", Δ) end elseif pAp < 0 && ρ > 0 npcurv = true - (verbose > 0) && @printf("pᵀAp = %8.1e < 0 and rᵀAr = %8.1e > 0\n", pAp, ρ) + (verbose > 0) && @printf(iostream, "pᴴAp = %8.1e < 0 and rᴴAr = %8.1e > 0\n", pAp, ρ) α = descent ? t1 : t2 tr = min(tr, rNorm² / ρ) Δ = -α * pr + tr * rNorm² + (α^2 * pAp - (tr)^2 * ρ) / 2 if Δ > 0 - (verbose > 0) && @printf("direction r engenders a bigger decrease. q_p - q_r = %8.1e > 0\n", Δ) - (verbose > 0) && @printf("redefining p := r\n") + (verbose > 0) && @printf(iostream, "direction r engenders a bigger decrease. q_p - q_r = %8.1e > 0\n", Δ) + (verbose > 0) && @printf(iostream, "redefining p := r\n") p = r q = Ar α = tr else - (verbose > 0) && @printf("direction p engenders an equal or a bigger decrease. q_p - q_r = %8.1e ≤ 0\n", Δ) + (verbose > 0) && @printf(iostream, "direction p engenders an equal or a bigger decrease. q_p - q_r = %8.1e ≤ 0\n", Δ) end elseif pAp < 0 && ρ < 0 npcurv = true - (verbose > 0) && @printf("negative curvatures along p and r. pᵀAp = %8.1e and rᵀAr = %8.1e\n", pAp, ρ) + (verbose > 0) && @printf(iostream, "negative curvatures along p and r. pᴴAp = %8.1e and rᴴAr = %8.1e\n", pAp, ρ) α = descent ? t1 : t2 Δ = -α * pr + tr * rNorm² + (α^2 * pAp - (tr)^2 * ρ) / 2 if Δ > 0 - (verbose > 0) && @printf("direction r engenders a bigger decrease. q_p - q_r = %8.1e > 0\n", Δ) - (verbose > 0) && @printf("redefining p := r\n") + (verbose > 0) && @printf(iostream, "direction r engenders a bigger decrease. q_p - q_r = %8.1e > 0\n", Δ) + (verbose > 0) && @printf(iostream, "redefining p := r\n") p = r q = Ar α = tr else - (verbose > 0) && @printf("direction p engenders an equal or a bigger decrease. q_p - q_r = %8.1e ≤ 0\n", Δ) + (verbose > 0) && @printf(iostream, "direction p engenders an equal or a bigger decrease. q_p - q_r = %8.1e ≤ 0\n", Δ) end end @@ -297,7 +320,7 @@ function cr!(solver :: CrSolver{T,FC,S}, A, b :: AbstractVector{FC}; iter = iter + 1 if kdisplay(iter, verbose) m = m - α * pr + α^2 * pAp / 2 - @printf(" %d %8.1e %8.1e %8.1e\n", iter, xNorm, rNorm, m) + @printf(iostream, " %d %8.1e %8.1e %8.1e\n", iter, xNorm, rNorm, m) end # Stopping conditions that do not depend on user input. @@ -330,14 +353,14 @@ function cr!(solver :: CrSolver{T,FC,S}, A, b :: AbstractVector{FC}; solver.warm_start = false return solver end - pr = rNorm² + β * pr - β * α * pAp # pᵀr + pr = rNorm² + β * pr - β * α * pAp # pᴴr abspr = abs(pr) - pAp = ρ + β^2 * pAp # pᵀq + pAp = ρ + β^2 * pAp # pᴴq abspAp = abs(pAp) descent = pr > 0 end - (verbose > 0) && @printf("\n") + (verbose > 0) && @printf(iostream, "\n") tired && (status = "maximum number of iterations exceeded") on_boundary && (status = "on trust-region boundary") diff --git a/src/craig.jl b/src/craig.jl index 20597ea02..76afe9d51 100644 --- a/src/craig.jl +++ b/src/craig.jl @@ -11,7 +11,7 @@ # and is equivalent to applying the conjugate gradient method # to the linear system # -# AAᵀy = b. +# AAᴴy = b. # # This method, sometimes known under the name CRAIG, is the # Golub-Kahan implementation of CGNE, and is described in @@ -32,13 +32,15 @@ export craig, craig! - """ (x, y, stats) = craig(A, b::AbstractVector{FC}; - M=I, N=I, sqd::Bool=false, λ::T=zero(T), atol::T=√eps(T), - btol::T=√eps(T), rtol::T=√eps(T), conlim::T=1/√eps(T), itmax::Int=0, - verbose::Int=0, transfer_to_lsqr::Bool=false, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + M=I, N=I, ldiv::Bool=false, + transfer_to_lsqr::Bool=false, sqd::Bool=false, + λ::T=zero(T), btol::T=√eps(T), + conlim::T=1/√eps(T), atol::T=√eps(T), + rtol::T=√eps(T), itmax::Int=0, + verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. @@ -47,19 +49,19 @@ Find the least-norm solution of the consistent linear system Ax + λ²y = b -using the Golub-Kahan implementation of Craig's method, where λ ≥ 0 is a +of size m × n using the Golub-Kahan implementation of Craig's method, where λ ≥ 0 is a regularization parameter. This method is equivalent to CGNE but is more stable. For a system in the form Ax = b, Craig's method is equivalent to applying -CG to AAᵀy = b and recovering x = Aᵀy. Note that y are the Lagrange +CG to AAᴴy = b and recovering x = Aᴴy. Note that y are the Lagrange multipliers of the least-norm problem minimize ‖x‖ s.t. Ax = b. If `λ > 0`, CRAIG solves the symmetric and quasi-definite system - [ -F Aᵀ ] [ x ] [ 0 ] + [ -F Aᴴ ] [ x ] [ 0 ] [ A λ²E ] [ y ] = [ b ], where E and F are symmetric and positive definite. @@ -70,12 +72,12 @@ The system above represents the optimality conditions of min ‖x‖²_F + λ²‖y‖²_E s.t. Ax + λ²Ey = b. -For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᵀKx`. -CRAIG is then equivalent to applying CG to `(AF⁻¹Aᵀ + λ²E)y = b` with `Fx = Aᵀy`. +For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᴴKx`. +CRAIG is then equivalent to applying CG to `(AF⁻¹Aᴴ + λ²E)y = b` with `Fx = Aᴴy`. If `λ = 0`, CRAIG solves the symmetric and indefinite system - [ -F Aᵀ ] [ x ] [ 0 ] + [ -F Aᴴ ] [ x ] [ 0 ] [ A 0 ] [ y ] = [ b ]. The system above represents the optimality conditions of @@ -86,8 +88,34 @@ In this case, `M` can still be specified and indicates the weighted norm in whic In this implementation, both the x and y-parts of the solution are returned. -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension m × n; +* `b`: a vector of length m. + +#### Keyword arguments + +* `M`: linear operator that models a Hermitian positive-definite matrix of size `m` used for centered preconditioning of the augmented system; +* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning of the augmented system; +* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`; +* `transfer_to_lsqr`: transfer from the LSLQ point to the LSQR point, when it exists. The transfer is based on the residual norm; +* `sqd`: if `true`, set `λ=1` for Hermitian quasi-definite systems; +* `λ`: regularization parameter; +* `btol`: stopping tolerance used to detect zero-residual problems; +* `conlim`: limit on the estimated condition number of `A` beyond which the solution will be abandoned; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. + +#### Output arguments + +* `x`: a dense vector of length n; +* `y`: a dense vector of length m; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### References @@ -112,14 +140,17 @@ See [`CraigSolver`](@ref) for more details about the `solver`. function craig! end function craig!(solver :: CraigSolver{T,FC,S}, A, b :: AbstractVector{FC}; - M=I, N=I, sqd :: Bool=false, λ :: T=zero(T), atol :: T=√eps(T), - btol :: T=√eps(T), rtol :: T=√eps(T), conlim :: T=1/√eps(T), itmax :: Int=0, - verbose :: Int=0, transfer_to_lsqr :: Bool=false, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} + M=I, N=I, ldiv :: Bool=false, + transfer_to_lsqr :: Bool=false, sqd :: Bool=false, + λ :: T=zero(T), btol :: T=√eps(T), + conlim :: T=1/√eps(T), atol :: T=√eps(T), + rtol :: T=√eps(T), itmax :: Int=0, + verbose :: Int=0, history :: Bool=false, + callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} m, n = size(A) length(b) == m || error("Inconsistent problem size") - (verbose > 0) && @printf("CRAIG: system of %d equations in %d variables\n", m, n) + (verbose > 0) && @printf(iostream, "CRAIG: system of %d equations in %d variables\n", m, n) # Check sqd and λ parameters sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !") @@ -131,16 +162,16 @@ function craig!(solver :: CraigSolver{T,FC,S}, A, b :: AbstractVector{FC}; # Check type consistency eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") # Compute the adjoint of A - Aᵀ = A' + Aᴴ = A' # Set up workspace. allocate_if(!MisI, solver, :u , S, m) allocate_if(!NisI, solver, :v , S, n) allocate_if(λ > 0, solver, :w2, S, n) - x, Nv, Aᵀu, y, w = solver.x, solver.Nv, solver.Aᵀu, solver.y, solver.w + x, Nv, Aᴴu, y, w = solver.x, solver.Nv, solver.Aᴴu, solver.y, solver.w Mu, Av, w2, stats = solver.Mu, solver.Av, solver.w2, solver.stats rNorms = stats.residuals reset!(stats) @@ -180,7 +211,7 @@ function craig!(solver :: CraigSolver{T,FC,S}, A, b :: AbstractVector{FC}; Anorm² = zero(T) # Estimate of ‖A‖²_F. Anorm = zero(T) - Dnorm² = zero(T) # Estimate of ‖(AᵀA)⁻¹‖². + Dnorm² = zero(T) # Estimate of ‖(AᴴA)⁻¹‖². Acond = zero(T) # Estimate of cond(A). xNorm² = zero(T) # Estimate of ‖x‖². xNorm = zero(T) @@ -191,8 +222,8 @@ function craig!(solver :: CraigSolver{T,FC,S}, A, b :: AbstractVector{FC}; ɛ_c = atol + rtol * rNorm # Stopping tolerance for consistent systems. ɛ_i = atol # Stopping tolerance for inconsistent systems. ctol = conlim > 0 ? 1/conlim : zero(T) # Stopping tolerance for ill-conditioned operators. - (verbose > 0) && @printf("%5s %8s %8s %8s %8s %8s %7s\n", "k", "‖r‖", "‖x‖", "‖A‖", "κ(A)", "α", "β") - kdisplay(iter, verbose) && @printf("%5d %8.2e %8.2e %8.2e %8.2e\n", iter, rNorm, xNorm, Anorm, Acond) + (verbose > 0) && @printf(iostream, "%5s %8s %8s %8s %8s %8s %7s\n", "k", "‖r‖", "‖x‖", "‖A‖", "κ(A)", "α", "β") + kdisplay(iter, verbose) && @printf(iostream, "%5d %8.2e %8.2e %8.2e %8.2e\n", iter, rNorm, xNorm, Anorm, Acond) bkwerr = one(T) # initial value of the backward error ‖r‖ / √(‖b‖² + ‖A‖² ‖x‖²) @@ -212,9 +243,9 @@ function craig!(solver :: CraigSolver{T,FC,S}, A, b :: AbstractVector{FC}; while ! (solved || inconsistent || ill_cond || tired || user_requested_exit) # Generate the next Golub-Kahan vectors - # 1. αₖ₊₁Nvₖ₊₁ = Aᵀuₖ₊₁ - βₖ₊₁Nvₖ - mul!(Aᵀu, Aᵀ, u) - @kaxpby!(n, one(FC), Aᵀu, -β, Nv) + # 1. αₖ₊₁Nvₖ₊₁ = Aᴴuₖ₊₁ - βₖ₊₁Nvₖ + mul!(Aᴴu, Aᴴ, u) + @kaxpby!(n, one(FC), Aᴴu, -β, Nv) NisI || mulorldiv!(v, N, Nv, ldiv) α = sqrt(@kdotr(n, v, Nv)) if α == 0 @@ -296,7 +327,7 @@ function craig!(solver :: CraigSolver{T,FC,S}, A, b :: AbstractVector{FC}; ρ_prev = ρ # Only differs from α if λ > 0. - kdisplay(iter, verbose) && @printf("%5d %8.2e %8.2e %8.2e %8.2e %8.1e %7.1e\n", iter, rNorm, xNorm, Anorm, Acond, α, β) + kdisplay(iter, verbose) && @printf(iostream, "%5d %8.2e %8.2e %8.2e %8.2e %8.1e %7.1e\n", iter, rNorm, xNorm, Anorm, Acond, α, β) solved_lim = bkwerr ≤ btol solved_mach = one(T) + bkwerr ≤ one(T) @@ -312,7 +343,7 @@ function craig!(solver :: CraigSolver{T,FC,S}, A, b :: AbstractVector{FC}; inconsistent = false tired = iter ≥ itmax end - (verbose > 0) && @printf("\n") + (verbose > 0) && @printf(iostream, "\n") # transfer to LSQR point if requested if λ > 0 && transfer_to_lsqr diff --git a/src/craigmr.jl b/src/craigmr.jl index e08bb9c36..3b64829d6 100644 --- a/src/craigmr.jl +++ b/src/craigmr.jl @@ -10,7 +10,7 @@ # and is equivalent to applying the conjugate residual method # to the linear system # -# AAᵀy = b. +# AAᴴy = b. # # This method is equivalent to CRMR, and is described in # @@ -26,12 +26,13 @@ export craigmr, craigmr! - """ (x, y, stats) = craigmr(A, b::AbstractVector{FC}; - M=I, N=I, sqd :: Bool=false, λ :: T=zero(T), atol :: T=√eps(T), - rtol::T=√eps(T), itmax::Int=0, verbose::Int=0, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + M=I, N=I, ldiv::Bool=false, + sqd::Bool=false, λ::T=zero(T), atol::T=√eps(T), + rtol::T=√eps(T), itmax::Int=0, + verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. @@ -40,11 +41,11 @@ Solve the consistent linear system Ax + λ²y = b -using the CRAIGMR method, where λ ≥ 0 is a regularization parameter. +of size m × n using the CRAIGMR method, where λ ≥ 0 is a regularization parameter. This method is equivalent to applying the Conjugate Residuals method to the normal equations of the second kind - (AAᵀ + λ²I) y = b + (AAᴴ + λ²I) y = b but is more stable. When λ = 0, this method solves the minimum-norm problem @@ -52,7 +53,7 @@ but is more stable. When λ = 0, this method solves the minimum-norm problem If `λ > 0`, CRAIGMR solves the symmetric and quasi-definite system - [ -F Aᵀ ] [ x ] [ 0 ] + [ -F Aᴴ ] [ x ] [ 0 ] [ A λ²E ] [ y ] = [ b ], where E and F are symmetric and positive definite. @@ -63,12 +64,12 @@ The system above represents the optimality conditions of min ‖x‖²_F + λ²‖y‖²_E s.t. Ax + λ²Ey = b. -For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᵀKx`. -CRAIGMR is then equivalent to applying MINRES to `(AF⁻¹Aᵀ + λ²E)y = b` with `Fx = Aᵀy`. +For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᴴKx`. +CRAIGMR is then equivalent to applying MINRES to `(AF⁻¹Aᴴ + λ²E)y = b` with `Fx = Aᴴy`. If `λ = 0`, CRAIGMR solves the symmetric and indefinite system - [ -F Aᵀ ] [ x ] [ 0 ] + [ -F Aᴴ ] [ x ] [ 0 ] [ A 0 ] [ y ] = [ b ]. The system above represents the optimality conditions of @@ -82,8 +83,31 @@ It is formally equivalent to CRMR, though can be slightly more accurate, and intricate to implement. Both the x- and y-parts of the solution are returned. -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension m × n; +* `b`: a vector of length m. + +#### Keyword arguments + +* `M`: linear operator that models a Hermitian positive-definite matrix of size `m` used for centered preconditioning of the augmented system; +* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning of the augmented system; +* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`; +* `sqd`: if `true`, set `λ=1` for Hermitian quasi-definite systems; +* `λ`: regularization parameter; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. + +#### Output arguments + +* `x`: a dense vector of length n; +* `y`: a dense vector of length m; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### References @@ -108,13 +132,15 @@ See [`CraigmrSolver`](@ref) for more details about the `solver`. function craigmr! end function craigmr!(solver :: CraigmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; - M=I, N=I, sqd :: Bool=false, λ :: T=zero(T), atol :: T=√eps(T), - rtol :: T=√eps(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} + M=I, N=I, ldiv :: Bool=false, + sqd :: Bool=false, λ :: T=zero(T), atol :: T=√eps(T), + rtol :: T=√eps(T), itmax :: Int=0, + verbose :: Int=0, history :: Bool=false, + callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} m, n = size(A) length(b) == m || error("Inconsistent problem size") - (verbose > 0) && @printf("CRAIGMR: system of %d equations in %d variables\n", m, n) + (verbose > 0) && @printf(iostream, "CRAIGMR: system of %d equations in %d variables\n", m, n) # Check sqd and λ parameters sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !") @@ -126,23 +152,23 @@ function craigmr!(solver :: CraigmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; # Check type consistency eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") # Compute the adjoint of A - Aᵀ = A' + Aᴴ = A' # Set up workspace. allocate_if(!MisI, solver, :u, S, m) allocate_if(!NisI, solver, :v, S, n) allocate_if(λ > 0, solver, :q, S, n) - x, Nv, Aᵀu, d, y, Mu = solver.x, solver.Nv, solver.Aᵀu, solver.d, solver.y, solver.Mu + x, Nv, Aᴴu, d, y, Mu = solver.x, solver.Nv, solver.Aᴴu, solver.d, solver.y, solver.Mu w, wbar, Av, q, stats = solver.w, solver.wbar, solver.Av, solver.q, solver.stats rNorms, ArNorms = stats.residuals, stats.Aresiduals reset!(stats) u = MisI ? Mu : solver.u v = NisI ? Nv : solver.v - # Compute y such that AAᵀy = b. Then recover x = Aᵀy. + # Compute y such that AAᴴy = b. Then recover x = Aᴴy. x .= zero(FC) y .= zero(FC) Mu .= b @@ -161,9 +187,9 @@ function craigmr!(solver :: CraigmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; # β₁Mu₁ = b. @kscal!(m, one(FC)/β, u) MisI || @kscal!(m, one(FC)/β, Mu) - # α₁Nv₁ = Aᵀu₁. - mul!(Aᵀu, Aᵀ, u) - Nv .= Aᵀu + # α₁Nv₁ = Aᴴu₁. + mul!(Aᴴu, Aᴴ, u) + Nv .= Aᴴu NisI || mulorldiv!(v, N, Nv, ldiv) α = sqrt(@kdotr(n, v, Nv)) Anorm² = α * α @@ -171,10 +197,10 @@ function craigmr!(solver :: CraigmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; iter = 0 itmax == 0 && (itmax = m + n) - (verbose > 0) && @printf("%5s %7s %7s %7s %7s %8s %8s %7s\n", "k", "‖r‖", "‖Aᵀr‖", "β", "α", "cos", "sin", "‖A‖²") - kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e\n", iter, β, α, β, α, 0, 1, Anorm²) + (verbose > 0) && @printf(iostream, "%5s %7s %7s %7s %7s %8s %8s %7s\n", "k", "‖r‖", "‖Aᴴr‖", "β", "α", "cos", "sin", "‖A‖²") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e\n", iter, β, α, β, α, 0, 1, Anorm²) - # Aᵀb = 0 so x = 0 is a minimum least-squares solution + # Aᴴb = 0 so x = 0 is a minimum least-squares solution if α == 0 stats.niter = 0 stats.solved, stats.inconsistent = true, false @@ -288,16 +314,16 @@ function craigmr!(solver :: CraigmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; # xₖ = Dₖzₖ @kaxpy!(n, ζ, d, x) - # 2. αₖ₊₁Nvₖ₊₁ = Aᵀuₖ₊₁ - βₖ₊₁Nvₖ - mul!(Aᵀu, Aᵀ, u) - @kaxpby!(n, one(FC), Aᵀu, -β, Nv) + # 2. αₖ₊₁Nvₖ₊₁ = Aᴴuₖ₊₁ - βₖ₊₁Nvₖ + mul!(Aᴴu, Aᴴ, u) + @kaxpby!(n, one(FC), Aᴴu, -β, Nv) NisI || mulorldiv!(v, N, Nv, ldiv) α = sqrt(@kdotr(n, v, Nv)) Anorm² = Anorm² + α * α # = ‖Lₖ‖ ArNorm = α * β * abs(ζ/ρ) history && push!(ArNorms, ArNorm) - kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e\n", iter, rNorm, ArNorm, β, α, c, s, Anorm²) + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e\n", iter, rNorm, ArNorm, β, α, c, s, Anorm²) if λ > 0 (cdₖ, sdₖ, λₖ₊₁) = sym_givens(λ, λₐᵤₓ) @@ -320,7 +346,7 @@ function craigmr!(solver :: CraigmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; inconsistent = (rNorm > 100 * ɛ_c) & (ArNorm ≤ ɛ_i) tired = iter ≥ itmax end - (verbose > 0) && @printf("\n") + (verbose > 0) && @printf(iostream, "\n") tired && (status = "maximum number of iterations exceeded") solved && (status = "found approximate minimum-norm solution") diff --git a/src/crls.jl b/src/crls.jl index 6410fb836..78615fad6 100644 --- a/src/crls.jl +++ b/src/crls.jl @@ -5,7 +5,7 @@ # # equivalently, of the linear system # -# AᵀAx = Aᵀb. +# AᴴAx = Aᴴb. # # This implementation follows the formulation given in # @@ -20,12 +20,12 @@ export crls, crls! - """ (x, stats) = crls(A, b::AbstractVector{FC}; - M=I, λ::T=zero(T), atol::T=√eps(T), rtol::T=√eps(T), - radius::T=zero(T), itmax::Int=0, verbose::Int=0, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + M=I, ldiv::Bool=false, radius::T=zero(T), + λ::T=zero(T), atol::T=√eps(T), rtol::T=√eps(T), + itmax::Int=0, verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. @@ -34,19 +34,40 @@ Solve the linear least-squares problem minimize ‖b - Ax‖₂² + λ‖x‖₂² -using the Conjugate Residuals (CR) method. This method is equivalent to -applying MINRES to the normal equations +of size m × n using the Conjugate Residuals (CR) method. +This method is equivalent to applying MINRES to the normal equations - (AᵀA + λI) x = Aᵀb. + (AᴴA + λI) x = Aᴴb. This implementation recurs the residual r := b - Ax. -CRLS produces monotonic residuals ‖r‖₂ and optimality residuals ‖Aᵀr‖₂. +CRLS produces monotonic residuals ‖r‖₂ and optimality residuals ‖Aᴴr‖₂. It is formally equivalent to LSMR, though can be substantially less accurate, but simpler to implement. -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension m × n; +* `b`: a vector of length m. + +#### Keyword arguments + +* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for preconditioning; +* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`; +* `radius`: add the trust-region constraint ‖x‖ ≤ `radius` if `radius > 0`. Useful to compute a step in a trust-region method for optimization; +* `λ`: regularization parameter; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. + +#### Output arguments + +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### Reference @@ -70,23 +91,24 @@ See [`CrlsSolver`](@ref) for more details about the `solver`. function crls! end function crls!(solver :: CrlsSolver{T,FC,S}, A, b :: AbstractVector{FC}; - M=I, λ :: T=zero(T), atol :: T=√eps(T), rtol :: T=√eps(T), - radius :: T=zero(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} + M=I, ldiv :: Bool=false, radius :: T=zero(T), + λ :: T=zero(T), atol :: T=√eps(T), rtol :: T=√eps(T), + itmax :: Int=0, verbose :: Int=0, history :: Bool=false, + callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} m, n = size(A) length(b) == m || error("Inconsistent problem size") - (verbose > 0) && @printf("CRLS: system of %d equations in %d variables\n", m, n) + (verbose > 0) && @printf(iostream, "CRLS: system of %d equations in %d variables\n", m, n) # Tests M = Iₙ MisI = (M === I) # Check type consistency eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") # Compute the adjoint of A - Aᵀ = A' + Aᴴ = A' # Set up workspace. allocate_if(!MisI, solver, :Ms, S, m) @@ -112,13 +134,13 @@ function crls!(solver :: CrlsSolver{T,FC,S}, A, b :: AbstractVector{FC}; end MisI || mulorldiv!(Mr, M, r, ldiv) - mul!(Ar, Aᵀ, Mr) # - λ * x0 if x0 ≠ 0. + mul!(Ar, Aᴴ, Mr) # - λ * x0 if x0 ≠ 0. mul!(s, A, Ar) MisI || mulorldiv!(Ms, M, s, ldiv) p .= Ar Ap .= s - mul!(q, Aᵀ, Ms) # Ap + mul!(q, Aᴴ, Ms) # Ap λ > 0 && @kaxpy!(n, λ, p, q) # q = q + λ * p γ = @kdotr(m, s, Ms) # Faster than γ = dot(s, Ms) iter = 0 @@ -128,8 +150,8 @@ function crls!(solver :: CrlsSolver{T,FC,S}, A, b :: AbstractVector{FC}; λ > 0 && (γ += λ * ArNorm * ArNorm) history && push!(ArNorms, ArNorm) ε = atol + rtol * ArNorm - (verbose > 0) && @printf("%5s %8s %8s\n", "k", "‖Aᵀr‖", "‖r‖") - kdisplay(iter, verbose) && @printf("%5d %8.2e %8.2e\n", iter, ArNorm, rNorm) + (verbose > 0) && @printf(iostream, "%5s %8s %8s\n", "k", "‖Aᴴr‖", "‖r‖") + kdisplay(iter, verbose) && @printf(iostream, "%5d %8.2e %8.2e\n", iter, ArNorm, rNorm) status = "unknown" on_boundary = false @@ -147,14 +169,14 @@ function crls!(solver :: CrlsSolver{T,FC,S}, A, b :: AbstractVector{FC}; if radius > 0 pNorm = @knrm2(n, p) if @kdotr(m, Ap, Ap) ≤ ε * sqrt(qNorm²) * pNorm # the quadratic is constant in the direction p - psd = true # det(AᵀA) = 0 - p = Ar # p = Aᵀr + psd = true # det(AᴴA) = 0 + p = Ar # p = Aᴴr pNorm² = ArNorm * ArNorm - mul!(q, Aᵀ, s) - α = min(ArNorm^2 / γ, maximum(to_boundary(x, p, radius, flip = false, dNorm2 = pNorm²))) # the quadratic is minimal in the direction Aᵀr for α = ‖Ar‖²/γ + mul!(q, Aᴴ, s) + α = min(ArNorm^2 / γ, maximum(to_boundary(n, x, p, radius, flip = false, dNorm2 = pNorm²))) # the quadratic is minimal in the direction Aᴴr for α = ‖Ar‖²/γ else pNorm² = pNorm * pNorm - σ = maximum(to_boundary(x, p, radius, flip = false, dNorm2 = pNorm²)) + σ = maximum(to_boundary(n, x, p, radius, flip = false, dNorm2 = pNorm²)) if α ≥ σ α = σ on_boundary = true @@ -177,7 +199,7 @@ function crls!(solver :: CrlsSolver{T,FC,S}, A, b :: AbstractVector{FC}; @kaxpby!(n, one(FC), Ar, β, p) # Faster than p = Ar + β * p @kaxpby!(m, one(FC), s, β, Ap) # Faster than Ap = s + β * Ap MisI || mulorldiv!(MAp, M, Ap, ldiv) - mul!(q, Aᵀ, MAp) + mul!(q, Aᴴ, MAp) λ > 0 && @kaxpy!(n, λ, p, q) # q = q + λ * p γ = γ_next @@ -189,12 +211,12 @@ function crls!(solver :: CrlsSolver{T,FC,S}, A, b :: AbstractVector{FC}; history && push!(rNorms, rNorm) history && push!(ArNorms, ArNorm) iter = iter + 1 - kdisplay(iter, verbose) && @printf("%5d %8.2e %8.2e\n", iter, ArNorm, rNorm) + kdisplay(iter, verbose) && @printf(iostream, "%5d %8.2e %8.2e\n", iter, ArNorm, rNorm) user_requested_exit = callback(solver) :: Bool solved = (ArNorm ≤ ε) || on_boundary tired = iter ≥ itmax end - (verbose > 0) && @printf("\n") + (verbose > 0) && @printf(iostream, "\n") tired && (status = "maximum number of iterations exceeded") solved && (status = "solution good enough given atol and rtol") diff --git a/src/crmr.jl b/src/crmr.jl index deb5cf79f..621ba5ef3 100644 --- a/src/crmr.jl +++ b/src/crmr.jl @@ -10,9 +10,9 @@ # and is equivalent to applying the conjugate residual method # to the linear system # -# AAᵀy = b. +# AAᴴy = b. # -# This method is equivalent to Craig-MR, described in +# This method is equivalent to CRAIGMR, described in # # D. Orban and M. Arioli. Iterative Solution of Symmetric Quasi-Definite Linear Systems, # Volume 3 of Spotlights. SIAM, Philadelphia, PA, 2017. @@ -26,12 +26,13 @@ export crmr, crmr! - """ (x, stats) = crmr(A, b::AbstractVector{FC}; - M=I, λ::T=zero(T), atol::T=√eps(T), - rtol::T=√eps(T), itmax::Int=0, verbose::Int=0, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + N=I, ldiv::Bool=false, + λ::T=zero(T), atol::T=√eps(T), + rtol::T=√eps(T), itmax::Int=0, + verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. @@ -40,11 +41,11 @@ Solve the consistent linear system Ax + √λs = b -using the Conjugate Residual (CR) method, where λ ≥ 0 is a regularization +of size m × n using the Conjugate Residual (CR) method, where λ ≥ 0 is a regularization parameter. This method is equivalent to applying CR to the normal equations of the second kind - (AAᵀ + λI) y = b + (AAᴴ + λI) y = b but is more stable. When λ = 0, this method solves the minimum-norm problem @@ -58,10 +59,28 @@ CRMR produces monotonic residuals ‖r‖₂. It is formally equivalent to CRAIG-MR, though can be slightly less accurate, but simpler to implement. Only the x-part of the solution is returned. -A preconditioner M may be provided. +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension m × n; +* `b`: a vector of length m. + +#### Keyword arguments + +* `N`: +* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`; +* `λ`: regularization parameter; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. + +#### Output arguments -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### References @@ -86,35 +105,37 @@ See [`CrmrSolver`](@ref) for more details about the `solver`. function crmr! end function crmr!(solver :: CrmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; - M=I, λ :: T=zero(T), atol :: T=√eps(T), - rtol :: T=√eps(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} + N=I, ldiv :: Bool=false, + λ :: T=zero(T), atol :: T=√eps(T), + rtol :: T=√eps(T), itmax :: Int=0, + verbose :: Int=0, history :: Bool=false, + callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} m, n = size(A) length(b) == m || error("Inconsistent problem size") - (verbose > 0) && @printf("CRMR: system of %d equations in %d variables\n", m, n) + (verbose > 0) && @printf(iostream, "CRMR: system of %d equations in %d variables\n", m, n) - # Tests M = Iₙ - MisI = (M === I) + # Tests N = Iₙ + NisI = (N === I) # Check type consistency eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") # Compute the adjoint of A - Aᵀ = A' + Aᴴ = A' # Set up workspace. - allocate_if(!MisI, solver, :Mq, S, m) + allocate_if(!NisI, solver, :Nq, S, m) allocate_if(λ > 0, solver, :s , S, m) - x, p, Aᵀr, r = solver.x, solver.p, solver.Aᵀr, solver.r + x, p, Aᴴr, r = solver.x, solver.p, solver.Aᴴr, solver.r q, s, stats = solver.q, solver.s, solver.stats rNorms, ArNorms = stats.residuals, stats.Aresiduals reset!(stats) - Mq = MisI ? q : solver.Mq + Nq = NisI ? q : solver.Nq x .= zero(FC) # initial estimation x = 0 - mulorldiv!(r, M, b, ldiv) # initial residual r = M * (b - Ax) = M * b + mulorldiv!(r, N, b, ldiv) # initial residual r = N * (b - Ax) = N * b bNorm = @knrm2(m, r) # norm(b - A * x0) if x0 ≠ 0. rNorm = bNorm # + λ * ‖x0‖ if x0 ≠ 0 and λ > 0. history && push!(rNorms, rNorm) @@ -126,9 +147,9 @@ function crmr!(solver :: CrmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; return solver end λ > 0 && (s .= r) - mul!(Aᵀr, Aᵀ, r) # - λ * x0 if x0 ≠ 0. - p .= Aᵀr - γ = @kdotr(n, Aᵀr, Aᵀr) # Faster than γ = dot(Aᵀr, Aᵀr) + mul!(Aᴴr, Aᴴ, r) # - λ * x0 if x0 ≠ 0. + p .= Aᴴr + γ = @kdotr(n, Aᴴr, Aᴴr) # Faster than γ = dot(Aᴴr, Aᴴr) λ > 0 && (γ += λ * rNorm * rNorm) iter = 0 itmax == 0 && (itmax = m + n) @@ -137,8 +158,8 @@ function crmr!(solver :: CrmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; history && push!(ArNorms, ArNorm) ɛ_c = atol + rtol * rNorm # Stopping tolerance for consistent systems. ɛ_i = atol + rtol * ArNorm # Stopping tolerance for inconsistent systems. - (verbose > 0) && @printf("%5s %8s %8s\n", "k", "‖Aᵀr‖", "‖r‖") - kdisplay(iter, verbose) && @printf("%5d %8.2e %8.2e\n", iter, ArNorm, rNorm) + (verbose > 0) && @printf(iostream, "%5s %8s %8s\n", "k", "‖Aᴴr‖", "‖r‖") + kdisplay(iter, verbose) && @printf(iostream, "%5d %8.2e %8.2e\n", iter, ArNorm, rNorm) status = "unknown" solved = rNorm ≤ ɛ_c @@ -149,17 +170,17 @@ function crmr!(solver :: CrmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; while ! (solved || inconsistent || tired || user_requested_exit) mul!(q, A, p) λ > 0 && @kaxpy!(m, λ, s, q) # q = q + λ * s - MisI || mulorldiv!(Mq, M, q, ldiv) - α = γ / @kdotr(m, q, Mq) # Compute qᵗ * M * q + NisI || mulorldiv!(Nq, N, q, ldiv) + α = γ / @kdotr(m, q, Nq) # Compute qᴴ * N * q @kaxpy!(n, α, p, x) # Faster than x = x + α * p - @kaxpy!(m, -α, Mq, r) # Faster than r = r - α * Mq + @kaxpy!(m, -α, Nq, r) # Faster than r = r - α * Nq rNorm = @knrm2(m, r) # norm(r) - mul!(Aᵀr, Aᵀ, r) - γ_next = @kdotr(n, Aᵀr, Aᵀr) # Faster than γ_next = dot(Aᵀr, Aᵀr) + mul!(Aᴴr, Aᴴ, r) + γ_next = @kdotr(n, Aᴴr, Aᴴr) # Faster than γ_next = dot(Aᴴr, Aᴴr) λ > 0 && (γ_next += λ * rNorm * rNorm) β = γ_next / γ - @kaxpby!(n, one(FC), Aᵀr, β, p) # Faster than p = Aᵀr + β * p + @kaxpby!(n, one(FC), Aᴴr, β, p) # Faster than p = Aᴴr + β * p if λ > 0 @kaxpby!(m, one(FC), r, β, s) # s = r + β * s end @@ -169,13 +190,13 @@ function crmr!(solver :: CrmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; history && push!(rNorms, rNorm) history && push!(ArNorms, ArNorm) iter = iter + 1 - kdisplay(iter, verbose) && @printf("%5d %8.2e %8.2e\n", iter, ArNorm, rNorm) + kdisplay(iter, verbose) && @printf(iostream, "%5d %8.2e %8.2e\n", iter, ArNorm, rNorm) user_requested_exit = callback(solver) :: Bool solved = rNorm ≤ ɛ_c inconsistent = (rNorm > 100 * ɛ_c) && (ArNorm ≤ ɛ_i) tired = iter ≥ itmax end - (verbose > 0) && @printf("\n") + (verbose > 0) && @printf(iostream, "\n") tired && (status = "maximum number of iterations exceeded") solved && (status = "solution good enough given atol and rtol") diff --git a/src/diom.jl b/src/diom.jl index 9c6b9767b..7bf23e355 100644 --- a/src/diom.jl +++ b/src/diom.jl @@ -11,40 +11,58 @@ export diom, diom! """ - (x, stats) = diom(A, b::AbstractVector{FC}; memory::Int=20, - M=I, N=I, atol::T=√eps(T), rtol::T=√eps(T), - reorthogonalization::Bool=false, itmax::Int=0, + (x, stats) = diom(A, b::AbstractVector{FC}; + memory::Int=20, M=I, N=I, ldiv::Bool=false, + reorthogonalization::Bool=false, atol::T=√eps(T), + rtol::T=√eps(T), itmax::Int=0, verbose::Int=0, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. -Solve the consistent linear system Ax = b using direct incomplete orthogonalization method. + (x, stats) = diom(A, b, x0::AbstractVector; kwargs...) + +DIOM can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above. + +Solve the consistent linear system Ax = b of size n using DIOM. DIOM only orthogonalizes the new vectors of the Krylov basis against the `memory` most recent vectors. If CG is well defined on `Ax = b` and `memory = 2`, DIOM is theoretically equivalent to CG. If `k ≤ memory` where `k` is the number of iterations, DIOM is theoretically equivalent to FOM. Otherwise, DIOM interpolates between CG and FOM and is similar to CG with partial reorthogonalization. -Partial reorthogonalization is available with the `reorthogonalization` option. - -An advantage of DIOM is that nonsymmetric or symmetric indefinite or both nonsymmetric +An advantage of DIOM is that non-Hermitian or Hermitian indefinite or both non-Hermitian and indefinite systems of linear equations can be handled by this single algorithm. -This implementation allows a left preconditioner M and a right preconditioner N. -- Left preconditioning : M⁻¹Ax = M⁻¹b -- Right preconditioning : AN⁻¹u = b with x = N⁻¹u -- Split preconditioning : M⁻¹AN⁻¹u = M⁻¹b with x = N⁻¹u +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension n; +* `b`: a vector of length n. -DIOM can be warm-started from an initial guess `x0` with the method +#### Optional argument - (x, stats) = diom(A, b, x0; kwargs...) +* `x0`: a vector of length n that represents an initial guess of the solution x. -where `kwargs` are the same keyword arguments as above. +#### Keyword arguments -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +* `memory`: the number of most recent vectors of the Krylov basis against which to orthogonalize a new vector; +* `M`: linear operator that models a nonsingular matrix of size `n` used for left preconditioning; +* `N`: linear operator that models a nonsingular matrix of size `n` used for right preconditioning; +* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`; +* `reorthogonalization`: reorthogonalize the new vectors of the Krylov basis against the `memory` most recent vectors; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. + +#### Output arguments + +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### Reference @@ -84,15 +102,16 @@ function diom!(solver :: DiomSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: A end function diom!(solver :: DiomSolver{T,FC,S}, A, b :: AbstractVector{FC}; - M=I, N=I, atol :: T=√eps(T), rtol :: T=√eps(T), - reorthogonalization :: Bool=false, itmax :: Int=0, + M=I, N=I, ldiv :: Bool=false, + reorthogonalization :: Bool=false, atol :: T=√eps(T), + rtol :: T=√eps(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} + callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} m, n = size(A) m == n || error("System must be square") length(b) == m || error("Inconsistent problem size") - (verbose > 0) && @printf("DIOM: system of size %d\n", n) + (verbose > 0) && @printf(iostream, "DIOM: system of size %d\n", n) # Check M = Iₙ and N = Iₙ MisI = (M === I) @@ -100,7 +119,7 @@ function diom!(solver :: DiomSolver{T,FC,S}, A, b :: AbstractVector{FC}; # Check type consistency eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") # Set up workspace. allocate_if(!MisI, solver, :w, S, n) @@ -121,7 +140,7 @@ function diom!(solver :: DiomSolver{T,FC,S}, A, b :: AbstractVector{FC}; else t .= b end - MisI || mulorldiv!(r₀, M, t, ldiv) # M⁻¹(b - Ax₀) + MisI || mulorldiv!(r₀, M, t, ldiv) # M(b - Ax₀) rNorm = @knrm2(n, r₀) # β = ‖r₀‖₂ history && push!(rNorms, rNorm) if rNorm == 0 @@ -136,23 +155,26 @@ function diom!(solver :: DiomSolver{T,FC,S}, A, b :: AbstractVector{FC}; itmax == 0 && (itmax = 2*n) ε = atol + rtol * rNorm - (verbose > 0) && @printf("%5s %7s\n", "k", "‖rₖ‖") - kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm) + (verbose > 0) && @printf(iostream, "%5s %7s\n", "k", "‖rₖ‖") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e\n", iter, rNorm) - mem = length(L) # Memory + mem = length(V) # Memory for i = 1 : mem - V[i] .= zero(FC) # Orthogonal basis of Kₖ(M⁻¹AN⁻¹, M⁻¹b). - P[i] .= zero(FC) # Directions for x : Pₘ = N⁻¹Vₘ(Uₘ)⁻¹. + V[i] .= zero(FC) # Orthogonal basis of Kₖ(MAN, Mr₀). end - H .= zero(FC) # Last column of the band hessenberg matrix Hₘ = LₘUₘ. - # Each column has at most mem + 1 nonzero elements. hᵢ.ₘ is stored as H[m-i+2]. - # m-i+2 represents the indice of the diagonal where hᵢ.ₘ is located. - # In addition of that, the last column of Uₘ is stored in H. - L .= zero(FC) # Last mem pivots of Lₘ. + for i = 1 : mem-1 + P[i] .= zero(FC) # Directions Pₖ = NVₖ(Uₖ)⁻¹. + end + H .= zero(FC) # Last column of the band hessenberg matrix Hₖ = LₖUₖ. + # Each column has at most mem + 1 nonzero elements. + # hᵢ.ₖ is stored as H[k-i+1], i ≤ k. hₖ₊₁.ₖ is not stored in H. + # k-i+1 represents the indice of the diagonal where hᵢ.ₖ is located. + # In addition of that, the last column of Uₖ is stored in H. + L .= zero(FC) # Last mem-1 pivots of Lₖ. # Initial ξ₁ and V₁. ξ = rNorm - @. V[1] = r₀ / rNorm + V[1] .= r₀ ./ rNorm # Stopping criterion. solved = rNorm ≤ ε @@ -166,83 +188,88 @@ function diom!(solver :: DiomSolver{T,FC,S}, A, b :: AbstractVector{FC}; iter = iter + 1 # Set position in circulars stacks. - pos = mod(iter-1, mem) + 1 # Position corresponding to pₘ and vₘ in circular stacks P and V. - next_pos = mod(iter, mem) + 1 # Position corresponding to vₘ₊₁ in the circular stack V. + pos = mod(iter-1, mem) + 1 # Position corresponding to vₖ in the circular stack V. + next_pos = mod(iter, mem) + 1 # Position corresponding to vₖ₊₁ in the circular stack V. # Incomplete Arnoldi procedure. z = NisI ? V[pos] : solver.z - NisI || mulorldiv!(z, N, V[pos], ldiv) # N⁻¹vₘ, forms pₘ - mul!(t, A, z) # AN⁻¹vₘ - MisI || mulorldiv!(w, M, t, ldiv) # M⁻¹AN⁻¹vₘ, forms vₘ₊₁ + NisI || mulorldiv!(z, N, V[pos], ldiv) # Nvₖ, forms pₖ + mul!(t, A, z) # ANvₖ + MisI || mulorldiv!(w, M, t, ldiv) # MANvₖ, forms vₖ₊₁ for i = max(1, iter-mem+1) : iter - ipos = mod(i-1, mem) + 1 # Position corresponding to vᵢ in the circular stack V. - diag = iter - i + 2 - H[diag] = @kdot(n, w, V[ipos]) # hᵢ.ₘ = ⟨M⁻¹AN⁻¹vₘ , vᵢ⟩ - @kaxpy!(n, -H[diag], V[ipos], w) # w ← w - hᵢ.ₘ * vᵢ + ipos = mod(i-1, mem) + 1 # Position corresponding to vᵢ in the circular stack V. + diag = iter - i + 1 + H[diag] = @kdot(n, w, V[ipos]) # hᵢ.ₖ = ⟨MANvₖ, vᵢ⟩ + @kaxpy!(n, -H[diag], V[ipos], w) # w ← w - hᵢ.ₖvᵢ end # Partial reorthogonalization of the Krylov basis. if reorthogonalization for i = max(1, iter-mem+1) : iter ipos = mod(i-1, mem) + 1 - diag = iter - i + 2 + diag = iter - i + 1 Htmp = @kdot(n, w, V[ipos]) H[diag] += Htmp @kaxpy!(n, -Htmp, V[ipos], w) end end - # Compute hₘ₊₁.ₘ and vₘ₊₁. - H[1] = @knrm2(n, w) # hₘ₊₁.ₘ = ‖vₘ₊₁‖₂ - if H[1] ≠ 0 # hₘ₊₁.ₘ = 0 ⇒ "lucky breakdown" - @. V[next_pos] = w / H[1] # vₘ₊₁ = w / hₘ₊₁.ₘ - end - # It's possible that uₘ₋ₘₑₘ.ₘ ≠ 0 when m ≥ mem + 1 - if iter ≥ mem + 2 - H[mem+2] = zero(FC) # hₘ₋ₘₑₘ.ₘ = 0 + # Compute hₖ₊₁.ₖ and vₖ₊₁. + Haux = @knrm2(n, w) # hₖ₊₁.ₖ = ‖vₖ₊₁‖₂ + if Haux ≠ 0 # hₖ₊₁.ₖ = 0 ⇒ "lucky breakdown" + V[next_pos] .= w ./ Haux # vₖ₊₁ = w / hₖ₊₁.ₖ end - # Update the LU factorization with partial pivoting of H. - # Compute the last column of Uₘ. + # Update the LU factorization of Hₖ. + # Compute the last column of Uₖ. if iter ≥ 2 - for i = max(2,iter-mem+1) : iter - lpos = mod(i-1, mem) + 1 # Position corresponding to lᵢ.ᵢ₋₁ in the circular stack L. - diag = iter - i + 2 + # u₁.ₖ ← h₁.ₖ if iter ≤ mem + # uₖ₋ₘₑₘ₊₁.ₖ ← hₖ₋ₘₑₘ₊₁.ₖ if iter ≥ mem + 1 + for i = max(2,iter-mem+2) : iter + lpos = mod(i-1, mem-1) + 1 # Position corresponding to lᵢ.ᵢ₋₁ in the circular stack L. + diag = iter - i + 1 next_diag = diag + 1 - # uᵢ.ₘ ← hᵢ.ₘ - lᵢ.ᵢ₋₁ * uᵢ₋₁.ₘ + # uᵢ.ₖ ← hᵢ.ₖ - lᵢ.ᵢ₋₁ * uᵢ₋₁.ₖ H[diag] = H[diag] - L[lpos] * H[next_diag] + if i == iter + # Compute ξₖ the last component of zₖ = β(Lₖ)⁻¹e₁. + # ξₖ = -lₖ.ₖ₋₁ * ξₖ₋₁ + ξ = - L[lpos] * ξ + end end - # Compute ξₘ the last component of zₘ = β(Lₘ)⁻¹e₁. - # ξₘ = -lₘ.ₘ₋₁ * ξₘ₋₁ - ξ = - L[pos] * ξ end - # Compute next pivot lₘ₊₁.ₘ = hₘ₊₁.ₘ / uₘ.ₘ - L[next_pos] = H[1] / H[2] - - # Compute the direction pₘ, the last column of Pₘ = N⁻¹Vₘ(Uₘ)⁻¹. - for i = max(1,iter-mem) : iter-1 - ipos = mod(i-1, mem) + 1 # Position corresponding to pᵢ in the circular stack P. - diag = iter - i + 2 - if ipos == pos - # pₐᵤₓ ← -hₘ₋ₘₑₘ.ₘ * pₘ₋ₘₑₘ - @kscal!(n, -H[diag], P[pos]) + # Compute next pivot lₖ₊₁.ₖ = hₖ₊₁.ₖ / uₖ.ₖ + next_lpos = mod(iter, mem-1) + 1 + L[next_lpos] = Haux / H[1] + + ppos = mod(iter-1, mem-1) + 1 # Position corresponding to pₖ in the circular stack P. + + # Compute the direction pₖ, the last column of Pₖ = NVₖ(Uₖ)⁻¹. + # u₁.ₖp₁ + ... + uₖ.ₖpₖ = Nvₖ if k ≤ mem + # uₖ₋ₘₑₘ₊₁.ₖpₖ₋ₘₑₘ₊₁ + ... + uₖ.ₖpₖ = Nvₖ if k ≥ mem + 1 + for i = max(1,iter-mem+1) : iter-1 + ipos = mod(i-1, mem-1) + 1 # Position corresponding to pᵢ in the circular stack P. + diag = iter - i + 1 + if ipos == ppos + # pₖ ← -uₖ₋ₘₑₘ₊₁.ₖ * pₖ₋ₘₑₘ₊₁ + @kscal!(n, -H[diag], P[ppos]) else - # pₐᵤₓ ← pₐᵤₓ - hᵢ.ₘ * pᵢ - @kaxpy!(n, -H[diag], P[ipos], P[pos]) + # pₖ ← pₖ - uᵢ.ₖ * pᵢ + @kaxpy!(n, -H[diag], P[ipos], P[ppos]) end end - # pₐᵤₓ ← pₐᵤₓ + N⁻¹vₘ - @kaxpy!(n, one(FC), z, P[pos]) - # pₘ = pₐᵤₓ / uₘ.ₘ - @. P[pos] = P[pos] / H[2] + # pₐᵤₓ ← pₐᵤₓ + Nvₖ + @kaxpy!(n, one(FC), z, P[ppos]) + # pₖ = pₐᵤₓ / uₖ.ₖ + P[ppos] .= P[ppos] ./ H[1] - # Update solution xₘ. - # xₘ = xₘ₋₁ + ξₘ * pₘ - @kaxpy!(n, ξ, P[pos], x) + # Update solution xₖ. + # xₖ = xₖ₋₁ + ξₖ * pₖ + @kaxpy!(n, ξ, P[ppos], x) # Compute residual norm. - # ‖ M⁻¹(b - Axₘ) ‖₂ = hₘ₊₁.ₘ * |ξₘ / uₘ.ₘ| - rNorm = real(H[1]) * abs(ξ / H[2]) + # ‖ M(b - Axₖ) ‖₂ = hₖ₊₁.ₖ * |ξₖ / uₖ.ₖ| + rNorm = Haux * abs(ξ / H[1]) history && push!(rNorms, rNorm) # Stopping conditions that do not depend on user input. @@ -254,9 +281,9 @@ function diom!(solver :: DiomSolver{T,FC,S}, A, b :: AbstractVector{FC}; resid_decrease_lim = rNorm ≤ ε solved = resid_decrease_lim || resid_decrease_mach tired = iter ≥ itmax - kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm) + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e\n", iter, rNorm) end - (verbose > 0) && @printf("\n") + (verbose > 0) && @printf(iostream, "\n") tired && (status = "maximum number of iterations exceeded") solved && (status = "solution good enough given atol and rtol") user_requested_exit && (status = "user-requested exit") diff --git a/src/dqgmres.jl b/src/dqgmres.jl index ab7c490a6..025016304 100644 --- a/src/dqgmres.jl +++ b/src/dqgmres.jl @@ -11,16 +11,21 @@ export dqgmres, dqgmres! """ - (x, stats) = dqgmres(A, b::AbstractVector{FC}; memory::Int=20, - M=I, N=I, atol::T=√eps(T), rtol::T=√eps(T), - reorthogonalization::Bool=false, itmax::Int=0, + (x, stats) = dqgmres(A, b::AbstractVector{FC}; + memory::Int=20, M=I, N=I, ldiv::Bool=false, + reorthogonalization::Bool=false, atol::T=√eps(T), + rtol::T=√eps(T), itmax::Int=0, verbose::Int=0, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. -Solve the consistent linear system Ax = b using DQGMRES method. + (x, stats) = dqgmres(A, b, x0::AbstractVector; kwargs...) + +DQGMRES can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above. + +Solve the consistent linear system Ax = b of size n using DQGMRES. DQGMRES algorithm is based on the incomplete Arnoldi orthogonalization process and computes a sequence of approximate solutions with the quasi-minimal residual property. @@ -30,21 +35,34 @@ If MINRES is well defined on `Ax = b` and `memory = 2`, DQGMRES is theoretically If `k ≤ memory` where `k` is the number of iterations, DQGMRES is theoretically equivalent to GMRES. Otherwise, DQGMRES interpolates between MINRES and GMRES and is similar to MINRES with partial reorthogonalization. -Partial reorthogonalization is available with the `reorthogonalization` option. +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension n; +* `b`: a vector of length n. + +#### Optional argument -This implementation allows a left preconditioner M and a right preconditioner N. -- Left preconditioning : M⁻¹Ax = M⁻¹b -- Right preconditioning : AN⁻¹u = b with x = N⁻¹u -- Split preconditioning : M⁻¹AN⁻¹u = M⁻¹b with x = N⁻¹u +* `x0`: a vector of length n that represents an initial guess of the solution x. -DQGMRES can be warm-started from an initial guess `x0` with the method +#### Keyword arguments - (x, stats) = dqgmres(A, b, x0; kwargs...) +* `memory`: the number of most recent vectors of the Krylov basis against which to orthogonalize a new vector; +* `M`: linear operator that models a nonsingular matrix of size `n` used for left preconditioning; +* `N`: linear operator that models a nonsingular matrix of size `n` used for right preconditioning; +* `reorthogonalization`: reorthogonalize the new vectors of the Krylov basis against the `memory` most recent vectors; +* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. -where `kwargs` are the same keyword arguments as above. +#### Output arguments -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### Reference @@ -84,15 +102,16 @@ function dqgmres!(solver :: DqgmresSolver{T,FC,S}, A, b :: AbstractVector{FC}, x end function dqgmres!(solver :: DqgmresSolver{T,FC,S}, A, b :: AbstractVector{FC}; - M=I, N=I, atol :: T=√eps(T), rtol :: T=√eps(T), - reorthogonalization :: Bool=false, itmax :: Int=0, + M=I, N=I, ldiv :: Bool=false, + reorthogonalization :: Bool=false, atol :: T=√eps(T), + rtol :: T=√eps(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} + callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} m, n = size(A) m == n || error("System must be square") length(b) == m || error("Inconsistent problem size") - (verbose > 0) && @printf("DQGMRES: system of size %d\n", n) + (verbose > 0) && @printf(iostream, "DQGMRES: system of size %d\n", n) # Check M = Iₙ and N = Iₙ MisI = (M === I) @@ -100,7 +119,7 @@ function dqgmres!(solver :: DqgmresSolver{T,FC,S}, A, b :: AbstractVector{FC}; # Check type consistency eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") # Set up workspace. allocate_if(!MisI, solver, :w, S, n) @@ -121,7 +140,7 @@ function dqgmres!(solver :: DqgmresSolver{T,FC,S}, A, b :: AbstractVector{FC}; else t .= b end - MisI || mulorldiv!(r₀, M, t, ldiv) # M⁻¹(b - Ax₀) + MisI || mulorldiv!(r₀, M, t, ldiv) # M(b - Ax₀) rNorm = @knrm2(n, r₀) # β = ‖r₀‖₂ history && push!(rNorms, rNorm) if rNorm == 0 @@ -136,29 +155,30 @@ function dqgmres!(solver :: DqgmresSolver{T,FC,S}, A, b :: AbstractVector{FC}; itmax == 0 && (itmax = 2*n) ε = atol + rtol * rNorm - (verbose > 0) && @printf("%5s %7s\n", "k", "‖rₖ‖") - kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm) + (verbose > 0) && @printf(iostream, "%5s %7s\n", "k", "‖rₖ‖") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e\n", iter, rNorm) # Set up workspace. - mem = length(c) # Memory. + mem = length(V) # Memory. for i = 1 : mem - V[i] .= zero(FC) # Orthogonal basis of Kₖ(M⁻¹AN⁻¹, M⁻¹b). - P[i] .= zero(FC) # Directions for x : Pₘ = N⁻¹Vₘ(Rₘ)⁻¹. + V[i] .= zero(FC) # Orthogonal basis of Kₖ(MAN, Mr₀). + P[i] .= zero(FC) # Directions for x : Pₖ = NVₖ(Rₖ)⁻¹. end - c .= zero(T) # Last mem Givens cosines used for the factorization QₘRₘ = Hₘ. - s .= zero(FC) # Last mem Givens sines used for the factorization QₘRₘ = Hₘ. - H .= zero(FC) # Last column of the band hessenberg matrix Hₘ. - # Each column has at most mem + 1 nonzero elements. hᵢ.ₘ is stored as H[m-i+2]. - # m-i+2 represents the indice of the diagonal where hᵢ.ₘ is located. - # In addition of that, the last column of Rₘ is also stored in H. + c .= zero(T) # Last mem Givens cosines used for the factorization QₖRₖ = Hₖ. + s .= zero(FC) # Last mem Givens sines used for the factorization QₖRₖ = Hₖ. + H .= zero(FC) # Last column of the band hessenberg matrix Hₖ. + # Each column has at most mem + 1 nonzero elements. + # hᵢ.ₖ is stored as H[k-i+1], i ≤ k. hₖ₊₁.ₖ is not stored in H. + # k-i+1 represents the indice of the diagonal where hᵢ.ₖ is located. + # In addition of that, the last column of Rₖ is also stored in H. # Initial γ₁ and V₁. - γₘ = rNorm # γₘ and γₘ₊₁ are the last components of gₘ, right-hand of the least squares problem min ‖ Hₘyₘ - gₘ ‖₂. - @. V[1] = r₀ / rNorm + γₖ = rNorm # γₖ and γₖ₊₁ are the last components of gₖ, right-hand of the least squares problem min ‖ Hₖyₖ - gₖ ‖₂. + V[1] .= r₀ ./ rNorm # The following stopping criterion compensates for the lag in the # residual, but usually increases the number of iterations. - # solved = sqrt(max(1, iter-mem+1)) * |γₘ₊₁| ≤ ε + # solved = sqrt(max(1, iter-mem+1)) * |γₖ₊₁| ≤ ε solved = rNorm ≤ ε # less accurate, but acceptable. tired = iter ≥ itmax status = "unknown" @@ -170,88 +190,89 @@ function dqgmres!(solver :: DqgmresSolver{T,FC,S}, A, b :: AbstractVector{FC}; iter = iter + 1 # Set position in circulars stacks. - pos = mod(iter-1, mem) + 1 # Position corresponding to pₘ and vₘ in circular stacks P and V. - next_pos = mod(iter, mem) + 1 # Position corresponding to vₘ₊₁ in the circular stack V. + pos = mod(iter-1, mem) + 1 # Position corresponding to pₖ and vₖ in circular stacks P and V. + next_pos = mod(iter, mem) + 1 # Position corresponding to vₖ₊₁ in the circular stack V. # Incomplete Arnoldi procedure. z = NisI ? V[pos] : solver.z - NisI || mulorldiv!(z, N, V[pos], ldiv) # N⁻¹vₘ, forms pₘ - mul!(t, A, z) # AN⁻¹vₘ - MisI || mulorldiv!(w, M, t, ldiv) # M⁻¹AN⁻¹vₘ, forms vₘ₊₁ + NisI || mulorldiv!(z, N, V[pos], ldiv) # Nvₖ, forms pₖ + mul!(t, A, z) # ANvₖ + MisI || mulorldiv!(w, M, t, ldiv) # MANvₖ, forms vₖ₊₁ for i = max(1, iter-mem+1) : iter - ipos = mod(i-1, mem) + 1 # Position corresponding to vᵢ in the circular stack V. - diag = iter - i + 2 - H[diag] = @kdot(n, w, V[ipos]) # hᵢ.ₘ = ⟨M⁻¹AN⁻¹vₘ , vᵢ⟩ - @kaxpy!(n, -H[diag], V[ipos], w) # w ← w - hᵢ.ₘ * vᵢ + ipos = mod(i-1, mem) + 1 # Position corresponding to vᵢ in the circular stack V. + diag = iter - i + 1 + H[diag] = @kdot(n, w, V[ipos]) # hᵢ.ₖ = ⟨MANvₖ, vᵢ⟩ + @kaxpy!(n, -H[diag], V[ipos], w) # w ← w - hᵢ.ₖvᵢ end # Partial reorthogonalization of the Krylov basis. if reorthogonalization for i = max(1, iter-mem+1) : iter ipos = mod(i-1, mem) + 1 - diag = iter - i + 2 + diag = iter - i + 1 Htmp = @kdot(n, w, V[ipos]) H[diag] += Htmp @kaxpy!(n, -Htmp, V[ipos], w) end end - # Compute hₘ₊₁.ₘ and vₘ₊₁. - H[1] = @knrm2(n, w) # hₘ₊₁.ₘ = ‖vₘ₊₁‖₂ - if H[1] ≠ 0 # hₘ₊₁.ₘ = 0 ⇒ "lucky breakdown" - @. V[next_pos] = w / H[1] # vₘ₊₁ = w / hₘ₊₁.ₘ + # Compute hₖ₊₁.ₖ and vₖ₊₁. + Haux = @knrm2(n, w) # hₖ₊₁.ₖ = ‖vₖ₊₁‖₂ + if Haux ≠ 0 # hₖ₊₁.ₖ = 0 ⇒ "lucky breakdown" + V[next_pos] .= w ./ Haux # vₖ₊₁ = w / hₖ₊₁.ₖ end - # rₘ₋ₘₑₘ.ₘ ≠ 0 when m ≥ mem + 1 + # rₖ₋ₘₑₘ.ₖ ≠ 0 when k ≥ mem + 1 + # We don't want to use rₖ₋₁₋ₘₑₘ.ₖ₋₁ when we compute rₖ₋ₘₑₘ.ₖ if iter ≥ mem + 2 - H[mem+2] = zero(FC) # hₘ₋ₘₑₘ.ₘ = 0 + H[mem+1] = zero(FC) # rₖ₋ₘₑₘ.ₖ = 0 end - # Update the QR factorization of H. + # Update the QR factorization of Hₖ. # Apply mem previous Givens reflections Ωᵢ. for i = max(1,iter-mem) : iter-1 - irot_pos = mod(i-1, mem) + 1 # Position corresponding to cᵢ and sᵢ in circular stacks c and s. - diag = iter - i + 1 + irot_pos = mod(i-1, mem) + 1 # Position corresponding to cᵢ and sᵢ in circular stacks c and s. + diag = iter - i next_diag = diag + 1 - H_aux = c[irot_pos] * H[next_diag] + s[irot_pos] * H[diag] + Htmp = c[irot_pos] * H[next_diag] + s[irot_pos] * H[diag] H[diag] = conj(s[irot_pos]) * H[next_diag] - c[irot_pos] * H[diag] - H[next_diag] = H_aux + H[next_diag] = Htmp end - # Compute and apply current Givens reflection Ωₘ. - # [cₘ sₘ] [ hₘ.ₘ ] = [ρₘ] - # [sₘ -cₘ] [hₘ₊₁.ₘ] [0 ] - (c[pos], s[pos], H[2]) = sym_givens(H[2], H[1]) - γₘ₊₁ = conj(s[pos]) * γₘ - γₘ = c[pos] * γₘ + # Compute and apply current Givens reflection Ωₖ. + # [cₖ sₖ] [ hₖ.ₖ ] = [ρₖ] + # [sₖ -cₖ] [hₖ₊₁.ₖ] [0 ] + (c[pos], s[pos], H[1]) = sym_givens(H[1], Haux) + γₖ₊₁ = conj(s[pos]) * γₖ + γₖ = c[pos] * γₖ - # Compute the direction pₘ, the last column of Pₘ = N⁻¹Vₘ(Rₘ)⁻¹. + # Compute the direction pₖ, the last column of Pₖ = NVₖ(Rₖ)⁻¹. for i = max(1,iter-mem) : iter-1 - ipos = mod(i-1, mem) + 1 # Position corresponding to pᵢ in the circular stack P. - diag = iter - i + 2 + ipos = mod(i-1, mem) + 1 # Position corresponding to pᵢ in the circular stack P. + diag = iter - i + 1 if ipos == pos - # pₐᵤₓ ← -hₘ₋ₘₑₘ.ₘ * pₘ₋ₘₑₘ + # pₐᵤₓ ← -hₖ₋ₘₑₘ.ₖ * pₖ₋ₘₑₘ @kscal!(n, -H[diag], P[pos]) else - # pₐᵤₓ ← pₐᵤₓ - hᵢ.ₘ * pᵢ + # pₐᵤₓ ← pₐᵤₓ - hᵢ.ₖ * pᵢ @kaxpy!(n, -H[diag], P[ipos], P[pos]) end end - # pₐᵤₓ ← pₐᵤₓ + N⁻¹vₘ + # pₐᵤₓ ← pₐᵤₓ + Nvₖ @kaxpy!(n, one(FC), z, P[pos]) - # pₘ = pₐᵤₓ / hₘ.ₘ - @. P[pos] = P[pos] / H[2] + # pₖ = pₐᵤₓ / hₖ.ₖ + P[pos] .= P[pos] ./ H[1] - # Compute solution xₘ. - # xₘ ← xₘ₋₁ + γₘ * pₘ - @kaxpy!(n, γₘ, P[pos], x) + # Compute solution xₖ. + # xₖ ← xₖ₋₁ + γₖ * pₖ + @kaxpy!(n, γₖ, P[pos], x) # Update residual norm estimate. - # ‖ M⁻¹(b - Axₘ) ‖₂ ≈ |γₘ₊₁| - rNorm = abs(γₘ₊₁) + # ‖ M(b - Axₖ) ‖₂ ≈ |γₖ₊₁| + rNorm = abs(γₖ₊₁) history && push!(rNorms, rNorm) - # Update γₘ. - γₘ = γₘ₊₁ + # Update γₖ. + γₖ = γₖ₊₁ # Stopping conditions that do not depend on user input. # This is to guard against tolerances that are unreasonably small. @@ -262,9 +283,9 @@ function dqgmres!(solver :: DqgmresSolver{T,FC,S}, A, b :: AbstractVector{FC}; resid_decrease_lim = rNorm ≤ ε solved = resid_decrease_lim || resid_decrease_mach tired = iter ≥ itmax - kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm) + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e\n", iter, rNorm) end - (verbose > 0) && @printf("\n") + (verbose > 0) && @printf(iostream, "\n") solved && (status = "solution good enough given atol and rtol") tired && (status = "maximum number of iterations exceeded") user_requested_exit && (status = "user-requested exit") diff --git a/src/fgmres.jl b/src/fgmres.jl new file mode 100644 index 000000000..fa536af23 --- /dev/null +++ b/src/fgmres.jl @@ -0,0 +1,353 @@ +# An implementation of FGMRES for the solution of the square linear system Ax = b. +# +# This method is described in +# +# Y. Saad, A Flexible Inner-Outer Preconditioned GMRES Algorithms. +# SIAM Journal on Scientific Computing, Vol. 14(2), pp. 461--469, 1993. +# +# Alexis Montoison, +# Montreal, September 2022. + +export fgmres, fgmres! + +""" + (x, stats) = fgmres(A, b::AbstractVector{FC}; + memory::Int=20, M=I, N=I, ldiv::Bool=false, + restart::Bool=false, reorthogonalization::Bool=false, + atol::T=√eps(T), rtol::T=√eps(T), itmax::Int=0, + verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) + +`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. +`FC` is `T` or `Complex{T}`. + + (x, stats) = fgmres(A, b, x0::AbstractVector; kwargs...) + +FGMRES can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above. + +Solve the linear system Ax = b of size n using FGMRES. + +FGMRES computes a sequence of approximate solutions with minimum residual. +FGMRES is a variant of GMRES that allows changes in the right preconditioner at each iteration. + +This implementation allows a left preconditioner M and a flexible right preconditioner N. +A situation in which the preconditioner is "not constant" is when a relaxation-type method, +a Chebyshev iteration or another Krylov subspace method is used as a preconditioner. +Compared to GMRES, there is no additional cost incurred in the arithmetic but the memory requirement almost doubles. +Thus, GMRES is recommended if the right preconditioner N is constant. + +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension n; +* `b`: a vector of length n. + +#### Optional argument + +* `x0`: a vector of length n that represents an initial guess of the solution x. + +#### Keyword arguments + +* `memory`: if `restart = true`, the restarted version FGMRES(k) is used with `k = memory`. If `restart = false`, the parameter `memory` should be used as a hint of the number of iterations to limit dynamic memory allocations. Additional storage will be allocated if the number of iterations exceeds `memory`; +* `M`: linear operator that models a nonsingular matrix of size `n` used for left preconditioning; +* `N`: linear operator that models a nonsingular matrix of size `n` used for right preconditioning; +* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`; +* `restart`: restart the method after `memory` iterations; +* `reorthogonalization`: reorthogonalize the new vectors of the Krylov basis against all previous vectors; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. + +#### Output arguments + +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. + +#### Reference + +* Y. Saad, [*A Flexible Inner-Outer Preconditioned GMRES Algorithm*](https://doi.org/10.1137/0914028), SIAM Journal on Scientific Computing, Vol. 14(2), pp. 461--469, 1993. +""" +function fgmres end + +function fgmres(A, b :: AbstractVector{FC}, x0 :: AbstractVector; memory :: Int=20, kwargs...) where FC <: FloatOrComplex + solver = FgmresSolver(A, b, memory) + fgmres!(solver, A, b, x0; kwargs...) + return (solver.x, solver.stats) +end + +function fgmres(A, b :: AbstractVector{FC}; memory :: Int=20, kwargs...) where FC <: FloatOrComplex + solver = FgmresSolver(A, b, memory) + fgmres!(solver, A, b; kwargs...) + return (solver.x, solver.stats) +end + +""" + solver = fgmres!(solver::FgmresSolver, A, b; kwargs...) + solver = fgmres!(solver::FgmresSolver, A, b, x0; kwargs...) + +where `kwargs` are keyword arguments of [`fgmres`](@ref). + +Note that the `memory` keyword argument is the only exception. +It's required to create a `FgmresSolver` and can't be changed later. + +See [`FgmresSolver`](@ref) for more details about the `solver`. +""" +function fgmres! end + +function fgmres!(solver :: FgmresSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} + warm_start!(solver, x0) + fgmres!(solver, A, b; kwargs...) + return solver +end + +function fgmres!(solver :: FgmresSolver{T,FC,S}, A, b :: AbstractVector{FC}; + M=I, N=I, ldiv :: Bool=false, + restart :: Bool=false, reorthogonalization :: Bool=false, + atol :: T=√eps(T), rtol :: T=√eps(T), itmax :: Int=0, + verbose :: Int=0, history :: Bool=false, + callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} + + m, n = size(A) + m == n || error("System must be square") + length(b) == m || error("Inconsistent problem size") + (verbose > 0) && @printf(iostream, "FGMRES: system of size %d\n", n) + + # Check M = Iₙ + MisI = (M === I) + + # Check type consistency + eltype(A) == FC || error("eltype(A) ≠ $FC") + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") + + # Set up workspace. + allocate_if(!MisI , solver, :q , S, n) + allocate_if(restart, solver, :Δx, S, n) + Δx, x, w, V, Z = solver.Δx, solver.x, solver.w, solver.V, solver.Z + z, c, s, R, stats = solver.z, solver.c, solver.s, solver.R, solver.stats + warm_start = solver.warm_start + rNorms = stats.residuals + reset!(stats) + q = MisI ? w : solver.q + r₀ = MisI ? w : solver.q + xr = restart ? Δx : x + + # Initial solution x₀. + x .= zero(FC) + + # Initial residual r₀. + if warm_start + mul!(w, A, Δx) + @kaxpby!(n, one(FC), b, -one(FC), w) + restart && @kaxpy!(n, one(FC), Δx, x) + else + w .= b + end + MisI || mulorldiv!(r₀, M, w, ldiv) # r₀ = M(b - Ax₀) + β = @knrm2(n, r₀) # β = ‖r₀‖₂ + + rNorm = β + history && push!(rNorms, β) + ε = atol + rtol * rNorm + + if β == 0 + stats.niter = 0 + stats.solved, stats.inconsistent = true, false + stats.status = "x = 0 is a zero-residual solution" + solver.warm_start = false + return solver + end + + mem = length(c) # Memory + npass = 0 # Number of pass + + iter = 0 # Cumulative number of iterations + inner_iter = 0 # Number of iterations in a pass + + itmax == 0 && (itmax = 2*n) + inner_itmax = itmax + + (verbose > 0) && @printf(iostream, "%5s %5s %7s %7s\n", "pass", "k", "‖rₖ‖", "hₖ₊₁.ₖ") + kdisplay(iter, verbose) && @printf(iostream, "%5d %5d %7.1e %7s\n", npass, iter, rNorm, "✗ ✗ ✗ ✗") + + # Tolerance for breakdown detection. + btol = eps(T)^(3/4) + + # Stopping criterion + breakdown = false + inconsistent = false + solved = rNorm ≤ ε + tired = iter ≥ itmax + inner_tired = inner_iter ≥ inner_itmax + status = "unknown" + user_requested_exit = false + + while !(solved || tired || breakdown || user_requested_exit) + + # Initialize workspace. + nr = 0 # Number of coefficients stored in Rₖ. + for i = 1 : mem + V[i] .= zero(FC) # Orthogonal basis of {Mr₀, MANₖr₀, ..., (MANₖ)ᵏ⁻¹r₀}. + Z[i] .= zero(FC) # Zₖ = [N₁v₁, ..., Nₖvₖ] + end + s .= zero(FC) # Givens sines used for the factorization QₖRₖ = Hₖ₊₁.ₖ. + c .= zero(T) # Givens cosines used for the factorization QₖRₖ = Hₖ₊₁.ₖ. + R .= zero(FC) # Upper triangular matrix Rₖ. + z .= zero(FC) # Right-hand of the least squares problem min ‖Hₖ₊₁.ₖyₖ - βe₁‖₂. + + if restart + xr .= zero(FC) # xr === Δx when restart is set to true + if npass ≥ 1 + mul!(w, A, x) + @kaxpby!(n, one(FC), b, -one(FC), w) + MisI || mulorldiv!(r₀, M, w, ldiv) + end + end + + # Initial ζ₁ and V₁ + β = @knrm2(n, r₀) + z[1] = β + @. V[1] = r₀ / rNorm + + npass = npass + 1 + solver.inner_iter = 0 + inner_tired = false + + while !(solved || inner_tired || breakdown || user_requested_exit) + + # Update iteration index + solver.inner_iter = solver.inner_iter + 1 + inner_iter = solver.inner_iter + + # Update workspace if more storage is required and restart is set to false + if !restart && (inner_iter > mem) + for i = 1 : inner_iter + push!(R, zero(FC)) + end + push!(s, zero(FC)) + push!(c, zero(T)) + push!(Z, S(undef, n)) + end + + # Continue the process. + # MAZₖ = Vₖ₊₁Hₖ₊₁.ₖ + mulorldiv!(Z[inner_iter], N, V[inner_iter], ldiv) # zₖ ← Nₖvₖ + mul!(w, A, Z[inner_iter]) # w ← Azₖ + MisI || mulorldiv!(q, M, w, ldiv) # q ← MAzₖ + for i = 1 : inner_iter + R[nr+i] = @kdot(n, V[i], q) # hᵢₖ = (vᵢ)ᴴq + @kaxpy!(n, -R[nr+i], V[i], q) # q ← q - hᵢₖvᵢ + end + + # Reorthogonalization of the basis. + if reorthogonalization + for i = 1 : inner_iter + Htmp = @kdot(n, V[i], q) + R[nr+i] += Htmp + @kaxpy!(n, -Htmp, V[i], q) + end + end + + # Compute hₖ₊₁.ₖ + Hbis = @knrm2(n, q) # hₖ₊₁.ₖ = ‖vₖ₊₁‖₂ + + # Update the QR factorization of Hₖ₊₁.ₖ. + # Apply previous Givens reflections Ωᵢ. + # [cᵢ sᵢ] [ r̄ᵢ.ₖ ] = [ rᵢ.ₖ ] + # [s̄ᵢ -cᵢ] [rᵢ₊₁.ₖ] [r̄ᵢ₊₁.ₖ] + for i = 1 : inner_iter-1 + Rtmp = c[i] * R[nr+i] + s[i] * R[nr+i+1] + R[nr+i+1] = conj(s[i]) * R[nr+i] - c[i] * R[nr+i+1] + R[nr+i] = Rtmp + end + + # Compute and apply current Givens reflection Ωₖ. + # [cₖ sₖ] [ r̄ₖ.ₖ ] = [rₖ.ₖ] + # [s̄ₖ -cₖ] [hₖ₊₁.ₖ] [ 0 ] + (c[inner_iter], s[inner_iter], R[nr+inner_iter]) = sym_givens(R[nr+inner_iter], Hbis) + + # Update zₖ = (Qₖ)ᴴβe₁ + ζₖ₊₁ = conj(s[inner_iter]) * z[inner_iter] + z[inner_iter] = c[inner_iter] * z[inner_iter] + + # Update residual norm estimate. + # ‖ M⁻¹(b - Axₖ) ‖₂ = |ζₖ₊₁| + rNorm = abs(ζₖ₊₁) + history && push!(rNorms, rNorm) + + # Update the number of coefficients in Rₖ + nr = nr + inner_iter + + # Stopping conditions that do not depend on user input. + # This is to guard against tolerances that are unreasonably small. + resid_decrease_mach = (rNorm + one(T) ≤ one(T)) + + # Update stopping criterion. + resid_decrease_lim = rNorm ≤ ε + breakdown = Hbis ≤ btol + solved = resid_decrease_lim || resid_decrease_mach + inner_tired = restart ? inner_iter ≥ min(mem, inner_itmax) : inner_iter ≥ inner_itmax + solver.inner_iter = inner_iter + kdisplay(iter+inner_iter, verbose) && @printf(iostream, "%5d %5d %7.1e %7.1e\n", npass, iter+inner_iter, rNorm, Hbis) + + # Compute vₖ₊₁ + if !(solved || inner_tired || breakdown) + if !restart && (inner_iter ≥ mem) + push!(V, S(undef, n)) + push!(z, zero(FC)) + end + @. V[inner_iter+1] = q / Hbis # hₖ₊₁.ₖvₖ₊₁ = q + z[inner_iter+1] = ζₖ₊₁ + end + + user_requested_exit = callback(solver) :: Bool + end + + # Compute y by solving Ry = z with backward substitution. + y = z # yᵢ = ζᵢ + for i = inner_iter : -1 : 1 + pos = nr + i - inner_iter # position of rᵢ.ₖ + for j = inner_iter : -1 : i+1 + y[i] = y[i] - R[pos] * y[j] # yᵢ ← yᵢ - rᵢⱼyⱼ + pos = pos - j + 1 # position of rᵢ.ⱼ₋₁ + end + # Rₖ can be singular if the system is inconsistent + if abs(R[pos]) ≤ btol + y[i] = zero(FC) + inconsistent = true + else + y[i] = y[i] / R[pos] # yᵢ ← yᵢ / rᵢᵢ + end + end + + # Form xₖ = N₁v₁y₁ + ... + Nₖvₖyₖ = z₁y₁ + ... + zₖyₖ + for i = 1 : inner_iter + @kaxpy!(n, y[i], Z[i], xr) + end + restart && @kaxpy!(n, one(FC), xr, x) + + # Update inner_itmax, iter and tired variables. + inner_itmax = inner_itmax - inner_iter + iter = iter + inner_iter + tired = iter ≥ itmax + end + (verbose > 0) && @printf(iostream, "\n") + + tired && (status = "maximum number of iterations exceeded") + solved && (status = "solution good enough given atol and rtol") + inconsistent && (status = "found approximate least-squares solution") + user_requested_exit && (status = "user-requested exit") + + # Update x + warm_start && !restart && @kaxpy!(n, one(FC), Δx, x) + solver.warm_start = false + + # Update stats + stats.niter = iter + stats.solved = solved + stats.inconsistent = inconsistent + stats.status = status + return solver +end diff --git a/src/fom.jl b/src/fom.jl index fcae5cf62..6aabb33f5 100644 --- a/src/fom.jl +++ b/src/fom.jl @@ -11,38 +11,53 @@ export fom, fom! """ - (x, stats) = fom(A, b::AbstractVector{FC}; memory::Int=20, - M=I, N=I, atol::T=√eps(T), rtol::T=√eps(T), - reorthogonalization::Bool=false, itmax::Int=0, - restart::Bool=false, verbose::Int=0, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + (x, stats) = fom(A, b::AbstractVector{FC}; + memory::Int=20, M=I, N=I, ldiv::Bool=false, + restart::Bool=false, reorthogonalization::Bool=false, + atol::T=√eps(T), rtol::T=√eps(T), itmax::Int=0, + verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. -Solve the linear system Ax = b using FOM method. + (x, stats) = fom(A, b, x0::AbstractVector; kwargs...) + +FOM can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above. + +Solve the linear system Ax = b of size n using FOM. FOM algorithm is based on the Arnoldi process and a Galerkin condition. -This implementation allows a left preconditioner M and a right preconditioner N. -- Left preconditioning : M⁻¹Ax = M⁻¹b -- Right preconditioning : AN⁻¹u = b with x = N⁻¹u -- Split preconditioning : M⁻¹AN⁻¹u = M⁻¹b with x = N⁻¹u +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension n; +* `b`: a vector of length n. -Full reorthogonalization is available with the `reorthogonalization` option. +#### Optional argument -If `restart = true`, the restarted version FOM(k) is used with `k = memory`. -If `restart = false`, the parameter `memory` should be used as a hint of the number of iterations to limit dynamic memory allocations. -More storage will be allocated only if the number of iterations exceed `memory`. +* `x0`: a vector of length n that represents an initial guess of the solution x. -FOM can be warm-started from an initial guess `x0` with the method +#### Keyword arguments - (x, stats) = fom(A, b, x0; kwargs...) +* `memory`: if `restart = true`, the restarted version FOM(k) is used with `k = memory`. If `restart = false`, the parameter `memory` should be used as a hint of the number of iterations to limit dynamic memory allocations. Additional storage will be allocated if the number of iterations exceeds `memory`; +* `M`: linear operator that models a nonsingular matrix of size `n` used for left preconditioning; +* `N`: linear operator that models a nonsingular matrix of size `n` used for right preconditioning; +* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`; +* `restart`: restart the method after `memory` iterations; +* `reorthogonalization`: reorthogonalize the new vectors of the Krylov basis against all previous vectors; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. -where `kwargs` are the same keyword arguments as above. +#### Output arguments -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### Reference @@ -82,15 +97,16 @@ function fom!(solver :: FomSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: Abs end function fom!(solver :: FomSolver{T,FC,S}, A, b :: AbstractVector{FC}; - M=I, N=I, atol :: T=√eps(T), rtol :: T=√eps(T), - reorthogonalization :: Bool=false, itmax :: Int=0, - restart :: Bool=false, verbose :: Int=0, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} + M=I, N=I, ldiv :: Bool=false, + restart :: Bool=false, reorthogonalization :: Bool=false, + atol :: T=√eps(T), rtol :: T=√eps(T), itmax :: Int=0, + verbose :: Int=0, history :: Bool=false, + callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} m, n = size(A) m == n || error("System must be square") length(b) == m || error("Inconsistent problem size") - (verbose > 0) && @printf("FOM: system of size %d\n", n) + (verbose > 0) && @printf(iostream, "FOM: system of size %d\n", n) # Check M = Iₙ and N = Iₙ MisI = (M === I) @@ -98,7 +114,7 @@ function fom!(solver :: FomSolver{T,FC,S}, A, b :: AbstractVector{FC}; # Check type consistency eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") # Set up workspace. allocate_if(!MisI , solver, :q , S, n) @@ -124,7 +140,7 @@ function fom!(solver :: FomSolver{T,FC,S}, A, b :: AbstractVector{FC}; else w .= b end - MisI || mulorldiv!(r₀, M, w, ldiv) # r₀ = M⁻¹(b - Ax₀) + MisI || mulorldiv!(r₀, M, w, ldiv) # r₀ = M(b - Ax₀) β = @knrm2(n, r₀) # β = ‖r₀‖₂ rNorm = β @@ -148,8 +164,8 @@ function fom!(solver :: FomSolver{T,FC,S}, A, b :: AbstractVector{FC}; itmax == 0 && (itmax = 2*n) inner_itmax = itmax - (verbose > 0) && @printf("%5s %5s %7s %7s\n", "pass", "k", "‖rₖ‖", "hₖ₊₁.ₖ") - kdisplay(iter, verbose) && @printf("%5d %5d %7.1e %7s\n", npass, iter, rNorm, "✗ ✗ ✗ ✗") + (verbose > 0) && @printf(iostream, "%5s %5s %7s %7s\n", "pass", "k", "‖rₖ‖", "hₖ₊₁.ₖ") + kdisplay(iter, verbose) && @printf(iostream, "%5d %5d %7.1e %7s\n", npass, iter, rNorm, "✗ ✗ ✗ ✗") # Tolerance for breakdown detection. btol = eps(T)^(3/4) @@ -167,7 +183,7 @@ function fom!(solver :: FomSolver{T,FC,S}, A, b :: AbstractVector{FC}; # Initialize workspace. nr = 0 # Number of coefficients stored in Uₖ. for i = 1 : mem - V[i] .= zero(FC) # Orthogonal basis of Kₖ(M⁻¹AN⁻¹, M⁻¹r₀). + V[i] .= zero(FC) # Orthogonal basis of Kₖ(MAN, Mr₀). end l .= zero(FC) # Lower unit triangular matrix Lₖ. U .= zero(FC) # Upper triangular matrix Uₖ. @@ -207,11 +223,11 @@ function fom!(solver :: FomSolver{T,FC,S}, A, b :: AbstractVector{FC}; # Continue the Arnoldi process. p = NisI ? V[inner_iter] : solver.p - NisI || mulorldiv!(p, N, V[inner_iter], ldiv) # p ← N⁻¹vₖ - mul!(w, A, p) # w ← AN⁻¹vₖ - MisI || mulorldiv!(q, M, w, ldiv) # q ← M⁻¹AN⁻¹vₖ + NisI || mulorldiv!(p, N, V[inner_iter], ldiv) # p ← Nvₖ + mul!(w, A, p) # w ← ANvₖ + MisI || mulorldiv!(q, M, w, ldiv) # q ← MANvₖ for i = 1 : inner_iter - U[nr+i] = @kdot(n, V[i], q) # hᵢₖ = qᵀvᵢ + U[nr+i] = @kdot(n, V[i], q) # hᵢₖ = (vᵢ)ᴴq @kaxpy!(n, -U[nr+i], V[i], q) # q ← q - hᵢₖvᵢ end @@ -240,7 +256,7 @@ function fom!(solver :: FomSolver{T,FC,S}, A, b :: AbstractVector{FC}; l[inner_iter] = Hbis / U[nr+inner_iter] # Update residual norm estimate. - # ‖ M⁻¹(b - Axₖ) ‖₂ = hₖ₊₁.ₖ * |ζₖ / uₖ.ₖ| + # ‖ M(b - Axₖ) ‖₂ = hₖ₊₁.ₖ * |ζₖ / uₖ.ₖ| rNorm = Hbis * abs(z[inner_iter] / U[nr+inner_iter]) history && push!(rNorms, rNorm) @@ -257,7 +273,7 @@ function fom!(solver :: FomSolver{T,FC,S}, A, b :: AbstractVector{FC}; breakdown = Hbis ≤ btol solved = resid_decrease_lim || resid_decrease_mach inner_tired = restart ? inner_iter ≥ min(mem, inner_itmax) : inner_iter ≥ inner_itmax - kdisplay(iter+inner_iter, verbose) && @printf("%5d %5d %7.1e %7.1e\n", npass, iter+inner_iter, rNorm, Hbis) + kdisplay(iter+inner_iter, verbose) && @printf(iostream, "%5d %5d %7.1e %7.1e\n", npass, iter+inner_iter, rNorm, Hbis) # Compute vₖ₊₁. if !(solved || inner_tired || breakdown) @@ -280,7 +296,7 @@ function fom!(solver :: FomSolver{T,FC,S}, A, b :: AbstractVector{FC}; y[i] = y[i] / U[pos] # yᵢ ← yᵢ / rᵢᵢ end - # Form xₖ = N⁻¹Vₖyₖ + # Form xₖ = NVₖyₖ for i = 1 : inner_iter @kaxpy!(n, y[i], V[i], xr) end @@ -295,7 +311,7 @@ function fom!(solver :: FomSolver{T,FC,S}, A, b :: AbstractVector{FC}; iter = iter + inner_iter tired = iter ≥ itmax end - (verbose > 0) && @printf("\n") + (verbose > 0) && @printf(iostream, "\n") tired && (status = "maximum number of iterations exceeded") breakdown && (status = "inconsistent linear system") diff --git a/src/gmres.jl b/src/gmres.jl index 388a4ab96..d475198b5 100644 --- a/src/gmres.jl +++ b/src/gmres.jl @@ -11,38 +11,53 @@ export gmres, gmres! """ - (x, stats) = gmres(A, b::AbstractVector{FC}; memory::Int=20, - M=I, N=I, atol::T=√eps(T), rtol::T=√eps(T), - reorthogonalization::Bool=false, itmax::Int=0, - restart::Bool=false, verbose::Int=0, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + (x, stats) = gmres(A, b::AbstractVector{FC}; + memory::Int=20, M=I, N=I, ldiv::Bool=false, + restart::Bool=false, reorthogonalization::Bool=false, + atol::T=√eps(T), rtol::T=√eps(T), itmax::Int=0, + verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. -Solve the linear system Ax = b using GMRES method. + (x, stats) = gmres(A, b, x0::AbstractVector; kwargs...) -GMRES algorithm is based on the Arnoldi process and computes a sequence of approximate solutions with the minimal residual property. +GMRES can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above. -This implementation allows a left preconditioner M and a right preconditioner N. -- Left preconditioning : M⁻¹Ax = M⁻¹b -- Right preconditioning : AN⁻¹u = b with x = N⁻¹u -- Split preconditioning : M⁻¹AN⁻¹u = M⁻¹b with x = N⁻¹u +Solve the linear system Ax = b of size n using GMRES. -Full reorthogonalization is available with the `reorthogonalization` option. +GMRES algorithm is based on the Arnoldi process and computes a sequence of approximate solutions with the minimum residual. -If `restart = true`, the restarted version GMRES(k) is used with `k = memory`. -If `restart = false`, the parameter `memory` should be used as a hint of the number of iterations to limit dynamic memory allocations. -More storage will be allocated only if the number of iterations exceed `memory`. +#### Input arguments -GMRES can be warm-started from an initial guess `x0` with the method +* `A`: a linear operator that models a matrix of dimension n; +* `b`: a vector of length n. - (x, stats) = gmres(A, b, x0; kwargs...) +#### Optional argument -where `kwargs` are the same keyword arguments as above. +* `x0`: a vector of length n that represents an initial guess of the solution x. -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +#### Keyword arguments + +* `memory`: if `restart = true`, the restarted version GMRES(k) is used with `k = memory`. If `restart = false`, the parameter `memory` should be used as a hint of the number of iterations to limit dynamic memory allocations. Additional storage will be allocated if the number of iterations exceeds `memory`; +* `M`: linear operator that models a nonsingular matrix of size `n` used for left preconditioning; +* `N`: linear operator that models a nonsingular matrix of size `n` used for right preconditioning; +* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`; +* `restart`: restart the method after `memory` iterations; +* `reorthogonalization`: reorthogonalize the new vectors of the Krylov basis against all previous vectors; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. + +#### Output arguments + +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### Reference @@ -82,15 +97,16 @@ function gmres!(solver :: GmresSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: end function gmres!(solver :: GmresSolver{T,FC,S}, A, b :: AbstractVector{FC}; - M=I, N=I, atol :: T=√eps(T), rtol :: T=√eps(T), - reorthogonalization :: Bool=false, itmax :: Int=0, - restart :: Bool=false, verbose :: Int=0, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} + M=I, N=I, ldiv :: Bool=false, + restart :: Bool=false, reorthogonalization :: Bool=false, + atol :: T=√eps(T), rtol :: T=√eps(T), itmax :: Int=0, + verbose :: Int=0, history :: Bool=false, + callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} m, n = size(A) m == n || error("System must be square") length(b) == m || error("Inconsistent problem size") - (verbose > 0) && @printf("GMRES: system of size %d\n", n) + (verbose > 0) && @printf(iostream, "GMRES: system of size %d\n", n) # Check M = Iₙ and N = Iₙ MisI = (M === I) @@ -98,7 +114,7 @@ function gmres!(solver :: GmresSolver{T,FC,S}, A, b :: AbstractVector{FC}; # Check type consistency eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") # Set up workspace. allocate_if(!MisI , solver, :q , S, n) @@ -124,7 +140,7 @@ function gmres!(solver :: GmresSolver{T,FC,S}, A, b :: AbstractVector{FC}; else w .= b end - MisI || mulorldiv!(r₀, M, w, ldiv) # r₀ = M⁻¹(b - Ax₀) + MisI || mulorldiv!(r₀, M, w, ldiv) # r₀ = M(b - Ax₀) β = @knrm2(n, r₀) # β = ‖r₀‖₂ rNorm = β @@ -148,8 +164,8 @@ function gmres!(solver :: GmresSolver{T,FC,S}, A, b :: AbstractVector{FC}; itmax == 0 && (itmax = 2*n) inner_itmax = itmax - (verbose > 0) && @printf("%5s %5s %7s %7s\n", "pass", "k", "‖rₖ‖", "hₖ₊₁.ₖ") - kdisplay(iter, verbose) && @printf("%5d %5d %7.1e %7s\n", npass, iter, rNorm, "✗ ✗ ✗ ✗") + (verbose > 0) && @printf(iostream, "%5s %5s %7s %7s\n", "pass", "k", "‖rₖ‖", "hₖ₊₁.ₖ") + kdisplay(iter, verbose) && @printf(iostream, "%5d %5d %7.1e %7s\n", npass, iter, rNorm, "✗ ✗ ✗ ✗") # Tolerance for breakdown detection. btol = eps(T)^(3/4) @@ -168,7 +184,7 @@ function gmres!(solver :: GmresSolver{T,FC,S}, A, b :: AbstractVector{FC}; # Initialize workspace. nr = 0 # Number of coefficients stored in Rₖ. for i = 1 : mem - V[i] .= zero(FC) # Orthogonal basis of Kₖ(M⁻¹AN⁻¹, M⁻¹r₀). + V[i] .= zero(FC) # Orthogonal basis of Kₖ(MAN, Mr₀). end s .= zero(FC) # Givens sines used for the factorization QₖRₖ = Hₖ₊₁.ₖ. c .= zero(T) # Givens cosines used for the factorization QₖRₖ = Hₖ₊₁.ₖ. @@ -210,11 +226,11 @@ function gmres!(solver :: GmresSolver{T,FC,S}, A, b :: AbstractVector{FC}; # Continue the Arnoldi process. p = NisI ? V[inner_iter] : solver.p - NisI || mulorldiv!(p, N, V[inner_iter], ldiv) # p ← N⁻¹vₖ - mul!(w, A, p) # w ← AN⁻¹vₖ - MisI || mulorldiv!(q, M, w, ldiv) # q ← M⁻¹AN⁻¹vₖ + NisI || mulorldiv!(p, N, V[inner_iter], ldiv) # p ← Nvₖ + mul!(w, A, p) # w ← ANvₖ + MisI || mulorldiv!(q, M, w, ldiv) # q ← MANvₖ for i = 1 : inner_iter - R[nr+i] = @kdot(n, V[i], q) # hᵢₖ = qᵀvᵢ + R[nr+i] = @kdot(n, V[i], q) # hᵢₖ = (vᵢ)ᴴq @kaxpy!(n, -R[nr+i], V[i], q) # q ← q - hᵢₖvᵢ end @@ -245,12 +261,12 @@ function gmres!(solver :: GmresSolver{T,FC,S}, A, b :: AbstractVector{FC}; # [s̄ₖ -cₖ] [hₖ₊₁.ₖ] [ 0 ] (c[inner_iter], s[inner_iter], R[nr+inner_iter]) = sym_givens(R[nr+inner_iter], Hbis) - # Update zₖ = (Qₖ)ᵀβe₁ + # Update zₖ = (Qₖ)ᴴβe₁ ζₖ₊₁ = conj(s[inner_iter]) * z[inner_iter] z[inner_iter] = c[inner_iter] * z[inner_iter] # Update residual norm estimate. - # ‖ M⁻¹(b - Axₖ) ‖₂ = |ζₖ₊₁| + # ‖ M(b - Axₖ) ‖₂ = |ζₖ₊₁| rNorm = abs(ζₖ₊₁) history && push!(rNorms, rNorm) @@ -267,7 +283,7 @@ function gmres!(solver :: GmresSolver{T,FC,S}, A, b :: AbstractVector{FC}; solved = resid_decrease_lim || resid_decrease_mach inner_tired = restart ? inner_iter ≥ min(mem, inner_itmax) : inner_iter ≥ inner_itmax solver.inner_iter = inner_iter - kdisplay(iter+inner_iter, verbose) && @printf("%5d %5d %7.1e %7.1e\n", npass, iter+inner_iter, rNorm, Hbis) + kdisplay(iter+inner_iter, verbose) && @printf(iostream, "%5d %5d %7.1e %7.1e\n", npass, iter+inner_iter, rNorm, Hbis) # Compute vₖ₊₁ if !(solved || inner_tired || breakdown) @@ -299,7 +315,7 @@ function gmres!(solver :: GmresSolver{T,FC,S}, A, b :: AbstractVector{FC}; end end - # Form xₖ = N⁻¹Vₖyₖ + # Form xₖ = NVₖyₖ for i = 1 : inner_iter @kaxpy!(n, y[i], V[i], xr) end @@ -314,7 +330,7 @@ function gmres!(solver :: GmresSolver{T,FC,S}, A, b :: AbstractVector{FC}; iter = iter + inner_iter tired = iter ≥ itmax end - (verbose > 0) && @printf("\n") + (verbose > 0) && @printf(iostream, "\n") tired && (status = "maximum number of iterations exceeded") solved && (status = "solution good enough given atol and rtol") diff --git a/src/gpmr.jl b/src/gpmr.jl index b10942995..958d2977c 100644 --- a/src/gpmr.jl +++ b/src/gpmr.jl @@ -12,23 +12,30 @@ export gpmr, gpmr! """ - (x, y, stats) = gpmr(A, B, b::AbstractVector{FC}, c::AbstractVector{FC}; memory::Int=20, - C=I, D=I, E=I, F=I, atol::T=√eps(T), rtol::T=√eps(T), - gsp::Bool=false, reorthogonalization::Bool=false, - itmax::Int=0, λ::FC=one(FC), μ::FC=one(FC), + (x, y, stats) = gpmr(A, B, b::AbstractVector{FC}, c::AbstractVector{FC}; + memory::Int=20, C=I, D=I, E=I, F=I, + ldiv::Bool=false, gsp::Bool=false, + λ::FC=one(FC), μ::FC=one(FC), + reorthogonalization::Bool=false, atol::T=√eps(T), + rtol::T=√eps(T), itmax::Int=0, verbose::Int=0, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. -GPMR solves the unsymmetric partitioned linear system + (x, y, stats) = gpmr(A, B, b, c, x0::AbstractVector, y0::AbstractVector; kwargs...) - [ λI A ] [ x ] = [ b ] - [ B μI ] [ y ] [ c ], +GPMR can be warm-started from initial guesses `x0` and `y0` where `kwargs` are the same keyword arguments as above. -where λ and μ are real or complex numbers. -`A` can have any shape and `B` has the shape of `Aᵀ`. +Given matrices `A` of dimension m × n and `B` of dimension n × m, +GPMR solves the non-Hermitian partitioned linear system + + [ λIₘ A ] [ x ] = [ b ] + [ B μIₙ ] [ y ] [ c ], + +of size (n+m) × (n+m) where λ and μ are real or complex numbers. +`A` can have any shape and `B` has the shape of `Aᴴ`. `A`, `B`, `b` and `c` must be all nonzero. This implementation allows left and right block diagonal preconditioners @@ -44,8 +51,6 @@ and can solve when `CE = M⁻¹` and `DF = N⁻¹`. By default, GPMR solves unsymmetric linear systems with `λ = 1` and `μ = 1`. -If `gsp = true`, `λ = 1`, `μ = 0` and the associated generalized saddle point system is solved. -`λ` and `μ` are also keyword arguments that can be directly modified for more specific problems. GPMR is based on the orthogonal Hessenberg reduction process and its relations with the block-Arnoldi process. The residual norm ‖rₖ‖ is monotonically decreasing in GPMR. @@ -53,19 +58,42 @@ The residual norm ‖rₖ‖ is monotonically decreasing in GPMR. GPMR stops when `itmax` iterations are reached or when `‖rₖ‖ ≤ atol + ‖r₀‖ * rtol`. `atol` is an absolute tolerance and `rtol` is a relative tolerance. -Full reorthogonalization is available with the `reorthogonalization` option. +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension m × n; +* `B`: a linear operator that models a matrix of dimension n × m; +* `b`: a vector of length m; +* `c`: a vector of length n. + +#### Optional arguments -Additional details can be displayed if verbose mode is enabled (verbose > 0). -Information will be displayed every `verbose` iterations. +* `x0`: a vector of length m that represents an initial guess of the solution x; +* `y0`: a vector of length n that represents an initial guess of the solution y. -GPMR can be warm-started from initial guesses `x0` and `y0` with the method +#### Keyword arguments - (x, y, stats) = gpmr(A, B, b, c, x0, y0; kwargs...) +* `memory`: if `restart = true`, the restarted version GPMR(k) is used with `k = memory`. If `restart = false`, the parameter `memory` should be used as a hint of the number of iterations to limit dynamic memory allocations. Additional storage will be allocated if the number of iterations exceeds `memory`; +* `C`: linear operator that models a nonsingular matrix of size `m`, and represents the first term of the block-diagonal left preconditioner; +* `D`: linear operator that models a nonsingular matrix of size `n`, and represents the second term of the block-diagonal left preconditioner; +* `E`: linear operator that models a nonsingular matrix of size `m`, and represents the first term of the block-diagonal right preconditioner; +* `F`: linear operator that models a nonsingular matrix of size `n`, and represents the second term of the block-diagonal right preconditioner; +* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`; +* `gsp`: if `true`, set `λ = 1` and `μ = 0` for generalized saddle-point systems; +* `λ` and `μ`: diagonal scaling factors of the partitioned linear system; +* `reorthogonalization`: reorthogonalize the new vectors of the Krylov basis against all previous vectors; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. -where `kwargs` are the same keyword arguments as above. +#### Output arguments -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +* `x`: a dense vector of length m; +* `y`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### Reference @@ -106,11 +134,13 @@ function gpmr!(solver :: GpmrSolver{T,FC,S}, A, B, b :: AbstractVector{FC}, c :: end function gpmr!(solver :: GpmrSolver{T,FC,S}, A, B, b :: AbstractVector{FC}, c :: AbstractVector{FC}; - C=I, D=I, E=I, F=I, atol :: T=√eps(T), rtol :: T=√eps(T), - gsp :: Bool=false, reorthogonalization :: Bool=false, - itmax :: Int=0, λ :: FC=one(FC), μ :: FC=one(FC), + C=I, D=I, E=I, F=I, + ldiv :: Bool=false, gsp :: Bool=false, + λ :: FC=one(FC), μ :: FC=one(FC), + reorthogonalization :: Bool=false, atol :: T=√eps(T), + rtol :: T=√eps(T), itmax :: Int=0, verbose :: Int=0, history::Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} + callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} m, n = size(A) s, t = size(B) @@ -118,7 +148,7 @@ function gpmr!(solver :: GpmrSolver{T,FC,S}, A, B, b :: AbstractVector{FC}, c :: s == n || error("Inconsistent problem size") length(b) == m || error("Inconsistent problem size") length(c) == n || error("Inconsistent problem size") - (verbose > 0) && @printf("GPMR: system of %d equations in %d variables\n", m+n, m+n) + (verbose > 0) && @printf(iostream, "GPMR: system of %d equations in %d variables\n", m+n, m+n) # Check C = E = Iₘ and D = F = Iₙ CisI = (C === I) @@ -129,8 +159,8 @@ function gpmr!(solver :: GpmrSolver{T,FC,S}, A, B, b :: AbstractVector{FC}, c :: # Check type consistency eltype(A) == FC || error("eltype(A) ≠ $FC") eltype(B) == FC || error("eltype(B) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") - ktypeof(c) == S || error("ktypeof(c) ≠ $S") + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") + ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S") # Determine λ and μ associated to generalized saddle point systems. gsp && (λ = one(FC) ; μ = zero(FC)) @@ -172,7 +202,7 @@ function gpmr!(solver :: GpmrSolver{T,FC,S}, A, B, b :: AbstractVector{FC}, c :: gs .= zero(FC) # Givens sines used for the factorization QₖRₖ = Sₖ₊₁.ₖ. gc .= zero(T) # Givens cosines used for the factorization QₖRₖ = Sₖ₊₁.ₖ. R .= zero(FC) # Upper triangular matrix Rₖ. - zt .= zero(FC) # Rₖzₖ = tₖ with (tₖ, τbar₂ₖ₊₁, τbar₂ₖ₊₂) = (Qₖ)ᵀ(βe₁ + γe₂). + zt .= zero(FC) # Rₖzₖ = tₖ with (tₖ, τbar₂ₖ₊₁, τbar₂ₖ₊₂) = (Qₖ)ᴴ(βe₁ + γe₂). # Warm-start # If λ ≠ 0, Cb₀ = Cb - CAΔy - λΔx because CM = Iₘ and E = Iₘ @@ -213,8 +243,8 @@ function gpmr!(solver :: GpmrSolver{T,FC,S}, A, B, b :: AbstractVector{FC}, c :: zt[1] = β zt[2] = γ - (verbose > 0) && @printf("%5s %7s %7s %7s\n", "k", "‖rₖ‖", "hₖ₊₁.ₖ", "fₖ₊₁.ₖ") - kdisplay(iter, verbose) && @printf("%5d %7.1e %7s %7s\n", iter, rNorm, "✗ ✗ ✗ ✗", "✗ ✗ ✗ ✗") + (verbose > 0) && @printf(iostream, "%5s %7s %7s %7s\n", "k", "‖rₖ‖", "hₖ₊₁.ₖ", "fₖ₊₁.ₖ") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7s %7s\n", iter, rNorm, "✗ ✗ ✗ ✗", "✗ ✗ ✗ ✗") # Tolerance for breakdown detection. btol = eps(T)^(3/4) @@ -259,8 +289,8 @@ function gpmr!(solver :: GpmrSolver{T,FC,S}, A, B, b :: AbstractVector{FC}, c :: DisI || mulorldiv!(p, D, dB, ldiv) # p = DBEvₖ for i = 1 : iter - hᵢₖ = @kdot(m, V[i], q) # hᵢ.ₖ = vᵢAuₖ - fᵢₖ = @kdot(n, U[i], p) # fᵢ.ₖ = uᵢBvₖ + hᵢₖ = @kdot(m, V[i], q) # hᵢ.ₖ = (vᵢ)ᴴq + fᵢₖ = @kdot(n, U[i], p) # fᵢ.ₖ = (uᵢ)ᴴp @kaxpy!(m, -hᵢₖ, V[i], q) # q ← q - hᵢ.ₖvᵢ @kaxpy!(n, -fᵢₖ, U[i], p) # p ← p - fᵢ.ₖuᵢ R[nr₂ₖ + 2i-1] = hᵢₖ @@ -270,8 +300,8 @@ function gpmr!(solver :: GpmrSolver{T,FC,S}, A, B, b :: AbstractVector{FC}, c :: # Reorthogonalization of the Krylov basis. if reorthogonalization for i = 1 : iter - Htmp = @kdot(m, V[i], q) # hₜₘₚ = qᵀvᵢ - Ftmp = @kdot(n, U[i], p) # fₜₘₚ = pᵀuᵢ + Htmp = @kdot(m, V[i], q) # hₜₘₚ = (vᵢ)ᴴq + Ftmp = @kdot(n, U[i], p) # fₜₘₚ = (uᵢ)ᴴp @kaxpy!(m, -Htmp, V[i], q) # q ← q - hₜₘₚvᵢ @kaxpy!(n, -Ftmp, U[i], p) # p ← p - fₜₘₚuᵢ R[nr₂ₖ + 2i-1] += Htmp # hᵢ.ₖ = hᵢ.ₖ + hₜₘₚ @@ -400,7 +430,7 @@ function gpmr!(solver :: GpmrSolver{T,FC,S}, A, B, b :: AbstractVector{FC}, c :: breakdown = Faux ≤ btol && Haux ≤ btol solved = resid_decrease_lim || resid_decrease_mach tired = iter ≥ itmax - kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e\n", iter, rNorm, Haux, Faux) + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e\n", iter, rNorm, Haux, Faux) # Compute vₖ₊₁ and uₖ₊₁ if !(solved || tired || breakdown || user_requested_exit) @@ -430,7 +460,7 @@ function gpmr!(solver :: GpmrSolver{T,FC,S}, A, B, b :: AbstractVector{FC}, c :: zt[2k+2] = τbar₂ₖ₊₂ end end - (verbose > 0) && @printf("\n") + (verbose > 0) && @printf(iostream, "\n") # Compute zₖ = (ζ₁, ..., ζ₂ₖ) by solving Rₖzₖ = tₖ with backward substitution. for i = 2iter : -1 : 1 diff --git a/src/krylov_processes.jl b/src/krylov_processes.jl new file mode 100644 index 000000000..2be66b1c5 --- /dev/null +++ b/src/krylov_processes.jl @@ -0,0 +1,439 @@ +export hermitian_lanczos, nonhermitian_lanczos, arnoldi, golub_kahan, saunders_simon_yip, montoison_orban + +""" + V, T = hermitian_lanczos(A, b, k) + +#### Input arguments + +* `A`: a linear operator that models a Hermitian matrix of dimension n; +* `b`: a vector of length n; +* `k`: the number of iterations of the Hermitian Lanczos process. + +#### Output arguments + +* `V`: a dense n × (k+1) matrix; +* `T`: a sparse (k+1) × k tridiagonal matrix. + +#### Reference + +* C. Lanczos, [*An Iteration Method for the Solution of the Eigenvalue Problem of Linear Differential and Integral Operators*](https://doi.org/10.6028/jres.045.026), Journal of Research of the National Bureau of Standards, 45(4), pp. 225--280, 1950. +""" +function hermitian_lanczos(A, b::AbstractVector{FC}, k::Int) where FC <: FloatOrComplex + m, n = size(A) + R = real(FC) + S = ktypeof(b) + M = vector_to_matrix(S) + + colptr = zeros(Int, k+1) + rowval = zeros(Int, 3k-1) + nzval = zeros(R, 3k-1) + + colptr[1] = 1 + rowval[1] = 1 + rowval[2] = 2 + for i = 1:k + colptr[i+1] = 3i + if i ≥ 2 + pos = colptr[i] + rowval[pos] = i-1 + rowval[pos+1] = i + rowval[pos+2] = i+1 + end + end + + V = M(undef, n, k+1) + T = SparseMatrixCSC(k+1, k, colptr, rowval, nzval) + + pαᵢ = 1 # Position of αᵢ in the vector `nzval` + for i = 1:k + vᵢ = view(V,:,i) + vᵢ₊₁ = q = view(V,:,i+1) + if i == 1 + βᵢ = @knrm2(n, b) + vᵢ .= b ./ βᵢ + end + mul!(q, A, vᵢ) + αᵢ = @kdotr(n, vᵢ, q) + nzval[pαᵢ] = αᵢ # Tᵢ.ᵢ = αᵢ + @kaxpy!(n, -αᵢ, vᵢ, q) + if i ≥ 2 + vᵢ₋₁ = view(V,:,i-1) + βᵢ = nzval[pαᵢ-2] # βᵢ = Tᵢ.ᵢ₋₁ + nzval[pαᵢ-1] = βᵢ # Tᵢ₋₁.ᵢ = βᵢ + @kaxpy!(n, -βᵢ, vᵢ₋₁, q) + end + βᵢ₊₁ = @knrm2(n, q) + nzval[pαᵢ+1] = βᵢ₊₁ # Tᵢ₊₁.ᵢ = βᵢ₊₁ + vᵢ₊₁ .= q ./ βᵢ₊₁ + pαᵢ = pαᵢ + 3 + end + return V, T +end + +""" + V, T, U, Tᴴ = nonhermitian_lanczos(A, b, c, k) + +#### Input arguments + +* `A`: a linear operator that models a square matrix of dimension n; +* `b`: a vector of length n; +* `c`: a vector of length n; +* `k`: the number of iterations of the non-Hermitian Lanczos process. + +#### Output arguments + +* `V`: a dense n × (k+1) matrix; +* `T`: a sparse (k+1) × k tridiagonal matrix; +* `U`: a dense n × (k+1) matrix; +* `Tᴴ`: a sparse (k+1) × k tridiagonal matrix. + +#### Reference + +* C. Lanczos, [*An Iteration Method for the Solution of the Eigenvalue Problem of Linear Differential and Integral Operators*](https://doi.org/10.6028/jres.045.026), Journal of Research of the National Bureau of Standards, 45(4), pp. 225--280, 1950. +""" +function nonhermitian_lanczos(A, b::AbstractVector{FC}, c::AbstractVector{FC}, k::Int) where FC <: FloatOrComplex + m, n = size(A) + Aᴴ = A' + S = ktypeof(b) + M = vector_to_matrix(S) + + colptr = zeros(Int, k+1) + rowval = zeros(Int, 3k-1) + nzval_T = zeros(FC, 3k-1) + nzval_Tᴴ = zeros(FC, 3k-1) + + colptr[1] = 1 + rowval[1] = 1 + rowval[2] = 2 + for i = 1:k + colptr[i+1] = 3i + if i ≥ 2 + pos = colptr[i] + rowval[pos] = i-1 + rowval[pos+1] = i + rowval[pos+2] = i+1 + end + end + + V = M(undef, n, k+1) + U = M(undef, n, k+1) + T = SparseMatrixCSC(k+1, k, colptr, rowval, nzval_T) + Tᴴ = SparseMatrixCSC(k+1, k, colptr, rowval, nzval_Tᴴ) + + pαᵢ = 1 # Position of αᵢ and ᾱᵢ in the vectors `nzval_T` and `nzval_Tᴴ` + for i = 1:k + vᵢ = view(V,:,i) + uᵢ = view(U,:,i) + vᵢ₊₁ = q = view(V,:,i+1) + uᵢ₊₁ = p = view(U,:,i+1) + if i == 1 + cᴴb = @kdot(n, c, b) + βᵢ = √(abs(cᴴb)) + γᵢ = cᴴb / βᵢ + vᵢ .= b ./ βᵢ + uᵢ .= c ./ conj(γᵢ) + end + mul!(q, A , vᵢ) + mul!(p, Aᴴ, uᵢ) + if i ≥ 2 + vᵢ₋₁ = view(V,:,i-1) + uᵢ₋₁ = view(U,:,i-1) + βᵢ = nzval_T[pαᵢ-2] # βᵢ = Tᵢ.ᵢ₋₁ + γᵢ = nzval_T[pαᵢ-1] # γᵢ = Tᵢ₋₁.ᵢ + @kaxpy!(n, - γᵢ , vᵢ₋₁, q) + @kaxpy!(n, -conj(βᵢ), uᵢ₋₁, p) + end + αᵢ = @kdot(n, uᵢ, q) + nzval_T[pαᵢ] = αᵢ # Tᵢ.ᵢ = αᵢ + nzval_Tᴴ[pαᵢ] = conj(αᵢ) # Tᴴᵢ.ᵢ = ᾱᵢ + @kaxpy!(m, - αᵢ , vᵢ, q) + @kaxpy!(n, -conj(αᵢ), uᵢ, p) + pᴴq = @kdot(n, p, q) + βᵢ₊₁ = √(abs(pᴴq)) + γᵢ₊₁ = pᴴq / βᵢ₊₁ + vᵢ₊₁ .= q ./ βᵢ₊₁ + uᵢ₊₁ .= p ./ conj(γᵢ₊₁) + nzval_T[pαᵢ+1] = βᵢ₊₁ # Tᵢ₊₁.ᵢ = βᵢ₊₁ + nzval_Tᴴ[pαᵢ+1] = conj(γᵢ₊₁) # Tᴴᵢ₊₁.ᵢ = γ̄ᵢ₊₁ + if i ≤ k-1 + nzval_T[pαᵢ+2] = γᵢ₊₁ # Tᵢ.ᵢ₊₁ = γᵢ₊₁ + nzval_Tᴴ[pαᵢ+2] = conj(βᵢ₊₁) # Tᴴᵢ.ᵢ₊₁ = β̄ᵢ₊₁ + end + pαᵢ = pαᵢ + 3 + end + return V, T, U, Tᴴ +end + +""" + V, H = arnoldi(A, b, k) + +#### Input arguments + +* `A`: a linear operator that models a square matrix of dimension n; +* `b`: a vector of length n; +* `k`: the number of iterations of the Arnoldi process. + +#### Output arguments + +* `V`: a dense n × (k+1) matrix; +* `H`: a dense (k+1) × k upper Hessenberg matrix. + +#### Reference + +* W. E. Arnoldi, [*The principle of minimized iterations in the solution of the matrix eigenvalue problem*](https://doi.org/10.1090/qam/42792), Quarterly of Applied Mathematics, 9, pp. 17--29, 1951. +""" +function arnoldi(A, b::AbstractVector{FC}, k::Int) where FC <: FloatOrComplex + m, n = size(A) + S = ktypeof(b) + M = vector_to_matrix(S) + + V = M(undef, n, k+1) + H = zeros(FC, k+1, k) + + for i = 1:k + vᵢ = view(V,:,i) + vᵢ₊₁ = q = view(V,:,i+1) + if i == 1 + β = @knrm2(n, b) + vᵢ .= b ./ β + end + mul!(q, A, vᵢ) + for j = 1:i + vⱼ = view(V,:,j) + H[j,i] = @kdot(n, vⱼ, q) + @kaxpy!(n, -H[j,i], vⱼ, q) + end + H[i+1,i] = @knrm2(n, q) + vᵢ₊₁ .= q ./ H[i+1,i] + end + return V, H +end + +""" + V, U, L = golub_kahan(A, b, k) + +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension m × n; +* `b`: a vector of length m; +* `k`: the number of iterations of the Golub-Kahan process. + +#### Output arguments + +* `V`: a dense n × (k+1) matrix; +* `U`: a dense m × (k+1) matrix; +* `L`: a sparse (k+1) × (k+1) lower bidiagonal matrix. + +#### References + +* G. H. Golub and W. Kahan, [*Calculating the Singular Values and Pseudo-Inverse of a Matrix*](https://doi.org/10.1137/0702016), SIAM Journal on Numerical Analysis, 2(2), pp. 225--224, 1965. +* C. C. Paige, [*Bidiagonalization of Matrices and Solution of Linear Equations*](https://doi.org/10.1137/0711019), SIAM Journal on Numerical Analysis, 11(1), pp. 197--209, 1974. +""" +function golub_kahan(A, b::AbstractVector{FC}, k::Int) where FC <: FloatOrComplex + m, n = size(A) + R = real(FC) + Aᴴ = A' + S = ktypeof(b) + M = vector_to_matrix(S) + + colptr = zeros(Int, k+2) + rowval = zeros(Int, 2k+1) + nzval = zeros(R, 2k+1) + + colptr[1] = 1 + for i = 1:k + pos = colptr[i] + colptr[i+1] = pos+2 + rowval[pos] = i + rowval[pos+1] = i+1 + end + rowval[2k+1] = k+1 + colptr[k+2] = 2k+2 + + V = M(undef, n, k+1) + U = M(undef, m, k+1) + L = SparseMatrixCSC(k+1, k+1, colptr, rowval, nzval) + + pαᵢ = 1 # Position of αᵢ in the vector `nzval` + for i = 1:k + uᵢ = view(U,:,i) + vᵢ = view(V,:,i) + uᵢ₊₁ = q = view(U,:,i+1) + vᵢ₊₁ = p = view(V,:,i+1) + if i == 1 + wᵢ = vᵢ + βᵢ = @knrm2(m, b) + uᵢ .= b ./ βᵢ + mul!(wᵢ, Aᴴ, uᵢ) + αᵢ = @knrm2(n, wᵢ) + nzval[pαᵢ] = αᵢ # Lᵢ.ᵢ = αᵢ + vᵢ .= wᵢ ./ αᵢ + end + mul!(q, A, vᵢ) + αᵢ = nzval[pαᵢ] # αᵢ = Lᵢ.ᵢ + @kaxpy!(m, -αᵢ, uᵢ, q) + βᵢ₊₁ = @knrm2(m, q) + uᵢ₊₁ .= q ./ βᵢ₊₁ + mul!(p, Aᴴ, uᵢ₊₁) + @kaxpy!(n, -βᵢ₊₁, vᵢ, p) + αᵢ₊₁ = @knrm2(n, p) + vᵢ₊₁ .= p ./ αᵢ₊₁ + nzval[pαᵢ+1] = βᵢ₊₁ # Lᵢ₊₁.ᵢ = βᵢ₊₁ + nzval[pαᵢ+2] = αᵢ₊₁ # Lᵢ₊₁.ᵢ₊₁ = αᵢ₊₁ + pαᵢ = pαᵢ + 2 + end + return V, U, L +end + +""" + V, T, U, Tᴴ = saunders_simon_yip(A, b, c, k) + +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension m × n; +* `b`: a vector of length m; +* `c`: a vector of length n; +* `k`: the number of iterations of the Saunders-Simon-Yip process. + +#### Output arguments + +* `V`: a dense m × (k+1) matrix; +* `T`: a sparse (k+1) × k tridiagonal matrix; +* `U`: a dense n × (k+1) matrix; +* `Tᴴ`: a sparse (k+1) × k tridiagonal matrix. + +#### Reference + +* M. A. Saunders, H. D. Simon, and E. L. Yip, [*Two Conjugate-Gradient-Type Methods for Unsymmetric Linear Equations*](https://doi.org/10.1137/0725052), SIAM Journal on Numerical Analysis, 25(4), pp. 927--940, 1988. +""" +function saunders_simon_yip(A, b::AbstractVector{FC}, c::AbstractVector{FC}, k::Int) where FC <: FloatOrComplex + m, n = size(A) + Aᴴ = A' + S = ktypeof(b) + M = vector_to_matrix(S) + + colptr = zeros(Int, k+1) + rowval = zeros(Int, 3k-1) + nzval_T = zeros(FC, 3k-1) + nzval_Tᴴ = zeros(FC, 3k-1) + + colptr[1] = 1 + rowval[1] = 1 + rowval[2] = 2 + for i = 1:k + colptr[i+1] = 3i + if i ≥ 2 + pos = colptr[i] + rowval[pos] = i-1 + rowval[pos+1] = i + rowval[pos+2] = i+1 + end + end + + V = M(undef, m, k+1) + U = M(undef, n, k+1) + T = SparseMatrixCSC(k+1, k, colptr, rowval, nzval_T) + Tᴴ = SparseMatrixCSC(k+1, k, colptr, rowval, nzval_Tᴴ) + + pαᵢ = 1 # Position of αᵢ and ᾱᵢ in the vectors `nzval_T` and `nzval_Tᴴ` + for i = 1:k + vᵢ = view(V,:,i) + uᵢ = view(U,:,i) + vᵢ₊₁ = q = view(V,:,i+1) + uᵢ₊₁ = p = view(U,:,i+1) + if i == 1 + β = @knrm2(m, b) + γ = @knrm2(n, c) + vᵢ .= b ./ β + uᵢ .= c ./ γ + end + mul!(q, A , uᵢ) + mul!(p, Aᴴ, vᵢ) + if i ≥ 2 + vᵢ₋₁ = view(V,:,i-1) + uᵢ₋₁ = view(U,:,i-1) + βᵢ = nzval_T[pαᵢ-2] # βᵢ = Tᵢ.ᵢ₋₁ + γᵢ = nzval_T[pαᵢ-1] # γᵢ = Tᵢ₋₁.ᵢ + @kaxpy!(m, -γᵢ, vᵢ₋₁, q) + @kaxpy!(n, -βᵢ, uᵢ₋₁, p) + end + αᵢ = @kdot(m, vᵢ, q) + nzval_T[pαᵢ] = αᵢ # Tᵢ.ᵢ = αᵢ + nzval_Tᴴ[pαᵢ] = conj(αᵢ) # Tᴴᵢ.ᵢ = ᾱᵢ + @kaxpy!(m, - αᵢ , vᵢ, q) + @kaxpy!(n, -conj(αᵢ), uᵢ, p) + βᵢ₊₁ = @knrm2(m, q) + γᵢ₊₁ = @knrm2(n, p) + vᵢ₊₁ .= q ./ βᵢ₊₁ + uᵢ₊₁ .= p ./ γᵢ₊₁ + nzval_T[pαᵢ+1] = βᵢ₊₁ # Tᵢ₊₁.ᵢ = βᵢ₊₁ + nzval_Tᴴ[pαᵢ+1] = γᵢ₊₁ # Tᴴᵢ₊₁.ᵢ = γᵢ₊₁ + if i ≤ k-1 + nzval_T[pαᵢ+2] = γᵢ₊₁ # Tᵢ.ᵢ₊₁ = γᵢ₊₁ + nzval_Tᴴ[pαᵢ+2] = βᵢ₊₁ # Tᴴᵢ.ᵢ₊₁ = βᵢ₊₁ + end + pαᵢ = pαᵢ + 3 + end + return V, T, U, Tᴴ +end + +""" + V, H, U, F = montoison_orban(A, B, b, c, k) + +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension m × n; +* `B`: a linear operator that models a matrix of dimension n × m; +* `b`: a vector of length m; +* `c`: a vector of length n; +* `k`: the number of iterations of the Montoison-Orban process. + +#### Output arguments + +* `V`: a dense m × (k+1) matrix; +* `H`: a dense (k+1) × k upper Hessenberg matrix; +* `U`: a dense n × (k+1) matrix; +* `F`: a dense (k+1) × k upper Hessenberg matrix. + +#### Reference + +* A. Montoison and D. Orban, [*GPMR: An Iterative Method for Unsymmetric Partitioned Linear Systems*](https://dx.doi.org/10.13140/RG.2.2.24069.68326), Cahier du GERAD G-2021-62, GERAD, Montréal, 2021. +""" +function montoison_orban(A, B, b::AbstractVector{FC}, c::AbstractVector{FC}, k::Int) where FC <: FloatOrComplex + m, n = size(A) + S = ktypeof(b) + M = vector_to_matrix(S) + + V = M(undef, m, k+1) + U = M(undef, n, k+1) + H = zeros(FC, k+1, k) + F = zeros(FC, k+1, k) + + for i = 1:k + vᵢ = view(V,:,i) + uᵢ = view(U,:,i) + vᵢ₊₁ = q = view(V,:,i+1) + uᵢ₊₁ = p = view(U,:,i+1) + if i == 1 + β = @knrm2(m, b) + γ = @knrm2(n, c) + vᵢ .= b ./ β + uᵢ .= c ./ γ + end + mul!(q, A, uᵢ) + mul!(p, B, vᵢ) + for j = 1:i + vⱼ = view(V,:,j) + uⱼ = view(U,:,j) + H[j,i] = @kdot(m, vⱼ, q) + @kaxpy!(n, -H[j,i], vⱼ, q) + F[j,i] = @kdot(n, uⱼ, p) + @kaxpy!(m, -F[j,i], uⱼ, p) + end + H[i+1,i] = @knrm2(m, q) + vᵢ₊₁ .= q ./ H[i+1,i] + F[i+1,i] = @knrm2(n, p) + uᵢ₊₁ .= p ./ F[i+1,i] + end + return V, H, U, F +end diff --git a/src/krylov_solvers.jl b/src/krylov_solvers.jl index 8a109a2be..bd2bc8a0e 100644 --- a/src/krylov_solvers.jl +++ b/src/krylov_solvers.jl @@ -3,11 +3,13 @@ CgLanczosShiftSolver, MinresQlpSolver, DqgmresSolver, DiomSolver, UsymlqSolver, UsymqrSolver, TricgSolver, TrimrSolver, TrilqrSolver, CgsSolver, BicgstabSolver, BilqSolver, QmrSolver, BilqrSolver, CglsSolver, CrlsSolver, CgneSolver, CrmrSolver, LslqSolver, LsqrSolver, LsmrSolver, LnlqSolver, CraigSolver, CraigmrSolver, -GmresSolver, FomSolver, GpmrSolver +GmresSolver, FomSolver, GpmrSolver, FgmresSolver export solve!, solution, nsolution, statistics, issolved, issolved_primal, issolved_dual, niterations, Aprod, Atprod, Bprod, warm_start! +import Base.size, Base.sizeof, Base.format_bytes + const KRYLOV_SOLVERS = Dict( :cg => :CgSolver , :cr => :CrSolver , @@ -20,6 +22,7 @@ const KRYLOV_SOLVERS = Dict( :fom => :FomSolver , :dqgmres => :DqgmresSolver , :gmres => :GmresSolver , + :fgmres => :FgmresSolver , :gpmr => :GpmrSolver , :usymlq => :UsymlqSolver , :usymqr => :UsymqrSolver , @@ -51,12 +54,14 @@ Type for storing the vectors required by the in-place version of MINRES. The outer constructors - solver = MinresSolver(n, m, S; window :: Int=5) + solver = MinresSolver(m, n, S; window :: Int=5) solver = MinresSolver(A, b; window :: Int=5) may be used in order to create these vectors. """ mutable struct MinresSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int Δx :: S x :: S r1 :: S @@ -68,29 +73,29 @@ mutable struct MinresSolver{T,FC,S} <: KrylovSolver{T,FC,S} err_vec :: Vector{T} warm_start :: Bool stats :: SimpleStats{T} +end - function MinresSolver(n, m, S; window :: Int=5) - FC = eltype(S) - T = real(FC) - Δx = S(undef, 0) - x = S(undef, n) - r1 = S(undef, n) - r2 = S(undef, n) - w1 = S(undef, n) - w2 = S(undef, n) - y = S(undef, n) - v = S(undef, 0) - err_vec = zeros(T, window) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(Δx, x, r1, r2, w1, w2, y, v, err_vec, false, stats) - return solver - end +function MinresSolver(m, n, S; window :: Int=5) + FC = eltype(S) + T = real(FC) + Δx = S(undef, 0) + x = S(undef, n) + r1 = S(undef, n) + r2 = S(undef, n) + w1 = S(undef, n) + w2 = S(undef, n) + y = S(undef, n) + v = S(undef, 0) + err_vec = zeros(T, window) + stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") + solver = MinresSolver{T,FC,S}(m, n, Δx, x, r1, r2, w1, w2, y, v, err_vec, false, stats) + return solver +end - function MinresSolver(A, b; window :: Int=5) - n, m = size(A) - S = ktypeof(b) - MinresSolver(n, m, S, window=window) - end +function MinresSolver(A, b; window :: Int=5) + m, n = size(A) + S = ktypeof(b) + MinresSolver(m, n, S, window=window) end """ @@ -98,12 +103,14 @@ Type for storing the vectors required by the in-place version of CG. The outer constructors - solver = CgSolver(n, m, S) + solver = CgSolver(m, n, S) solver = CgSolver(A, b) may be used in order to create these vectors. """ mutable struct CgSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int Δx :: S x :: S r :: S @@ -112,26 +119,26 @@ mutable struct CgSolver{T,FC,S} <: KrylovSolver{T,FC,S} z :: S warm_start :: Bool stats :: SimpleStats{T} +end - function CgSolver(n, m, S) - FC = eltype(S) - T = real(FC) - Δx = S(undef, 0) - x = S(undef, n) - r = S(undef, n) - p = S(undef, n) - Ap = S(undef, n) - z = S(undef, 0) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(Δx, x, r, p, Ap, z, false, stats) - return solver - end +function CgSolver(m, n, S) + FC = eltype(S) + T = real(FC) + Δx = S(undef, 0) + x = S(undef, n) + r = S(undef, n) + p = S(undef, n) + Ap = S(undef, n) + z = S(undef, 0) + stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") + solver = CgSolver{T,FC,S}(m, n, Δx, x, r, p, Ap, z, false, stats) + return solver +end - function CgSolver(A, b) - n, m = size(A) - S = ktypeof(b) - CgSolver(n, m, S) - end +function CgSolver(A, b) + m, n = size(A) + S = ktypeof(b) + CgSolver(m, n, S) end """ @@ -139,12 +146,14 @@ Type for storing the vectors required by the in-place version of CR. The outer constructors - solver = CrSolver(n, m, S) + solver = CrSolver(m, n, S) solver = CrSolver(A, b) may be used in order to create these vectors. """ mutable struct CrSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int Δx :: S x :: S r :: S @@ -154,27 +163,27 @@ mutable struct CrSolver{T,FC,S} <: KrylovSolver{T,FC,S} Mq :: S warm_start :: Bool stats :: SimpleStats{T} +end - function CrSolver(n, m, S) - FC = eltype(S) - T = real(FC) - Δx = S(undef, 0) - x = S(undef, n) - r = S(undef, n) - p = S(undef, n) - q = S(undef, n) - Ar = S(undef, n) - Mq = S(undef, 0) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(Δx, x, r, p, q, Ar, Mq, false, stats) - return solver - end +function CrSolver(m, n, S) + FC = eltype(S) + T = real(FC) + Δx = S(undef, 0) + x = S(undef, n) + r = S(undef, n) + p = S(undef, n) + q = S(undef, n) + Ar = S(undef, n) + Mq = S(undef, 0) + stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") + solver = CrSolver{T,FC,S}(m, n, Δx, x, r, p, q, Ar, Mq, false, stats) + return solver +end - function CrSolver(A, b) - n, m = size(A) - S = ktypeof(b) - CrSolver(n, m, S) - end +function CrSolver(A, b) + m, n = size(A) + S = ktypeof(b) + CrSolver(m, n, S) end """ @@ -182,12 +191,14 @@ Type for storing the vectors required by the in-place version of SYMMLQ. The outer constructors - solver = SymmlqSolver(n, m, S) + solver = SymmlqSolver(m, n, S) solver = SymmlqSolver(A, b) may be used in order to create these vectors. """ mutable struct SymmlqSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int Δx :: S x :: S Mvold :: S @@ -200,30 +211,30 @@ mutable struct SymmlqSolver{T,FC,S} <: KrylovSolver{T,FC,S} sprod :: Vector{T} warm_start :: Bool stats :: SymmlqStats{T} +end - function SymmlqSolver(n, m, S; window :: Int=5) - FC = eltype(S) - T = real(FC) - Δx = S(undef, 0) - x = S(undef, n) - Mvold = S(undef, n) - Mv = S(undef, n) - Mv_next = S(undef, n) - w̅ = S(undef, n) - v = S(undef, 0) - clist = zeros(T, window) - zlist = zeros(T, window) - sprod = ones(T, window) - stats = SymmlqStats(0, false, T[], Union{T, Missing}[], T[], Union{T, Missing}[], T(NaN), T(NaN), "unknown") - solver = new{T,FC,S}(Δx, x, Mvold, Mv, Mv_next, w̅, v, clist, zlist, sprod, false, stats) - return solver - end +function SymmlqSolver(m, n, S; window :: Int=5) + FC = eltype(S) + T = real(FC) + Δx = S(undef, 0) + x = S(undef, n) + Mvold = S(undef, n) + Mv = S(undef, n) + Mv_next = S(undef, n) + w̅ = S(undef, n) + v = S(undef, 0) + clist = zeros(T, window) + zlist = zeros(T, window) + sprod = ones(T, window) + stats = SymmlqStats(0, false, T[], Union{T, Missing}[], T[], Union{T, Missing}[], T(NaN), T(NaN), "unknown") + solver = SymmlqSolver{T,FC,S}(m, n, Δx, x, Mvold, Mv, Mv_next, w̅, v, clist, zlist, sprod, false, stats) + return solver +end - function SymmlqSolver(A, b; window :: Int=5) - n, m = size(A) - S = ktypeof(b) - SymmlqSolver(n, m, S, window=window) - end +function SymmlqSolver(A, b; window :: Int=5) + m, n = size(A) + S = ktypeof(b) + SymmlqSolver(m, n, S, window=window) end """ @@ -231,12 +242,14 @@ Type for storing the vectors required by the in-place version of CG-LANCZOS. The outer constructors - solver = CgLanczosSolver(n, m, S) + solver = CgLanczosSolver(m, n, S) solver = CgLanczosSolver(A, b) may be used in order to create these vectors. """ mutable struct CgLanczosSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int Δx :: S x :: S Mv :: S @@ -246,27 +259,27 @@ mutable struct CgLanczosSolver{T,FC,S} <: KrylovSolver{T,FC,S} v :: S warm_start :: Bool stats :: LanczosStats{T} +end - function CgLanczosSolver(n, m, S) - FC = eltype(S) - T = real(FC) - Δx = S(undef, 0) - x = S(undef, n) - Mv = S(undef, n) - Mv_prev = S(undef, n) - p = S(undef, n) - Mv_next = S(undef, n) - v = S(undef, 0) - stats = LanczosStats(0, false, T[], false, T(NaN), T(NaN), "unknown") - solver = new{T,FC,S}(Δx, x, Mv, Mv_prev, p, Mv_next, v, false, stats) - return solver - end +function CgLanczosSolver(m, n, S) + FC = eltype(S) + T = real(FC) + Δx = S(undef, 0) + x = S(undef, n) + Mv = S(undef, n) + Mv_prev = S(undef, n) + p = S(undef, n) + Mv_next = S(undef, n) + v = S(undef, 0) + stats = LanczosStats(0, false, T[], false, T(NaN), T(NaN), "unknown") + solver = CgLanczosSolver{T,FC,S}(m, n, Δx, x, Mv, Mv_prev, p, Mv_next, v, false, stats) + return solver +end - function CgLanczosSolver(A, b) - n, m = size(A) - S = ktypeof(b) - CgLanczosSolver(n, m, S) - end +function CgLanczosSolver(A, b) + m, n = size(A) + S = ktypeof(b) + CgLanczosSolver(m, n, S) end """ @@ -274,12 +287,14 @@ Type for storing the vectors required by the in-place version of CG-LANCZOS-SHIF The outer constructors - solver = CgLanczosShiftSolver(n, m, nshifts, S) + solver = CgLanczosShiftSolver(m, n, nshifts, S) solver = CgLanczosShiftSolver(A, b, nshifts) may be used in order to create these vectors. """ mutable struct CgLanczosShiftSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int Mv :: S Mv_prev :: S Mv_next :: S @@ -294,34 +309,34 @@ mutable struct CgLanczosShiftSolver{T,FC,S} <: KrylovSolver{T,FC,S} converged :: BitVector not_cv :: BitVector stats :: LanczosShiftStats{T} +end - function CgLanczosShiftSolver(n, m, nshifts, S) - FC = eltype(S) - T = real(FC) - Mv = S(undef, n) - Mv_prev = S(undef, n) - Mv_next = S(undef, n) - v = S(undef, 0) - x = [S(undef, n) for i = 1 : nshifts] - p = [S(undef, n) for i = 1 : nshifts] - σ = Vector{T}(undef, nshifts) - δhat = Vector{T}(undef, nshifts) - ω = Vector{T}(undef, nshifts) - γ = Vector{T}(undef, nshifts) - rNorms = Vector{T}(undef, nshifts) - indefinite = BitVector(undef, nshifts) - converged = BitVector(undef, nshifts) - not_cv = BitVector(undef, nshifts) - stats = LanczosShiftStats(0, false, [T[] for i = 1 : nshifts], indefinite, T(NaN), T(NaN), "unknown") - solver = new{T,FC,S}(Mv, Mv_prev, Mv_next, v, x, p, σ, δhat, ω, γ, rNorms, converged, not_cv, stats) - return solver - end +function CgLanczosShiftSolver(m, n, nshifts, S) + FC = eltype(S) + T = real(FC) + Mv = S(undef, n) + Mv_prev = S(undef, n) + Mv_next = S(undef, n) + v = S(undef, 0) + x = S[S(undef, n) for i = 1 : nshifts] + p = S[S(undef, n) for i = 1 : nshifts] + σ = Vector{T}(undef, nshifts) + δhat = Vector{T}(undef, nshifts) + ω = Vector{T}(undef, nshifts) + γ = Vector{T}(undef, nshifts) + rNorms = Vector{T}(undef, nshifts) + indefinite = BitVector(undef, nshifts) + converged = BitVector(undef, nshifts) + not_cv = BitVector(undef, nshifts) + stats = LanczosShiftStats(0, false, Vector{T}[T[] for i = 1 : nshifts], indefinite, T(NaN), T(NaN), "unknown") + solver = CgLanczosShiftSolver{T,FC,S}(m, n, Mv, Mv_prev, Mv_next, v, x, p, σ, δhat, ω, γ, rNorms, converged, not_cv, stats) + return solver +end - function CgLanczosShiftSolver(A, b, nshifts) - n, m = size(A) - S = ktypeof(b) - CgLanczosShiftSolver(n, m, nshifts, S) - end +function CgLanczosShiftSolver(A, b, nshifts) + m, n = size(A) + S = ktypeof(b) + CgLanczosShiftSolver(m, n, nshifts, S) end """ @@ -329,12 +344,14 @@ Type for storing the vectors required by the in-place version of MINRES-QLP. The outer constructors - solver = MinresQlpSolver(n, m, S) + solver = MinresQlpSolver(m, n, S) solver = MinresQlpSolver(A, b) may be used in order to create these vectors. """ mutable struct MinresQlpSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int Δx :: S wₖ₋₁ :: S wₖ :: S @@ -345,28 +362,28 @@ mutable struct MinresQlpSolver{T,FC,S} <: KrylovSolver{T,FC,S} vₖ :: S warm_start :: Bool stats :: SimpleStats{T} +end - function MinresQlpSolver(n, m, S) - FC = eltype(S) - T = real(FC) - Δx = S(undef, 0) - wₖ₋₁ = S(undef, n) - wₖ = S(undef, n) - M⁻¹vₖ₋₁ = S(undef, n) - M⁻¹vₖ = S(undef, n) - x = S(undef, n) - p = S(undef, n) - vₖ = S(undef, 0) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(Δx, wₖ₋₁, wₖ, M⁻¹vₖ₋₁, M⁻¹vₖ, x, p, vₖ, false, stats) - return solver - end +function MinresQlpSolver(m, n, S) + FC = eltype(S) + T = real(FC) + Δx = S(undef, 0) + wₖ₋₁ = S(undef, n) + wₖ = S(undef, n) + M⁻¹vₖ₋₁ = S(undef, n) + M⁻¹vₖ = S(undef, n) + x = S(undef, n) + p = S(undef, n) + vₖ = S(undef, 0) + stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") + solver = MinresQlpSolver{T,FC,S}(m, n, Δx, wₖ₋₁, wₖ, M⁻¹vₖ₋₁, M⁻¹vₖ, x, p, vₖ, false, stats) + return solver +end - function MinresQlpSolver(A, b) - n, m = size(A) - S = ktypeof(b) - MinresQlpSolver(n, m, S) - end +function MinresQlpSolver(A, b) + m, n = size(A) + S = ktypeof(b) + MinresQlpSolver(m, n, S) end """ @@ -374,13 +391,15 @@ Type for storing the vectors required by the in-place version of DQGMRES. The outer constructors - solver = DqgmresSolver(n, m, memory, S) + solver = DqgmresSolver(m, n, memory, S) solver = DqgmresSolver(A, b, memory = 20) may be used in order to create these vectors. `memory` is set to `n` if the value given is larger than `n`. """ mutable struct DqgmresSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int Δx :: S x :: S t :: S @@ -393,31 +412,31 @@ mutable struct DqgmresSolver{T,FC,S} <: KrylovSolver{T,FC,S} H :: Vector{FC} warm_start :: Bool stats :: SimpleStats{T} +end - function DqgmresSolver(n, m, memory, S) - memory = min(n, memory) - FC = eltype(S) - T = real(FC) - Δx = S(undef, 0) - x = S(undef, n) - t = S(undef, n) - z = S(undef, 0) - w = S(undef, 0) - P = [S(undef, n) for i = 1 : memory] - V = [S(undef, n) for i = 1 : memory] - c = Vector{T}(undef, memory) - s = Vector{FC}(undef, memory) - H = Vector{FC}(undef, memory+2) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(Δx, x, t, z, w, P, V, c, s, H, false, stats) - return solver - end +function DqgmresSolver(m, n, memory, S) + memory = min(m, memory) + FC = eltype(S) + T = real(FC) + Δx = S(undef, 0) + x = S(undef, n) + t = S(undef, n) + z = S(undef, 0) + w = S(undef, 0) + P = S[S(undef, n) for i = 1 : memory] + V = S[S(undef, n) for i = 1 : memory] + c = Vector{T}(undef, memory) + s = Vector{FC}(undef, memory) + H = Vector{FC}(undef, memory+1) + stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") + solver = DqgmresSolver{T,FC,S}(m, n, Δx, x, t, z, w, P, V, c, s, H, false, stats) + return solver +end - function DqgmresSolver(A, b, memory = 20) - n, m = size(A) - S = ktypeof(b) - DqgmresSolver(n, m, memory, S) - end +function DqgmresSolver(A, b, memory = 20) + m, n = size(A) + S = ktypeof(b) + DqgmresSolver(m, n, memory, S) end """ @@ -425,13 +444,15 @@ Type for storing the vectors required by the in-place version of DIOM. The outer constructors - solver = DiomSolver(n, m, memory, S) + solver = DiomSolver(m, n, memory, S) solver = DiomSolver(A, b, memory = 20) may be used in order to create these vectors. `memory` is set to `n` if the value given is larger than `n`. """ mutable struct DiomSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int Δx :: S x :: S t :: S @@ -443,30 +464,30 @@ mutable struct DiomSolver{T,FC,S} <: KrylovSolver{T,FC,S} H :: Vector{FC} warm_start :: Bool stats :: SimpleStats{T} +end - function DiomSolver(n, m, memory, S) - memory = min(n, memory) - FC = eltype(S) - T = real(FC) - Δx = S(undef, 0) - x = S(undef, n) - t = S(undef, n) - z = S(undef, 0) - w = S(undef, 0) - P = [S(undef, n) for i = 1 : memory] - V = [S(undef, n) for i = 1 : memory] - L = Vector{FC}(undef, memory) - H = Vector{FC}(undef, memory+2) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(Δx, x, t, z, w, P, V, L, H, false, stats) - return solver - end +function DiomSolver(m, n, memory, S) + memory = min(m, memory) + FC = eltype(S) + T = real(FC) + Δx = S(undef, 0) + x = S(undef, n) + t = S(undef, n) + z = S(undef, 0) + w = S(undef, 0) + P = S[S(undef, n) for i = 1 : memory-1] + V = S[S(undef, n) for i = 1 : memory] + L = Vector{FC}(undef, memory-1) + H = Vector{FC}(undef, memory) + stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") + solver = DiomSolver{T,FC,S}(m, n, Δx, x, t, z, w, P, V, L, H, false, stats) + return solver +end - function DiomSolver(A, b, memory = 20) - n, m = size(A) - S = ktypeof(b) - DiomSolver(n, m, memory, S) - end +function DiomSolver(A, b, memory = 20) + m, n = size(A) + S = ktypeof(b) + DiomSolver(m, n, memory, S) end """ @@ -474,12 +495,14 @@ Type for storing the vectors required by the in-place version of USYMLQ. The outer constructors - solver = UsymlqSolver(n, m, S) + solver = UsymlqSolver(m, n, S) solver = UsymlqSolver(A, b) may be used in order to create these vectors. """ mutable struct UsymlqSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int uₖ₋₁ :: S uₖ :: S p :: S @@ -491,29 +514,29 @@ mutable struct UsymlqSolver{T,FC,S} <: KrylovSolver{T,FC,S} q :: S warm_start :: Bool stats :: SimpleStats{T} +end - function UsymlqSolver(n, m, S) - FC = eltype(S) - T = real(FC) - uₖ₋₁ = S(undef, m) - uₖ = S(undef, m) - p = S(undef, m) - Δx = S(undef, 0) - x = S(undef, m) - d̅ = S(undef, m) - vₖ₋₁ = S(undef, n) - vₖ = S(undef, n) - q = S(undef, n) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(uₖ₋₁, uₖ, p, Δx, x, d̅, vₖ₋₁, vₖ, q, false, stats) - return solver - end +function UsymlqSolver(m, n, S) + FC = eltype(S) + T = real(FC) + uₖ₋₁ = S(undef, n) + uₖ = S(undef, n) + p = S(undef, n) + Δx = S(undef, 0) + x = S(undef, n) + d̅ = S(undef, n) + vₖ₋₁ = S(undef, m) + vₖ = S(undef, m) + q = S(undef, m) + stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") + solver = UsymlqSolver{T,FC,S}(m, n, uₖ₋₁, uₖ, p, Δx, x, d̅, vₖ₋₁, vₖ, q, false, stats) + return solver +end - function UsymlqSolver(A, b) - n, m = size(A) - S = ktypeof(b) - UsymlqSolver(n, m, S) - end +function UsymlqSolver(A, b) + m, n = size(A) + S = ktypeof(b) + UsymlqSolver(m, n, S) end """ @@ -521,12 +544,14 @@ Type for storing the vectors required by the in-place version of USYMQR. The outer constructors - solver = UsymqrSolver(n, m, S) + solver = UsymqrSolver(m, n, S) solver = UsymqrSolver(A, b) may be used in order to create these vectors. """ mutable struct UsymqrSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int vₖ₋₁ :: S vₖ :: S q :: S @@ -539,30 +564,30 @@ mutable struct UsymqrSolver{T,FC,S} <: KrylovSolver{T,FC,S} p :: S warm_start :: Bool stats :: SimpleStats{T} +end - function UsymqrSolver(n, m, S) - FC = eltype(S) - T = real(FC) - vₖ₋₁ = S(undef, n) - vₖ = S(undef, n) - q = S(undef, n) - Δx = S(undef, 0) - x = S(undef, m) - wₖ₋₂ = S(undef, m) - wₖ₋₁ = S(undef, m) - uₖ₋₁ = S(undef, m) - uₖ = S(undef, m) - p = S(undef, m) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(vₖ₋₁, vₖ, q, Δx, x, wₖ₋₂, wₖ₋₁, uₖ₋₁, uₖ, p, false, stats) - return solver - end +function UsymqrSolver(m, n, S) + FC = eltype(S) + T = real(FC) + vₖ₋₁ = S(undef, m) + vₖ = S(undef, m) + q = S(undef, m) + Δx = S(undef, 0) + x = S(undef, n) + wₖ₋₂ = S(undef, n) + wₖ₋₁ = S(undef, n) + uₖ₋₁ = S(undef, n) + uₖ = S(undef, n) + p = S(undef, n) + stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") + solver = UsymqrSolver{T,FC,S}(m, n, vₖ₋₁, vₖ, q, Δx, x, wₖ₋₂, wₖ₋₁, uₖ₋₁, uₖ, p, false, stats) + return solver +end - function UsymqrSolver(A, b) - n, m = size(A) - S = ktypeof(b) - UsymqrSolver(n, m, S) - end +function UsymqrSolver(A, b) + m, n = size(A) + S = ktypeof(b) + UsymqrSolver(m, n, S) end """ @@ -570,12 +595,14 @@ Type for storing the vectors required by the in-place version of TRICG. The outer constructors - solver = TricgSolver(n, m, S) + solver = TricgSolver(m, n, S) solver = TricgSolver(A, b) may be used in order to create these vectors. """ mutable struct TricgSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int y :: S N⁻¹uₖ₋₁ :: S N⁻¹uₖ :: S @@ -594,36 +621,36 @@ mutable struct TricgSolver{T,FC,S} <: KrylovSolver{T,FC,S} vₖ :: S warm_start :: Bool stats :: SimpleStats{T} +end - function TricgSolver(n, m, S) - FC = eltype(S) - T = real(FC) - y = S(undef, m) - N⁻¹uₖ₋₁ = S(undef, m) - N⁻¹uₖ = S(undef, m) - p = S(undef, m) - gy₂ₖ₋₁ = S(undef, m) - gy₂ₖ = S(undef, m) - x = S(undef, n) - M⁻¹vₖ₋₁ = S(undef, n) - M⁻¹vₖ = S(undef, n) - q = S(undef, n) - gx₂ₖ₋₁ = S(undef, n) - gx₂ₖ = S(undef, n) - Δx = S(undef, 0) - Δy = S(undef, 0) - uₖ = S(undef, 0) - vₖ = S(undef, 0) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(y, N⁻¹uₖ₋₁, N⁻¹uₖ, p, gy₂ₖ₋₁, gy₂ₖ, x, M⁻¹vₖ₋₁, M⁻¹vₖ, q, gx₂ₖ₋₁, gx₂ₖ, Δx, Δy, uₖ, vₖ, false, stats) - return solver - end +function TricgSolver(m, n, S) + FC = eltype(S) + T = real(FC) + y = S(undef, n) + N⁻¹uₖ₋₁ = S(undef, n) + N⁻¹uₖ = S(undef, n) + p = S(undef, n) + gy₂ₖ₋₁ = S(undef, n) + gy₂ₖ = S(undef, n) + x = S(undef, m) + M⁻¹vₖ₋₁ = S(undef, m) + M⁻¹vₖ = S(undef, m) + q = S(undef, m) + gx₂ₖ₋₁ = S(undef, m) + gx₂ₖ = S(undef, m) + Δx = S(undef, 0) + Δy = S(undef, 0) + uₖ = S(undef, 0) + vₖ = S(undef, 0) + stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") + solver = TricgSolver{T,FC,S}(m, n, y, N⁻¹uₖ₋₁, N⁻¹uₖ, p, gy₂ₖ₋₁, gy₂ₖ, x, M⁻¹vₖ₋₁, M⁻¹vₖ, q, gx₂ₖ₋₁, gx₂ₖ, Δx, Δy, uₖ, vₖ, false, stats) + return solver +end - function TricgSolver(A, b) - n, m = size(A) - S = ktypeof(b) - TricgSolver(n, m, S) - end +function TricgSolver(A, b) + m, n = size(A) + S = ktypeof(b) + TricgSolver(m, n, S) end """ @@ -631,12 +658,14 @@ Type for storing the vectors required by the in-place version of TRIMR. The outer constructors - solver = TrimrSolver(n, m, S) + solver = TrimrSolver(m, n, S) solver = TrimrSolver(A, b) may be used in order to create these vectors. """ mutable struct TrimrSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int y :: S N⁻¹uₖ₋₁ :: S N⁻¹uₖ :: S @@ -659,40 +688,40 @@ mutable struct TrimrSolver{T,FC,S} <: KrylovSolver{T,FC,S} vₖ :: S warm_start :: Bool stats :: SimpleStats{T} +end - function TrimrSolver(n, m, S) - FC = eltype(S) - T = real(FC) - y = S(undef, m) - N⁻¹uₖ₋₁ = S(undef, m) - N⁻¹uₖ = S(undef, m) - p = S(undef, m) - gy₂ₖ₋₃ = S(undef, m) - gy₂ₖ₋₂ = S(undef, m) - gy₂ₖ₋₁ = S(undef, m) - gy₂ₖ = S(undef, m) - x = S(undef, n) - M⁻¹vₖ₋₁ = S(undef, n) - M⁻¹vₖ = S(undef, n) - q = S(undef, n) - gx₂ₖ₋₃ = S(undef, n) - gx₂ₖ₋₂ = S(undef, n) - gx₂ₖ₋₁ = S(undef, n) - gx₂ₖ = S(undef, n) - Δx = S(undef, 0) - Δy = S(undef, 0) - uₖ = S(undef, 0) - vₖ = S(undef, 0) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(y, N⁻¹uₖ₋₁, N⁻¹uₖ, p, gy₂ₖ₋₃, gy₂ₖ₋₂, gy₂ₖ₋₁, gy₂ₖ, x, M⁻¹vₖ₋₁, M⁻¹vₖ, q, gx₂ₖ₋₃, gx₂ₖ₋₂, gx₂ₖ₋₁, gx₂ₖ, Δx, Δy, uₖ, vₖ, false, stats) - return solver - end +function TrimrSolver(m, n, S) + FC = eltype(S) + T = real(FC) + y = S(undef, n) + N⁻¹uₖ₋₁ = S(undef, n) + N⁻¹uₖ = S(undef, n) + p = S(undef, n) + gy₂ₖ₋₃ = S(undef, n) + gy₂ₖ₋₂ = S(undef, n) + gy₂ₖ₋₁ = S(undef, n) + gy₂ₖ = S(undef, n) + x = S(undef, m) + M⁻¹vₖ₋₁ = S(undef, m) + M⁻¹vₖ = S(undef, m) + q = S(undef, m) + gx₂ₖ₋₃ = S(undef, m) + gx₂ₖ₋₂ = S(undef, m) + gx₂ₖ₋₁ = S(undef, m) + gx₂ₖ = S(undef, m) + Δx = S(undef, 0) + Δy = S(undef, 0) + uₖ = S(undef, 0) + vₖ = S(undef, 0) + stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") + solver = TrimrSolver{T,FC,S}(m, n, y, N⁻¹uₖ₋₁, N⁻¹uₖ, p, gy₂ₖ₋₃, gy₂ₖ₋₂, gy₂ₖ₋₁, gy₂ₖ, x, M⁻¹vₖ₋₁, M⁻¹vₖ, q, gx₂ₖ₋₃, gx₂ₖ₋₂, gx₂ₖ₋₁, gx₂ₖ, Δx, Δy, uₖ, vₖ, false, stats) + return solver +end - function TrimrSolver(A, b) - n, m = size(A) - S = ktypeof(b) - TrimrSolver(n, m, S) - end +function TrimrSolver(A, b) + m, n = size(A) + S = ktypeof(b) + TrimrSolver(m, n, S) end """ @@ -700,12 +729,14 @@ Type for storing the vectors required by the in-place version of TRILQR. The outer constructors - solver = TrilqrSolver(n, m, S) + solver = TrilqrSolver(m, n, S) solver = TrilqrSolver(A, b) may be used in order to create these vectors. """ mutable struct TrilqrSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int uₖ₋₁ :: S uₖ :: S p :: S @@ -721,33 +752,33 @@ mutable struct TrilqrSolver{T,FC,S} <: KrylovSolver{T,FC,S} wₖ₋₂ :: S warm_start :: Bool stats :: AdjointStats{T} +end - function TrilqrSolver(n, m, S) - FC = eltype(S) - T = real(FC) - uₖ₋₁ = S(undef, m) - uₖ = S(undef, m) - p = S(undef, m) - d̅ = S(undef, m) - Δx = S(undef, 0) - x = S(undef, m) - vₖ₋₁ = S(undef, n) - vₖ = S(undef, n) - q = S(undef, n) - Δy = S(undef, 0) - y = S(undef, n) - wₖ₋₃ = S(undef, n) - wₖ₋₂ = S(undef, n) - stats = AdjointStats(0, false, false, T[], T[], "unknown") - solver = new{T,FC,S}(uₖ₋₁, uₖ, p, d̅, Δx, x, vₖ₋₁, vₖ, q, Δy, y, wₖ₋₃, wₖ₋₂, false, stats) - return solver - end +function TrilqrSolver(m, n, S) + FC = eltype(S) + T = real(FC) + uₖ₋₁ = S(undef, n) + uₖ = S(undef, n) + p = S(undef, n) + d̅ = S(undef, n) + Δx = S(undef, 0) + x = S(undef, n) + vₖ₋₁ = S(undef, m) + vₖ = S(undef, m) + q = S(undef, m) + Δy = S(undef, 0) + y = S(undef, m) + wₖ₋₃ = S(undef, m) + wₖ₋₂ = S(undef, m) + stats = AdjointStats(0, false, false, T[], T[], "unknown") + solver = TrilqrSolver{T,FC,S}(m, n, uₖ₋₁, uₖ, p, d̅, Δx, x, vₖ₋₁, vₖ, q, Δy, y, wₖ₋₃, wₖ₋₂, false, stats) + return solver +end - function TrilqrSolver(A, b) - n, m = size(A) - S = ktypeof(b) - TrilqrSolver(n, m, S) - end +function TrilqrSolver(A, b) + m, n = size(A) + S = ktypeof(b) + TrilqrSolver(m, n, S) end """ @@ -755,12 +786,14 @@ Type for storing the vectors required by the in-place version of CGS. The outer constructorss - solver = CgsSolver(n, m, S) + solver = CgsSolver(m, n, S) solver = CgsSolver(A, b) may be used in order to create these vectors. """ mutable struct CgsSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int Δx :: S x :: S r :: S @@ -772,29 +805,30 @@ mutable struct CgsSolver{T,FC,S} <: KrylovSolver{T,FC,S} vw :: S warm_start :: Bool stats :: SimpleStats{T} +end - function CgsSolver(n, m, S) - FC = eltype(S) - T = real(FC) - Δx = S(undef, 0) - x = S(undef, n) - r = S(undef, n) - u = S(undef, n) - p = S(undef, n) - q = S(undef, n) - ts = S(undef, n) - yz = S(undef, 0) - vw = S(undef, 0) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(Δx, x, r, u, p, q, ts, yz, vw, false, stats) - return solver - end +function CgsSolver(m, n, S) + FC = eltype(S) + T = real(FC) + Δx = S(undef, 0) + x = S(undef, n) + r = S(undef, n) + u = S(undef, n) + p = S(undef, n) + q = S(undef, n) + ts = S(undef, n) + yz = S(undef, 0) + vw = S(undef, 0) + + stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") + solver = CgsSolver{T,FC,S}(m, n, Δx, x, r, u, p, q, ts, yz, vw, false, stats) + return solver +end - function CgsSolver(A, b) - n, m = size(A) - S = ktypeof(b) - CgsSolver(n, m, S) - end +function CgsSolver(A, b) + m, n = size(A) + S = ktypeof(b) + CgsSolver(m, n, S) end """ @@ -802,12 +836,14 @@ Type for storing the vectors required by the in-place version of BICGSTAB. The outer constructors - solver = BicgstabSolver(n, m, S) + solver = BicgstabSolver(m, n, S) solver = BicgstabSolver(A, b) may be used in order to create these vectors. """ mutable struct BicgstabSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int Δx :: S x :: S r :: S @@ -819,29 +855,29 @@ mutable struct BicgstabSolver{T,FC,S} <: KrylovSolver{T,FC,S} t :: S warm_start :: Bool stats :: SimpleStats{T} +end - function BicgstabSolver(n, m, S) - FC = eltype(S) - T = real(FC) - Δx = S(undef, 0) - x = S(undef, n) - r = S(undef, n) - p = S(undef, n) - v = S(undef, n) - s = S(undef, n) - qd = S(undef, n) - yz = S(undef, 0) - t = S(undef, 0) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(Δx, x, r, p, v, s, qd, yz, t, false, stats) - return solver - end +function BicgstabSolver(m, n, S) + FC = eltype(S) + T = real(FC) + Δx = S(undef, 0) + x = S(undef, n) + r = S(undef, n) + p = S(undef, n) + v = S(undef, n) + s = S(undef, n) + qd = S(undef, n) + yz = S(undef, 0) + t = S(undef, 0) + stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") + solver = BicgstabSolver{T,FC,S}(m, n, Δx, x, r, p, v, s, qd, yz, t, false, stats) + return solver +end - function BicgstabSolver(A, b) - n, m = size(A) - S = ktypeof(b) - BicgstabSolver(n, m, S) - end +function BicgstabSolver(A, b) + m, n = size(A) + S = ktypeof(b) + BicgstabSolver(m, n, S) end """ @@ -849,12 +885,14 @@ Type for storing the vectors required by the in-place version of BILQ. The outer constructors - solver = BilqSolver(n, m, S) + solver = BilqSolver(m, n, S) solver = BilqSolver(A, b) may be used in order to create these vectors. """ mutable struct BilqSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int uₖ₋₁ :: S uₖ :: S q :: S @@ -866,29 +904,29 @@ mutable struct BilqSolver{T,FC,S} <: KrylovSolver{T,FC,S} d̅ :: S warm_start :: Bool stats :: SimpleStats{T} +end - function BilqSolver(n, m, S) - FC = eltype(S) - T = real(FC) - uₖ₋₁ = S(undef, n) - uₖ = S(undef, n) - q = S(undef, n) - vₖ₋₁ = S(undef, n) - vₖ = S(undef, n) - p = S(undef, n) - Δx = S(undef, 0) - x = S(undef, n) - d̅ = S(undef, n) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, Δx, x, d̅, false, stats) - return solver - end +function BilqSolver(m, n, S) + FC = eltype(S) + T = real(FC) + uₖ₋₁ = S(undef, n) + uₖ = S(undef, n) + q = S(undef, n) + vₖ₋₁ = S(undef, n) + vₖ = S(undef, n) + p = S(undef, n) + Δx = S(undef, 0) + x = S(undef, n) + d̅ = S(undef, n) + stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") + solver = BilqSolver{T,FC,S}(m, n, uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, Δx, x, d̅, false, stats) + return solver +end - function BilqSolver(A, b) - n, m = size(A) - S = ktypeof(b) - BilqSolver(n, m, S) - end +function BilqSolver(A, b) + m, n = size(A) + S = ktypeof(b) + BilqSolver(m, n, S) end """ @@ -896,12 +934,14 @@ Type for storing the vectors required by the in-place version of QMR. The outer constructors - solver = QmrSolver(n, m, S) + solver = QmrSolver(m, n, S) solver = QmrSolver(A, b) may be used in order to create these vectors. """ mutable struct QmrSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int uₖ₋₁ :: S uₖ :: S q :: S @@ -914,30 +954,30 @@ mutable struct QmrSolver{T,FC,S} <: KrylovSolver{T,FC,S} wₖ₋₁ :: S warm_start :: Bool stats :: SimpleStats{T} +end - function QmrSolver(n, m, S) - FC = eltype(S) - T = real(FC) - uₖ₋₁ = S(undef, n) - uₖ = S(undef, n) - q = S(undef, n) - vₖ₋₁ = S(undef, n) - vₖ = S(undef, n) - p = S(undef, n) - Δx = S(undef, 0) - x = S(undef, n) - wₖ₋₂ = S(undef, n) - wₖ₋₁ = S(undef, n) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, Δx, x, wₖ₋₂, wₖ₋₁, false, stats) - return solver - end +function QmrSolver(m, n, S) + FC = eltype(S) + T = real(FC) + uₖ₋₁ = S(undef, n) + uₖ = S(undef, n) + q = S(undef, n) + vₖ₋₁ = S(undef, n) + vₖ = S(undef, n) + p = S(undef, n) + Δx = S(undef, 0) + x = S(undef, n) + wₖ₋₂ = S(undef, n) + wₖ₋₁ = S(undef, n) + stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") + solver = QmrSolver{T,FC,S}(m, n, uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, Δx, x, wₖ₋₂, wₖ₋₁, false, stats) + return solver +end - function QmrSolver(A, b) - n, m = size(A) - S = ktypeof(b) - QmrSolver(n, m, S) - end +function QmrSolver(A, b) + m, n = size(A) + S = ktypeof(b) + QmrSolver(m, n, S) end """ @@ -945,12 +985,14 @@ Type for storing the vectors required by the in-place version of BILQR. The outer constructors - solver = BilqrSolver(n, m, S) + solver = BilqrSolver(m, n, S) solver = BilqrSolver(A, b) may be used in order to create these vectors. """ mutable struct BilqrSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int uₖ₋₁ :: S uₖ :: S q :: S @@ -966,33 +1008,33 @@ mutable struct BilqrSolver{T,FC,S} <: KrylovSolver{T,FC,S} wₖ₋₂ :: S warm_start :: Bool stats :: AdjointStats{T} +end - function BilqrSolver(n, m, S) - FC = eltype(S) - T = real(FC) - uₖ₋₁ = S(undef, n) - uₖ = S(undef, n) - q = S(undef, n) - vₖ₋₁ = S(undef, n) - vₖ = S(undef, n) - p = S(undef, n) - Δx = S(undef, 0) - x = S(undef, n) - Δy = S(undef, 0) - y = S(undef, n) - d̅ = S(undef, n) - wₖ₋₃ = S(undef, n) - wₖ₋₂ = S(undef, n) - stats = AdjointStats(0, false, false, T[], T[], "unknown") - solver = new{T,FC,S}(uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, Δx, x, Δy, y, d̅, wₖ₋₃, wₖ₋₂, false, stats) - return solver - end +function BilqrSolver(m, n, S) + FC = eltype(S) + T = real(FC) + uₖ₋₁ = S(undef, n) + uₖ = S(undef, n) + q = S(undef, n) + vₖ₋₁ = S(undef, n) + vₖ = S(undef, n) + p = S(undef, n) + Δx = S(undef, 0) + x = S(undef, n) + Δy = S(undef, 0) + y = S(undef, n) + d̅ = S(undef, n) + wₖ₋₃ = S(undef, n) + wₖ₋₂ = S(undef, n) + stats = AdjointStats(0, false, false, T[], T[], "unknown") + solver = BilqrSolver{T,FC,S}(m, n, uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, Δx, x, Δy, y, d̅, wₖ₋₃, wₖ₋₂, false, stats) + return solver +end - function BilqrSolver(A, b) - n, m = size(A) - S = ktypeof(b) - BilqrSolver(n, m, S) - end +function BilqrSolver(A, b) + m, n = size(A) + S = ktypeof(b) + BilqrSolver(m, n, S) end """ @@ -1000,12 +1042,14 @@ Type for storing the vectors required by the in-place version of CGLS. The outer constructors - solver = CglsSolver(n, m, S) + solver = CglsSolver(m, n, S) solver = CglsSolver(A, b) may be used in order to create these vectors. """ mutable struct CglsSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int x :: S p :: S s :: S @@ -1013,26 +1057,26 @@ mutable struct CglsSolver{T,FC,S} <: KrylovSolver{T,FC,S} q :: S Mr :: S stats :: SimpleStats{T} +end - function CglsSolver(n, m, S) - FC = eltype(S) - T = real(FC) - x = S(undef, m) - p = S(undef, m) - s = S(undef, m) - r = S(undef, n) - q = S(undef, n) - Mr = S(undef, 0) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(x, p, s, r, q, Mr, stats) - return solver - end +function CglsSolver(m, n, S) + FC = eltype(S) + T = real(FC) + x = S(undef, n) + p = S(undef, n) + s = S(undef, n) + r = S(undef, m) + q = S(undef, m) + Mr = S(undef, 0) + stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") + solver = CglsSolver{T,FC,S}(m, n, x, p, s, r, q, Mr, stats) + return solver +end - function CglsSolver(A, b) - n, m = size(A) - S = ktypeof(b) - CglsSolver(n, m, S) - end +function CglsSolver(A, b) + m, n = size(A) + S = ktypeof(b) + CglsSolver(m, n, S) end """ @@ -1040,12 +1084,14 @@ Type for storing the vectors required by the in-place version of CRLS. The outer constructors - solver = CrlsSolver(n, m, S) + solver = CrlsSolver(m, n, S) solver = CrlsSolver(A, b) may be used in order to create these vectors. """ mutable struct CrlsSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int x :: S p :: S Ar :: S @@ -1055,28 +1101,28 @@ mutable struct CrlsSolver{T,FC,S} <: KrylovSolver{T,FC,S} s :: S Ms :: S stats :: SimpleStats{T} +end - function CrlsSolver(n, m, S) - FC = eltype(S) - T = real(FC) - x = S(undef, m) - p = S(undef, m) - Ar = S(undef, m) - q = S(undef, m) - r = S(undef, n) - Ap = S(undef, n) - s = S(undef, n) - Ms = S(undef, 0) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(x, p, Ar, q, r, Ap, s, Ms, stats) - return solver - end +function CrlsSolver(m, n, S) + FC = eltype(S) + T = real(FC) + x = S(undef, n) + p = S(undef, n) + Ar = S(undef, n) + q = S(undef, n) + r = S(undef, m) + Ap = S(undef, m) + s = S(undef, m) + Ms = S(undef, 0) + stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") + solver = CrlsSolver{T,FC,S}(m, n, x, p, Ar, q, r, Ap, s, Ms, stats) + return solver +end - function CrlsSolver(A, b) - n, m = size(A) - S = ktypeof(b) - CrlsSolver(n, m, S) - end +function CrlsSolver(A, b) + m, n = size(A) + S = ktypeof(b) + CrlsSolver(m, n, S) end """ @@ -1084,41 +1130,43 @@ Type for storing the vectors required by the in-place version of CGNE. The outer constructors - solver = CgneSolver(n, m, S) + solver = CgneSolver(m, n, S) solver = CgneSolver(A, b) may be used in order to create these vectors. """ mutable struct CgneSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int x :: S p :: S - Aᵀz :: S + Aᴴz :: S r :: S q :: S s :: S z :: S stats :: SimpleStats{T} +end - function CgneSolver(n, m, S) - FC = eltype(S) - T = real(FC) - x = S(undef, m) - p = S(undef, m) - Aᵀz = S(undef, m) - r = S(undef, n) - q = S(undef, n) - s = S(undef, 0) - z = S(undef, 0) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(x, p, Aᵀz, r, q, s, z, stats) - return solver - end +function CgneSolver(m, n, S) + FC = eltype(S) + T = real(FC) + x = S(undef, n) + p = S(undef, n) + Aᴴz = S(undef, n) + r = S(undef, m) + q = S(undef, m) + s = S(undef, 0) + z = S(undef, 0) + stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") + solver = CgneSolver{T,FC,S}(m, n, x, p, Aᴴz, r, q, s, z, stats) + return solver +end - function CgneSolver(A, b) - n, m = size(A) - S = ktypeof(b) - CgneSolver(n, m, S) - end +function CgneSolver(A, b) + m, n = size(A) + S = ktypeof(b) + CgneSolver(m, n, S) end """ @@ -1126,41 +1174,43 @@ Type for storing the vectors required by the in-place version of CRMR. The outer constructors - solver = CrmrSolver(n, m, S) + solver = CrmrSolver(m, n, S) solver = CrmrSolver(A, b) may be used in order to create these vectors. """ mutable struct CrmrSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int x :: S p :: S - Aᵀr :: S + Aᴴr :: S r :: S q :: S - Mq :: S + Nq :: S s :: S stats :: SimpleStats{T} +end - function CrmrSolver(n, m, S) - FC = eltype(S) - T = real(FC) - x = S(undef, m) - p = S(undef, m) - Aᵀr = S(undef, m) - r = S(undef, n) - q = S(undef, n) - Mq = S(undef, 0) - s = S(undef, 0) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(x, p, Aᵀr, r, q, Mq, s, stats) - return solver - end +function CrmrSolver(m, n, S) + FC = eltype(S) + T = real(FC) + x = S(undef, n) + p = S(undef, n) + Aᴴr = S(undef, n) + r = S(undef, m) + q = S(undef, m) + Nq = S(undef, 0) + s = S(undef, 0) + stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") + solver = CrmrSolver{T,FC,S}(m, n, x, p, Aᴴr, r, q, Nq, s, stats) + return solver +end - function CrmrSolver(A, b) - n, m = size(A) - S = ktypeof(b) - CrmrSolver(n, m, S) - end +function CrmrSolver(A, b) + m, n = size(A) + S = ktypeof(b) + CrmrSolver(m, n, S) end """ @@ -1168,15 +1218,17 @@ Type for storing the vectors required by the in-place version of LSLQ. The outer constructors - solver = LslqSolver(n, m, S) + solver = LslqSolver(m, n, S) solver = LslqSolver(A, b) may be used in order to create these vectors. """ mutable struct LslqSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int x :: S Nv :: S - Aᵀu :: S + Aᴴu :: S w̄ :: S Mu :: S Av :: S @@ -1184,29 +1236,29 @@ mutable struct LslqSolver{T,FC,S} <: KrylovSolver{T,FC,S} v :: S err_vec :: Vector{T} stats :: LSLQStats{T} +end - function LslqSolver(n, m, S; window :: Int=5) - FC = eltype(S) - T = real(FC) - x = S(undef, m) - Nv = S(undef, m) - Aᵀu = S(undef, m) - w̄ = S(undef, m) - Mu = S(undef, n) - Av = S(undef, n) - u = S(undef, 0) - v = S(undef, 0) - err_vec = zeros(T, window) - stats = LSLQStats(0, false, false, T[], T[], T[], false, T[], T[], "unknown") - solver = new{T,FC,S}(x, Nv, Aᵀu, w̄, Mu, Av, u, v, err_vec, stats) - return solver - end +function LslqSolver(m, n, S; window :: Int=5) + FC = eltype(S) + T = real(FC) + x = S(undef, n) + Nv = S(undef, n) + Aᴴu = S(undef, n) + w̄ = S(undef, n) + Mu = S(undef, m) + Av = S(undef, m) + u = S(undef, 0) + v = S(undef, 0) + err_vec = zeros(T, window) + stats = LSLQStats(0, false, false, T[], T[], T[], false, T[], T[], "unknown") + solver = LslqSolver{T,FC,S}(m, n, x, Nv, Aᴴu, w̄, Mu, Av, u, v, err_vec, stats) + return solver +end - function LslqSolver(A, b; window :: Int=5) - n, m = size(A) - S = ktypeof(b) - LslqSolver(n, m, S, window=window) - end +function LslqSolver(A, b; window :: Int=5) + m, n = size(A) + S = ktypeof(b) + LslqSolver(m, n, S, window=window) end """ @@ -1214,15 +1266,17 @@ Type for storing the vectors required by the in-place version of LSQR. The outer constructors - solver = LsqrSolver(n, m, S) + solver = LsqrSolver(m, n, S) solver = LsqrSolver(A, b) may be used in order to create these vectors. """ mutable struct LsqrSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int x :: S Nv :: S - Aᵀu :: S + Aᴴu :: S w :: S Mu :: S Av :: S @@ -1230,29 +1284,29 @@ mutable struct LsqrSolver{T,FC,S} <: KrylovSolver{T,FC,S} v :: S err_vec :: Vector{T} stats :: SimpleStats{T} +end - function LsqrSolver(n, m, S; window :: Int=5) - FC = eltype(S) - T = real(FC) - x = S(undef, m) - Nv = S(undef, m) - Aᵀu = S(undef, m) - w = S(undef, m) - Mu = S(undef, n) - Av = S(undef, n) - u = S(undef, 0) - v = S(undef, 0) - err_vec = zeros(T, window) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(x, Nv, Aᵀu, w, Mu, Av, u, v, err_vec, stats) - return solver - end +function LsqrSolver(m, n, S; window :: Int=5) + FC = eltype(S) + T = real(FC) + x = S(undef, n) + Nv = S(undef, n) + Aᴴu = S(undef, n) + w = S(undef, n) + Mu = S(undef, m) + Av = S(undef, m) + u = S(undef, 0) + v = S(undef, 0) + err_vec = zeros(T, window) + stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") + solver = LsqrSolver{T,FC,S}(m, n, x, Nv, Aᴴu, w, Mu, Av, u, v, err_vec, stats) + return solver +end - function LsqrSolver(A, b; window :: Int=5) - n, m = size(A) - S = ktypeof(b) - LsqrSolver(n, m, S, window=window) - end +function LsqrSolver(A, b; window :: Int=5) + m, n = size(A) + S = ktypeof(b) + LsqrSolver(m, n, S, window=window) end """ @@ -1260,15 +1314,17 @@ Type for storing the vectors required by the in-place version of LSMR. The outer constructors - solver = LsmrSolver(n, m, S) + solver = LsmrSolver(m, n, S) solver = LsmrSolver(A, b) may be used in order to create these vectors. """ mutable struct LsmrSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int x :: S Nv :: S - Aᵀu :: S + Aᴴu :: S h :: S hbar :: S Mu :: S @@ -1277,30 +1333,30 @@ mutable struct LsmrSolver{T,FC,S} <: KrylovSolver{T,FC,S} v :: S err_vec :: Vector{T} stats :: LsmrStats{T} +end - function LsmrSolver(n, m, S; window :: Int=5) - FC = eltype(S) - T = real(FC) - x = S(undef, m) - Nv = S(undef, m) - Aᵀu = S(undef, m) - h = S(undef, m) - hbar = S(undef, m) - Mu = S(undef, n) - Av = S(undef, n) - u = S(undef, 0) - v = S(undef, 0) - err_vec = zeros(T, window) - stats = LsmrStats(0, false, false, T[], T[], zero(T), zero(T), zero(T), zero(T), zero(T), "unknown") - solver = new{T,FC,S}(x, Nv, Aᵀu, h, hbar, Mu, Av, u, v, err_vec, stats) - return solver - end +function LsmrSolver(m, n, S; window :: Int=5) + FC = eltype(S) + T = real(FC) + x = S(undef, n) + Nv = S(undef, n) + Aᴴu = S(undef, n) + h = S(undef, n) + hbar = S(undef, n) + Mu = S(undef, m) + Av = S(undef, m) + u = S(undef, 0) + v = S(undef, 0) + err_vec = zeros(T, window) + stats = LsmrStats(0, false, false, T[], T[], zero(T), zero(T), zero(T), zero(T), zero(T), "unknown") + solver = LsmrSolver{T,FC,S}(m, n, x, Nv, Aᴴu, h, hbar, Mu, Av, u, v, err_vec, stats) + return solver +end - function LsmrSolver(A, b; window :: Int=5) - n, m = size(A) - S = ktypeof(b) - LsmrSolver(n, m, S, window=window) - end +function LsmrSolver(A, b; window :: Int=5) + m, n = size(A) + S = ktypeof(b) + LsmrSolver(m, n, S, window=window) end """ @@ -1308,15 +1364,17 @@ Type for storing the vectors required by the in-place version of LNLQ. The outer constructors - solver = LnlqSolver(n, m, S) + solver = LnlqSolver(m, n, S) solver = LnlqSolver(A, b) may be used in order to create these vectors. """ mutable struct LnlqSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int x :: S Nv :: S - Aᵀu :: S + Aᴴu :: S y :: S w̄ :: S Mu :: S @@ -1325,30 +1383,30 @@ mutable struct LnlqSolver{T,FC,S} <: KrylovSolver{T,FC,S} v :: S q :: S stats :: LNLQStats{T} +end - function LnlqSolver(n, m, S) - FC = eltype(S) - T = real(FC) - x = S(undef, m) - Nv = S(undef, m) - Aᵀu = S(undef, m) - y = S(undef, n) - w̄ = S(undef, n) - Mu = S(undef, n) - Av = S(undef, n) - u = S(undef, 0) - v = S(undef, 0) - q = S(undef, 0) - stats = LNLQStats(0, false, T[], false, T[], T[], "unknown") - solver = new{T,FC,S}(x, Nv, Aᵀu, y, w̄, Mu, Av, u, v, q, stats) - return solver - end +function LnlqSolver(m, n, S) + FC = eltype(S) + T = real(FC) + x = S(undef, n) + Nv = S(undef, n) + Aᴴu = S(undef, n) + y = S(undef, m) + w̄ = S(undef, m) + Mu = S(undef, m) + Av = S(undef, m) + u = S(undef, 0) + v = S(undef, 0) + q = S(undef, 0) + stats = LNLQStats(0, false, T[], false, T[], T[], "unknown") + solver = LnlqSolver{T,FC,S}(m, n, x, Nv, Aᴴu, y, w̄, Mu, Av, u, v, q, stats) + return solver +end - function LnlqSolver(A, b) - n, m = size(A) - S = ktypeof(b) - LnlqSolver(n, m, S) - end +function LnlqSolver(A, b) + m, n = size(A) + S = ktypeof(b) + LnlqSolver(m, n, S) end """ @@ -1356,15 +1414,17 @@ Type for storing the vectors required by the in-place version of CRAIG. The outer constructors - solver = CraigSolver(n, m, S) + solver = CraigSolver(m, n, S) solver = CraigSolver(A, b) may be used in order to create these vectors. """ mutable struct CraigSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int x :: S Nv :: S - Aᵀu :: S + Aᴴu :: S y :: S w :: S Mu :: S @@ -1373,30 +1433,30 @@ mutable struct CraigSolver{T,FC,S} <: KrylovSolver{T,FC,S} v :: S w2 :: S stats :: SimpleStats{T} +end - function CraigSolver(n, m, S) - FC = eltype(S) - T = real(FC) - x = S(undef, m) - Nv = S(undef, m) - Aᵀu = S(undef, m) - y = S(undef, n) - w = S(undef, n) - Mu = S(undef, n) - Av = S(undef, n) - u = S(undef, 0) - v = S(undef, 0) - w2 = S(undef, 0) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(x, Nv, Aᵀu, y, w, Mu, Av, u, v, w2, stats) - return solver - end +function CraigSolver(m, n, S) + FC = eltype(S) + T = real(FC) + x = S(undef, n) + Nv = S(undef, n) + Aᴴu = S(undef, n) + y = S(undef, m) + w = S(undef, m) + Mu = S(undef, m) + Av = S(undef, m) + u = S(undef, 0) + v = S(undef, 0) + w2 = S(undef, 0) + stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") + solver = CraigSolver{T,FC,S}(m, n, x, Nv, Aᴴu, y, w, Mu, Av, u, v, w2, stats) + return solver +end - function CraigSolver(A, b) - n, m = size(A) - S = ktypeof(b) - CraigSolver(n, m, S) - end +function CraigSolver(A, b) + m, n = size(A) + S = ktypeof(b) + CraigSolver(m, n, S) end """ @@ -1404,15 +1464,17 @@ Type for storing the vectors required by the in-place version of CRAIGMR. The outer constructors - solver = CraigmrSolver(n, m, S) + solver = CraigmrSolver(m, n, S) solver = CraigmrSolver(A, b) may be used in order to create these vectors. """ mutable struct CraigmrSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int x :: S Nv :: S - Aᵀu :: S + Aᴴu :: S d :: S y :: S Mu :: S @@ -1423,32 +1485,32 @@ mutable struct CraigmrSolver{T,FC,S} <: KrylovSolver{T,FC,S} v :: S q :: S stats :: SimpleStats{T} +end - function CraigmrSolver(n, m, S) - FC = eltype(S) - T = real(FC) - x = S(undef, m) - Nv = S(undef, m) - Aᵀu = S(undef, m) - d = S(undef, m) - y = S(undef, n) - Mu = S(undef, n) - w = S(undef, n) - wbar = S(undef, n) - Av = S(undef, n) - u = S(undef, 0) - v = S(undef, 0) - q = S(undef, 0) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(x, Nv, Aᵀu, d, y, Mu, w, wbar, Av, u, v, q, stats) - return solver - end +function CraigmrSolver(m, n, S) + FC = eltype(S) + T = real(FC) + x = S(undef, n) + Nv = S(undef, n) + Aᴴu = S(undef, n) + d = S(undef, n) + y = S(undef, m) + Mu = S(undef, m) + w = S(undef, m) + wbar = S(undef, m) + Av = S(undef, m) + u = S(undef, 0) + v = S(undef, 0) + q = S(undef, 0) + stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") + solver = CraigmrSolver{T,FC,S}(m, n, x, Nv, Aᴴu, d, y, Mu, w, wbar, Av, u, v, q, stats) + return solver +end - function CraigmrSolver(A, b) - n, m = size(A) - S = ktypeof(b) - CraigmrSolver(n, m, S) - end +function CraigmrSolver(A, b) + m, n = size(A) + S = ktypeof(b) + CraigmrSolver(m, n, S) end """ @@ -1456,13 +1518,15 @@ Type for storing the vectors required by the in-place version of GMRES. The outer constructors - solver = GmresSolver(n, m, memory, S) + solver = GmresSolver(m, n, memory, S) solver = GmresSolver(A, b, memory = 20) may be used in order to create these vectors. `memory` is set to `n` if the value given is larger than `n`. """ mutable struct GmresSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int Δx :: S x :: S w :: S @@ -1476,31 +1540,85 @@ mutable struct GmresSolver{T,FC,S} <: KrylovSolver{T,FC,S} warm_start :: Bool inner_iter :: Int stats :: SimpleStats{T} +end - function GmresSolver(n, m, memory, S) - memory = min(n, memory) - FC = eltype(S) - T = real(FC) - Δx = S(undef, 0) - x = S(undef, n) - w = S(undef, n) - p = S(undef, 0) - q = S(undef, 0) - V = [S(undef, n) for i = 1 : memory] - c = Vector{T}(undef, memory) - s = Vector{FC}(undef, memory) - z = Vector{FC}(undef, memory) - R = Vector{FC}(undef, div(memory * (memory+1), 2)) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(Δx, x, w, p, q, V, c, s, z, R, false, 0, stats) - return solver - end +function GmresSolver(m, n, memory, S) + memory = min(m, memory) + FC = eltype(S) + T = real(FC) + Δx = S(undef, 0) + x = S(undef, n) + w = S(undef, n) + p = S(undef, 0) + q = S(undef, 0) + V = S[S(undef, n) for i = 1 : memory] + c = Vector{T}(undef, memory) + s = Vector{FC}(undef, memory) + z = Vector{FC}(undef, memory) + R = Vector{FC}(undef, div(memory * (memory+1), 2)) + stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") + solver = GmresSolver{T,FC,S}(m, n, Δx, x, w, p, q, V, c, s, z, R, false, 0, stats) + return solver +end - function GmresSolver(A, b, memory = 20) - n, m = size(A) - S = ktypeof(b) - GmresSolver(n, m, memory, S) - end +function GmresSolver(A, b, memory = 20) + m, n = size(A) + S = ktypeof(b) + GmresSolver(m, n, memory, S) +end + +""" +Type for storing the vectors required by the in-place version of FGMRES. + +The outer constructors + + solver = FgmresSolver(m, n, memory, S) + solver = FgmresSolver(A, b, memory = 20) + +may be used in order to create these vectors. +`memory` is set to `n` if the value given is larger than `n`. +""" +mutable struct FgmresSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int + Δx :: S + x :: S + w :: S + q :: S + V :: Vector{S} + Z :: Vector{S} + c :: Vector{T} + s :: Vector{FC} + z :: Vector{FC} + R :: Vector{FC} + warm_start :: Bool + inner_iter :: Int + stats :: SimpleStats{T} +end + +function FgmresSolver(m, n, memory, S) + memory = min(m, memory) + FC = eltype(S) + T = real(FC) + Δx = S(undef, 0) + x = S(undef, n) + w = S(undef, n) + q = S(undef, 0) + V = S[S(undef, n) for i = 1 : memory] + Z = S[S(undef, n) for i = 1 : memory] + c = Vector{T}(undef, memory) + s = Vector{FC}(undef, memory) + z = Vector{FC}(undef, memory) + R = Vector{FC}(undef, div(memory * (memory+1), 2)) + stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") + solver = FgmresSolver{T,FC,S}(m, n, Δx, x, w, q, V, Z, c, s, z, R, false, 0, stats) + return solver +end + +function FgmresSolver(A, b, memory = 20) + m, n = size(A) + S = ktypeof(b) + FgmresSolver(m, n, memory, S) end """ @@ -1508,13 +1626,15 @@ Type for storing the vectors required by the in-place version of FOM. The outer constructors - solver = FomSolver(n, m, memory, S) + solver = FomSolver(m, n, memory, S) solver = FomSolver(A, b, memory = 20) may be used in order to create these vectors. `memory` is set to `n` if the value given is larger than `n`. """ mutable struct FomSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int Δx :: S x :: S w :: S @@ -1526,30 +1646,30 @@ mutable struct FomSolver{T,FC,S} <: KrylovSolver{T,FC,S} U :: Vector{FC} warm_start :: Bool stats :: SimpleStats{T} +end - function FomSolver(n, m, memory, S) - memory = min(n, memory) - FC = eltype(S) - T = real(FC) - Δx = S(undef, 0) - x = S(undef, n) - w = S(undef, n) - p = S(undef, 0) - q = S(undef, 0) - V = [S(undef, n) for i = 1 : memory] - l = Vector{FC}(undef, memory) - z = Vector{FC}(undef, memory) - U = Vector{FC}(undef, div(memory * (memory+1), 2)) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(Δx, x, w, p, q, V, l, z, U, false, stats) - return solver - end +function FomSolver(m, n, memory, S) + memory = min(m, memory) + FC = eltype(S) + T = real(FC) + Δx = S(undef, 0) + x = S(undef, n) + w = S(undef, n) + p = S(undef, 0) + q = S(undef, 0) + V = S[S(undef, n) for i = 1 : memory] + l = Vector{FC}(undef, memory) + z = Vector{FC}(undef, memory) + U = Vector{FC}(undef, div(memory * (memory+1), 2)) + stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") + solver = FomSolver{T,FC,S}(m, n, Δx, x, w, p, q, V, l, z, U, false, stats) + return solver +end - function FomSolver(A, b, memory = 20) - n, m = size(A) - S = ktypeof(b) - FomSolver(n, m, memory, S) - end +function FomSolver(A, b, memory = 20) + m, n = size(A) + S = ktypeof(b) + FomSolver(m, n, memory, S) end """ @@ -1557,13 +1677,15 @@ Type for storing the vectors required by the in-place version of GPMR. The outer constructors - solver = GpmrSolver(n, m, memory, S) + solver = GpmrSolver(m, n, memory, S) solver = GpmrSolver(A, b, memory = 20) may be used in order to create these vectors. `memory` is set to `n + m` if the value given is larger than `n + m`. """ mutable struct GpmrSolver{T,FC,S} <: KrylovSolver{T,FC,S} + m :: Int + n :: Int wA :: S wB :: S dA :: S @@ -1582,37 +1704,37 @@ mutable struct GpmrSolver{T,FC,S} <: KrylovSolver{T,FC,S} R :: Vector{FC} warm_start :: Bool stats :: SimpleStats{T} +end - function GpmrSolver(n, m, memory, S) - memory = min(n + m, memory) - FC = eltype(S) - T = real(FC) - wA = S(undef, 0) - wB = S(undef, 0) - dA = S(undef, n) - dB = S(undef, m) - Δx = S(undef, 0) - Δy = S(undef, 0) - x = S(undef, n) - y = S(undef, m) - q = S(undef, 0) - p = S(undef, 0) - V = [S(undef, n) for i = 1 : memory] - U = [S(undef, m) for i = 1 : memory] - gs = Vector{FC}(undef, 4 * memory) - gc = Vector{T}(undef, 4 * memory) - zt = Vector{FC}(undef, 2 * memory) - R = Vector{FC}(undef, memory * (2memory + 1)) - stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") - solver = new{T,FC,S}(wA, wB, dA, dB, Δx, Δy, x, y, q, p, V, U, gs, gc, zt, R, false, stats) - return solver - end +function GpmrSolver(m, n, memory, S) + memory = min(n + m, memory) + FC = eltype(S) + T = real(FC) + wA = S(undef, 0) + wB = S(undef, 0) + dA = S(undef, m) + dB = S(undef, n) + Δx = S(undef, 0) + Δy = S(undef, 0) + x = S(undef, m) + y = S(undef, n) + q = S(undef, 0) + p = S(undef, 0) + V = S[S(undef, m) for i = 1 : memory] + U = S[S(undef, n) for i = 1 : memory] + gs = Vector{FC}(undef, 4 * memory) + gc = Vector{T}(undef, 4 * memory) + zt = Vector{FC}(undef, 2 * memory) + R = Vector{FC}(undef, memory * (2 * memory + 1)) + stats = SimpleStats(0, false, false, T[], T[], T[], "unknown") + solver = GpmrSolver{T,FC,S}(m, n, wA, wB, dA, dB, Δx, Δy, x, y, q, p, V, U, gs, gc, zt, R, false, stats) + return solver +end - function GpmrSolver(A, b, memory = 20) - n, m = size(A) - S = ktypeof(b) - GpmrSolver(n, m, memory, S) - end +function GpmrSolver(A, b, memory = 20) + m, n = size(A) + S = ktypeof(b) + GpmrSolver(m, n, memory, S) end """ @@ -1704,29 +1826,35 @@ for (KS, fun, nsol, nA, nAt, warm_start) in [ (MinresQlpSolver , :minres_qlp! , 1, 1, 0, true ) (QmrSolver , :qmr! , 1, 1, 1, true ) (GmresSolver , :gmres! , 1, 1, 0, true ) + (FgmresSolver , :fgmres! , 1, 1, 0, true ) (FomSolver , :fom! , 1, 1, 0, true ) (GpmrSolver , :gpmr! , 2, 1, 0, true ) ] @eval begin - @inline solve!(solver :: $KS, args...; kwargs...) = $(fun)(solver, args...; kwargs...) - @inline statistics(solver :: $KS) = solver.stats - @inline niterations(solver :: $KS) = solver.stats.niter - @inline Aprod(solver :: $KS) = $nA * solver.stats.niter - @inline Atprod(solver :: $KS) = $nAt * solver.stats.niter + size(solver :: $KS) = solver.m, solver.n + solve!(solver :: $KS, args...; kwargs...) = $(fun)(solver, args...; kwargs...) + statistics(solver :: $KS) = solver.stats + niterations(solver :: $KS) = solver.stats.niter + Aprod(solver :: $KS) = $nA * solver.stats.niter + Atprod(solver :: $KS) = $nAt * solver.stats.niter if $KS == GpmrSolver - @inline Bprod(solver :: $KS) = solver.stats.niter + Bprod(solver :: $KS) = solver.stats.niter + end + nsolution(solver :: $KS) = $nsol + if $nsol == 1 + solution(solver :: $KS) = solver.x + solution(solver :: $KS, p :: Integer) = (p == 1) ? solution(solver) : error("solution(solver) has only one output.") + end + if $nsol == 2 + solution(solver :: $KS) = solver.x, solver.y + solution(solver :: $KS, p :: Integer) = (1 ≤ p ≤ 2) ? solution(solver)[p] : error("solution(solver) has only two outputs.") end - @inline nsolution(solver :: $KS) = $nsol - ($nsol == 1) && @inline solution(solver :: $KS) = solver.x - ($nsol == 2) && @inline solution(solver :: $KS) = solver.x, solver.y - ($nsol == 1) && @inline solution(solver :: $KS, p :: Integer) = (p == 1) ? solution(solver) : error("solution(solver) has only one output.") - ($nsol == 2) && @inline solution(solver :: $KS, p :: Integer) = (1 ≤ p ≤ 2) ? solution(solver)[p] : error("solution(solver) has only two outputs.") if $KS ∈ (BilqrSolver, TrilqrSolver) - @inline issolved_primal(solver :: $KS) = solver.stats.solved_primal - @inline issolved_dual(solver :: $KS) = solver.stats.solved_dual - @inline issolved(solver :: $KS) = issolved_primal(solver) && issolved_dual(solver) + issolved_primal(solver :: $KS) = solver.stats.solved_primal + issolved_dual(solver :: $KS) = solver.stats.solved_dual + issolved(solver :: $KS) = issolved_primal(solver) && issolved_dual(solver) else - @inline issolved(solver :: $KS) = solver.stats.solved + issolved(solver :: $KS) = solver.stats.solved end if $warm_start if $KS in (BilqrSolver, TrilqrSolver, TricgSolver, TrimrSolver, GpmrSolver) @@ -1758,6 +1886,29 @@ for (KS, fun, nsol, nA, nAt, warm_start) in [ end end +function ksizeof(attribute) + if isa(attribute, Vector{<:AbstractVector}) && !isempty(attribute) + # A vector of vectors is a vector of pointers in Julia. + # All vectors inside a vector have the same size in Krylov.jl + size_attribute = sizeof(attribute) + length(attribute) * ksizeof(attribute[1]) + else + size_attribute = sizeof(attribute) + end + return size_attribute +end + +function sizeof(stats_solver :: Union{KrylovStats, KrylovSolver}) + type = typeof(stats_solver) + nfields = fieldcount(type) + storage = 0 + for i = 1:nfields + field_i = getfield(stats_solver, i) + size_i = ksizeof(field_i) + storage += size_i + end + return storage +end + """ show(io, solver; show_stats=true) @@ -1765,38 +1916,40 @@ Statistics of `solver` are displayed if `show_stats` is set to true. """ function show(io :: IO, solver :: KrylovSolver{T,FC,S}; show_stats :: Bool=true) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} workspace = typeof(solver) - name_solver = workspace.name.wrapper - l1 = max(length(string(name_solver)), 10) # length("warm_start") = 10 - l2 = length(string(S)) + 8 # length("Vector{}") = 8 + name_solver = string(workspace.name.name) + name_stats = string(typeof(solver.stats).name.name) + nbytes = sizeof(solver) + storage = format_bytes(nbytes) architecture = S <: Vector ? "CPU" : "GPU" - format = Printf.Format("│%$(l1)s│%$(l2)s│%18s│\n") - format2 = Printf.Format("│%$(l1+1)s│%$(l2)s│%18s│\n") - @printf(io, "┌%s┬%s┬%s┐\n", "─"^l1, "─"^l2, "─"^18) - Printf.format(io, format, name_solver, "Precision: $FC", "Architecture: $architecture") - @printf(io, "├%s┼%s┼%s┤\n", "─"^l1, "─"^l2, "─"^18) + l1 = max(length(name_solver), length(string(FC)) + 11) # length("Precision: ") = 11 + nchar = workspace <: Union{CgLanczosShiftSolver, FomSolver, DiomSolver, DqgmresSolver, GmresSolver, FgmresSolver, GpmrSolver} ? 8 : 0 # length("Vector{}") = 8 + l2 = max(ndigits(solver.m) + 7, length(architecture) + 14, length(string(S)) + nchar) # length("nrows: ") = 7 and length("Architecture: ") = 14 + l2 = max(l2, length(name_stats) + 2 + length(string(T))) # length("{}") = 2 + l3 = max(ndigits(solver.n) + 7, length(storage) + 9) # length("Storage: ") = 9 and length("cols: ") = 7 + format = Printf.Format("│%$(l1)s│%$(l2)s│%$(l3)s│\n") + format2 = Printf.Format("│%$(l1+1)s│%$(l2)s│%$(l3)s│\n") + @printf(io, "┌%s┬%s┬%s┐\n", "─"^l1, "─"^l2, "─"^l3) + Printf.format(io, format, "$(name_solver)", "nrows: $(solver.m)", "ncols: $(solver.n)") + @printf(io, "├%s┼%s┼%s┤\n", "─"^l1, "─"^l2, "─"^l3) + Printf.format(io, format, "Precision: $FC", "Architecture: $architecture","Storage: $storage") + @printf(io, "├%s┼%s┼%s┤\n", "─"^l1, "─"^l2, "─"^l3) Printf.format(io, format, "Attribute", "Type", "Size") - @printf(io, "├%s┼%s┼%s┤\n", "─"^l1, "─"^l2, "─"^18) - for i=1:fieldcount(workspace)-1 # show stats seperately - type_i = fieldtype(workspace, i) + @printf(io, "├%s┼%s┼%s┤\n", "─"^l1, "─"^l2, "─"^l3) + for i=1:fieldcount(workspace) name_i = fieldname(workspace, i) - len = if type_i <: AbstractVector - field_i = getfield(solver, name_i) - ni = length(field_i) - if eltype(type_i) <: AbstractVector - "$(ni) x $(length(field_i[1]))" - else - length(field_i) - end - else - 0 - end - if (name_i in [:w̅, :w̄, :d̅]) && (VERSION < v"1.8.0-DEV") - Printf.format(io, format2, string(name_i), type_i, len) + type_i = fieldtype(workspace, i) + field_i = getfield(solver, name_i) + size_i = ksizeof(field_i) + if (name_i::Symbol in [:w̅, :w̄, :d̅]) && (VERSION < v"1.8.0-DEV") + (size_i ≠ 0) && Printf.format(io, format2, string(name_i), type_i, format_bytes(size_i)) else - Printf.format(io, format, string(name_i), type_i, len) + (size_i ≠ 0) && Printf.format(io, format, string(name_i), type_i, format_bytes(size_i)) end end - @printf(io, "└%s┴%s┴%s┘\n","─"^l1,"─"^l2,"─"^18) - show_stats && show(io, solver.stats) + @printf(io, "└%s┴%s┴%s┘\n","─"^l1,"─"^l2,"─"^l3) + if show_stats + @printf(io, "\n") + show(io, solver.stats) + end return nothing end diff --git a/src/krylov_stats.jl b/src/krylov_stats.jl index a662fa0a0..392912895 100644 --- a/src/krylov_stats.jl +++ b/src/krylov_stats.jl @@ -1,3 +1,6 @@ +export KrylovStats, SimpleStats, LsmrStats, LanczosStats, LanczosShiftStats, +SymmlqStats, AdjointStats, LNLQStats, LSLQStats + "Abstract type for statistics returned by a solver" abstract type KrylovStats{T} end @@ -21,6 +24,12 @@ mutable struct SimpleStats{T} <: KrylovStats{T} status :: String end +function reset!(stats :: SimpleStats) + empty!(stats.residuals) + empty!(stats.Aresiduals) + empty!(stats.Acond) +end + """ Type for statistics returned by LSMR. The attributes are: - niter @@ -47,6 +56,11 @@ mutable struct LsmrStats{T} <: KrylovStats{T} status :: String end +function reset!(stats :: LsmrStats) + empty!(stats.residuals) + empty!(stats.Aresiduals) +end + """ Type for statistics returned by CG-LANCZOS, the attributes are: - niter @@ -67,6 +81,10 @@ mutable struct LanczosStats{T} <: KrylovStats{T} status :: String end +function reset!(stats :: LanczosStats) + empty!(stats.residuals) +end + """ Type for statistics returned by CG-LANCZOS with shifts, the attributes are: - niter @@ -117,6 +135,13 @@ mutable struct SymmlqStats{T} <: KrylovStats{T} status :: String end +function reset!(stats :: SymmlqStats) + empty!(stats.residuals) + empty!(stats.residualscg) + empty!(stats.errors) + empty!(stats.errorscg) +end + """ Type for statistics returned by adjoint systems solvers BiLQR and TriLQR, the attributes are: - niter @@ -135,6 +160,11 @@ mutable struct AdjointStats{T} <: KrylovStats{T} status :: String end +function reset!(stats :: AdjointStats) + empty!(stats.residuals_primal) + empty!(stats.residuals_dual) +end + """ Type for statistics returned by the LNLQ method, the attributes are: - niter @@ -155,6 +185,12 @@ mutable struct LNLQStats{T} <: KrylovStats{T} status :: String end +function reset!(stats :: LNLQStats) + empty!(stats.residuals) + empty!(stats.error_bnd_x) + empty!(stats.error_bnd_y) +end + """ Type for statistics returned by the LSLQ method, the attributes are: - niter @@ -181,6 +217,14 @@ mutable struct LSLQStats{T} <: KrylovStats{T} status :: String end +function reset!(stats :: LSLQStats) + empty!(stats.residuals) + empty!(stats.Aresiduals) + empty!(stats.err_lbnds) + empty!(stats.err_ubnds_lq) + empty!(stats.err_ubnds_cg) +end + import Base.show special_fields = Dict( @@ -192,45 +236,24 @@ special_fields = Dict( :err_ubnds_cg => "error bound CG", ) -for f in ["Simple", "Lsmr", "Adjoint", "LNLQ", "LSLQ", "Lanczos", "Symmlq"] - T = Meta.parse("Krylov." * f * "Stats{S}") - - @eval function empty_field!(stats :: $T, i, ::Type{Vector{Si}}) where {S, Si} - statfield = getfield(stats, i) - empty!(statfield) - end - @eval empty_field!(stats :: $T, i, type) where S = stats - - @eval function reset!(stats :: $T) where S - nfield = length($T.types) - for i = 1 : nfield - type = fieldtype($T, i) - empty_field!(stats, i, type) +function show(io :: IO, stats :: KrylovStats) + kst = typeof(stats) + s = string(kst.name.name) * "\n" + nfield = fieldcount(kst) + for i = 1 : nfield + field = fieldname(kst, i) + field_name = if field ∈ keys(special_fields) + special_fields[field] + else + replace(string(field), "_" => " ") end - end -end - -for f in ["Simple", "Lsmr", "Lanczos", "LanczosShift", "Symmlq", "Adjoint", "LNLQ", "LSLQ"] - T = Meta.parse("Krylov." * f * "Stats{S}") - - @eval function show(io :: IO, stats :: $T) where S - s = $f * " stats\n" - nfield = length($T.types) - for i = 1 : nfield - field = fieldname($T, i) - field_name = if field ∈ keys(special_fields) - special_fields[field] - else - replace(string(field), "_" => " ") - end - s *= " " * field_name * ":" - statfield = getfield(stats, field) - if isa(statfield, AbstractVector) && eltype(statfield) <: Union{Missing, AbstractFloat} - s *= @sprintf " %s\n" vec2str(statfield) - else - s *= @sprintf " %s\n" statfield - end + s *= " " * field_name * ":" + statfield = getfield(stats, field) + if isa(statfield, AbstractVector) && eltype(statfield) <: Union{Missing, AbstractFloat} + s *= @sprintf " %s\n" vec2str(statfield) + else + s *= @sprintf " %s\n" statfield end - print(io, s) end + print(io, s) end diff --git a/src/krylov_utils.jl b/src/krylov_utils.jl index 6f0c1c382..6049f9c28 100644 --- a/src/krylov_utils.jl +++ b/src/krylov_utils.jl @@ -1,3 +1,8 @@ +export kstdout + +"Default I/O stream for all Krylov methods." +const kstdout = Core.stdout + """ FloatOrComplex{T} Union type of `T` and `Complex{T}` where T is an `AbstractFloat`. @@ -92,8 +97,8 @@ function sym_givens(a :: Complex{T}, b :: Complex{T}) where T <: AbstractFloat return (c, s, ρ) end -@inline sym_givens(a :: Complex{T}, b :: T) where T <: AbstractFloat = sym_givens(a, Complex{T}(b)) -@inline sym_givens(a :: T, b :: Complex{T}) where T <: AbstractFloat = sym_givens(Complex{T}(a), b) +sym_givens(a :: Complex{T}, b :: T) where T <: AbstractFloat = sym_givens(a, Complex{T}(b)) +sym_givens(a :: T, b :: Complex{T}) where T <: AbstractFloat = sym_givens(Complex{T}(a), b) """ roots = roots_quadratic(q₂, q₁, q₀; nitref) @@ -111,68 +116,86 @@ function roots_quadratic(q₂ :: T, q₁ :: T, q₀ :: T; # Case where q(x) is linear. if q₂ == zero(T) if q₁ == zero(T) - root = [zero(T)] - q₀ == zero(T) || (root = T[]) + q₀ == zero(T) || error("The quadratic `q` doesn't have real roots.") + root = zero(T) else - root = [-q₀ / q₁] + root = -q₀ / q₁ end - return root + return (root, root) end # Case where q(x) is indeed quadratic. rhs = √eps(T) * q₁ * q₁ if abs(q₀ * q₂) > rhs ρ = q₁ * q₁ - 4 * q₂ * q₀ - ρ < 0 && return T[] + ρ < 0 && return error("The quadratic `q` doesn't have real roots.") d = -(q₁ + copysign(sqrt(ρ), q₁)) / 2 - roots = [d / q₂, q₀ / d] + root1 = d / q₂ + root2 = q₀ / d else # Ill-conditioned quadratic. - roots = [-q₁ / q₂, zero(T)] + root1 = -q₁ / q₂ + root2 = zero(T) end # Perform a few Newton iterations to improve accuracy. - for k = 1 : 2 - root = roots[k] - for it = 1 : nitref - q = (q₂ * root + q₁) * root + q₀ - dq = 2 * q₂ * root + q₁ - dq == zero(T) && continue - root = root - q / dq - end - roots[k] = root + for it = 1 : nitref + q = (q₂ * root1 + q₁) * root1 + q₀ + dq = 2 * q₂ * root1 + q₁ + dq == zero(T) && continue + root1 = root1 - q / dq end - return roots -end + for it = 1 : nitref + q = (q₂ * root2 + q₁) * root2 + q₀ + dq = 2 * q₂ * root2 + q₁ + dq == zero(T) && continue + root2 = root2 - q / dq + end + return (root1, root2) +end """ - roots = to_boundary(x, d, radius; flip, xNorm2, dNorm2) - -Given a trust-region radius `radius`, a vector `x` lying inside the -trust-region and a direction `d`, return `σ1` and `σ2` such that - - ‖x + σi d‖ = radius, i = 1, 2 + s = vec2str(x; ndisp) -in the Euclidean norm. If known, ‖x‖² may be supplied in `xNorm2`. +Display an array in the form -If `flip` is set to `true`, `σ1` and `σ2` are computed such that + [ -3.0e-01 -5.1e-01 1.9e-01 ... -2.3e-01 -4.4e-01 2.4e-01 ] - ‖x - σi d‖ = radius, i = 1, 2. +with (ndisp - 1)/2 elements on each side. """ -function to_boundary(x :: Vector{T}, d :: Vector{T}, - radius :: T; flip :: Bool=false, xNorm2 :: T=zero(T), dNorm2 :: T=zero(T)) where T <: Number - radius > 0 || error("radius must be positive") - - # ‖d‖² σ² + 2 xᵀd σ + (‖x‖² - radius²). - xd = dot(x, d) - flip && (xd = -xd) - dNorm2 == zero(T) && (dNorm2 = dot(d, d)) - dNorm2 == zero(T) && error("zero direction") - xNorm2 == zero(T) && (xNorm2 = dot(x, x)) - (xNorm2 ≤ radius * radius) || error(@sprintf("outside of the trust region: ‖x‖²=%7.1e, Δ²=%7.1e", xNorm2, radius * radius)) - roots = roots_quadratic(dNorm2, 2 * xd, xNorm2 - radius * radius) - return roots # `σ1` and `σ2` +function vec2str(x :: AbstractVector{T}; ndisp :: Int=7) where T <: Union{AbstractFloat, Missing} + n = length(x) + if n ≤ ndisp + ndisp = n + nside = n + else + nside = max(1, div(ndisp - 1, 2)) + end + s = "[" + i = 1 + while i ≤ nside + if x[i] !== missing + s *= @sprintf("%8.1e ", x[i]) + else + s *= " ✗✗✗✗ " + end + i += 1 + end + if i ≤ div(n, 2) + s *= "... " + end + i = max(i, n - nside + 1) + while i ≤ n + if x[i] !== missing + s *= @sprintf("%8.1e ", x[i]) + else + s *= " ✗✗✗✗ " + end + i += 1 + end + s *= "]" + return s end """ @@ -201,84 +224,125 @@ function ktypeof(v::S) where S <: AbstractVector end function ktypeof(v::S) where S <: SubArray - return ktypeof(v.parent) + vp = v.parent + if isa(vp, DenseMatrix) + M = typeof(vp) + return matrix_to_vector(M) # view of a row or a column of a matrix + else + return ktypeof(vp) # view of a vector + end +end + +""" + M = vector_to_matrix(S) + +Return the dense matrix storage type `M` related to the dense vector storage type `S`. +""" +function vector_to_matrix(::Type{S}) where S <: DenseVector + T = hasproperty(S, :body) ? S.body : S + par = T.parameters + npar = length(par) + (2 ≤ npar ≤ 3) || error("Type $S is not supported.") + if npar == 2 + M = T.name.wrapper{par[1], 2} + else + M = T.name.wrapper{par[1], 2, par[3]} + end + return M +end + +""" + S = matrix_to_vector(M) + +Return the dense vector storage type `S` related to the dense matrix storage type `M`. +""" +function matrix_to_vector(::Type{M}) where M <: DenseMatrix + T = hasproperty(M, :body) ? M.body : M + par = T.parameters + npar = length(par) + (2 ≤ npar ≤ 3) || error("Type $M is not supported.") + if npar == 2 + S = T.name.wrapper{par[1], 1} + else + S = T.name.wrapper{par[1], 1, par[3]} + end + return S end """ v = kzeros(S, n) -Create an AbstractVector of storage type `S` of length `n` only composed of zero. +Create a vector of storage type `S` of length `n` only composed of zero. """ -@inline kzeros(S, n) = fill!(S(undef, n), zero(eltype(S))) +kzeros(S, n) = fill!(S(undef, n), zero(eltype(S))) """ v = kones(S, n) -Create an AbstractVector of storage type `S` of length `n` only composed of one. +Create a vector of storage type `S` of length `n` only composed of one. """ -@inline kones(S, n) = fill!(S(undef, n), one(eltype(S))) +kones(S, n) = fill!(S(undef, n), one(eltype(S))) -@inline allocate_if(bool, solver, v, S, n) = bool && isempty(solver.:($v)) && (solver.:($v) = S(undef, n)) +allocate_if(bool, solver, v, S, n) = bool && isempty(solver.:($v)::S) && (solver.:($v)::S = S(undef, n)) -@inline kdisplay(iter, verbose) = (verbose > 0) && (mod(iter, verbose) == 0) +kdisplay(iter, verbose) = (verbose > 0) && (mod(iter, verbose) == 0) -@inline mulorldiv!(y, P, x, ldiv::Bool) = ldiv ? ldiv!(y, P, x) : mul!(y, P, x) +mulorldiv!(y, P, x, ldiv::Bool) = ldiv ? ldiv!(y, P, x) : mul!(y, P, x) -@inline krylov_dot(n :: Integer, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasReal = BLAS.dot(n, x, dx, y, dy) -@inline krylov_dot(n :: Integer, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasComplex = BLAS.dotc(n, x, dx, y, dy) -@inline krylov_dot(n :: Integer, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: Number = dot(x, y) +kdot(n :: Integer, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasReal = BLAS.dot(n, x, dx, y, dy) +kdot(n :: Integer, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasComplex = BLAS.dotc(n, x, dx, y, dy) +kdot(n :: Integer, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: FloatOrComplex = dot(x, y) -@inline krylov_dotr(n :: Integer, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: AbstractFloat = krylov_dot(n, x, dx, y, dy) -@inline krylov_dotr(n :: Integer, x :: AbstractVector{Complex{T}}, dx :: Integer, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = real(krylov_dot(n, x, dx, y, dy)) +kdotr(n :: Integer, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: AbstractFloat = kdot(n, x, dx, y, dy) +kdotr(n :: Integer, x :: AbstractVector{Complex{T}}, dx :: Integer, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = real(kdot(n, x, dx, y, dy)) -@inline krylov_norm2(n :: Integer, x :: Vector{T}, dx :: Integer) where T <: BLAS.BlasFloat = BLAS.nrm2(n, x, dx) -@inline krylov_norm2(n :: Integer, x :: AbstractVector{T}, dx :: Integer) where T <: Number = norm(x) +knrm2(n :: Integer, x :: Vector{T}, dx :: Integer) where T <: BLAS.BlasFloat = BLAS.nrm2(n, x, dx) +knrm2(n :: Integer, x :: AbstractVector{T}, dx :: Integer) where T <: FloatOrComplex = norm(x) -@inline krylov_scal!(n :: Integer, s :: T, x :: Vector{T}, dx :: Integer) where T <: BLAS.BlasFloat = BLAS.scal!(n, s, x, dx) -@inline krylov_scal!(n :: Integer, s :: T, x :: AbstractVector{T}, dx :: Integer) where T <: Number = (x .*= s) -@inline krylov_scal!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer) where T <: AbstractFloat = krylov_scal!(n, Complex{T}(s), x, dx) +kscal!(n :: Integer, s :: T, x :: Vector{T}, dx :: Integer) where T <: BLAS.BlasFloat = BLAS.scal!(n, s, x, dx) +kscal!(n :: Integer, s :: T, x :: AbstractVector{T}, dx :: Integer) where T <: FloatOrComplex = (x .*= s) +kscal!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer) where T <: AbstractFloat = kscal!(n, Complex{T}(s), x, dx) -@inline krylov_axpy!(n :: Integer, s :: T, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasFloat = BLAS.axpy!(n, s, x, dx, y, dy) -@inline krylov_axpy!(n :: Integer, s :: T, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: Number = axpy!(s, x, y) -@inline krylov_axpy!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = krylov_axpy!(n, Complex{T}(s), x, dx, y, dy) +kaxpy!(n :: Integer, s :: T, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasFloat = BLAS.axpy!(n, s, x, dx, y, dy) +kaxpy!(n :: Integer, s :: T, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: FloatOrComplex = axpy!(s, x, y) +kaxpy!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = kaxpy!(n, Complex{T}(s), x, dx, y, dy) -@inline krylov_axpby!(n :: Integer, s :: T, x :: Vector{T}, dx :: Integer, t :: T, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasFloat = BLAS.axpby!(n, s, x, dx, t, y, dy) -@inline krylov_axpby!(n :: Integer, s :: T, x :: AbstractVector{T}, dx :: Integer, t :: T, y :: AbstractVector{T}, dy :: Integer) where T <: Number = axpby!(s, x, t, y) -@inline krylov_axpby!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer, t :: Complex{T}, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = krylov_axpby!(n, Complex{T}(s), x, dx, t, y, dy) -@inline krylov_axpby!(n :: Integer, s :: Complex{T}, x :: AbstractVector{Complex{T}}, dx :: Integer, t :: T, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = krylov_axpby!(n, s, x, dx, Complex{T}(t), y, dy) -@inline krylov_axpby!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer, t :: T, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = krylov_axpby!(n, Complex{T}(s), x, dx, Complex{T}(t), y, dy) +kaxpby!(n :: Integer, s :: T, x :: Vector{T}, dx :: Integer, t :: T, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasFloat = BLAS.axpby!(n, s, x, dx, t, y, dy) +kaxpby!(n :: Integer, s :: T, x :: AbstractVector{T}, dx :: Integer, t :: T, y :: AbstractVector{T}, dy :: Integer) where T <: FloatOrComplex = axpby!(s, x, t, y) +kaxpby!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer, t :: Complex{T}, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = kaxpby!(n, Complex{T}(s), x, dx, t, y, dy) +kaxpby!(n :: Integer, s :: Complex{T}, x :: AbstractVector{Complex{T}}, dx :: Integer, t :: T, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = kaxpby!(n, s, x, dx, Complex{T}(t), y, dy) +kaxpby!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer, t :: T, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = kaxpby!(n, Complex{T}(s), x, dx, Complex{T}(t), y, dy) -@inline krylov_copy!(n :: Integer, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasFloat = BLAS.blascopy!(n, x, dx, y, dy) -@inline krylov_copy!(n :: Integer, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: Number = copyto!(y, x) +kcopy!(n :: Integer, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasFloat = BLAS.blascopy!(n, x, dx, y, dy) +kcopy!(n :: Integer, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: FloatOrComplex = copyto!(y, x) # the macros are just for readability, so we don't have to write the increments (always equal to 1) - macro kdot(n, x, y) - return esc(:(krylov_dot($n, $x, 1, $y, 1))) + return esc(:(Krylov.kdot($n, $x, 1, $y, 1))) end macro kdotr(n, x, y) - return esc(:(krylov_dotr($n, $x, 1, $y, 1))) + return esc(:(Krylov.kdotr($n, $x, 1, $y, 1))) end macro knrm2(n, x) - return esc(:(krylov_norm2($n, $x, 1))) + return esc(:(Krylov.knrm2($n, $x, 1))) end macro kscal!(n, s, x) - return esc(:(krylov_scal!($n, $s, $x, 1))) + return esc(:(Krylov.kscal!($n, $s, $x, 1))) end macro kaxpy!(n, s, x, y) - return esc(:(krylov_axpy!($n, $s, $x, 1, $y, 1))) + return esc(:(Krylov.kaxpy!($n, $s, $x, 1, $y, 1))) end macro kaxpby!(n, s, x, t, y) - return esc(:(krylov_axpby!($n, $s, $x, 1, $t, $y, 1))) + return esc(:(Krylov.kaxpby!($n, $s, $x, 1, $t, $y, 1))) end macro kcopy!(n, x, y) - return esc(:(krylov_copy!($n, $x, 1, $y, 1))) + return esc(:(Krylov.kcopy!($n, $x, 1, $y, 1))) end macro kswap(x, y) @@ -294,44 +358,35 @@ macro kref!(n, x, y, c, s) end """ - s = vec2str(x; ndisp) + roots = to_boundary(n, x, d, radius; flip, xNorm2, dNorm2) -Display an array in the form +Given a trust-region radius `radius`, a vector `x` lying inside the +trust-region and a direction `d`, return `σ1` and `σ2` such that - [ -3.0e-01 -5.1e-01 1.9e-01 ... -2.3e-01 -4.4e-01 2.4e-01 ] + ‖x + σi d‖ = radius, i = 1, 2 -with (ndisp - 1)/2 elements on each side. +in the Euclidean norm. +`n` is the length of vectors `x` and `d`. +If known, ‖x‖² and ‖d‖² may be supplied with `xNorm2` and `dNorm2`. + +If `flip` is set to `true`, `σ1` and `σ2` are computed such that + + ‖x - σi d‖ = radius, i = 1, 2. """ -function vec2str(x :: AbstractVector{T}; ndisp :: Int=7) where T <: Union{AbstractFloat, Missing} - n = length(x) - if n ≤ ndisp - ndisp = n - nside = n - else - nside = max(1, div(ndisp - 1, 2)) - end - s = "[" - i = 1 - while i ≤ nside - if x[i] !== missing - s *= @sprintf("%8.1e ", x[i]) - else - s *= " ✗✗✗✗ " - end - i += 1 - end - if i ≤ div(n, 2) - s *= "... " - end - i = max(i, n - nside + 1) - while i ≤ n - if x[i] !== missing - s *= @sprintf("%8.1e ", x[i]) - else - s *= " ✗✗✗✗ " - end - i += 1 - end - s *= "]" - return s +function to_boundary(n :: Int, x :: AbstractVector{FC}, d :: AbstractVector{FC}, radius :: T; flip :: Bool=false, xNorm2 :: T=zero(T), dNorm2 :: T=zero(T)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}} + radius > 0 || error("radius must be positive") + + # ‖d‖² σ² + (xᴴd + dᴴx) σ + (‖x‖² - Δ²). + rxd = @kdotr(n, x, d) + flip && (rxd = -rxd) + dNorm2 == zero(T) && (dNorm2 = @kdotr(n, d, d)) + dNorm2 == zero(T) && error("zero direction") + xNorm2 == zero(T) && (xNorm2 = @kdotr(n, x, x)) + radius2 = radius * radius + (xNorm2 ≤ radius2) || error(@sprintf("outside of the trust region: ‖x‖²=%7.1e, Δ²=%7.1e", xNorm2, radius2)) + + # q₂ = ‖d‖², q₁ = xᴴd + dᴴx, q₀ = ‖x‖² - Δ² + # ‖x‖² ≤ Δ² ⟹ (q₁)² - 4 * q₂ * q₀ ≥ 0 + roots = roots_quadratic(dNorm2, 2 * rxd, xNorm2 - radius2) + return roots # `σ1` and `σ2` end diff --git a/src/lnlq.jl b/src/lnlq.jl index a1f890de2..deda7336f 100644 --- a/src/lnlq.jl +++ b/src/lnlq.jl @@ -9,9 +9,9 @@ # and is equivalent to applying the SYMMLQ method # to the linear system # -# AAᵀy = b with x = Aᵀy and can be reformulated as +# AAᴴy = b with x = Aᴴy and can be reformulated as # -# [ -I Aᵀ ][ x ] = [ 0 ] +# [ -I Aᴴ ][ x ] = [ 0 ] # [ A ][ y ] [ b ]. # # This method is based on the Golub-Kahan bidiagonalization process and is described in @@ -26,10 +26,14 @@ export lnlq, lnlq! """ (x, y, stats) = lnlq(A, b::AbstractVector{FC}; - M=I, N=I, sqd::Bool=false, λ::T=zero(T), σ::T=zero(T), - atol::T=√eps(T), rtol::T=√eps(T), etolx::T=√eps(T), etoly::T=√eps(T), itmax::Int=0, - transfer_to_craig::Bool=true, verbose::Int=0, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + M=I, N=I, ldiv::Bool=false, + transfer_to_craig::Bool=true, + sqd::Bool=false, λ::T=zero(T), + σ::T=zero(T), utolx::T=√eps(T), + utoly::T=√eps(T), atol::T=√eps(T), + rtol::T=√eps(T), itmax::Int=0, + verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. @@ -38,17 +42,17 @@ Find the least-norm solution of the consistent linear system Ax + λ²y = b -using the LNLQ method, where λ ≥ 0 is a regularization parameter. +of size m × n using the LNLQ method, where λ ≥ 0 is a regularization parameter. For a system in the form Ax = b, LNLQ method is equivalent to applying -SYMMLQ to AAᵀy = b and recovering x = Aᵀy but is more stable. +SYMMLQ to AAᴴy = b and recovering x = Aᴴy but is more stable. Note that y are the Lagrange multipliers of the least-norm problem minimize ‖x‖ s.t. Ax = b. If `λ > 0`, LNLQ solves the symmetric and quasi-definite system - [ -F Aᵀ ] [ x ] [ 0 ] + [ -F Aᴴ ] [ x ] [ 0 ] [ A λ²E ] [ y ] = [ b ], where E and F are symmetric and positive definite. @@ -59,12 +63,12 @@ The system above represents the optimality conditions of min ‖x‖²_F + λ²‖y‖²_E s.t. Ax + λ²Ey = b. -For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᵀKx`. -LNLQ is then equivalent to applying SYMMLQ to `(AF⁻¹Aᵀ + λ²E)y = b` with `Fx = Aᵀy`. +For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᴴKx`. +LNLQ is then equivalent to applying SYMMLQ to `(AF⁻¹Aᴴ + λ²E)y = b` with `Fx = Aᴴy`. If `λ = 0`, LNLQ solves the symmetric and indefinite system - [ -F Aᵀ ] [ x ] [ 0 ] + [ -F Aᴴ ] [ x ] [ 0 ] [ A 0 ] [ y ] = [ b ]. The system above represents the optimality conditions of @@ -75,12 +79,39 @@ In this case, `M` can still be specified and indicates the weighted norm in whic In this implementation, both the x and y-parts of the solution are returned. -`etolx` and `etoly` are tolerances on the upper bound of the distance to the solution ‖x-xₛ‖ and ‖y-yₛ‖, respectively. +`utolx` and `utoly` are tolerances on the upper bound of the distance to the solution ‖x-x*‖ and ‖y-y*‖, respectively. The bound is valid if λ>0 or σ>0 where σ should be strictly smaller than the smallest positive singular value. For instance σ:=(1-1e-7)σₘᵢₙ . -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension m × n; +* `b`: a vector of length m. + +#### Keyword arguments + +* `M`: linear operator that models a Hermitian positive-definite matrix of size `m` used for centered preconditioning of the augmented system; +* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning of the augmented system; +* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`; +* `transfer_to_craig`: transfer from the LNLQ point to the CRAIG point, when it exists. The transfer is based on the residual norm; +* `sqd`: if `true`, set `λ=1` for Hermitian quasi-definite systems; +* `λ`: regularization parameter; +* `σ`: strict lower bound on the smallest positive singular value `σₘᵢₙ` such as `σ = (1-10⁻⁷)σₘᵢₙ`; +* `utolx`: tolerance on the upper bound on the distance to the solution `‖x-x*‖`; +* `utoly`: tolerance on the upper bound on the distance to the solution `‖y-y*‖`; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. + +#### Output arguments + +* `x`: a dense vector of length n; +* `y`: a dense vector of length m; +* `stats`: statistics collected on the run in a [`LNLQStats`](@ref) structure. #### Reference @@ -104,14 +135,18 @@ See [`LnlqSolver`](@ref) for more details about the `solver`. function lnlq! end function lnlq!(solver :: LnlqSolver{T,FC,S}, A, b :: AbstractVector{FC}; - M=I, N=I, sqd :: Bool=false, λ :: T=zero(T), σ :: T=zero(T), - atol :: T=√eps(T), rtol :: T=√eps(T), etolx :: T=√eps(T), etoly :: T=√eps(T), itmax :: Int=0, - transfer_to_craig :: Bool=true, verbose :: Int=0, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} + M=I, N=I, ldiv :: Bool=false, + transfer_to_craig :: Bool=true, + sqd :: Bool=false, λ :: T=zero(T), + σ :: T=zero(T), utolx :: T=√eps(T), + utoly :: T=√eps(T), atol :: T=√eps(T), + rtol :: T=√eps(T), itmax :: Int=0, + verbose :: Int=0, history :: Bool=false, + callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} m, n = size(A) length(b) == m || error("Inconsistent problem size") - (verbose > 0) && @printf("LNLQ: system of %d equations in %d variables\n", m, n) + (verbose > 0) && @printf(iostream, "LNLQ: system of %d equations in %d variables\n", m, n) # Check sqd and λ parameters sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !") @@ -123,16 +158,16 @@ function lnlq!(solver :: LnlqSolver{T,FC,S}, A, b :: AbstractVector{FC}; # Check type consistency eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") # Compute the adjoint of A - Aᵀ = A' + Aᴴ = A' # Set up workspace. allocate_if(!MisI, solver, :u, S, m) allocate_if(!NisI, solver, :v, S, n) allocate_if(λ > 0, solver, :q, S, n) - x, Nv, Aᵀu, y, w̄ = solver.x, solver.Nv, solver.Aᵀu, solver.y, solver.w̄ + x, Nv, Aᴴu, y, w̄ = solver.x, solver.Nv, solver.Aᴴu, solver.y, solver.w̄ Mu, Av, q, stats = solver.Mu, solver.Av, solver.q, solver.stats rNorms, xNorms, yNorms = stats.residuals, stats.error_bnd_x, stats.error_bnd_y reset!(stats) @@ -163,8 +198,8 @@ function lnlq!(solver :: LnlqSolver{T,FC,S}, A, b :: AbstractVector{FC}; iter = 0 itmax == 0 && (itmax = m + n) - (verbose > 0) && @printf("%5s %7s\n", "k", "‖rₖ‖") - kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, bNorm) + (verbose > 0) && @printf(iostream, "%5s %7s\n", "k", "‖rₖ‖") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e\n", iter, bNorm) # Update iteration index iter = iter + 1 @@ -179,9 +214,9 @@ function lnlq!(solver :: LnlqSolver{T,FC,S}, A, b :: AbstractVector{FC}; MisI || @kscal!(m, one(FC) / βₖ, Mu) end - # α₁Nv₁ = Aᵀu₁. - mul!(Aᵀu, Aᵀ, u) - Nv .= Aᵀu + # α₁Nv₁ = Aᴴu₁. + mul!(Aᴴu, Aᴴ, u) + Nv .= Aᴴu NisI || mulorldiv!(v, N, Nv, ldiv) # v₁ = N⁻¹ * Nv₁ αₖ = sqrt(@kdotr(n, v, Nv)) # α₁ = ‖v₁‖_N if αₖ ≠ 0 @@ -190,8 +225,8 @@ function lnlq!(solver :: LnlqSolver{T,FC,S}, A, b :: AbstractVector{FC}; end w̄ .= u # Direction w̄₁ - cₖ = zero(T) # Givens cosines used for the LQ factorization of (Lₖ)ᵀ - sₖ = zero(FC) # Givens sines used for the LQ factorization of (Lₖ)ᵀ + cₖ = zero(T) # Givens cosines used for the LQ factorization of (Lₖ)ᴴ + sₖ = zero(FC) # Givens sines used for the LQ factorization of (Lₖ)ᴴ ζₖ₋₁ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ ηₖ = zero(FC) # Coefficient of M̅ₖ @@ -214,7 +249,7 @@ function lnlq!(solver :: LnlqSolver{T,FC,S}, A, b :: AbstractVector{FC}; αhatₖ = αₖ end - # Begin the LQ factorization of (Lₖ)ᵀ = M̅ₖQₖ. + # Begin the LQ factorization of (Lₖ)ᴴ = M̅ₖQₖ. # [ α₁ β₂ 0 • • • 0 ] [ ϵ₁ 0 • • • • 0 ] # [ 0 α₂ • • • ] [ η₂ ϵ₂ • • ] # [ • • • • • • ] [ 0 • • • • ] @@ -225,7 +260,7 @@ function lnlq!(solver :: LnlqSolver{T,FC,S}, A, b :: AbstractVector{FC}; ϵbarₖ = αhatₖ # ϵbar₁ = αhat₁ - # Hₖ = Bₖ(Lₖ)ᵀ = [ Lₖ(Lₖ)ᵀ ] ⟹ (Hₖ₋₁)ᵀ = [Lₖ₋₁Mₖ₋₁ 0] Qₖ + # Hₖ = Bₖ(Lₖ)ᴴ = [ Lₖ(Lₖ)ᴴ ] ⟹ (Hₖ₋₁)ᴴ = [Lₖ₋₁Mₖ₋₁ 0] Qₖ # [ αₖβₖ₊₁(eₖ)ᵀ ] # # Solve Lₖtₖ = β₁e₁ and M̅ₖz̅ₖ = tₖ @@ -247,7 +282,7 @@ function lnlq!(solver :: LnlqSolver{T,FC,S}, A, b :: AbstractVector{FC}; err_x = τtildeₖ err_y = ζtildeₖ - solved_lq = err_x ≤ etolx || err_y ≤ etoly + solved_lq = err_x ≤ utolx || err_y ≤ utoly history && push!(xNorms, err_x) history && push!(yNorms, err_y) @@ -273,7 +308,7 @@ function lnlq!(solver :: LnlqSolver{T,FC,S}, A, b :: AbstractVector{FC}; # Continue the generalized Golub-Kahan bidiagonalization. # AVₖ = MUₖ₊₁Bₖ - # AᵀUₖ₊₁ = NVₖ(Bₖ)ᵀ + αₖ₊₁Nvₖ₊₁(eₖ₊₁)ᵀ = NVₖ₊₁(Lₖ₊₁)ᵀ + # AᴴUₖ₊₁ = NVₖ(Bₖ)ᴴ + αₖ₊₁Nvₖ₊₁(eₖ₊₁)ᴴ = NVₖ₊₁(Lₖ₊₁)ᴴ # # [ α₁ 0 • • • • 0 ] # [ β₂ α₂ • • ] @@ -296,9 +331,9 @@ function lnlq!(solver :: LnlqSolver{T,FC,S}, A, b :: AbstractVector{FC}; MisI || @kscal!(m, one(FC) / βₖ₊₁, Mu) end - # αₖ₊₁Nvₖ₊₁ = Aᵀuₖ₊₁ - βₖ₊₁Nvₖ - mul!(Aᵀu, Aᵀ, u) - @kaxpby!(n, one(FC), Aᵀu, -βₖ₊₁, Nv) + # αₖ₊₁Nvₖ₊₁ = Aᴴuₖ₊₁ - βₖ₊₁Nvₖ + mul!(Aᴴu, Aᴴ, u) + @kaxpby!(n, one(FC), Aᴴu, -βₖ₊₁, Nv) NisI || mulorldiv!(v, N, Nv, ldiv) # vₖ₊₁ = N⁻¹ * Nvₖ₊₁ αₖ₊₁ = sqrt(@kdotr(n, v, Nv)) # αₖ₊₁ = ‖vₖ₊₁‖_N if αₖ₊₁ ≠ 0 @@ -353,7 +388,7 @@ function lnlq!(solver :: LnlqSolver{T,FC,S}, A, b :: AbstractVector{FC}; ρbar = ssig * μbar + csig * σₑₛₜ end - # Continue the LQ factorization of (Lₖ₊₁)ᵀ. + # Continue the LQ factorization of (Lₖ₊₁)ᴴ. # [ηₖ ϵbarₖ βₖ₊₁] [1 0 0 ] = [ηₖ ϵₖ 0 ] # [0 0 αₖ₊₁] [0 cₖ₊₁ sₖ₊₁] [0 ηₖ₊₁ ϵbarₖ₊₁] # [0 sₖ₊₁ -cₖ₊₁] @@ -438,18 +473,15 @@ function lnlq!(solver :: LnlqSolver{T,FC,S}, A, b :: AbstractVector{FC}; solved_lq = rNorm_lq ≤ ε solved_cg = transfer_to_craig && rNorm_cg ≤ ε if σₑₛₜ > 0 - if transfer_to_craig - solved_cg = solved_cg || err_x ≤ etolx || err_y ≤ etoly - else - solved_lq = solved_lq || err_x ≤ etolx || err_y ≤ etoly - end + solved_lq = solved_lq || err_x ≤ utolx || err_y ≤ utoly + solved_cg = transfer_to_craig && (solved_cg || err_x ≤ utolx || err_y ≤ utoly) end - kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm_lq) + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e\n", iter, rNorm_lq) # Update iteration index. iter = iter + 1 end - (verbose > 0) && @printf("\n") + (verbose > 0) && @printf(iostream, "\n") if solved_cg if λ > 0 diff --git a/src/lslq.jl b/src/lslq.jl index 908de19c5..4e26fb67a 100644 --- a/src/lslq.jl +++ b/src/lslq.jl @@ -5,7 +5,7 @@ # # equivalently, of the normal equations # -# AᵀAx = Aᵀb. +# AᴴAx = Aᴴb. # # LSLQ is formally equivalent to applying SYMMLQ to the normal equations # but should be more stable. @@ -21,15 +21,17 @@ export lslq, lslq! - """ (x, stats) = lslq(A, b::AbstractVector{FC}; - M=I, N=I, sqd::Bool=false, λ::T=zero(T), - atol::T=√eps(T), btol::T=√eps(T), etol::T=√eps(T), - window::Int=5, utol::T=√eps(T), itmax::Int=0, - σ::T=zero(T), transfer_to_lsqr::Bool=false, - conlim::T=1/√eps(T), verbose::Int=0, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + M=I, N=I, ldiv::Bool=false, + window::Int=5, transfer_to_lsqr::Bool=false, + sqd::Bool=false, λ::T=zero(T), + σ::T=zero(T), etol::T=√eps(T), + utol::T=√eps(T), btol::T=√eps(T), + conlim::T=1/√eps(T), atol::T=√eps(T), + rtol::T=√eps(T), itmax::Int=0, + verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. @@ -38,31 +40,17 @@ Solve the regularized linear least-squares problem minimize ‖b - Ax‖₂² + λ²‖x‖₂² -using the LSLQ method, where λ ≥ 0 is a regularization parameter. +of size m × n using the LSLQ method, where λ ≥ 0 is a regularization parameter. LSLQ is formally equivalent to applying SYMMLQ to the normal equations - (AᵀA + λ²I) x = Aᵀb + (AᴴA + λ²I) x = Aᴴb but is more stable. -#### Main features - -* the solution estimate is updated along orthogonal directions -* the norm of the solution estimate ‖xᴸₖ‖₂ is increasing -* the error ‖eₖ‖₂ := ‖xᴸₖ - x*‖₂ is decreasing -* it is possible to transition cheaply from the LSLQ iterate to the LSQR iterate if there is an advantage (there always is in terms of error) -* if `A` is rank deficient, identify the minimum least-squares solution - -#### Optional arguments - -* `M`: a symmetric and positive definite dual preconditioner -* `N`: a symmetric and positive definite primal preconditioner -* `sqd` indicates that we are solving a symmetric and quasi-definite system with `λ=1` - If `λ > 0`, we solve the symmetric and quasi-definite system [ E A ] [ r ] [ b ] - [ Aᵀ -λ²F ] [ x ] = [ 0 ], + [ Aᴴ -λ²F ] [ x ] = [ 0 ], where E and F are symmetric and positive definite. Preconditioners M = E⁻¹ ≻ 0 and N = F⁻¹ ≻ 0 may be provided in the form of linear operators. @@ -72,39 +60,60 @@ The system above represents the optimality conditions of minimize ‖b - Ax‖²_E⁻¹ + λ²‖x‖²_F. -For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᵀKx`. -LSLQ is then equivalent to applying SYMMLQ to `(AᵀE⁻¹A + λ²F)x = AᵀE⁻¹b` with `r = E⁻¹(b - Ax)`. +For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᴴKx`. +LSLQ is then equivalent to applying SYMMLQ to `(AᴴE⁻¹A + λ²F)x = AᴴE⁻¹b` with `r = E⁻¹(b - Ax)`. If `λ = 0`, we solve the symmetric and indefinite system [ E A ] [ r ] [ b ] - [ Aᵀ 0 ] [ x ] = [ 0 ]. + [ Aᴴ 0 ] [ x ] = [ 0 ]. The system above represents the optimality conditions of minimize ‖b - Ax‖²_E⁻¹. -In this case, `N` can still be specified and indicates the weighted norm in which `x` and `Aᵀr` should be measured. +In this case, `N` can still be specified and indicates the weighted norm in which `x` and `Aᴴr` should be measured. `r` can be recovered by computing `E⁻¹(b - Ax)`. -* `λ` is a regularization parameter (see the problem statement above) -* `σ` is an underestimate of the smallest nonzero singular value of `A`---setting `σ` too large will result in an error in the course of the iterations -* `atol` is a stopping tolerance based on the residual -* `btol` is a stopping tolerance used to detect zero-residual problems -* `etol` is a stopping tolerance based on the lower bound on the error -* `window` is the number of iterations used to accumulate a lower bound on the error -* `utol` is a stopping tolerance based on the upper bound on the error -* `transfer_to_lsqr` return the CG solution estimate (i.e., the LSQR point) instead of the LQ estimate -* `itmax` is the maximum number of iterations (0 means no imposed limit) -* `conlim` is the limit on the estimated condition number of `A` beyond which the solution will be abandoned -* `verbose` determines verbosity. - -#### Return values +#### Main features -`lslq` returns the tuple `(x, stats)` where +* the solution estimate is updated along orthogonal directions +* the norm of the solution estimate ‖xᴸₖ‖₂ is increasing +* the error ‖eₖ‖₂ := ‖xᴸₖ - x*‖₂ is decreasing +* it is possible to transition cheaply from the LSLQ iterate to the LSQR iterate if there is an advantage (there always is in terms of error) +* if `A` is rank deficient, identify the minimum least-squares solution -* `x` is the LQ solution estimate -* `stats` collects other statistics on the run in a LSLQStats +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension m × n; +* `b`: a vector of length m. + +#### Keyword arguments + +* `M`: linear operator that models a Hermitian positive-definite matrix of size `m` used for centered preconditioning of the augmented system; +* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning of the augmented system; +* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`; +* `window`: number of iterations used to accumulate a lower bound on the error; +* `transfer_to_lsqr`: transfer from the LSLQ point to the LSQR point, when it exists. The transfer is based on the residual norm; +* `sqd`: if `true`, set `λ=1` for Hermitian quasi-definite systems; +* `λ`: regularization parameter; +* `σ`: strict lower bound on the smallest positive singular value `σₘᵢₙ` such as `σ = (1-10⁻⁷)σₘᵢₙ`; +* `etol`: stopping tolerance based on the lower bound on the error; +* `utol`: stopping tolerance based on the upper bound on the error; +* `btol`: stopping tolerance used to detect zero-residual problems; +* `conlim`: limit on the estimated condition number of `A` beyond which the solution will be abandoned; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. + +#### Output arguments + +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`LSLQStats`](@ref) structure. * `stats.err_lbnds` is a vector of lower bounds on the LQ error---the vector is empty if `window` is set to zero * `stats.err_ubnds_lq` is a vector of upper bounds on the LQ error---the vector is empty if `σ == 0` is left at zero @@ -116,8 +125,8 @@ In this case, `N` can still be specified and indicates the weighted norm in whic The iterations stop as soon as one of the following conditions holds true: * the optimality residual is sufficiently small (`stats.status = "found approximate minimum least-squares solution"`) in the sense that either - * ‖Aᵀr‖ / (‖A‖ ‖r‖) ≤ atol, or - * 1 + ‖Aᵀr‖ / (‖A‖ ‖r‖) ≤ 1 + * ‖Aᴴr‖ / (‖A‖ ‖r‖) ≤ atol, or + * 1 + ‖Aᴴr‖ / (‖A‖ ‖r‖) ≤ 1 * an approximate zero-residual solution has been found (`stats.status = "found approximate zero-residual solution"`) in the sense that either * ‖r‖ / ‖b‖ ≤ btol + atol ‖A‖ * ‖xᴸ‖ / ‖b‖, or * 1 + ‖r‖ / ‖b‖ ≤ 1 @@ -127,9 +136,6 @@ The iterations stop as soon as one of the following conditions holds true: * the lower bound on the LQ forward error is less than etol * ‖xᴸ‖ * the upper bound on the CG forward error is less than utol * ‖xᶜ‖ -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. - #### References * R. Estrin, D. Orban and M. A. Saunders, [*Euclidean-norm error bounds for SYMMLQ and CG*](https://doi.org/10.1137/16M1094816), SIAM Journal on Matrix Analysis and Applications, 40(1), pp. 235--253, 2019. @@ -153,16 +159,19 @@ See [`LslqSolver`](@ref) for more details about the `solver`. function lslq! end function lslq!(solver :: LslqSolver{T,FC,S}, A, b :: AbstractVector{FC}; - M=I, N=I, sqd :: Bool=false, λ :: T=zero(T), - atol :: T=√eps(T), btol :: T=√eps(T), etol :: T=√eps(T), - utol :: T=√eps(T), itmax :: Int=0, σ :: T=zero(T), - transfer_to_lsqr :: Bool=false, conlim :: T=1/√eps(T), + M=I, N=I, ldiv :: Bool=false, + transfer_to_lsqr :: Bool=false, + sqd :: Bool=false, λ :: T=zero(T), + σ :: T=zero(T), etol :: T=√eps(T), + utol :: T=√eps(T), btol :: T=√eps(T), + conlim :: T=1/√eps(T), atol :: T=√eps(T), + rtol :: T=√eps(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} + callback=solver->false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} m, n = size(A) length(b) == m || error("Inconsistent problem size") - (verbose > 0) && @printf("LSLQ: system of %d equations in %d variables\n", m, n) + (verbose > 0) && @printf(iostream, "LSLQ: system of %d equations in %d variables\n", m, n) # Check sqd and λ parameters sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !") @@ -174,15 +183,15 @@ function lslq!(solver :: LslqSolver{T,FC,S}, A, b :: AbstractVector{FC}; # Check type consistency eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") # Compute the adjoint of A - Aᵀ = A' + Aᴴ = A' # Set up workspace. allocate_if(!MisI, solver, :u, S, m) allocate_if(!NisI, solver, :v, S, n) - x, Nv, Aᵀu, w̄ = solver.x, solver.Nv, solver.Aᵀu, solver.w̄ + x, Nv, Aᴴu, w̄ = solver.x, solver.Nv, solver.Aᴴu, solver.w̄ Mu, Av, err_vec, stats = solver.Mu, solver.Av, solver.err_vec, solver.stats rNorms, ArNorms, err_lbnds = stats.residuals, stats.Aresiduals, stats.err_lbnds err_ubnds_lq, err_ubnds_cg = stats.err_ubnds_lq, stats.err_ubnds_cg @@ -213,12 +222,12 @@ function lslq!(solver :: LslqSolver{T,FC,S}, A, b :: AbstractVector{FC}; @kscal!(m, one(FC)/β₁, u) MisI || @kscal!(m, one(FC)/β₁, Mu) - mul!(Aᵀu, Aᵀ, u) - Nv .= Aᵀu + mul!(Aᴴu, Aᴴ, u) + Nv .= Aᴴu NisI || mulorldiv!(v, N, Nv, ldiv) α = sqrt(@kdotr(n, v, Nv)) # = α₁ - # Aᵀb = 0 so x = 0 is a minimum least-squares solution + # Aᴴb = 0 so x = 0 is a minimum least-squares solution if α == 0 stats.niter = 0 stats.solved, stats.inconsistent = true, false @@ -274,11 +283,12 @@ function lslq!(solver :: LslqSolver{T,FC,S}, A, b :: AbstractVector{FC}; iter = 0 itmax == 0 && (itmax = m + n) - (verbose > 0) && @printf("%5s %7s %7s %7s %7s %8s %8s %7s %7s %7s\n", "k", "‖r‖", "‖Aᵀr‖", "β", "α", "cos", "sin", "‖A‖²", "κ(A)", "‖xL‖") - kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e %7.1e\n", iter, rNorm, ArNorm, β, α, c, s, Anorm², Acond, xlqNorm) + (verbose > 0) && @printf(iostream, "%5s %7s %7s %7s %7s %8s %8s %7s %7s %7s\n", "k", "‖r‖", "‖Aᴴr‖", "β", "α", "cos", "sin", "‖A‖²", "κ(A)", "‖xL‖") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e %7.1e\n", iter, rNorm, ArNorm, β, α, c, s, Anorm², Acond, xlqNorm) status = "unknown" - solved = solved_mach = solved_lim = (rNorm ≤ atol) + ε = atol + rtol * β₁ + solved = solved_mach = solved_lim = (rNorm ≤ ε) tired = iter ≥ itmax ill_cond = ill_cond_mach = ill_cond_lim = false zero_resid = zero_resid_mach = zero_resid_lim = false @@ -298,9 +308,9 @@ function lslq!(solver :: LslqSolver{T,FC,S}, A, b :: AbstractVector{FC}; @kscal!(m, one(FC)/β, u) MisI || @kscal!(m, one(FC)/β, Mu) - # 2. αₖ₊₁Nvₖ₊₁ = Aᵀuₖ₊₁ - βₖ₊₁Nvₖ - mul!(Aᵀu, Aᵀ, u) - @kaxpby!(n, one(FC), Aᵀu, -β, Nv) + # 2. αₖ₊₁Nvₖ₊₁ = Aᴴuₖ₊₁ - βₖ₊₁Nvₖ + mul!(Aᴴu, Aᴴ, u) + @kaxpby!(n, one(FC), Aᴴu, -β, Nv) NisI || mulorldiv!(v, N, Nv, ldiv) α = sqrt(@kdotr(n, v, Nv)) if α ≠ 0 @@ -388,11 +398,11 @@ function lslq!(solver :: LslqSolver{T,FC,S}, A, b :: AbstractVector{FC}; end end - test1 = rNorm / β₁ + test1 = rNorm test2 = ArNorm / (Anorm * rNorm) test3 = 1 / Acond - t1 = test1 / (one(T) + Anorm * xlqNorm / β₁) - rtol = btol + atol * Anorm * xlqNorm / β₁ + t1 = test1 / (one(T) + Anorm * xlqNorm) + tol = btol + atol * Anorm * xlqNorm / β₁ # update LSLQ point for next iteration @kaxpy!(n, c * ζ, w̄, x) @@ -407,7 +417,7 @@ function lslq!(solver :: LslqSolver{T,FC,S}, A, b :: AbstractVector{FC}; # check stopping condition based on forward error lower bound err_vec[mod(iter, window) + 1] = ζ if iter ≥ window - err_lbnd = norm(err_vec) + err_lbnd = @knrm2(window, err_vec) history && push!(err_lbnds, err_lbnd) fwd_err_lbnd = err_lbnd ≤ etol * xlqNorm end @@ -432,16 +442,16 @@ function lslq!(solver :: LslqSolver{T,FC,S}, A, b :: AbstractVector{FC}; tired = iter ≥ itmax ill_cond_lim = (test3 ≤ ctol) solved_lim = (test2 ≤ atol) - zero_resid_lim = (test1 ≤ rtol) + zero_resid_lim = (test1 ≤ ε) ill_cond = ill_cond_mach || ill_cond_lim zero_resid = zero_resid_mach || zero_resid_lim solved = solved_mach || solved_lim || zero_resid || fwd_err_lbnd || fwd_err_ubnd iter = iter + 1 - kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e %7.1e\n", iter, rNorm, ArNorm, β, α, c, s, Anorm, Acond, xlqNorm) + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e %7.1e\n", iter, rNorm, ArNorm, β, α, c, s, Anorm, Acond, xlqNorm) end - (verbose > 0) && @printf("\n") + (verbose > 0) && @printf(iostream, "\n") if transfer_to_lsqr # compute LSQR point @kaxpy!(n, ζ̄ , w̄, x) diff --git a/src/lsmr.jl b/src/lsmr.jl index f4d8349d1..781d9448a 100644 --- a/src/lsmr.jl +++ b/src/lsmr.jl @@ -5,7 +5,7 @@ # # equivalently, of the normal equations # -# AᵀAx = Aᵀb. +# AᴴAx = Aᴴb. # # LSMR is formally equivalent to applying MINRES to the normal equations # but should be more stable. It is also formally equivalent to CRLS though @@ -24,17 +24,16 @@ export lsmr, lsmr! - """ (x, stats) = lsmr(A, b::AbstractVector{FC}; - M=I, N=I, sqd::Bool=false, λ::T=zero(T), + M=I, N=I, ldiv::Bool=false, + window::Int=5, sqd::Bool=false, λ::T=zero(T), + radius::T=zero(T), etol::T=√eps(T), axtol::T=√eps(T), btol::T=√eps(T), - atol::T=zero(T), rtol::T=zero(T), - etol::T=√eps(T), window::Int=5, - itmax::Int=0, conlim::T=1/√eps(T), - radius::T=zero(T), verbose::Int=0, - history::Bool=false, ldiv::Bool=false, - callback=solver->false) + conlim::T=1/√eps(T), atol::T=zero(T), + rtol::T=zero(T), itmax::Int=0, + verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. @@ -43,24 +42,24 @@ Solve the regularized linear least-squares problem minimize ‖b - Ax‖₂² + λ²‖x‖₂² -using the LSMR method, where λ ≥ 0 is a regularization parameter. +of size m × n using the LSMR method, where λ ≥ 0 is a regularization parameter. LSMR is formally equivalent to applying MINRES to the normal equations - (AᵀA + λ²I) x = Aᵀb + (AᴴA + λ²I) x = Aᴴb (and therefore to CRLS) but is more stable. -LSMR produces monotonic residuals ‖r‖₂ and optimality residuals ‖Aᵀr‖₂. +LSMR produces monotonic residuals ‖r‖₂ and optimality residuals ‖Aᴴr‖₂. It is formally equivalent to CRLS, though can be substantially more accurate. LSMR can be also used to find a null vector of a singular matrix A -by solving the problem `min ‖Aᵀx - b‖` with any nonzero vector `b`. -At a minimizer, the residual vector `r = b - Aᵀx` will satisfy `Ar = 0`. +by solving the problem `min ‖Aᴴx - b‖` with any nonzero vector `b`. +At a minimizer, the residual vector `r = b - Aᴴx` will satisfy `Ar = 0`. If `λ > 0`, we solve the symmetric and quasi-definite system [ E A ] [ r ] [ b ] - [ Aᵀ -λ²F ] [ x ] = [ 0 ], + [ Aᴴ -λ²F ] [ x ] = [ 0 ], where E and F are symmetric and positive definite. Preconditioners M = E⁻¹ ≻ 0 and N = F⁻¹ ≻ 0 may be provided in the form of linear operators. @@ -70,23 +69,51 @@ The system above represents the optimality conditions of minimize ‖b - Ax‖²_E⁻¹ + λ²‖x‖²_F. -For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᵀKx`. -LSMR is then equivalent to applying MINRES to `(AᵀE⁻¹A + λ²F)x = AᵀE⁻¹b` with `r = E⁻¹(b - Ax)`. +For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᴴKx`. +LSMR is then equivalent to applying MINRES to `(AᴴE⁻¹A + λ²F)x = AᴴE⁻¹b` with `r = E⁻¹(b - Ax)`. If `λ = 0`, we solve the symmetric and indefinite system [ E A ] [ r ] [ b ] - [ Aᵀ 0 ] [ x ] = [ 0 ]. + [ Aᴴ 0 ] [ x ] = [ 0 ]. The system above represents the optimality conditions of minimize ‖b - Ax‖²_E⁻¹. -In this case, `N` can still be specified and indicates the weighted norm in which `x` and `Aᵀr` should be measured. +In this case, `N` can still be specified and indicates the weighted norm in which `x` and `Aᴴr` should be measured. `r` can be recovered by computing `E⁻¹(b - Ax)`. -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension m × n; +* `b`: a vector of length m. + +#### Keyword arguments + +* `M`: linear operator that models a Hermitian positive-definite matrix of size `m` used for centered preconditioning of the augmented system; +* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning of the augmented system; +* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`; +* `window`: number of iterations used to accumulate a lower bound on the error; +* `sqd`: if `true`, set `λ=1` for Hermitian quasi-definite systems; +* `λ`: regularization parameter; +* `radius`: add the trust-region constraint ‖x‖ ≤ `radius` if `radius > 0`. Useful to compute a step in a trust-region method for optimization; +* `etol`: stopping tolerance based on the lower bound on the error; +* `axtol`: tolerance on the backward error; +* `btol`: stopping tolerance used to detect zero-residual problems; +* `conlim`: limit on the estimated condition number of `A` beyond which the solution will be abandoned; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. + +#### Output arguments + +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`LsmrStats`](@ref) structure. #### Reference @@ -110,16 +137,18 @@ See [`LsmrSolver`](@ref) for more details about the `solver`. function lsmr! end function lsmr!(solver :: LsmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; - M=I, N=I, sqd :: Bool=false, λ :: T=zero(T), + M=I, N=I, ldiv :: Bool=false, + sqd :: Bool=false, λ :: T=zero(T), + radius :: T=zero(T), etol :: T=√eps(T), axtol :: T=√eps(T), btol :: T=√eps(T), - atol :: T=zero(T), rtol :: T=zero(T), - etol :: T=√eps(T), itmax :: Int=0, conlim :: T=1/√eps(T), - radius :: T=zero(T), verbose :: Int=0, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} + conlim :: T=1/√eps(T), atol :: T=zero(T), + rtol :: T=zero(T), itmax :: Int=0, + verbose :: Int=0, history :: Bool=false, + callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} m, n = size(A) length(b) == m || error("Inconsistent problem size") - (verbose > 0) && @printf("LSMR: system of %d equations in %d variables\n", m, n) + (verbose > 0) && @printf(iostream, "LSMR: system of %d equations in %d variables\n", m, n) # Check sqd and λ parameters sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !") @@ -131,15 +160,15 @@ function lsmr!(solver :: LsmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; # Check type consistency eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") # Compute the adjoint of A - Aᵀ = A' + Aᴴ = A' # Set up workspace. allocate_if(!MisI, solver, :u, S, m) allocate_if(!NisI, solver, :v, S, n) - x, Nv, Aᵀu, h, hbar = solver.x, solver.Nv, solver.Aᵀu, solver.h, solver.hbar + x, Nv, Aᴴu, h, hbar = solver.x, solver.Nv, solver.Aᴴu, solver.h, solver.hbar Mu, Av, err_vec, stats = solver.Mu, solver.Av, solver.err_vec, solver.stats rNorms, ArNorms = stats.residuals, stats.Aresiduals reset!(stats) @@ -166,8 +195,8 @@ function lsmr!(solver :: LsmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; @kscal!(m, one(FC)/β₁, u) MisI || @kscal!(m, one(FC)/β₁, Mu) - mul!(Aᵀu, Aᵀ, u) - Nv .= Aᵀu + mul!(Aᴴu, Aᴴ, u) + Nv .= Aᴴu NisI || mulorldiv!(v, N, Nv, ldiv) α = sqrt(@kdotr(n, v, Nv)) @@ -210,10 +239,10 @@ function lsmr!(solver :: LsmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; iter = 0 itmax == 0 && (itmax = m + n) - (verbose > 0) && @printf("%5s %7s %7s %7s %7s %8s %8s %7s\n", "k", "‖r‖", "‖Aᵀr‖", "β", "α", "cos", "sin", "‖A‖²") - kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e\n", iter, β₁, α, β₁, α, 0, 1, Anorm²) + (verbose > 0) && @printf(iostream, "%5s %7s %7s %7s %7s %8s %8s %7s\n", "k", "‖r‖", "‖Aᴴr‖", "β", "α", "cos", "sin", "‖A‖²") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e\n", iter, β₁, α, β₁, α, 0, 1, Anorm²) - # Aᵀb = 0 so x = 0 is a minimum least-squares solution + # Aᴴb = 0 so x = 0 is a minimum least-squares solution if α == 0 stats.niter = 0 stats.solved, stats.inconsistent = true, false @@ -248,9 +277,9 @@ function lsmr!(solver :: LsmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; @kscal!(m, one(FC)/β, u) MisI || @kscal!(m, one(FC)/β, Mu) - # 2. αₖ₊₁Nvₖ₊₁ = Aᵀuₖ₊₁ - βₖ₊₁Nvₖ - mul!(Aᵀu, Aᵀ, u) - @kaxpby!(n, one(FC), Aᵀu, -β, Nv) + # 2. αₖ₊₁Nvₖ₊₁ = Aᴴuₖ₊₁ - βₖ₊₁Nvₖ + mul!(Aᴴu, Aᴴ, u) + @kaxpby!(n, one(FC), Aᴴu, -β, Nv) NisI || mulorldiv!(v, N, Nv, ldiv) α = sqrt(@kdotr(n, v, Nv)) if α ≠ 0 @@ -287,7 +316,7 @@ function lsmr!(solver :: LsmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; # the step ϕ/ρ is not necessarily positive σ = ζ / (ρ * ρbar) if radius > 0 - t1, t2 = to_boundary(x, hbar, radius) + t1, t2 = to_boundary(n, x, hbar, radius) tmax, tmin = max(t1, t2), min(t1, t2) on_boundary = σ > tmax || σ < tmin σ = σ > 0 ? min(σ, tmax) : max(σ, tmin) @@ -336,7 +365,7 @@ function lsmr!(solver :: LsmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; t1 = test1 / (one(T) + Anorm * xNorm / β₁) rNormtol = btol + axtol * Anorm * xNorm / β₁ - kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e\n", iter, rNorm, ArNorm, β, α, c, s, Anorm²) + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e\n", iter, rNorm, ArNorm, β, α, c, s, Anorm²) # Stopping conditions that do not depend on user input. # This is to guard against tolerances that are unreasonably small. @@ -357,7 +386,7 @@ function lsmr!(solver :: LsmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; zero_resid = zero_resid_mach | zero_resid_lim solved = solved_mach | solved_lim | solved_opt | zero_resid | fwd_err | on_boundary end - (verbose > 0) && @printf("\n") + (verbose > 0) && @printf(iostream, "\n") tired && (status = "maximum number of iterations exceeded") ill_cond_mach && (status = "condition number seems too large for this machine") diff --git a/src/lsqr.jl b/src/lsqr.jl index dd3779dce..0351b75e1 100644 --- a/src/lsqr.jl +++ b/src/lsqr.jl @@ -5,7 +5,7 @@ # # equivalently, of the normal equations # -# AᵀAx = Aᵀb. +# AᴴAx = Aᴴb. # # LSQR is formally equivalent to applying the conjugate gradient method # to the normal equations but should be more stable. It is also formally @@ -24,16 +24,16 @@ export lsqr, lsqr! - """ (x, stats) = lsqr(A, b::AbstractVector{FC}; - M=I, N=I, sqd::Bool=false, λ::T=zero(T), + M=I, N=I, ldiv::Bool=false, + window::Int=5, sqd::Bool=false, λ::T=zero(T), + radius::T=zero(T), etol::T=√eps(T), axtol::T=√eps(T), btol::T=√eps(T), - atol::T=zero(T), rtol::T=zero(T), - etol::T=√eps(T), window::Int=5, - itmax::Int=0, conlim::T=1/√eps(T), - radius::T=zero(T), verbose::Int=0, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + conlim::T=1/√eps(T), atol::T=zero(T), + rtol::T=zero(T), itmax::Int=0, + verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. @@ -42,20 +42,20 @@ Solve the regularized linear least-squares problem minimize ‖b - Ax‖₂² + λ²‖x‖₂² -using the LSQR method, where λ ≥ 0 is a regularization parameter. +of size m × n using the LSQR method, where λ ≥ 0 is a regularization parameter. LSQR is formally equivalent to applying CG to the normal equations - (AᵀA + λ²I) x = Aᵀb + (AᴴA + λ²I) x = Aᴴb (and therefore to CGLS) but is more stable. -LSQR produces monotonic residuals ‖r‖₂ but not optimality residuals ‖Aᵀr‖₂. +LSQR produces monotonic residuals ‖r‖₂ but not optimality residuals ‖Aᴴr‖₂. It is formally equivalent to CGLS, though can be slightly more accurate. If `λ > 0`, LSQR solves the symmetric and quasi-definite system [ E A ] [ r ] [ b ] - [ Aᵀ -λ²F ] [ x ] = [ 0 ], + [ Aᴴ -λ²F ] [ x ] = [ 0 ], where E and F are symmetric and positive definite. Preconditioners M = E⁻¹ ≻ 0 and N = F⁻¹ ≻ 0 may be provided in the form of linear operators. @@ -65,23 +65,51 @@ The system above represents the optimality conditions of minimize ‖b - Ax‖²_E⁻¹ + λ²‖x‖²_F. -For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᵀKx`. -LSQR is then equivalent to applying CG to `(AᵀE⁻¹A + λ²F)x = AᵀE⁻¹b` with `r = E⁻¹(b - Ax)`. +For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᴴKx`. +LSQR is then equivalent to applying CG to `(AᴴE⁻¹A + λ²F)x = AᴴE⁻¹b` with `r = E⁻¹(b - Ax)`. If `λ = 0`, we solve the symmetric and indefinite system [ E A ] [ r ] [ b ] - [ Aᵀ 0 ] [ x ] = [ 0 ]. + [ Aᴴ 0 ] [ x ] = [ 0 ]. The system above represents the optimality conditions of minimize ‖b - Ax‖²_E⁻¹. -In this case, `N` can still be specified and indicates the weighted norm in which `x` and `Aᵀr` should be measured. +In this case, `N` can still be specified and indicates the weighted norm in which `x` and `Aᴴr` should be measured. `r` can be recovered by computing `E⁻¹(b - Ax)`. -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension m × n; +* `b`: a vector of length m. + +#### Keyword arguments + +* `M`: linear operator that models a Hermitian positive-definite matrix of size `m` used for centered preconditioning of the augmented system; +* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning of the augmented system; +* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`; +* `window`: number of iterations used to accumulate a lower bound on the error; +* `sqd`: if `true`, set `λ=1` for Hermitian quasi-definite systems; +* `λ`: regularization parameter; +* `radius`: add the trust-region constraint ‖x‖ ≤ `radius` if `radius > 0`. Useful to compute a step in a trust-region method for optimization; +* `etol`: stopping tolerance based on the lower bound on the error; +* `axtol`: tolerance on the backward error; +* `btol`: stopping tolerance used to detect zero-residual problems; +* `conlim`: limit on the estimated condition number of `A` beyond which the solution will be abandoned; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. + +#### Output arguments + +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### Reference @@ -105,16 +133,18 @@ See [`LsqrSolver`](@ref) for more details about the `solver`. function lsqr! end function lsqr!(solver :: LsqrSolver{T,FC,S}, A, b :: AbstractVector{FC}; - M=I, N=I, sqd :: Bool=false, λ :: T=zero(T), + M=I, N=I, ldiv :: Bool=false, + sqd :: Bool=false, λ :: T=zero(T), + radius :: T=zero(T), etol :: T=√eps(T), axtol :: T=√eps(T), btol :: T=√eps(T), - atol :: T=zero(T), rtol :: T=zero(T), - etol :: T=√eps(T), itmax :: Int=0, conlim :: T=1/√eps(T), - radius :: T=zero(T), verbose :: Int=0, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} + conlim :: T=1/√eps(T), atol :: T=zero(T), + rtol :: T=zero(T), itmax :: Int=0, + verbose :: Int=0, history :: Bool=false, + callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} m, n = size(A) length(b) == m || error("Inconsistent problem size") - (verbose > 0) && @printf("LSQR: system of %d equations in %d variables\n", m, n) + (verbose > 0) && @printf(iostream, "LSQR: system of %d equations in %d variables\n", m, n) # Check sqd and λ parameters sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !") @@ -126,15 +156,15 @@ function lsqr!(solver :: LsqrSolver{T,FC,S}, A, b :: AbstractVector{FC}; # Check type consistency eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") # Compute the adjoint of A - Aᵀ = A' + Aᴴ = A' # Set up workspace. allocate_if(!MisI, solver, :u, S, m) allocate_if(!NisI, solver, :v, S, n) - x, Nv, Aᵀu, w = solver.x, solver.Nv, solver.Aᵀu, solver.w + x, Nv, Aᴴu, w = solver.x, solver.Nv, solver.Aᴴu, solver.w Mu, Av, err_vec, stats = solver.Mu, solver.Av, solver.err_vec, solver.stats rNorms, ArNorms = stats.residuals, stats.Aresiduals reset!(stats) @@ -162,8 +192,8 @@ function lsqr!(solver :: LsqrSolver{T,FC,S}, A, b :: AbstractVector{FC}; @kscal!(m, one(FC)/β₁, u) MisI || @kscal!(m, one(FC)/β₁, Mu) - mul!(Aᵀu, Aᵀ, u) - Nv .= Aᵀu + mul!(Aᴴu, Aᴴ, u) + Nv .= Aᴴu NisI || mulorldiv!(v, N, Nv, ldiv) Anorm² = @kdotr(n, v, Nv) Anorm = sqrt(Anorm²) @@ -184,8 +214,8 @@ function lsqr!(solver :: LsqrSolver{T,FC,S}, A, b :: AbstractVector{FC}; iter = 0 itmax == 0 && (itmax = m + n) - (verbose > 0) && @printf("%5s %7s %7s %7s %7s %7s %7s %7s %7s\n", "k", "α", "β", "‖r‖", "‖Aᵀr‖", "compat", "backwrd", "‖A‖", "κ(A)") - kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e\n", iter, β₁, α, β₁, α, 0, 1, Anorm, Acond) + (verbose > 0) && @printf(iostream, "%5s %7s %7s %7s %7s %7s %7s %7s %7s\n", "k", "α", "β", "‖r‖", "‖Aᴴr‖", "compat", "backwrd", "‖A‖", "κ(A)") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e\n", iter, β₁, α, β₁, α, 0, 1, Anorm, Acond) rNorm = β₁ r1Norm = rNorm @@ -194,7 +224,7 @@ function lsqr!(solver :: LsqrSolver{T,FC,S}, A, b :: AbstractVector{FC}; history && push!(rNorms, r2Norm) ArNorm = ArNorm0 = α * β history && push!(ArNorms, ArNorm) - # Aᵀb = 0 so x = 0 is a minimum least-squares solution + # Aᴴb = 0 so x = 0 is a minimum least-squares solution if α == 0 stats.niter = 0 stats.solved, stats.inconsistent = true, false @@ -237,9 +267,9 @@ function lsqr!(solver :: LsqrSolver{T,FC,S}, A, b :: AbstractVector{FC}; Anorm² = Anorm² + α * α + β * β # = ‖B_{k-1}‖² λ > 0 && (Anorm² += λ²) - # 2. αₖ₊₁Nvₖ₊₁ = Aᵀuₖ₊₁ - βₖ₊₁Nvₖ - mul!(Aᵀu, Aᵀ, u) - @kaxpby!(n, one(FC), Aᵀu, -β, Nv) + # 2. αₖ₊₁Nvₖ₊₁ = Aᴴuₖ₊₁ - βₖ₊₁Nvₖ + mul!(Aᴴu, Aᴴ, u) + @kaxpby!(n, one(FC), Aᴴu, -β, Nv) NisI || mulorldiv!(v, N, Nv, ldiv) α = sqrt(@kdotr(n, v, Nv)) if α ≠ 0 @@ -272,7 +302,7 @@ function lsqr!(solver :: LsqrSolver{T,FC,S}, A, b :: AbstractVector{FC}; xENorm² = xENorm² + ϕ * ϕ err_vec[mod(iter, window) + 1] = ϕ - iter ≥ window && (err_lbnd = norm(err_vec)) + iter ≥ window && (err_lbnd = @knrm2(window, err_vec)) τ = s * ϕ θ = s * α @@ -283,7 +313,7 @@ function lsqr!(solver :: LsqrSolver{T,FC,S}, A, b :: AbstractVector{FC}; # the step ϕ/ρ is not necessarily positive σ = ϕ / ρ if radius > 0 - t1, t2 = to_boundary(x, w, radius) + t1, t2 = to_boundary(n, x, w, radius) tmax, tmin = max(t1, t2), min(t1, t2) on_boundary = σ > tmax || σ < tmin σ = σ > 0 ? min(σ, tmax) : max(σ, tmin) @@ -325,7 +355,7 @@ function lsqr!(solver :: LsqrSolver{T,FC,S}, A, b :: AbstractVector{FC}; t1 = test1 / (one(T) + Anorm * xNorm / β₁) rNormtol = btol + axtol * Anorm * xNorm / β₁ - kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e\n", iter, α, β, rNorm, ArNorm, test1, test2, Anorm, Acond) + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e\n", iter, α, β, rNorm, ArNorm, test1, test2, Anorm, Acond) # Stopping conditions that do not depend on user input. # This is to guard against tolerances that are unreasonably small. @@ -346,7 +376,7 @@ function lsqr!(solver :: LsqrSolver{T,FC,S}, A, b :: AbstractVector{FC}; zero_resid = zero_resid_mach | zero_resid_lim solved = solved_mach | solved_lim | solved_opt | zero_resid | fwd_err | on_boundary end - (verbose > 0) && @printf("\n") + (verbose > 0) && @printf(iostream, "\n") tired && (status = "maximum number of iterations exceeded") ill_cond_mach && (status = "condition number seems too large for this machine") diff --git a/src/minres.jl b/src/minres.jl index cbaefee9f..f82bbc350 100644 --- a/src/minres.jl +++ b/src/minres.jl @@ -3,7 +3,7 @@ # # minimize ‖Ax - b‖₂ # -# where A is square and symmetric. +# where A is Hermitian. # # MINRES is formally equivalent to applying the conjugate residuals method # to Ax = b when A is positive definite, but is more general and also applies @@ -21,20 +21,22 @@ export minres, minres! - """ (x, stats) = minres(A, b::AbstractVector{FC}; - M=I, λ::T=zero(T), atol::T=√eps(T)/100, - rtol::T=√eps(T)/100, ratol :: T=zero(T), - rrtol :: T=zero(T), etol::T=√eps(T), - window::Int=5, itmax::Int=0, - conlim::T=1/√eps(T), verbose::Int=0, - history::Bool=false, ldiv::Bool=false, - callback=solver->false) + M=I, ldiv::Bool=false, window::Int=5, + λ::T=zero(T), atol::T=√eps(T), + rtol::T=√eps(T), etol::T=√eps(T), + conlim::T=1/√eps(T), itmax::Int=0, + verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. + (x, stats) = minres(A, b, x0::AbstractVector; kwargs...) + +MINRES can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above. + Solve the shifted linear least-squares problem minimize ‖b - (A + λI)x‖₂² @@ -43,26 +45,44 @@ or the shifted linear system (A + λI) x = b -using the MINRES method, where λ ≥ 0 is a shift parameter, -where A is square and symmetric. +of size n using the MINRES method, where λ ≥ 0 is a shift parameter, +where A is Hermitian. MINRES is formally equivalent to applying CR to Ax=b when A is positive definite, but is typically more stable and also applies to the case where A is indefinite. -MINRES produces monotonic residuals ‖r‖₂ and optimality residuals ‖Aᵀr‖₂. +MINRES produces monotonic residuals ‖r‖₂ and optimality residuals ‖Aᴴr‖₂. + +#### Input arguments -A preconditioner M may be provided in the form of a linear operator and is -assumed to be symmetric and positive definite. +* `A`: a linear operator that models a Hermitian matrix of dimension n; +* `b`: a vector of length n. -MINRES can be warm-started from an initial guess `x0` with the method +#### Optional argument - (x, stats) = minres(A, b, x0; kwargs...) +* `x0`: a vector of length n that represents an initial guess of the solution x. -where `kwargs` are the same keyword arguments as above. +#### Keyword arguments -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +* `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning; +* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`; +* `window`: number of iterations used to accumulate a lower bound on the error; +* `λ`: regularization parameter; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `etol`: stopping tolerance based on the lower bound on the error; +* `conlim`: limit on the estimated condition number of `A` beyond which the solution will be abandoned; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. + +#### Output arguments + +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### Reference @@ -99,22 +119,24 @@ function minres!(solver :: MinresSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 end function minres!(solver :: MinresSolver{T,FC,S}, A, b :: AbstractVector{FC}; - M=I, λ :: T=zero(T), atol :: T=√eps(T)/100, rtol :: T=√eps(T)/100, - ratol :: T=zero(T), rrtol :: T=zero(T), etol :: T=√eps(T), - itmax :: Int=0, conlim :: T=1/√eps(T), verbose :: Int=0, - history :: Bool=false, ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - - n, m = size(A) + M=I, ldiv :: Bool=false, + λ :: T=zero(T), atol :: T=√eps(T), + rtol :: T=√eps(T), etol :: T=√eps(T), + conlim :: T=1/√eps(T), itmax :: Int=0, + verbose :: Int=0, history :: Bool=false, + callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} + + m, n = size(A) m == n || error("System must be square") length(b) == n || error("Inconsistent problem size") - (verbose > 0) && @printf("MINRES: system of size %d\n", n) + (verbose > 0) && @printf(iostream, "MINRES: system of size %d\n", n) # Tests M = Iₙ MisI = (M === I) # Check type consistency eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") # Set up workspace. allocate_if(!MisI, solver, :v, S, n) @@ -189,16 +211,15 @@ function minres!(solver :: MinresSolver{T,FC,S}, A, b :: AbstractVector{FC}; iter = 0 itmax == 0 && (itmax = 2*n) - (verbose > 0) && @printf("%5s %7s %7s %7s %8s %8s %7s %7s %7s %7s\n", "k", "‖r‖", "‖Aᵀr‖", "β", "cos", "sin", "‖A‖", "κ(A)", "test1", "test2") - kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e\n", iter, rNorm, ArNorm, β, cs, sn, ANorm, Acond) + (verbose > 0) && @printf(iostream, "%5s %7s %7s %7s %8s %8s %7s %7s %7s %7s\n", "k", "‖r‖", "‖Aᴴr‖", "β", "cos", "sin", "‖A‖", "κ(A)", "test1", "test2") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e\n", iter, rNorm, ArNorm, β, cs, sn, ANorm, Acond) - tol = atol + rtol * β₁ - rNormtol = ratol + rrtol * β₁ + ε = atol + rtol * β₁ stats.status = "unknown" solved = solved_mach = solved_lim = (rNorm ≤ rtol) tired = iter ≥ itmax ill_cond = ill_cond_mach = ill_cond_lim = false - zero_resid = zero_resid_mach = zero_resid_lim = (rNorm ≤ tol) + zero_resid = zero_resid_mach = zero_resid_lim = (rNorm ≤ ε) fwd_err = false user_requested_exit = false @@ -241,7 +262,7 @@ function minres!(solver :: MinresSolver{T,FC,S}, A, b :: AbstractVector{FC}; ϵ = sn * β δbar = -cs * β root = sqrt(γbar * γbar + δbar * δbar) - ArNorm = ϕbar * root # = ‖Aᵀrₖ₋₁‖ + ArNorm = ϕbar * root # = ‖Aᴴrₖ₋₁‖ history && push!(ArNorms, ArNorm) # Compute the next plane rotation. @@ -266,7 +287,7 @@ function minres!(solver :: MinresSolver{T,FC,S}, A, b :: AbstractVector{FC}; # Compute lower bound on forward error. err_vec[mod(iter, window) + 1] = ϕ - iter ≥ window && (err_lbnd = norm(err_vec)) + iter ≥ window && (err_lbnd = @knrm2(window, err_vec)) γmax = max(γmax, γ) γmin = min(γmin, γ) @@ -292,11 +313,11 @@ function minres!(solver :: MinresSolver{T,FC,S}, A, b :: AbstractVector{FC}; Acond = γmax / γmin history && push!(Aconds, Acond) - kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e %7.1e %7.1e\n", iter, rNorm, ArNorm, β, cs, sn, ANorm, Acond, test1, test2) + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e %7.1e %7.1e\n", iter, rNorm, ArNorm, β, cs, sn, ANorm, Acond, test1, test2) if iter == 1 && β / β₁ ≤ 10 * ϵM - # Aᵀb = 0 so x = 0 is a minimum least-squares solution - stats.niter = 0 + # Aᴴb = 0 so x = 0 is a minimum least-squares solution + stats.niter = 1 stats.solved, stats.inconsistent = true, true stats.status = "x is a minimum least-squares solution" solver.warm_start = false @@ -314,18 +335,18 @@ function minres!(solver :: MinresSolver{T,FC,S}, A, b :: AbstractVector{FC}; # Stopping conditions based on user-provided tolerances. tired = iter ≥ itmax ill_cond_lim = (one(T) / Acond ≤ ctol) - solved_lim = (test2 ≤ tol) - zero_resid_lim = (test1 ≤ tol) - resid_decrease_lim = (rNorm ≤ rNormtol) + solved_lim = (test2 ≤ ε) + zero_resid_lim = MisI && (test1 ≤ eps(T)) + resid_decrease_lim = (rNorm ≤ ε) iter ≥ window && (fwd_err = err_lbnd ≤ etol * sqrt(xENorm²)) user_requested_exit = callback(solver) :: Bool - zero_resid = zero_resid_mach | zero_resid_lim - resid_decrease = resid_decrease_mach | resid_decrease_lim - ill_cond = ill_cond_mach | ill_cond_lim - solved = solved_mach | solved_lim | zero_resid | fwd_err | resid_decrease + zero_resid = zero_resid_mach || zero_resid_lim + resid_decrease = resid_decrease_mach || resid_decrease_lim + ill_cond = ill_cond_mach || ill_cond_lim + solved = solved_mach || solved_lim || zero_resid || fwd_err || resid_decrease end - (verbose > 0) && @printf("\n") + (verbose > 0) && @printf(iostream, "\n") tired && (status = "maximum number of iterations exceeded") ill_cond_mach && (status = "condition number seems too large for this machine") diff --git a/src/minres_qlp.jl b/src/minres_qlp.jl index bbfbf856b..72662f97e 100644 --- a/src/minres_qlp.jl +++ b/src/minres_qlp.jl @@ -18,30 +18,52 @@ export minres_qlp, minres_qlp! """ (x, stats) = minres_qlp(A, b::AbstractVector{FC}; - M=I, atol::T=√eps(T), rtol::T=√eps(T), - ctol::T=√eps(T), λ::T=zero(T), itmax::Int=0, + M=I, ldiv::Bool=false, Artol::T=√eps(T), + λ::T=zero(T), atol::T=√eps(T), + rtol::T=√eps(T), itmax::Int=0, verbose::Int=0, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. + (x, stats) = minres_qlp(A, b, x0::AbstractVector; kwargs...) + +MINRES-QLP can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above. + MINRES-QLP is the only method based on the Lanczos process that returns the minimum-norm -solution on singular inconsistent systems (A + λI)x = b, where λ is a shift parameter. +solution on singular inconsistent systems (A + λI)x = b of size n, where λ is a shift parameter. It is significantly more complex but can be more reliable than MINRES when A is ill-conditioned. -A preconditioner M may be provided in the form of a linear operator and is -assumed to be symmetric and positive definite. M also indicates the weighted norm in which residuals are measured. -MINRES-QLP can be warm-started from an initial guess `x0` with the method +#### Input arguments + +* `A`: a linear operator that models a Hermitian matrix of dimension n; +* `b`: a vector of length n. + +#### Optional argument + +* `x0`: a vector of length n that represents an initial guess of the solution x. + +#### Keyword arguments - (x, stats) = minres_qlp(A, b, x0; kwargs...) +* `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning; +* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`; +* `Artol`: relative stopping tolerance based on the Aᴴ-residual norm; +* `λ`: regularization parameter; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. -where `kwargs` are the same keyword arguments as above. +#### Output arguments -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### References @@ -80,22 +102,23 @@ function minres_qlp!(solver :: MinresQlpSolver{T,FC,S}, A, b :: AbstractVector{F end function minres_qlp!(solver :: MinresQlpSolver{T,FC,S}, A, b :: AbstractVector{FC}; - M=I, atol :: T=√eps(T), rtol :: T=√eps(T), - ctol :: T=√eps(T), λ ::T=zero(T), itmax :: Int=0, + M=I, ldiv :: Bool=false, Artol :: T=√eps(T), + λ ::T=zero(T), atol :: T=√eps(T), + rtol :: T=√eps(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} + callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - n, m = size(A) + m, n = size(A) m == n || error("System must be square") length(b) == m || error("Inconsistent problem size") - (verbose > 0) && @printf("MINRES-QLP: system of size %d\n", n) + (verbose > 0) && @printf(iostream, "MINRES-QLP: system of size %d\n", n) # Tests M = Iₙ MisI = (M === I) # Check type consistency eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") # Set up workspace. allocate_if(!MisI, solver, :vₖ, S, n) @@ -147,8 +170,8 @@ function minres_qlp!(solver :: MinresQlpSolver{T,FC,S}, A, b :: AbstractVector{F ε = atol + rtol * rNorm κ = zero(T) - (verbose > 0) && @printf("%5s %7s %7s %7s %7s %8s %7s %8s %7s\n", "k", "‖rₖ‖", "‖Arₖ₋₁‖", "βₖ₊₁", "Rₖ.ₖ", "Lₖ.ₖ", "‖A‖", "κ(A)", "backward") - kdisplay(iter, verbose) && @printf("%5d %7.1e %7s %7.1e %7s %8s %7.1e %7.1e %8s\n", iter, rNorm, "✗ ✗ ✗ ✗", βₖ, "✗ ✗ ✗ ✗", " ✗ ✗ ✗ ✗", ANorm, Acond, " ✗ ✗ ✗ ✗") + (verbose > 0) && @printf(iostream, "%5s %7s %7s %7s %7s %8s %7s %8s %7s\n", "k", "‖rₖ‖", "‖Arₖ₋₁‖", "βₖ₊₁", "Rₖ.ₖ", "Lₖ.ₖ", "‖A‖", "κ(A)", "backward") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7s %7.1e %7s %8s %7.1e %7.1e %8s\n", iter, rNorm, "✗ ✗ ✗ ✗", βₖ, "✗ ✗ ✗ ✗", " ✗ ✗ ✗ ✗", ANorm, Acond, " ✗ ✗ ✗ ✗") # Set up workspace. M⁻¹vₖ₋₁ .= zero(FC) @@ -246,7 +269,7 @@ function minres_qlp!(solver :: MinresQlpSolver{T,FC,S}, A, b :: AbstractVector{F # [sₖ -cₖ] [βₖ₊₁ ] [0 ] (cₖ, sₖ, λₖ) = sym_givens(λbarₖ, βₖ₊₁) - # Compute [ zₖ ] = (Qₖ)ᵀβ₁e₁ + # Compute [ zₖ ] = (Qₖ)ᴴβ₁e₁ # [ζbarₖ₊₁] # # [cₖ sₖ] [ζbarₖ] = [ ζₖ ] @@ -312,7 +335,7 @@ function minres_qlp!(solver :: MinresQlpSolver{T,FC,S}, A, b :: AbstractVector{F τₖ = (ξₖ - ψbarₖ₋₁ * τₖ₋₁) / μbarₖ end - # Compute directions wₖ₋₂, ẘₖ₋₁ and w̄ₖ, last columns of Wₖ = Vₖ(Pₖ)ᵀ + # Compute directions wₖ₋₂, ẘₖ₋₁ and w̄ₖ, last columns of Wₖ = Vₖ(Pₖ)ᴴ if iter == 1 # w̅₁ = v₁ @. wₖ = vₖ @@ -352,7 +375,7 @@ function minres_qlp!(solver :: MinresQlpSolver{T,FC,S}, A, b :: AbstractVector{F # Update ‖Arₖ₋₁‖ estimate # ‖ Arₖ₋₁ ‖ = |ζbarₖ| * √(|λbarₖ|² + |γbarₖ|²) ArNorm = abs(ζbarₖ) * √(abs2(λbarₖ) + abs2(cₖ₋₁ * βₖ₊₁)) - iter == 1 && (κ = atol + ctol * ArNorm) + iter == 1 && (κ = atol + Artol * ArNorm) history && push!(ArNorms, ArNorm) ANorm = sqrt(ANorm²) @@ -383,14 +406,14 @@ function minres_qlp!(solver :: MinresQlpSolver{T,FC,S}, A, b :: AbstractVector{F # Stopping conditions based on user-provided tolerances. tired = iter ≥ itmax resid_decrease_lim = (rNorm ≤ ε) - zero_resid_lim = (backward ≤ ε) + zero_resid_lim = MisI && (backward ≤ eps(T)) breakdown = βₖ₊₁ ≤ btol user_requested_exit = callback(solver) :: Bool zero_resid = zero_resid_mach | zero_resid_lim resid_decrease = resid_decrease_mach | resid_decrease_lim solved = resid_decrease | zero_resid - inconsistent = (ArNorm ≤ κ && abs(μbarₖ) ≤ ctol) || (breakdown && !solved) + inconsistent = (ArNorm ≤ κ && abs(μbarₖ) ≤ Artol) || (breakdown && !solved) # Update variables if iter ≥ 2 @@ -405,9 +428,9 @@ function minres_qlp!(solver :: MinresQlpSolver{T,FC,S}, A, b :: AbstractVector{F μbarₖ₋₁ = μbarₖ ζbarₖ = ζbarₖ₊₁ βₖ = βₖ₊₁ - kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %7.1e %8.1e %7.1e %7.1e %8.1e\n", iter, rNorm, ArNorm, βₖ₊₁, λₖ, μbarₖ, ANorm, Acond, backward) + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %7.1e %8.1e %7.1e %7.1e %8.1e\n", iter, rNorm, ArNorm, βₖ₊₁, λₖ, μbarₖ, ANorm, Acond, backward) end - (verbose > 0) && @printf("\n") + (verbose > 0) && @printf(iostream, "\n") # Finalize the update of x if iter ≥ 2 diff --git a/src/qmr.jl b/src/qmr.jl index eb4a4eb46..e24fba79a 100644 --- a/src/qmr.jl +++ b/src/qmr.jl @@ -21,28 +21,48 @@ export qmr, qmr! """ - (x, stats) = qmr(A, b::AbstractVector{FC}; c::AbstractVector{FC}=b, - atol::T=√eps(T), rtol::T=√eps(T), - itmax::Int=0, verbose::Int=0, history::Bool=false, - callback=solver->false) + (x, stats) = qmr(A, b::AbstractVector{FC}; + c::AbstractVector{FC}=b, atol::T=√eps(T), + rtol::T=√eps(T), itmax::Int=0, verbose::Int=0, + history::Bool=false, callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. -Solve the square linear system Ax = b using the QMR method. + (x, stats) = qmr(A, b, x0::AbstractVector; kwargs...) + +QMR can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above. + +Solve the square linear system Ax = b of size n using QMR. QMR is based on the Lanczos biorthogonalization process and requires two initial vectors `b` and `c`. -The relation `bᵀc ≠ 0` must be satisfied and by default `c = b`. -When `A` is symmetric and `b = c`, QMR is equivalent to MINRES. +The relation `bᴴc ≠ 0` must be satisfied and by default `c = b`. +When `A` is Hermitian and `b = c`, QMR is equivalent to MINRES. + +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension n; +* `b`: a vector of length n. + +#### Optional argument + +* `x0`: a vector of length n that represents an initial guess of the solution x. -QMR can be warm-started from an initial guess `x0` with the method +#### Keyword arguments - (x, stats) = qmr(A, b, x0; kwargs...) +* `c`: the second initial vector of length `n` required by the Lanczos biorthogonalization process; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. -where `kwargs` are the same keyword arguments as above. +#### Output arguments -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### References @@ -83,20 +103,20 @@ end function qmr!(solver :: QmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: AbstractVector{FC}=b, atol :: T=√eps(T), rtol :: T=√eps(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false, - callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} + callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} - n, m = size(A) + m, n = size(A) m == n || error("System must be square") length(b) == m || error("Inconsistent problem size") - (verbose > 0) && @printf("QMR: system of size %d\n", n) + (verbose > 0) && @printf(iostream, "QMR: system of size %d\n", n) # Check type consistency eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") - ktypeof(c) == S || error("ktypeof(c) ≠ $S") + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") + ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S") # Compute the adjoint of A - Aᵀ = A' + Aᴴ = A' # Set up workspace. uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p = solver.uₖ₋₁, solver.uₖ, solver.q, solver.vₖ₋₁, solver.vₖ, solver.p @@ -129,22 +149,22 @@ function qmr!(solver :: QmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Abst itmax == 0 && (itmax = 2*n) ε = atol + rtol * rNorm - (verbose > 0) && @printf("%5s %7s\n", "k", "‖rₖ‖") - kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm) + (verbose > 0) && @printf(iostream, "%5s %7s\n", "k", "‖rₖ‖") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e\n", iter, rNorm) # Initialize the Lanczos biorthogonalization process. - cᵗb = @kdot(n, c, r₀) # ⟨c,r₀⟩ - if cᵗb == 0 + cᴴb = @kdot(n, c, r₀) # ⟨c,r₀⟩ + if cᴴb == 0 stats.niter = 0 stats.solved = false stats.inconsistent = false - stats.status = "Breakdown bᵀc = 0" + stats.status = "Breakdown bᴴc = 0" solver.warm_start = false return solver end - βₖ = √(abs(cᵗb)) # β₁γ₁ = cᵀ(b - Ax₀) - γₖ = cᵗb / βₖ # β₁γ₁ = cᵀ(b - Ax₀) + βₖ = √(abs(cᴴb)) # β₁γ₁ = cᴴ(b - Ax₀) + γₖ = cᴴb / βₖ # β₁γ₁ = cᴴ(b - Ax₀) vₖ₋₁ .= zero(FC) # v₀ = 0 uₖ₋₁ .= zero(FC) # u₀ = 0 vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁ @@ -153,7 +173,7 @@ function qmr!(solver :: QmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Abst sₖ₋₂ = sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the QR factorization of Tₖ₊₁.ₖ wₖ₋₂ .= zero(FC) # Column k-2 of Wₖ = Vₖ(Rₖ)⁻¹ wₖ₋₁ .= zero(FC) # Column k-1 of Wₖ = Vₖ(Rₖ)⁻¹ - ζbarₖ = βₖ # ζbarₖ is the last component of z̅ₖ = (Qₖ)ᵀβ₁e₁ + ζbarₖ = βₖ # ζbarₖ is the last component of z̅ₖ = (Qₖ)ᴴβ₁e₁ τₖ = @kdotr(n, vₖ, vₖ) # τₖ is used for the residual norm estimate # Stopping criterion. @@ -169,10 +189,10 @@ function qmr!(solver :: QmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Abst # Continue the Lanczos biorthogonalization process. # AVₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ - # AᵀUₖ = Uₖ(Tₖ)ᵀ + γ̄ₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᵀ + # AᴴUₖ = Uₖ(Tₖ)ᴴ + γ̄ₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᴴ mul!(q, A , vₖ) # Forms vₖ₊₁ : q ← Avₖ - mul!(p, Aᵀ, uₖ) # Forms uₖ₊₁ : p ← Aᵀuₖ + mul!(p, Aᴴ, uₖ) # Forms uₖ₊₁ : p ← Aᴴuₖ @kaxpy!(n, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁ @kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - β̄ₖ * uₖ₋₁ @@ -182,9 +202,9 @@ function qmr!(solver :: QmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Abst @kaxpy!(n, - αₖ , vₖ, q) # q ← q - αₖ * vₖ @kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ - pᵗq = @kdot(n, p, q) # pᵗq = ⟨p,q⟩ - βₖ₊₁ = √(abs(pᵗq)) # βₖ₊₁ = √(|pᵗq|) - γₖ₊₁ = pᵗq / βₖ₊₁ # γₖ₊₁ = pᵗq / βₖ₊₁ + pᴴq = @kdot(n, p, q) # pᴴq = ⟨p,q⟩ + βₖ₊₁ = √(abs(pᴴq)) # βₖ₊₁ = √(|pᴴq|) + γₖ₊₁ = pᴴq / βₖ₊₁ # γₖ₊₁ = pᴴq / βₖ₊₁ # Update the QR factorization of Tₖ₊₁.ₖ = Qₖ [ Rₖ ]. # [ Oᵀ ] @@ -271,7 +291,7 @@ function qmr!(solver :: QmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Abst @. vₖ₋₁ = vₖ # vₖ₋₁ ← vₖ @. uₖ₋₁ = uₖ # uₖ₋₁ ← uₖ - if pᵗq ≠ zero(FC) + if pᴴq ≠ zero(FC) @. vₖ = q / βₖ₊₁ # βₖ₊₁vₖ₊₁ = q @. uₖ = p / conj(γₖ₊₁) # γ̄ₖ₊₁uₖ₊₁ = p end @@ -303,10 +323,10 @@ function qmr!(solver :: QmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Abst resid_decrease_lim = rNorm ≤ ε solved = resid_decrease_lim || resid_decrease_mach tired = iter ≥ itmax - breakdown = !solved && (pᵗq == 0) - kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm) + breakdown = !solved && (pᴴq == 0) + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e\n", iter, rNorm) end - (verbose > 0) && @printf("\n") + (verbose > 0) && @printf(iostream, "\n") tired && (status = "maximum number of iterations exceeded") breakdown && (status = "Breakdown ⟨uₖ₊₁,vₖ₊₁⟩ = 0") diff --git a/src/symmlq.jl b/src/symmlq.jl index 7b889c715..81477fc66 100644 --- a/src/symmlq.jl +++ b/src/symmlq.jl @@ -1,5 +1,5 @@ # An implementation of SYMMLQ for the solution of the -# linear system Ax = b, where A is square and symmetric. +# linear system Ax = b, where A is Hermitian. # # This implementation follows the original implementation by # Michael Saunders described in @@ -11,38 +11,62 @@ export symmlq, symmlq! - """ - (x, stats) = symmlq(A, b::AbstractVector{FC}; window::Int=0, - M=I, λ::T=zero(T), transfer_to_cg::Bool=true, - λest::T=zero(T), atol::T=√eps(T), rtol::T=√eps(T), - etol::T=√eps(T), itmax::Int=0, conlim::T=1/√eps(T), + (x, stats) = symmlq(A, b::AbstractVector{FC}; + M=I, ldiv::Bool=false, window::Int=5, + transfer_to_cg::Bool=true, λ::T=zero(T), + λest::T=zero(T), etol::T=√eps(T), + conlim::T=1/√eps(T), atol::T=√eps(T), + rtol::T=√eps(T), itmax::Int=0, verbose::Int=0, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. + (x, stats) = symmlq(A, b, x0::AbstractVector; kwargs...) + +SYMMLQ can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above + Solve the shifted linear system (A + λI) x = b -using the SYMMLQ method, where λ is a shift parameter, -and A is square and symmetric. +of size n using the SYMMLQ method, where λ is a shift parameter, and A is Hermitian. + +SYMMLQ produces monotonic errors ‖x* - x‖₂. + +#### Input arguments + +* `A`: a linear operator that models a Hermitian matrix of dimension n; +* `b`: a vector of length n. -SYMMLQ produces monotonic errors ‖x*-x‖₂. +#### Optional argument -A preconditioner M may be provided in the form of a linear operator and is -assumed to be symmetric and positive definite. +* `x0`: a vector of length n that represents an initial guess of the solution x. -SYMMLQ can be warm-started from an initial guess `x0` with the method +#### Keyword arguments - (x, stats) = symmlq(A, b, x0; kwargs...) +* `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning; +* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`; +* `window`: number of iterations used to accumulate a lower bound on the error; +* `transfer_to_cg`: transfer from the SYMMLQ point to the CG point, when it exists. The transfer is based on the residual norm; +* `λ`: regularization parameter; +* `λest`: positive strict lower bound on the smallest eigenvalue `λₘᵢₙ` when solving a positive-definite system, such as `λest = (1-10⁻⁷)λₘᵢₙ`; +* `etol`: stopping tolerance based on the lower bound on the error; +* `conlim`: limit on the estimated condition number of `A` beyond which the solution will be abandoned; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. -where `kwargs` are the same keyword arguments as above. +#### Output arguments -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SymmlqStats`](@ref) structure. #### Reference @@ -79,23 +103,25 @@ function symmlq!(solver :: SymmlqSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 end function symmlq!(solver :: SymmlqSolver{T,FC,S}, A, b :: AbstractVector{FC}; - M=I, λ :: T=zero(T), transfer_to_cg :: Bool=true, - λest :: T=zero(T), atol :: T=√eps(T), rtol :: T=√eps(T), - etol :: T=√eps(T), itmax :: Int=0, conlim :: T=1/√eps(T), + M=I, ldiv :: Bool=false, + transfer_to_cg :: Bool=true, λ :: T=zero(T), + λest :: T=zero(T), etol :: T=√eps(T), + conlim :: T=1/√eps(T), atol :: T=√eps(T), + rtol :: T=√eps(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} + callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} m, n = size(A) m == n || error("System must be square") length(b) == m || error("Inconsistent problem size") - (verbose > 0) && @printf("SYMMLQ: system of size %d\n", n) + (verbose > 0) && @printf(iostream, "SYMMLQ: system of size %d\n", n) # Tests M = Iₙ MisI = (M === I) # Check type consistency eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") # Set up workspace. allocate_if(!MisI, solver, :v, S, n) @@ -213,8 +239,8 @@ function symmlq!(solver :: SymmlqSolver{T,FC,S}, A, b :: AbstractVector{FC}; iter = 0 itmax == 0 && (itmax = 2 * n) - (verbose > 0) && @printf("%5s %7s %7s %8s %8s %7s %7s %7s\n", "k", "‖r‖", "β", "cos", "sin", "‖A‖", "κ(A)", "test1") - kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e\n", iter, rNorm, β, cold, sold, ANorm, Acond) + (verbose > 0) && @printf(iostream, "%5s %7s %7s %8s %8s %7s %7s %7s\n", "k", "‖r‖", "β", "cos", "sin", "‖A‖", "κ(A)", "test1") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e\n", iter, rNorm, β, cold, sold, ANorm, Acond) tol = atol + rtol * β₁ status = "unknown" @@ -301,8 +327,11 @@ function symmlq!(solver :: SymmlqSolver{T,FC,S}, A, b :: AbstractVector{FC}; zetabark = zlist[jx] / clist[jx] if γbar ≠ 0 - theta = abs(sum(clist[i] * sprod[i] * zlist[i] for i = 1 : window)) - theta = zetabark * theta + abs(zetabark * ζbar * sprod[ix] * s) - zetabark^2 + theta = zero(T) + for i = 1 : window + theta += clist[i] * sprod[i] * zlist[i] + end + theta = zetabark * abs(theta) + abs(zetabark * ζbar * sprod[ix] * s) - zetabark^2 history && (errorscg[iter-window+1] = sqrt(abs(errorscg[iter-window+1]^2 - 2*theta))) else history && (errorscg[iter-window+1] = missing) @@ -345,7 +374,7 @@ function symmlq!(solver :: SymmlqSolver{T,FC,S}, A, b :: AbstractVector{FC}; ANorm = sqrt(ANorm²) test1 = rNorm / (ANorm * xNorm) - kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e %7.1e\n", iter, rNorm, β, c, s, ANorm, Acond, test1) + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e %7.1e\n", iter, rNorm, β, c, s, ANorm, Acond, test1) # Reset variables ϵold = ϵ @@ -372,7 +401,7 @@ function symmlq!(solver :: SymmlqSolver{T,FC,S}, A, b :: AbstractVector{FC}; ill_cond = ill_cond_mach || ill_cond_lim solved = solved_mach || zero_resid || zero_resid_mach || zero_resid_lim || fwd_err || resid_decrease_mach end - (verbose > 0) && @printf("\n") + (verbose > 0) && @printf(iostream, "\n") # Compute CG point # (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * w̅ₖ diff --git a/src/tricg.jl b/src/tricg.jl index 5acff2d52..4096a9ffe 100644 --- a/src/tricg.jl +++ b/src/tricg.jl @@ -13,30 +13,32 @@ export tricg, tricg! """ (x, y, stats) = tricg(A, b::AbstractVector{FC}, c::AbstractVector{FC}; - M=I, N=I, atol::T=√eps(T), rtol::T=√eps(T), - spd::Bool=false, snd::Bool=false, flip::Bool=false, - τ::T=one(T), ν::T=-one(T), itmax::Int=0, + M=I, N=I, ldiv::Bool=false, + spd::Bool=false, snd::Bool=false, + flip::Bool=false, τ::T=one(T), + ν::T=-one(T), atol::T=√eps(T), + rtol::T=√eps(T), itmax::Int=0, verbose::Int=0, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. -TriCG solves the symmetric linear system + (x, y, stats) = tricg(A, b, c, x0::AbstractVector, y0::AbstractVector; kwargs...) + +TriCG can be warm-started from initial guesses `x0` and `y0` where `kwargs` are the same keyword arguments as above. + +Given a matrix `A` of dimension m × n, TriCG solves the Hermitian linear system [ τE A ] [ x ] = [ b ] - [ Aᵀ νF ] [ y ] [ c ], + [ Aᴴ νF ] [ y ] [ c ], -where τ and ν are real numbers, E = M⁻¹ ≻ 0 and F = N⁻¹ ≻ 0. +of size (n+m) × (n+m) where τ and ν are real numbers, E = M⁻¹ ≻ 0 and F = N⁻¹ ≻ 0. `b` and `c` must both be nonzero. TriCG could breakdown if `τ = 0` or `ν = 0`. It's recommended to use TriMR in these cases. -By default, TriCG solves symmetric and quasi-definite linear systems with τ = 1 and ν = -1. -If `flip = true`, TriCG solves another known variant of SQD systems where τ = -1 and ν = 1. -If `spd = true`, τ = ν = 1 and the associated symmetric and positive definite linear system is solved. -If `snd = true`, τ = ν = -1 and the associated symmetric and negative definite linear system is solved. -`τ` and `ν` are also keyword arguments that can be directly modified for more specific problems. +By default, TriCG solves Hermitian and quasi-definite linear systems with τ = 1 and ν = -1. TriCG is based on the preconditioned orthogonal tridiagonalization process and its relation with the preconditioned block-Lanczos process. @@ -50,17 +52,39 @@ It's the Euclidean norm when `M` and `N` are identity operators. TriCG stops when `itmax` iterations are reached or when `‖rₖ‖ ≤ atol + ‖r₀‖ * rtol`. `atol` is an absolute tolerance and `rtol` is a relative tolerance. -Additional details can be displayed if verbose mode is enabled (verbose > 0). -Information will be displayed every `verbose` iterations. +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension m × n; +* `b`: a vector of length m; +* `c`: a vector of length n. + +#### Optional arguments + +* `x0`: a vector of length m that represents an initial guess of the solution x; +* `y0`: a vector of length n that represents an initial guess of the solution y. -TriCG can be warm-started from initial guesses `x0` and `y0` with the method +#### Keyword arguments - (x, y, stats) = tricg(A, b, c, x0, y0; kwargs...) +* `M`: linear operator that models a Hermitian positive-definite matrix of size `m` used for centered preconditioning of the partitioned system; +* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning of the partitioned system; +* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`; +* `spd`: if `true`, set `τ = 1` and `ν = 1` for Hermitian and positive-definite linear system; +* `snd`: if `true`, set `τ = -1` and `ν = -1` for Hermitian and negative-definite linear systems; +* `flip`: if `true`, set `τ = -1` and `ν = 1` for another known variant of Hermitian quasi-definite systems; +* `τ` and `ν`: diagonal scaling factors of the partitioned Hermitian linear system; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. -where `kwargs` are the same keyword arguments as above. +#### Output arguments -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +* `x`: a dense vector of length m; +* `y`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### Reference @@ -98,16 +122,18 @@ function tricg!(solver :: TricgSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: end function tricg!(solver :: TricgSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC}; - M=I, N=I, atol :: T=√eps(T), rtol :: T=√eps(T), - spd :: Bool=false, snd :: Bool=false, flip :: Bool=false, - τ :: T=one(T), ν :: T=-one(T), itmax :: Int=0, + M=I, N=I, ldiv :: Bool=false, + spd :: Bool=false, snd :: Bool=false, + flip :: Bool=false, τ :: T=one(T), + ν :: T=-one(T), atol :: T=√eps(T), + rtol :: T=√eps(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} + callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} m, n = size(A) length(b) == m || error("Inconsistent problem size") length(c) == n || error("Inconsistent problem size") - (verbose > 0) && @printf("TriCG: system of %d equations in %d variables\n", m+n, m+n) + (verbose > 0) && @printf(iostream, "TriCG: system of %d equations in %d variables\n", m+n, m+n) # Check flip, spd and snd parameters spd && flip && error("The matrix cannot be SPD and SQD") @@ -120,8 +146,8 @@ function tricg!(solver :: TricgSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: # Check type consistency eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") - ktypeof(c) == S || error("ktypeof(c) ≠ $S") + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") + ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S") # Determine τ and ν associated to SQD, SPD or SND systems. flip && (τ = -one(T) ; ν = one(T)) @@ -133,7 +159,7 @@ function tricg!(solver :: TricgSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: warm_start && (ν ≠ 0) && !NisI && error("Warm-start with preconditioners is not supported.") # Compute the adjoint of A - Aᵀ = A' + Aᴴ = A' # Set up workspace. allocate_if(!MisI, solver, :vₖ, S, m) @@ -164,12 +190,12 @@ function tricg!(solver :: TricgSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: N⁻¹uₖ₋₁ .= zero(FC) # u₀ = 0 # [ τI A ] [ xₖ ] = [ b - τΔx - AΔy ] = [ b₀ ] - # [ Aᵀ νI ] [ yₖ ] [ c - AᵀΔx - νΔy ] [ c₀ ] + # [ Aᴴ νI ] [ yₖ ] [ c - AᴴΔx - νΔy ] [ c₀ ] if warm_start mul!(b₀, A, Δy) (τ ≠ 0) && @kaxpy!(m, τ, Δx, b₀) @kaxpby!(m, one(FC), b, -one(FC), b₀) - mul!(c₀, Aᵀ, Δx) + mul!(c₀, Aᴴ, Δx) (ν ≠ 0) && @kaxpy!(n, ν, Δy, c₀) @kaxpby!(n, one(FC), c, -one(FC), c₀) end @@ -196,7 +222,7 @@ function tricg!(solver :: TricgSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: error("c must be nonzero") end - # Initialize directions Gₖ such that Lₖ(Gₖ)ᵀ = (Wₖ)ᵀ + # Initialize directions Gₖ such that L̄ₖ(Gₖ)ᵀ = (Wₖ)ᵀ gx₂ₖ₋₁ .= zero(FC) gy₂ₖ₋₁ .= zero(FC) gx₂ₖ .= zero(FC) @@ -207,8 +233,8 @@ function tricg!(solver :: TricgSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: history && push!(rNorms, rNorm) ε = atol + rtol * rNorm - (verbose > 0) && @printf("%5s %7s %7s %7s\n", "k", "‖rₖ‖", "βₖ₊₁", "γₖ₊₁") - kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e\n", iter, rNorm, βₖ, γₖ) + (verbose > 0) && @printf(iostream, "%5s %7s %7s %7s\n", "k", "‖rₖ‖", "βₖ₊₁", "γₖ₊₁") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e\n", iter, rNorm, βₖ, γₖ) # Set up workspace. d₂ₖ₋₃ = d₂ₖ₋₂ = zero(T) @@ -231,10 +257,10 @@ function tricg!(solver :: TricgSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: # Continue the orthogonal tridiagonalization process. # AUₖ = EVₖTₖ + βₖ₊₁Evₖ₊₁(eₖ)ᵀ = EVₖ₊₁Tₖ₊₁.ₖ - # AᵀVₖ = FUₖ(Tₖ)ᵀ + γₖ₊₁Fuₖ₊₁(eₖ)ᵀ = FUₖ₊₁(Tₖ.ₖ₊₁)ᵀ + # AᴴVₖ = FUₖ(Tₖ)ᴴ + γₖ₊₁Fuₖ₊₁(eₖ)ᵀ = FUₖ₊₁(Tₖ.ₖ₊₁)ᴴ mul!(q, A , uₖ) # Forms Evₖ₊₁ : q ← Auₖ - mul!(p, Aᵀ, vₖ) # Forms Fuₖ₊₁ : p ← Aᵀvₖ + mul!(p, Aᴴ, vₖ) # Forms Fuₖ₊₁ : p ← Aᴴvₖ if iter ≥ 2 @kaxpy!(m, -γₖ, M⁻¹vₖ₋₁, q) # q ← q - γₖ * M⁻¹vₖ₋₁ @@ -254,14 +280,14 @@ function tricg!(solver :: TricgSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: # [0 u₁ ••• 0 uₖ] # # rₖ = [ b ] - [ τE A ] [ xₖ ] = [ b ] - [ τE A ] Wₖzₖ - # [ c ] [ Aᵀ νF ] [ yₖ ] [ c ] [ Aᵀ νF ] + # [ c ] [ Aᴴ νF ] [ yₖ ] [ c ] [ Aᴴ νF ] # # block-Lanczos formulation : [ τE A ] Wₖ = [ E 0 ] Wₖ₊₁Sₖ₊₁.ₖ - # [ Aᵀ νF ] [ 0 F ] + # [ Aᴴ νF ] [ 0 F ] # - # TriCG subproblem : (Wₖ)ᵀ * rₖ = 0 ↔ Sₖ.ₖzₖ = β₁e₁ + γ₁e₂ + # TriCG subproblem : (Wₖ)ᴴ * rₖ = 0 ↔ Sₖ.ₖzₖ = β₁e₁ + γ₁e₂ # - # Update the LDLᵀ factorization of Sₖ.ₖ. + # Update the LDLᴴ factorization of Sₖ.ₖ. # # [ τ α₁ γ₂ 0 • • • • 0 ] # [ ᾱ₁ ν β₂ • • ] @@ -306,7 +332,7 @@ function tricg!(solver :: TricgSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: π₂ₖ = -(δₖ * d₂ₖ₋₁ * π₂ₖ₋₁ + λₖ * d₂ₖ₋₂ * π₂ₖ₋₂ + ηₖ * d₂ₖ₋₃ * π₂ₖ₋₃) / d₂ₖ end - # Solve Gₖ = Wₖ(Lₖ)⁻ᵀ ⟷ L̄ₖ(Gₖ)ᵀ = (Wₖ)ᵀ. + # Solve Gₖ = Wₖ(Lₖ)⁻ᴴ ⟷ L̄ₖ(Gₖ)ᵀ = (Wₖ)ᵀ. if iter == 1 # [ 1 0 ] [ gx₁ gy₁ ] = [ v₁ 0 ] # [ δ̄₁ 1 ] [ gx₂ gy₂ ] [ 0 u₁ ] @@ -342,7 +368,7 @@ function tricg!(solver :: TricgSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: # Compute vₖ₊₁ and uₖ₊₁ MisI || mulorldiv!(vₖ₊₁, M, q, ldiv) # βₖ₊₁vₖ₊₁ = MAuₖ - γₖvₖ₋₁ - αₖvₖ - NisI || mulorldiv!(uₖ₊₁, N, p, ldiv) # γₖ₊₁uₖ₊₁ = NAᵀvₖ - βₖuₖ₋₁ - ᾱₖuₖ + NisI || mulorldiv!(uₖ₊₁, N, p, ldiv) # γₖ₊₁uₖ₊₁ = NAᴴvₖ - βₖuₖ₋₁ - ᾱₖuₖ βₖ₊₁ = sqrt(@kdotr(m, vₖ₊₁, q)) # βₖ₊₁ = ‖vₖ₊₁‖_E γₖ₊₁ = sqrt(@kdotr(n, uₖ₊₁, p)) # γₖ₊₁ = ‖uₖ₊₁‖_F @@ -388,9 +414,9 @@ function tricg!(solver :: TricgSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: breakdown = βₖ₊₁ ≤ btol && γₖ₊₁ ≤ btol solved = resid_decrease_lim || resid_decrease_mach tired = iter ≥ itmax - kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e\n", iter, rNorm, βₖ₊₁, γₖ₊₁) + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e\n", iter, rNorm, βₖ₊₁, γₖ₊₁) end - (verbose > 0) && @printf("\n") + (verbose > 0) && @printf(iostream, "\n") tired && (status = "maximum number of iterations exceeded") breakdown && (status = "inconsistent linear system") diff --git a/src/trilqr.jl b/src/trilqr.jl index edcb4c9b9..e11a8a6c6 100644 --- a/src/trilqr.jl +++ b/src/trilqr.jl @@ -1,5 +1,5 @@ # An implementation of TRILQR for the solution of square or -# rectangular consistent linear adjoint systems Ax = b and Aᵀy = c. +# rectangular consistent linear adjoint systems Ax = b and Aᴴy = c. # # This method is described in # @@ -14,32 +14,53 @@ export trilqr, trilqr! """ (x, y, stats) = trilqr(A, b::AbstractVector{FC}, c::AbstractVector{FC}; - atol::T=√eps(T), rtol::T=√eps(T), transfer_to_usymcg::Bool=true, - itmax::Int=0, verbose::Int=0, history::Bool=false, - callback=solver->false) + transfer_to_usymcg::Bool=true, atol::T=√eps(T), + rtol::T=√eps(T), itmax::Int=0, + verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. + (x, y, stats) = trilqr(A, b, c, x0::AbstractVector, y0::AbstractVector; kwargs...) + +TriLQR can be warm-started from initial guesses `x0` and `y0` where `kwargs` are the same keyword arguments as above. + Combine USYMLQ and USYMQR to solve adjoint systems. [0 A] [y] = [b] - [Aᵀ 0] [x] [c] + [Aᴴ 0] [x] [c] + +USYMLQ is used for solving primal system `Ax = b` of size m × n. +USYMQR is used for solving dual system `Aᴴy = c` of size n × m. + +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension m × n; +* `b`: a vector of length m; +* `c`: a vector of length n. -USYMLQ is used for solving primal system `Ax = b`. -USYMQR is used for solving dual system `Aᵀy = c`. +#### Optional arguments -An option gives the possibility of transferring from the USYMLQ point to the -USYMCG point, when it exists. The transfer is based on the residual norm. +* `x0`: a vector of length n that represents an initial guess of the solution x; +* `y0`: a vector of length m that represents an initial guess of the solution y. -TriLQR can be warm-started from initial guesses `x0` and `y0` with the method +#### Keyword arguments - (x, y, stats) = trilqr(A, b, c, x0, y0; kwargs...) +* `transfer_to_usymcg`: transfer from the USYMLQ point to the USYMCG point, when it exists. The transfer is based on the residual norm; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. -where `kwargs` are the same keyword arguments as above. +#### Output arguments -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +* `x`: a dense vector of length n; +* `y`: a dense vector of length m; +* `stats`: statistics collected on the run in an [`AdjointStats`](@ref) structure. #### Reference @@ -77,23 +98,24 @@ function trilqr!(solver :: TrilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c : end function trilqr!(solver :: TrilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC}; - atol :: T=√eps(T), rtol :: T=√eps(T), transfer_to_usymcg :: Bool=true, - itmax :: Int=0, verbose :: Int=0, history :: Bool=false, - callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} + transfer_to_usymcg :: Bool=true, atol :: T=√eps(T), + rtol :: T=√eps(T), itmax :: Int=0, + verbose :: Int=0, history :: Bool=false, + callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} m, n = size(A) length(b) == m || error("Inconsistent problem size") length(c) == n || error("Inconsistent problem size") - (verbose > 0) && @printf("TRILQR: primal system of %d equations in %d variables\n", m, n) - (verbose > 0) && @printf("TRILQR: dual system of %d equations in %d variables\n", n, m) + (verbose > 0) && @printf(iostream, "TRILQR: primal system of %d equations in %d variables\n", m, n) + (verbose > 0) && @printf(iostream, "TRILQR: dual system of %d equations in %d variables\n", n, m) # Check type consistency eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") - ktypeof(c) == S || error("ktypeof(c) ≠ $S") + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") + ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S") # Compute the adjoint of A - Aᵀ = A' + Aᴴ = A' # Set up workspace. uₖ₋₁, uₖ, p, d̅, x, stats = solver.uₖ₋₁, solver.uₖ, solver.p, solver.d̅, solver.x, solver.stats @@ -107,7 +129,7 @@ function trilqr!(solver :: TrilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c : if warm_start mul!(r₀, A, Δx) @kaxpby!(n, one(FC), b, -one(FC), r₀) - mul!(s₀, Aᵀ, Δy) + mul!(s₀, Aᴴ, Δy) @kaxpby!(n, one(FC), c, -one(FC), s₀) end @@ -115,7 +137,7 @@ function trilqr!(solver :: TrilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c : x .= zero(FC) # x₀ bNorm = @knrm2(m, r₀) # rNorm = ‖r₀‖ - # Initial solution y₀ and residual s₀ = c - Aᵀy₀. + # Initial solution y₀ and residual s₀ = c - Aᴴy₀. t .= zero(FC) # t₀ cNorm = @knrm2(n, s₀) # sNorm = ‖s₀‖ @@ -127,8 +149,8 @@ function trilqr!(solver :: TrilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c : εL = atol + rtol * bNorm εQ = atol + rtol * cNorm ξ = zero(T) - (verbose > 0) && @printf("%5s %7s %7s\n", "k", "‖rₖ‖", "‖sₖ‖") - kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e\n", iter, bNorm, cNorm) + (verbose > 0) && @printf(iostream, "%5s %7s %7s\n", "k", "‖rₖ‖", "‖sₖ‖") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e\n", iter, bNorm, cNorm) # Set up workspace. βₖ = @knrm2(m, r₀) # β₁ = ‖r₀‖ = ‖v₁‖ @@ -136,17 +158,17 @@ function trilqr!(solver :: TrilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c : vₖ₋₁ .= zero(FC) # v₀ = 0 uₖ₋₁ .= zero(FC) # u₀ = 0 vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁ - uₖ .= s₀ ./ γₖ # u₁ = (c - Aᵀy₀) / γ₁ + uₖ .= s₀ ./ γₖ # u₁ = (c - Aᴴy₀) / γ₁ cₖ₋₁ = cₖ = -one(T) # Givens cosines used for the LQ factorization of Tₖ sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the LQ factorization of Tₖ - d̅ .= zero(FC) # Last column of D̅ₖ = Uₖ(Qₖ)ᵀ + d̅ .= zero(FC) # Last column of D̅ₖ = Uₖ(Qₖ)ᴴ ζₖ₋₁ = ζbarₖ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁ ζₖ₋₂ = ηₖ = zero(FC) # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ δbarₖ₋₁ = δbarₖ = zero(FC) # Coefficients of Lₖ₋₁ and L̅ₖ modified over the course of two iterations ψbarₖ₋₁ = ψₖ₋₁ = zero(FC) # ψₖ₋₁ and ψbarₖ are the last components of h̅ₖ = Qₖγ₁e₁ ϵₖ₋₃ = λₖ₋₂ = zero(FC) # Components of Lₖ₋₁ - wₖ₋₃ .= zero(FC) # Column k-3 of Wₖ = Vₖ(Lₖ)⁻ᵀ - wₖ₋₂ .= zero(FC) # Column k-2 of Wₖ = Vₖ(Lₖ)⁻ᵀ + wₖ₋₃ .= zero(FC) # Column k-3 of Wₖ = Vₖ(Lₖ)⁻ᴴ + wₖ₋₂ .= zero(FC) # Column k-2 of Wₖ = Vₖ(Lₖ)⁻ᴴ # Stopping criterion. inconsistent = false @@ -166,10 +188,10 @@ function trilqr!(solver :: TrilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c : # Continue the SSY tridiagonalization process. # AUₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ - # AᵀVₖ = Uₖ(Tₖ)ᵀ + γₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᵀ + # AᴴVₖ = Uₖ(Tₖ)ᴴ + γₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᴴ mul!(q, A , uₖ) # Forms vₖ₊₁ : q ← Auₖ - mul!(p, Aᵀ, vₖ) # Forms uₖ₊₁ : p ← Aᵀvₖ + mul!(p, Aᴴ, vₖ) # Forms uₖ₊₁ : p ← Aᴴvₖ @kaxpy!(m, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁ @kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - βₖ * uₖ₋₁ @@ -236,7 +258,7 @@ function trilqr!(solver :: TrilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c : ηₖ = -ϵₖ₋₂ * ζₖ₋₂ - λₖ₋₁ * ζₖ₋₁ end - # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Uₖ(Qₖ)ᵀ. + # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Uₖ(Qₖ)ᴴ. # [d̅ₖ₋₁ uₖ] [cₖ s̄ₖ] = [dₖ₋₁ d̅ₖ] ⟷ dₖ₋₁ = cₖ * d̅ₖ₋₁ + sₖ * uₖ # [sₖ -cₖ] ⟷ d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * uₖ if iter ≥ 2 @@ -295,7 +317,7 @@ function trilqr!(solver :: TrilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c : ψbarₖ = sₖ * ψbarₖ₋₁ end - # Compute the direction wₖ₋₁, the last column of Wₖ₋₁ = (Vₖ₋₁)(Lₖ₋₁)⁻ᵀ ⟷ (L̄ₖ₋₁)(Wₖ₋₁)ᵀ = (Vₖ₋₁)ᵀ. + # Compute the direction wₖ₋₁, the last column of Wₖ₋₁ = (Vₖ₋₁)(Lₖ₋₁)⁻ᴴ ⟷ (L̄ₖ₋₁)(Wₖ₋₁)ᵀ = (Vₖ₋₁)ᵀ. # w₁ = v₁ / δ̄₁ if iter == 2 wₖ₋₁ = wₖ₋₂ @@ -374,11 +396,11 @@ function trilqr!(solver :: TrilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c : user_requested_exit = callback(solver) :: Bool tired = iter ≥ itmax - kdisplay(iter, verbose) && solved_primal && !solved_dual && @printf("%5d %7s %7.1e\n", iter, "", sNorm) - kdisplay(iter, verbose) && !solved_primal && solved_dual && @printf("%5d %7.1e %7s\n", iter, rNorm_lq, "") - kdisplay(iter, verbose) && !solved_primal && !solved_dual && @printf("%5d %7.1e %7.1e\n", iter, rNorm_lq, sNorm) + kdisplay(iter, verbose) && solved_primal && !solved_dual && @printf(iostream, "%5d %7s %7.1e\n", iter, "", sNorm) + kdisplay(iter, verbose) && !solved_primal && solved_dual && @printf(iostream, "%5d %7.1e %7s\n", iter, rNorm_lq, "") + kdisplay(iter, verbose) && !solved_primal && !solved_dual && @printf(iostream, "%5d %7.1e %7.1e\n", iter, rNorm_lq, sNorm) end - (verbose > 0) && @printf("\n") + (verbose > 0) && @printf(iostream, "\n") # Compute USYMCG point # (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * d̅ₖ diff --git a/src/trimr.jl b/src/trimr.jl index bc53633c2..9da4dfa92 100644 --- a/src/trimr.jl +++ b/src/trimr.jl @@ -13,30 +13,31 @@ export trimr, trimr! """ (x, y, stats) = trimr(A, b::AbstractVector{FC}, c::AbstractVector{FC}; - M=I, N=I, atol::T=√eps(T), rtol::T=√eps(T), - spd::Bool=false, snd::Bool=false, flip::Bool=false, sp::Bool=false, - τ::T=one(T), ν::T=-one(T), itmax::Int=0, + M=I, N=I, ldiv::Bool=false, + spd::Bool=false, snd::Bool=false, + flip::Bool=false, sp::Bool=false, + τ::T=one(T), ν::T=-one(T), atol::T=√eps(T), + rtol::T=√eps(T), itmax::Int=0, verbose::Int=0, history::Bool=false, - ldiv::Bool=false, callback=solver->false) + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. -TriMR solves the symmetric linear system + (x, y, stats) = trimr(A, b, c, x0::AbstractVector, y0::AbstractVector; kwargs...) + +TriMR can be warm-started from initial guesses `x0` and `y0` where `kwargs` are the same keyword arguments as above. + +Given a matrix `A` of dimension m × n, TriMR solves the symmetric linear system [ τE A ] [ x ] = [ b ] - [ Aᵀ νF ] [ y ] [ c ], + [ Aᴴ νF ] [ y ] [ c ], -where τ and ν are real numbers, E = M⁻¹ ≻ 0, F = N⁻¹ ≻ 0. +of size (n+m) × (n+m) where τ and ν are real numbers, E = M⁻¹ ≻ 0, F = N⁻¹ ≻ 0. `b` and `c` must both be nonzero. TriMR handles saddle-point systems (`τ = 0` or `ν = 0`) and adjoint systems (`τ = 0` and `ν = 0`) without any risk of breakdown. By default, TriMR solves symmetric and quasi-definite linear systems with τ = 1 and ν = -1. -If `flip = true`, TriMR solves another known variant of SQD systems where τ = -1 and ν = 1. -If `spd = true`, τ = ν = 1 and the associated symmetric and positive definite linear system is solved. -If `snd = true`, τ = ν = -1 and the associated symmetric and negative definite linear system is solved. -If `sp = true`, τ = 1, ν = 0 and the associated saddle-point linear system is solved. -`τ` and `ν` are also keyword arguments that can be directly modified for more specific problems. TriMR is based on the preconditioned orthogonal tridiagonalization process and its relation with the preconditioned block-Lanczos process. @@ -50,17 +51,40 @@ It's the Euclidean norm when `M` and `N` are identity operators. TriMR stops when `itmax` iterations are reached or when `‖rₖ‖ ≤ atol + ‖r₀‖ * rtol`. `atol` is an absolute tolerance and `rtol` is a relative tolerance. -Additional details can be displayed if verbose mode is enabled (verbose > 0). -Information will be displayed every `verbose` iterations. +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension m × n; +* `b`: a vector of length m; +* `c`: a vector of length n. + +#### Optional arguments + +* `x0`: a vector of length m that represents an initial guess of the solution x; +* `y0`: a vector of length n that represents an initial guess of the solution y. -TriMR can be warm-started from initial guesses `x0` and `y0` with the method +#### Keyword arguments - (x, y, stats) = trimr(A, b, c, x0, y0; kwargs...) +* `M`: linear operator that models a Hermitian positive-definite matrix of size `m` used for centered preconditioning of the partitioned system; +* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning of the partitioned system; +* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`; +* `spd`: if `true`, set `τ = 1` and `ν = 1` for Hermitian and positive-definite linear system; +* `snd`: if `true`, set `τ = -1` and `ν = -1` for Hermitian and negative-definite linear systems; +* `flip`: if `true`, set `τ = -1` and `ν = 1` for another known variant of Hermitian quasi-definite systems; +* `sp`: if `true`, set `τ = 1` and `ν = 0` for saddle-point systems; +* `τ` and `ν`: diagonal scaling factors of the partitioned Hermitian linear system; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. -where `kwargs` are the same keyword arguments as above. +#### Output arguments -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +* `x`: a dense vector of length m; +* `y`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### Reference @@ -98,16 +122,18 @@ function trimr!(solver :: TrimrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: end function trimr!(solver :: TrimrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC}; - M=I, N=I, atol :: T=√eps(T), rtol :: T=√eps(T), - spd :: Bool=false, snd :: Bool=false, flip :: Bool=false, sp :: Bool=false, - τ :: T=one(T), ν :: T=-one(T), itmax :: Int=0, + M=I, N=I, ldiv :: Bool=false, + spd :: Bool=false, snd :: Bool=false, + flip :: Bool=false, sp :: Bool=false, + τ :: T=one(T), ν :: T=-one(T), atol :: T=√eps(T), + rtol :: T=√eps(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false, - ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} + callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} m, n = size(A) length(b) == m || error("Inconsistent problem size") length(c) == n || error("Inconsistent problem size") - (verbose > 0) && @printf("TriMR: system of %d equations in %d variables\n", m+n, m+n) + (verbose > 0) && @printf(iostream, "TriMR: system of %d equations in %d variables\n", m+n, m+n) # Check flip, sp, spd and snd parameters spd && flip && error("The matrix cannot be symmetric positive definite and symmetric quasi-definite !") @@ -123,8 +149,8 @@ function trimr!(solver :: TrimrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: # Check type consistency eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") - ktypeof(c) == S || error("ktypeof(c) ≠ $S") + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") + ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S") # Determine τ and ν associated to SQD, SPD or SND systems. flip && (τ = -one(T) ; ν = one(T)) @@ -137,7 +163,7 @@ function trimr!(solver :: TrimrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: warm_start && (ν ≠ 0) && !NisI && error("Warm-start with preconditioners is not supported.") # Compute the adjoint of A - Aᵀ = A' + Aᴴ = A' # Set up workspace. allocate_if(!MisI, solver, :vₖ, S, m) @@ -169,12 +195,12 @@ function trimr!(solver :: TrimrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: N⁻¹uₖ₋₁ .= zero(FC) # u₀ = 0 # [ τI A ] [ xₖ ] = [ b - τΔx - AΔy ] = [ b₀ ] - # [ Aᵀ νI ] [ yₖ ] [ c - AᵀΔx - νΔy ] [ c₀ ] + # [ Aᴴ νI ] [ yₖ ] [ c - AᴴΔx - νΔy ] [ c₀ ] if warm_start mul!(b₀, A, Δy) (τ ≠ 0) && @kaxpy!(m, τ, Δx, b₀) @kaxpby!(m, one(FC), b, -one(FC), b₀) - mul!(c₀, Aᵀ, Δx) + mul!(c₀, Aᴴ, Δx) (ν ≠ 0) && @kaxpy!(n, ν, Δy, c₀) @kaxpby!(n, one(FC), c, -one(FC), c₀) end @@ -216,8 +242,8 @@ function trimr!(solver :: TrimrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: history && push!(rNorms, rNorm) ε = atol + rtol * rNorm - (verbose > 0) && @printf("%5s %7s %7s %7s\n", "k", "‖rₖ‖", "βₖ₊₁", "γₖ₊₁") - kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e\n", iter, rNorm, βₖ, γₖ) + (verbose > 0) && @printf(iostream, "%5s %7s %7s %7s\n", "k", "‖rₖ‖", "βₖ₊₁", "γₖ₊₁") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e\n", iter, rNorm, βₖ, γₖ) # Set up workspace. old_c₁ₖ = old_c₂ₖ = old_c₃ₖ = old_c₄ₖ = zero(T) @@ -244,10 +270,10 @@ function trimr!(solver :: TrimrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: # Continue the orthogonal tridiagonalization process. # AUₖ = EVₖTₖ + βₖ₊₁Evₖ₊₁(eₖ)ᵀ = EVₖ₊₁Tₖ₊₁.ₖ - # AᵀVₖ = FUₖ(Tₖ)ᵀ + γₖ₊₁Fuₖ₊₁(eₖ)ᵀ = FUₖ₊₁(Tₖ.ₖ₊₁)ᵀ + # AᴴVₖ = FUₖ(Tₖ)ᴴ + γₖ₊₁Fuₖ₊₁(eₖ)ᵀ = FUₖ₊₁(Tₖ.ₖ₊₁)ᴴ mul!(q, A , uₖ) # Forms Evₖ₊₁ : q ← Auₖ - mul!(p, Aᵀ, vₖ) # Forms Fuₖ₊₁ : p ← Aᵀvₖ + mul!(p, Aᴴ, vₖ) # Forms Fuₖ₊₁ : p ← Aᴴvₖ if iter ≥ 2 @kaxpy!(m, -γₖ, M⁻¹vₖ₋₁, q) # q ← q - γₖ * M⁻¹vₖ₋₁ @@ -261,7 +287,7 @@ function trimr!(solver :: TrimrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: # Compute vₖ₊₁ and uₖ₊₁ MisI || mulorldiv!(vₖ₊₁, M, q, ldiv) # βₖ₊₁vₖ₊₁ = MAuₖ - γₖvₖ₋₁ - αₖvₖ - NisI || mulorldiv!(uₖ₊₁, N, p, ldiv) # γₖ₊₁uₖ₊₁ = NAᵀvₖ - βₖuₖ₋₁ - ᾱₖuₖ + NisI || mulorldiv!(uₖ₊₁, N, p, ldiv) # γₖ₊₁uₖ₊₁ = NAᴴvₖ - βₖuₖ₋₁ - ᾱₖuₖ βₖ₊₁ = sqrt(@kdotr(m, vₖ₊₁, q)) # βₖ₊₁ = ‖vₖ₊₁‖_E γₖ₊₁ = sqrt(@kdotr(n, uₖ₊₁, p)) # γₖ₊₁ = ‖uₖ₊₁‖_F @@ -282,10 +308,10 @@ function trimr!(solver :: TrimrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: # [0 u₁ ••• 0 uₖ] # # rₖ = [ b ] - [ τE A ] [ xₖ ] = [ b ] - [ τE A ] Wₖzₖ - # [ c ] [ Aᵀ νF ] [ yₖ ] [ c ] [ Aᵀ νF ] + # [ c ] [ Aᴴ νF ] [ yₖ ] [ c ] [ Aᴴ νF ] # # block-Lanczos formulation : [ τE A ] Wₖ = [ E 0 ] Wₖ₊₁Sₖ₊₁.ₖ - # [ Aᵀ νF ] [ 0 F ] + # [ Aᴴ νF ] [ 0 F ] # # TriMR subproblem : min ‖ rₖ ‖ ↔ min ‖ Sₖ₊₁.ₖzₖ - β₁e₁ - γ₁e₂ ‖ # @@ -419,7 +445,7 @@ function trimr!(solver :: TrimrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: @kswap(gy₂ₖ₋₂, gy₂ₖ) end - # Update p̅ₖ = (Qₖ)ᵀ * (β₁e₁ + γ₁e₂) + # Update p̅ₖ = (Qₖ)ᴴ * (β₁e₁ + γ₁e₂) πbis₂ₖ = c₁ₖ * πbar₂ₖ πbis₂ₖ₊₂ = conj(s₁ₖ) * πbar₂ₖ # @@ -490,9 +516,9 @@ function trimr!(solver :: TrimrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: breakdown = βₖ₊₁ ≤ btol && γₖ₊₁ ≤ btol solved = resid_decrease_lim || resid_decrease_mach tired = iter ≥ itmax - kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e\n", iter, rNorm, βₖ₊₁, γₖ₊₁) + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e\n", iter, rNorm, βₖ₊₁, γₖ₊₁) end - (verbose > 0) && @printf("\n") + (verbose > 0) && @printf(iostream, "\n") tired && (status = "maximum number of iterations exceeded") breakdown && (status = "inconsistent linear system") diff --git a/src/usymlq.jl b/src/usymlq.jl index 71670c80f..53aef51a3 100644 --- a/src/usymlq.jl +++ b/src/usymlq.jl @@ -21,34 +21,53 @@ export usymlq, usymlq! """ (x, stats) = usymlq(A, b::AbstractVector{FC}, c::AbstractVector{FC}; - atol::T=√eps(T), rtol::T=√eps(T), transfer_to_usymcg::Bool=true, - itmax::Int=0, verbose::Int=0, history::Bool=false, - callback=solver->false) + transfer_to_usymcg::Bool=true, atol::T=√eps(T), + rtol::T=√eps(T), itmax::Int=0, + verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. -Solve the linear system Ax = b using the USYMLQ method. + (x, stats) = usymlq(A, b, c, x0::AbstractVector; kwargs...) + +USYMLQ can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above. + +USYMLQ determines the least-norm solution of the consistent linear system Ax = b of size m × n. USYMLQ is based on the orthogonal tridiagonalization process and requires two initial nonzero vectors `b` and `c`. -The vector `c` is only used to initialize the process and a default value can be `b` or `Aᵀb` depending on the shape of `A`. +The vector `c` is only used to initialize the process and a default value can be `b` or `Aᴴb` depending on the shape of `A`. The error norm ‖x - x*‖ monotonously decreases in USYMLQ. It's considered as a generalization of SYMMLQ. It can also be applied to under-determined and over-determined problems. In all cases, problems must be consistent. -An option gives the possibility of transferring to the USYMCG point, -when it exists. The transfer is based on the residual norm. +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension m × n; +* `b`: a vector of length m; +* `c`: a vector of length n. + +#### Optional argument + +* `x0`: a vector of length n that represents an initial guess of the solution x. -USYMLQ can be warm-started from an initial guess `x0` with the method +#### Keyword arguments - (x, stats) = usymlq(A, b, c, x0; kwargs...) +* `transfer_to_usymcg`: transfer from the USYMLQ point to the USYMCG point, when it exists. The transfer is based on the residual norm; +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. -where `kwargs` are the same keyword arguments as above. +#### Output arguments -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### References @@ -88,22 +107,23 @@ function usymlq!(solver :: UsymlqSolver{T,FC,S}, A, b :: AbstractVector{FC}, c : end function usymlq!(solver :: UsymlqSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC}; - atol :: T=√eps(T), rtol :: T=√eps(T), transfer_to_usymcg :: Bool=true, - itmax :: Int=0, verbose :: Int=0, history :: Bool=false, - callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} + transfer_to_usymcg :: Bool=true, atol :: T=√eps(T), + rtol :: T=√eps(T), itmax :: Int=0, + verbose :: Int=0, history :: Bool=false, + callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} m, n = size(A) length(b) == m || error("Inconsistent problem size") length(c) == n || error("Inconsistent problem size") - (verbose > 0) && @printf("USYMLQ: system of %d equations in %d variables\n", m, n) + (verbose > 0) && @printf(iostream, "USYMLQ: system of %d equations in %d variables\n", m, n) # Check type consistency eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") - ktypeof(c) == S || error("ktypeof(c) ≠ $S") + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") + ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S") # Compute the adjoint of A - Aᵀ = A' + Aᴴ = A' # Set up workspace. uₖ₋₁, uₖ, p, Δx, x = solver.uₖ₋₁, solver.uₖ, solver.p, solver.Δx, solver.x @@ -135,8 +155,8 @@ function usymlq!(solver :: UsymlqSolver{T,FC,S}, A, b :: AbstractVector{FC}, c : itmax == 0 && (itmax = m+n) ε = atol + rtol * bNorm - (verbose > 0) && @printf("%5s %7s\n", "k", "‖rₖ‖") - kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, bNorm) + (verbose > 0) && @printf(iostream, "%5s %7s\n", "k", "‖rₖ‖") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e\n", iter, bNorm) βₖ = @knrm2(m, r₀) # β₁ = ‖v₁‖ = ‖r₀‖ γₖ = @knrm2(n, c) # γ₁ = ‖u₁‖ = ‖c‖ @@ -146,7 +166,7 @@ function usymlq!(solver :: UsymlqSolver{T,FC,S}, A, b :: AbstractVector{FC}, c : uₖ .= c ./ γₖ # u₁ = c / γ₁ cₖ₋₁ = cₖ = -one(T) # Givens cosines used for the LQ factorization of Tₖ sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the LQ factorization of Tₖ - d̅ .= zero(FC) # Last column of D̅ₖ = Uₖ(Qₖ)ᵀ + d̅ .= zero(FC) # Last column of D̅ₖ = Uₖ(Qₖ)ᴴ ζₖ₋₁ = ζbarₖ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁ ζₖ₋₂ = ηₖ = zero(FC) # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ δbarₖ₋₁ = δbarₖ = zero(FC) # Coefficients of Lₖ₋₁ and Lₖ modified over the course of two iterations @@ -164,10 +184,10 @@ function usymlq!(solver :: UsymlqSolver{T,FC,S}, A, b :: AbstractVector{FC}, c : # Continue the SSY tridiagonalization process. # AUₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ - # AᵀVₖ = Uₖ(Tₖ)ᵀ + γₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᵀ + # AᴴVₖ = Uₖ(Tₖ)ᴴ + γₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᴴ mul!(q, A , uₖ) # Forms vₖ₊₁ : q ← Auₖ - mul!(p, Aᵀ, vₖ) # Forms uₖ₊₁ : p ← Aᵀvₖ + mul!(p, Aᴴ, vₖ) # Forms uₖ₊₁ : p ← Aᴴvₖ @kaxpy!(m, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁ @kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - βₖ * uₖ₋₁ @@ -233,7 +253,7 @@ function usymlq!(solver :: UsymlqSolver{T,FC,S}, A, b :: AbstractVector{FC}, c : ηₖ = -ϵₖ₋₂ * ζₖ₋₂ - λₖ₋₁ * ζₖ₋₁ end - # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Uₖ(Qₖ)ᵀ. + # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Uₖ(Qₖ)ᴴ. # [d̅ₖ₋₁ uₖ] [cₖ s̄ₖ] = [dₖ₋₁ d̅ₖ] ⟷ dₖ₋₁ = cₖ * d̅ₖ₋₁ + sₖ * uₖ # [sₖ -cₖ] ⟷ d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * uₖ if iter ≥ 2 @@ -294,9 +314,9 @@ function usymlq!(solver :: UsymlqSolver{T,FC,S}, A, b :: AbstractVector{FC}, c : solved_lq = rNorm_lq ≤ ε solved_cg = transfer_to_usymcg && (abs(δbarₖ) > eps(T)) && (rNorm_cg ≤ ε) tired = iter ≥ itmax - kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm_lq) + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e\n", iter, rNorm_lq) end - (verbose > 0) && @printf("\n") + (verbose > 0) && @printf(iostream, "\n") # Compute USYMCG point # (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * d̅ₖ diff --git a/src/usymqr.jl b/src/usymqr.jl index 863390c3f..3876499b5 100644 --- a/src/usymqr.jl +++ b/src/usymqr.jl @@ -21,31 +21,52 @@ export usymqr, usymqr! """ (x, stats) = usymqr(A, b::AbstractVector{FC}, c::AbstractVector{FC}; - atol::T=√eps(T), rtol::T=√eps(T), - itmax::Int=0, verbose::Int=0, history::Bool=false, - callback=solver->false) + atol::T=√eps(T), rtol::T=√eps(T), itmax::Int=0, + verbose::Int=0, history::Bool=false, + callback=solver->false, iostream::IO=kstdout) `T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`. `FC` is `T` or `Complex{T}`. -Solve the linear system Ax = b using the USYMQR method. + (x, stats) = usymqr(A, b, c, x0::AbstractVector; kwargs...) + +USYMQR can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above. + +USYMQR solves the linear least-squares problem min ‖b - Ax‖² of size m × n. +USYMQR solves Ax = b if it is consistent. USYMQR is based on the orthogonal tridiagonalization process and requires two initial nonzero vectors `b` and `c`. -The vector `c` is only used to initialize the process and a default value can be `b` or `Aᵀb` depending on the shape of `A`. +The vector `c` is only used to initialize the process and a default value can be `b` or `Aᴴb` depending on the shape of `A`. The residual norm ‖b - Ax‖ monotonously decreases in USYMQR. It's considered as a generalization of MINRES. It can also be applied to under-determined and over-determined problems. USYMQR finds the minimum-norm solution if problems are inconsistent. -USYMQR can be warm-started from an initial guess `x0` with the method +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension m × n; +* `b`: a vector of length m; +* `c`: a vector of length n. + +#### Optional argument + +* `x0`: a vector of length n that represents an initial guess of the solution x. + +#### Keyword arguments - (x, stats) = usymqr(A, b, c, x0; kwargs...) +* `atol`: absolute stopping tolerance based on the residual norm; +* `rtol`: relative stopping tolerance based on the residual norm; +* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`; +* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations; +* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms; +* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise; +* `iostream`: stream to which output is logged. -where `kwargs` are the same keyword arguments as above. +#### Output arguments -The callback is called as `callback(solver)` and should return `true` if the main loop should terminate, -and `false` otherwise. +* `x`: a dense vector of length n; +* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure. #### References @@ -85,28 +106,28 @@ function usymqr!(solver :: UsymqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c : end function usymqr!(solver :: UsymqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC}; - atol :: T=√eps(T), rtol :: T=√eps(T), - itmax :: Int=0, verbose :: Int=0, history :: Bool=false, - callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} + atol :: T=√eps(T), rtol :: T=√eps(T), itmax :: Int=0, + verbose :: Int=0, history :: Bool=false, + callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}} m, n = size(A) length(b) == m || error("Inconsistent problem size") length(c) == n || error("Inconsistent problem size") - (verbose > 0) && @printf("USYMQR: system of %d equations in %d variables\n", m, n) + (verbose > 0) && @printf(iostream, "USYMQR: system of %d equations in %d variables\n", m, n) # Check type consistency eltype(A) == FC || error("eltype(A) ≠ $FC") - ktypeof(b) == S || error("ktypeof(b) ≠ $S") - ktypeof(c) == S || error("ktypeof(c) ≠ $S") + ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S") + ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S") # Compute the adjoint of A - Aᵀ = A' + Aᴴ = A' # Set up workspace. vₖ₋₁, vₖ, q, Δx, x, p = solver.vₖ₋₁, solver.vₖ, solver.q, solver.Δx, solver.x, solver.p wₖ₋₂, wₖ₋₁, uₖ₋₁, uₖ, stats = solver.wₖ₋₂, solver.wₖ₋₁, solver.uₖ₋₁, solver.uₖ, solver.stats warm_start = solver.warm_start - rNorms, AᵀrNorms = stats.residuals, stats.Aresiduals + rNorms, AᴴrNorms = stats.residuals, stats.Aresiduals reset!(stats) r₀ = warm_start ? q : b @@ -133,8 +154,8 @@ function usymqr!(solver :: UsymqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c : ε = atol + rtol * rNorm κ = zero(T) - (verbose > 0) && @printf("%5s %7s %7s\n", "k", "‖rₖ‖", "‖Aᵀrₖ₋₁‖") - kdisplay(iter, verbose) && @printf("%5d %7.1e %7s\n", iter, rNorm, "✗ ✗ ✗ ✗") + (verbose > 0) && @printf(iostream, "%5s %7s %7s\n", "k", "‖rₖ‖", "‖Aᴴrₖ₋₁‖") + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7s\n", iter, rNorm, "✗ ✗ ✗ ✗") βₖ = @knrm2(m, r₀) # β₁ = ‖v₁‖ = ‖r₀‖ γₖ = @knrm2(n, c) # γ₁ = ‖u₁‖ = ‖c‖ @@ -146,7 +167,7 @@ function usymqr!(solver :: UsymqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c : sₖ₋₂ = sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the QR factorization of Tₖ₊₁.ₖ wₖ₋₂ .= zero(FC) # Column k-2 of Wₖ = Uₖ(Rₖ)⁻¹ wₖ₋₁ .= zero(FC) # Column k-1 of Wₖ = Uₖ(Rₖ)⁻¹ - ζbarₖ = βₖ # ζbarₖ is the last component of z̅ₖ = (Qₖ)ᵀβ₁e₁ + ζbarₖ = βₖ # ζbarₖ is the last component of z̅ₖ = (Qₖ)ᴴβ₁e₁ # Stopping criterion. solved = rNorm ≤ ε @@ -161,10 +182,10 @@ function usymqr!(solver :: UsymqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c : # Continue the SSY tridiagonalization process. # AUₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ - # AᵀVₖ = Uₖ(Tₖ)ᵀ + γₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᵀ + # AᴴVₖ = Uₖ(Tₖ)ᴴ + γₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᴴ mul!(q, A , uₖ) # Forms vₖ₊₁ : q ← Auₖ - mul!(p, Aᵀ, vₖ) # Forms uₖ₊₁ : p ← Aᵀvₖ + mul!(p, Aᴴ, vₖ) # Forms uₖ₊₁ : p ← Aᴴvₖ @kaxpy!(m, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁ @kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - βₖ * uₖ₋₁ @@ -254,9 +275,9 @@ function usymqr!(solver :: UsymqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c : rNorm = abs(ζbarₖ₊₁) history && push!(rNorms, rNorm) - # Compute ‖Aᵀrₖ₋₁‖ = |ζbarₖ| * √(|δbarₖ|² + |λbarₖ|²). - AᵀrNorm = abs(ζbarₖ) * √(abs2(δbarₖ) + abs2(cₖ₋₁ * γₖ₊₁)) - history && push!(AᵀrNorms, AᵀrNorm) + # Compute ‖Aᴴrₖ₋₁‖ = |ζbarₖ| * √(|δbarₖ|² + |λbarₖ|²). + AᴴrNorm = abs(ζbarₖ) * √(abs2(δbarₖ) + abs2(cₖ₋₁ * γₖ₊₁)) + history && push!(AᴴrNorms, AᴴrNorm) # Compute uₖ₊₁ and uₖ₊₁. @. vₖ₋₁ = vₖ # vₖ₋₁ ← vₖ @@ -286,14 +307,14 @@ function usymqr!(solver :: UsymqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c : βₖ = βₖ₊₁ # Update stopping criterion. - iter == 1 && (κ = atol + rtol * AᵀrNorm) + iter == 1 && (κ = atol + rtol * AᴴrNorm) user_requested_exit = callback(solver) :: Bool solved = rNorm ≤ ε - inconsistent = !solved && AᵀrNorm ≤ κ + inconsistent = !solved && AᴴrNorm ≤ κ tired = iter ≥ itmax - kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e\n", iter, rNorm, AᵀrNorm) + kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e\n", iter, rNorm, AᴴrNorm) end - (verbose > 0) && @printf("\n") + (verbose > 0) && @printf(iostream, "\n") tired && (status = "maximum number of iterations exceeded") solved && (status = "solution good enough given atol and rtol") user_requested_exit && (status = "user-requested exit") diff --git a/test/callback_utils.jl b/test/callback_utils.jl new file mode 100644 index 000000000..f88f01848 --- /dev/null +++ b/test/callback_utils.jl @@ -0,0 +1,152 @@ +mutable struct StorageGetxRestartedGmres{S} + x::S + y::S + p::S +end +StorageGetxRestartedGmres(solver::GmresSolver; N = I) = + StorageGetxRestartedGmres(similar(solver.x), similar(solver.z), (N === I) ? similar(solver.p) : similar(solver.x)) + +function get_x_restarted_gmres!(solver::GmresSolver{T,FC,S}, A, + stor::StorageGetxRestartedGmres{S}, N) where {T,FC,S} + NisI = (N === I) + x2, y2, p2 = stor.x, stor.y, stor.p + n = size(A, 2) + # Compute yₖ by solving Rₖyₖ = zₖ with backward substitution. + nr = sum(1:solver.inner_iter) + y = solver.z # yᵢ = zᵢ + y2 .= y + R = solver.R + V = solver.V + x2 .= solver.Δx + for i = solver.inner_iter : -1 : 1 + pos = nr + i - solver.inner_iter # position of rᵢ.ₖ + for j = solver.inner_iter : -1 : i+1 + y2[i] = y2[i] - R[pos] * y2[j] # yᵢ ← yᵢ - rᵢⱼyⱼ + pos = pos - j + 1 # position of rᵢ.ⱼ₋₁ + end + # Rₖ can be singular if the system is inconsistent + if abs(R[pos]) ≤ eps(T)^(3/4) + y2[i] = zero(FC) + inconsistent = true + else + y2[i] = y2[i] / R[pos] # yᵢ ← yᵢ / rᵢᵢ + end + end + + # Form xₖ = N⁻¹Vₖyₖ + for i = 1 : solver.inner_iter + Krylov.@kaxpy!(n, y2[i], V[i], x2) + end + if !NisI + p2 .= solver.p + p2 .= x2 + mul!(x2, N, p2) + end + x2 .+= solver.x +end + +mutable struct TestCallbackN2{T, S, M} + A::M + b::S + storage_vec::S + tol::T +end +TestCallbackN2(A, b; tol = 0.1) = TestCallbackN2(A, b, similar(b), tol) + +function (cb_n2::TestCallbackN2)(solver) + mul!(cb_n2.storage_vec, cb_n2.A, solver.x) + cb_n2.storage_vec .-= cb_n2.b + return norm(cb_n2.storage_vec) ≤ cb_n2.tol +end + +mutable struct TestCallbackN2Adjoint{T, S, M} + A::M + b::S + c::S + storage_vec1::S + storage_vec2::S + tol::T +end +TestCallbackN2Adjoint(A, b, c; tol = 0.1) = TestCallbackN2Adjoint(A, b, c, similar(b), similar(c), tol) + +function (cb_n2::TestCallbackN2Adjoint)(solver) + mul!(cb_n2.storage_vec1, cb_n2.A, solver.x) + cb_n2.storage_vec1 .-= cb_n2.b + mul!(cb_n2.storage_vec2, cb_n2.A', solver.y) + cb_n2.storage_vec2 .-= cb_n2.c + return (norm(cb_n2.storage_vec1) ≤ cb_n2.tol && norm(cb_n2.storage_vec2) ≤ cb_n2.tol) +end + +mutable struct TestCallbackN2Shifts{T, S, M} + A::M + b::S + shifts::Vector{T} + tol::T +end +TestCallbackN2Shifts(A, b, shifts; tol = 0.1) = TestCallbackN2Shifts(A, b, shifts, tol) + +function (cb_n2::TestCallbackN2Shifts)(solver) + r = residuals(cb_n2.A, cb_n2.b, cb_n2.shifts, solver.x) + return all(map(norm, r) .≤ cb_n2.tol) +end + +mutable struct TestCallbackN2LS{T, S, M} + A::M + b::S + λ::T + storage_vec1::S + storage_vec2::S + tol::T +end +TestCallbackN2LS(A, b, λ; tol = 0.1) = TestCallbackN2LS(A, b, λ, similar(b), similar(b, size(A, 2)), tol) + +function (cb_n2::TestCallbackN2LS)(solver) + mul!(cb_n2.storage_vec1, cb_n2.A, solver.x) + cb_n2.storage_vec1 .-= cb_n2.b + mul!(cb_n2.storage_vec2, cb_n2.A', cb_n2.storage_vec1) + cb_n2.storage_vec2 .+= cb_n2.λ .* solver.x + return norm(cb_n2.storage_vec2) ≤ cb_n2.tol +end + +mutable struct TestCallbackN2LN{T, S, M} + A::M + b::S + λ::T + storage_vec::S + tol::T +end +TestCallbackN2LN(A, b, λ; tol = 0.1) = TestCallbackN2LN(A, b, λ, similar(b), tol) + +function (cb_n2::TestCallbackN2LN)(solver) + mul!(cb_n2.storage_vec, cb_n2.A, solver.x) + cb_n2.storage_vec .-= cb_n2.b + cb_n2.λ != 0 && (cb_n2.storage_vec .+= cb_n2.λ .* solver.x) + return norm(cb_n2.storage_vec) ≤ cb_n2.tol +end + +mutable struct TestCallbackN2SaddlePts{T, S, M} + A::M + b::S + c::S + storage_vec1::S + storage_vec2::S + tol::T +end +TestCallbackN2SaddlePts(A, b, c; tol = 0.1) = + TestCallbackN2SaddlePts(A, b, c, similar(b), similar(c), tol) + +function (cb_n2::TestCallbackN2SaddlePts)(solver) + mul!(cb_n2.storage_vec1, cb_n2.A, solver.y) + cb_n2.storage_vec1 .+= solver.x .- cb_n2.b + mul!(cb_n2.storage_vec2, cb_n2.A', solver.x) + cb_n2.storage_vec2 .-= solver.y .+ cb_n2.c + return (norm(cb_n2.storage_vec1) ≤ cb_n2.tol && norm(cb_n2.storage_vec2) ≤ cb_n2.tol) +end + +function restarted_gmres_callback_n2(solver::GmresSolver, A, b, stor, N, storage_vec, tol) + get_x_restarted_gmres!(solver, A, stor, N) + x = stor.x + mul!(storage_vec, A, x) + storage_vec .-= b + return (norm(storage_vec) ≤ tol) +end diff --git a/test/get_div_grad.jl b/test/get_div_grad.jl index 6d6bf012e..ae27e5061 100644 --- a/test/get_div_grad.jl +++ b/test/get_div_grad.jl @@ -1,8 +1,8 @@ # Identity matrix. eye(n::Int; FC=Float64) = sparse(one(FC) * I, n, n) -# Compute the energy norm ‖r‖ₚ = √(rᵀPr) where P is a symmetric and positive definite matrix. -metric(r, P) = sqrt(dot(r, P * r)) +# Compute the energy norm ‖r‖ₚ = √(rᴴPr) where P is a symmetric and positive definite matrix. +metric(r, P) = sqrt(real(dot(r, P * r))) # Based on Lars Ruthotto's initial implementation. function get_div_grad(n1 :: Int, n2 :: Int, n3 :: Int) diff --git a/test/gpu/amd.jl b/test/gpu/amd.jl new file mode 100644 index 000000000..9fb6cdffd --- /dev/null +++ b/test/gpu/amd.jl @@ -0,0 +1,111 @@ +using AMDGPU + +include("gpu.jl") + +@testset "AMD -- AMDGPU.jl" begin + + @test AMDGPU.functional() + AMDGPU.allowscalar(false) + + @testset "documentation" begin + A_cpu = rand(ComplexF64, 20, 20) + A_cpu = A_cpu + A_cpu' + b_cpu = rand(ComplexF64, 20) + A_gpu = ROCMatrix(A_cpu) + b_gpu = ROCVector(b_cpu) + x, stats = minres(A_gpu, b_gpu) + end + + for FC in (Float32, Float64, ComplexF32, ComplexF64) + S = ROCVector{FC} + M = ROCMatrix{FC} + T = real(FC) + n = 10 + x = rand(FC, n) + x = S(x) + y = rand(FC, n) + y = S(y) + a = rand(FC) + b = rand(FC) + s = rand(FC) + a2 = rand(T) + b2 = rand(T) + c = rand(T) + + @testset "kdot -- $FC" begin + Krylov.@kdot(n, x, y) + end + + @testset "kdotr -- $FC" begin + Krylov.@kdotr(n, x, y) + end + + @testset "knrm2 -- $FC" begin + Krylov.@knrm2(n, x) + end + + @testset "kaxpy! -- $FC" begin + Krylov.@kaxpy!(n, a, x, y) + Krylov.@kaxpy!(n, a2, x, y) + end + + @testset "kaxpby! -- $FC" begin + Krylov.@kaxpby!(n, a, x, b, y) + Krylov.@kaxpby!(n, a2, x, b, y) + Krylov.@kaxpby!(n, a, x, b2, y) + Krylov.@kaxpby!(n, a2, x, b2, y) + end + + @testset "kcopy! -- $FC" begin + Krylov.@kcopy!(n, x, y) + end + + @testset "kswap -- $FC" begin + Krylov.@kswap(x, y) + end + + @testset "kref! -- $FC" begin + Krylov.@kref!(n, x, y, c, s) + end + + @testset "conversion -- $FC" begin + test_conversion(S, M) + end + + ε = eps(T) + atol = √ε + rtol = √ε + + @testset "GMRES -- $FC" begin + A, b = nonsymmetric_indefinite(FC=FC) + A = M(A) + b = S(b) + x, stats = gmres(A, b) + @test norm(b - A * x) ≤ atol + rtol * norm(b) + end + + @testset "CG -- $FC" begin + A, b = symmetric_definite(FC=FC) + A = M(A) + b = S(b) + x, stats = cg(A, b) + @test norm(b - A * x) ≤ atol + rtol * norm(b) + end + + @testset "MINRES-QLP -- $FC" begin + A, b = symmetric_indefinite(FC=FC) + A = M(A) + b = S(b) + x, stats = minres_qlp(A, b) + @test norm(b - A * x) ≤ atol + rtol * norm(b) + end + + # @testset "processes -- $FC" begin + # test_processes(S, M) + # end + + @testset "solver -- $FC" begin + test_solver(S, M) + end + end +end diff --git a/test/gpu/gpu.jl b/test/gpu/gpu.jl new file mode 100644 index 000000000..09036ecac --- /dev/null +++ b/test/gpu/gpu.jl @@ -0,0 +1,52 @@ +using LinearAlgebra, SparseArrays, Test +using Krylov + +include("../test_utils.jl") + +function test_processes(S, M) + m = 250 + n = 500 + k = 20 + FC = eltype(S) + + cpu_A, cpu_b = symmetric_indefinite(n, FC=FC) + gpu_A, gpu_b = M(cpu_A), S(cpu_b) + V, T = hermitian_lanczos(gpu_A, gpu_b, k) + + cpu_A, cpu_b = nonsymmetric_definite(n, FC=FC) + cpu_c = -cpu_b + gpu_A, gpu_b, gpu_c = M(cpu_A), S(cpu_b), S(cpu_c) + V, T, U, Tᴴ = nonhermitian_lanczos(gpu_A, gpu_b, gpu_c, k) + + cpu_A, cpu_b = nonsymmetric_indefinite(n, FC=FC) + gpu_A, gpu_b = M(cpu_A), S(cpu_b) + V, H = arnoldi(gpu_A, gpu_b, k) + + cpu_A, cpu_b = under_consistent(m, n, FC=FC) + gpu_A, gpu_b = M(cpu_A), S(cpu_b) + V, U, L = golub_kahan(gpu_A, gpu_b, k) + + cpu_A, cpu_b = under_consistent(m, n, FC=FC) + _, cpu_c = over_consistent(n, m, FC=FC) + gpu_A, gpu_b, gpu_c = M(cpu_A), S(cpu_b), S(cpu_c) + V, T, U, Tᴴ = saunders_simon_yip(gpu_A, gpu_b, gpu_c, k) + + cpu_A, cpu_b = under_consistent(m, n, FC=FC) + cpu_B, cpu_c = over_consistent(n, m, FC=FC) + gpu_A, gpu_B, gpu_b, gpu_c = M(cpu_A), M(cpu_B), S(cpu_b), S(cpu_c) + V, H, U, F = montoison_orban(gpu_A, gpu_B, gpu_b, gpu_c, k) +end + +function test_solver(S, M) + n = 10 + memory = 5 + A = M(undef, n, n) + b = S(undef, n) + solver = GmresSolver(n, n, memory, S) + solve!(solver, A, b) # Test that we don't have errors +end + +function test_conversion(S, M) + @test Krylov.vector_to_matrix(S) == M + @test Krylov.matrix_to_vector(M) == S +end diff --git a/test/gpu/intel.jl b/test/gpu/intel.jl new file mode 100644 index 000000000..f03176199 --- /dev/null +++ b/test/gpu/intel.jl @@ -0,0 +1,113 @@ +using oneAPI + +include("gpu.jl") + +@testset "Intel -- oneAPI.jl" begin + + @test oneAPI.functional() + oneAPI.allowscalar(false) + + @testset "documentation" begin + T = Float32 + m = 20 + n = 10 + A_cpu = rand(T, m, n) + b_cpu = rand(T, m) + A_gpu = oneMatrix(A_cpu) + b_gpu = oneVector(b_cpu) + x, stats = lsqr(A_gpu, b_gpu) + end + + for FC ∈ (Float32, ComplexF32) + S = oneVector{FC} + M = oneMatrix{FC} + T = real(FC) + n = 10 + x = rand(FC, n) + x = S(x) + y = rand(FC, n) + y = S(y) + a = rand(FC) + b = rand(FC) + s = rand(FC) + a2 = rand(T) + b2 = rand(T) + c = rand(T) + + @testset "kdot -- $FC" begin + Krylov.@kdot(n, x, y) + end + + @testset "kdotr -- $FC" begin + Krylov.@kdotr(n, x, y) + end + + @testset "knrm2 -- $FC" begin + Krylov.@knrm2(n, x) + end + + @testset "kaxpy! -- $FC" begin + Krylov.@kaxpy!(n, a, x, y) + Krylov.@kaxpy!(n, a2, x, y) + end + + @testset "kaxpby! -- $FC" begin + Krylov.@kaxpby!(n, a, x, b, y) + Krylov.@kaxpby!(n, a2, x, b, y) + Krylov.@kaxpby!(n, a, x, b2, y) + Krylov.@kaxpby!(n, a2, x, b2, y) + end + + @testset "kcopy! -- $FC" begin + Krylov.@kcopy!(n, x, y) + end + + @testset "kswap -- $FC" begin + Krylov.@kswap(x, y) + end + + @testset "kref! -- $FC" begin + Krylov.@kref!(n, x, y, c, s) + end + + @testset "conversion -- $FC" begin + test_conversion(S, M) + end + + ε = eps(T) + atol = √ε + rtol = √ε + + @testset "GMRES -- $FC" begin + A, b = nonsymmetric_indefinite(FC=FC) + A = M(A) + b = S(b) + x, stats = gmres(A, b) + @test norm(b - A * x) ≤ atol + rtol * norm(b) + end + + @testset "CG -- $FC" begin + A, b = symmetric_definite(FC=FC) + A = M(A) + b = S(b) + x, stats = cg(A, b) + @test norm(b - A * x) ≤ atol + rtol * norm(b) + end + + @testset "MINRES-QLP -- $FC" begin + A, b = symmetric_indefinite(FC=FC) + A = M(A) + b = S(b) + x, stats = minres_qlp(A, b) + @test norm(b - A * x) ≤ atol + rtol * norm(b) + end + + # @testset "processes -- $FC" begin + # test_processes(S, M) + # end + + @testset "solver -- $FC" begin + test_solver(S, M) + end + end +end diff --git a/test/gpu/metal.jl b/test/gpu/metal.jl new file mode 100644 index 000000000..2e684e21f --- /dev/null +++ b/test/gpu/metal.jl @@ -0,0 +1,113 @@ +using Metal + +include("gpu.jl") + +@testset "Apple M1 GPUs -- Metal.jl" begin + + # @test Metal.functional() + Metal.allowscalar(false) + + @testset "documentation" begin + T = Float32 + n = 10 + m = 20 + A_cpu = rand(T, n, m) + b_cpu = rand(T, n) + A_gpu = MtlMatrix(A_cpu) + b_gpu = MtlVector(b_cpu) + x, stats = craig(A_gpu, b_gpu) + end + + for FC in (Float32, ComplexF32) + S = MtlVector{FC} + M = MtlMatrix{FC} + T = real(FC) + n = 10 + x = rand(FC, n) + x = S(x) + y = rand(FC, n) + y = S(y) + a = rand(FC) + b = rand(FC) + s = rand(FC) + a2 = rand(T) + b2 = rand(T) + c = rand(T) + + @testset "kdot -- $FC" begin + Krylov.@kdot(n, x, y) + end + + @testset "kdotr -- $FC" begin + Krylov.@kdotr(n, x, y) + end + + @testset "knrm2 -- $FC" begin + Krylov.@knrm2(n, x) + end + + @testset "kaxpy! -- $FC" begin + Krylov.@kaxpy!(n, a, x, y) + Krylov.@kaxpy!(n, a2, x, y) + end + + @testset "kaxpby! -- $FC" begin + Krylov.@kaxpby!(n, a, x, b, y) + Krylov.@kaxpby!(n, a2, x, b, y) + Krylov.@kaxpby!(n, a, x, b2, y) + Krylov.@kaxpby!(n, a2, x, b2, y) + end + + @testset "kcopy! -- $FC" begin + Krylov.@kcopy!(n, x, y) + end + + @testset "kswap -- $FC" begin + Krylov.@kswap(x, y) + end + + @testset "kref! -- $FC" begin + Krylov.@kref!(n, x, y, c, s) + end + + @testset "conversion -- $FC" begin + test_conversion(S, M) + end + + ε = eps(T) + atol = √ε + rtol = √ε + + @testset "GMRES -- $FC" begin + A, b = nonsymmetric_indefinite(FC=FC) + A = M(A) + b = S(b) + x, stats = gmres(A, b) + @test norm(b - A * x) ≤ atol + rtol * norm(b) + end + + @testset "CG -- $FC" begin + A, b = symmetric_definite(FC=FC) + A = M(A) + b = S(b) + x, stats = cg(A, b) + @test norm(b - A * x) ≤ atol + rtol * norm(b) + end + + @testset "MINRES-QLP -- $FC" begin + A, b = symmetric_indefinite(FC=FC) + A = M(A) + b = S(b) + x, stats = minres_qlp(A, b) + @test norm(b - A * x) ≤ atol + rtol * norm(b) + end + + # @testset "processes -- $FC" begin + # test_processes(S, M) + # end + + @testset "solver -- $FC" begin + test_solver(S, M) + end + end +end diff --git a/test/gpu/nvidia.jl b/test/gpu/nvidia.jl new file mode 100644 index 000000000..908a2819c --- /dev/null +++ b/test/gpu/nvidia.jl @@ -0,0 +1,204 @@ +using LinearOperators, CUDA, CUDA.CUSPARSE, CUDA.CUSOLVER + +include("gpu.jl") + +@testset "Nvidia -- CUDA.jl" begin + + @test CUDA.functional() + CUDA.allowscalar(false) + + @testset "documentation" begin + A_cpu = rand(20, 20) + b_cpu = rand(20) + A_gpu = CuMatrix(A_cpu) + b_gpu = CuVector(b_cpu) + x, stats = bilq(A_gpu, b_gpu) + + A_cpu = sprand(200, 100, 0.3) + b_cpu = rand(200) + A_gpu = CuSparseMatrixCSC(A_cpu) + b_gpu = CuVector(b_cpu) + x, stats = lsmr(A_gpu, b_gpu) + + @testset "ic0" begin + A_cpu, b_cpu = sparse_laplacian() + + b_gpu = CuVector(b_cpu) + n = length(b_gpu) + T = eltype(b_gpu) + symmetric = hermitian = true + + A_gpu = CuSparseMatrixCSC(A_cpu) + P = ic02(A_gpu, 'O') + function ldiv_csc_ic0!(y, P, x) + copyto!(y, x) + sv2!('T', 'U', 'N', 1.0, P, y, 'O') + sv2!('N', 'U', 'N', 1.0, P, y, 'O') + return y + end + opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_csc_ic0!(y, P, x)) + x, stats = cg(A_gpu, b_gpu, M=opM) + @test norm(b_gpu - A_gpu * x) ≤ 1e-6 + + A_gpu = CuSparseMatrixCSR(A_cpu) + P = ic02(A_gpu, 'O') + function ldiv_csr_ic0!(y, P, x) + copyto!(y, x) + sv2!('N', 'L', 'N', 1.0, P, y, 'O') + sv2!('T', 'L', 'N', 1.0, P, y, 'O') + return y + end + opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_csr_ic0!(y, P, x)) + x, stats = cg(A_gpu, b_gpu, M=opM) + @test norm(b_gpu - A_gpu * x) ≤ 1e-6 + end + + @testset "ilu0" begin + A_cpu, b_cpu = polar_poisson() + + p = zfd(A_cpu, 'O') + p .+= 1 + A_cpu = A_cpu[p,:] + b_cpu = b_cpu[p] + + b_gpu = CuVector(b_cpu) + n = length(b_gpu) + T = eltype(b_gpu) + symmetric = hermitian = false + + A_gpu = CuSparseMatrixCSC(A_cpu) + P = ilu02(A_gpu, 'O') + function ldiv_csc_ilu0!(y, P, x) + copyto!(y, x) + sv2!('N', 'L', 'N', 1.0, P, y, 'O') + sv2!('N', 'U', 'U', 1.0, P, y, 'O') + return y + end + opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_csc_ilu0!(y, P, x)) + x, stats = bicgstab(A_gpu, b_gpu, M=opM) + @test norm(b_gpu - A_gpu * x) ≤ 1e-6 + + A_gpu = CuSparseMatrixCSR(A_cpu) + P = ilu02(A_gpu, 'O') + function ldiv_csr_ilu0!(y, P, x) + copyto!(y, x) + sv2!('N', 'L', 'U', 1.0, P, y, 'O') + sv2!('N', 'U', 'N', 1.0, P, y, 'O') + return y + end + opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_csr_ilu0!(y, P, x)) + x, stats = bicgstab(A_gpu, b_gpu, M=opM) + @test norm(b_gpu - A_gpu * x) ≤ 1e-6 + end + end + + for FC in (Float32, Float64, ComplexF32, ComplexF64) + S = CuVector{FC} + V = CuSparseVector{FC} + M = CuMatrix{FC} + T = real(FC) + n = 10 + x = rand(FC, n) + x = S(x) + y = rand(FC, n) + y = S(y) + a = rand(FC) + b = rand(FC) + s = rand(FC) + a2 = rand(T) + b2 = rand(T) + c = rand(T) + + @testset "kdot -- $FC" begin + Krylov.@kdot(n, x, y) + end + + @testset "kdotr -- $FC" begin + Krylov.@kdotr(n, x, y) + end + + @testset "knrm2 -- $FC" begin + Krylov.@knrm2(n, x) + end + + @testset "kaxpy! -- $FC" begin + Krylov.@kaxpy!(n, a, x, y) + Krylov.@kaxpy!(n, a2, x, y) + end + + @testset "kaxpby! -- $FC" begin + Krylov.@kaxpby!(n, a, x, b, y) + Krylov.@kaxpby!(n, a2, x, b, y) + Krylov.@kaxpby!(n, a, x, b2, y) + Krylov.@kaxpby!(n, a2, x, b2, y) + end + + @testset "kcopy! -- $FC" begin + Krylov.@kcopy!(n, x, y) + end + + @testset "kswap -- $FC" begin + Krylov.@kswap(x, y) + end + + @testset "kref! -- $FC" begin + Krylov.@kref!(n, x, y, c, s) + end + + @testset "conversion -- $FC" begin + test_conversion(S, M) + end + + ε = eps(T) + atol = √ε + rtol = √ε + + @testset "GMRES -- $FC" begin + A, b = nonsymmetric_indefinite(FC=FC) + A = M(A) + b = S(b) + x, stats = gmres(A, b) + @test norm(b - A * x) ≤ atol + rtol * norm(b) + end + + @testset "CG -- $FC" begin + A, b = symmetric_definite(FC=FC) + A = M(A) + b = S(b) + x, stats = cg(A, b) + @test norm(b - A * x) ≤ atol + rtol * norm(b) + end + + @testset "MINRES-QLP -- $FC" begin + A, b = symmetric_indefinite(FC=FC) + A = M(A) + b = S(b) + x, stats = minres_qlp(A, b) + @test norm(b - A * x) ≤ atol + rtol * norm(b) + end + + @testset "processes -- $FC" begin + test_processes(S, M) + end + + @testset "solver -- $FC" begin + test_solver(S, M) + end + + @testset "ktypeof -- $FC" begin + dv = S(rand(FC, 10)) + b = view(dv, 4:8) + @test Krylov.ktypeof(dv) <: S + @test Krylov.ktypeof(b) <: S + + dm = M(rand(FC, 10, 10)) + b = view(dm, :, 3) + @test Krylov.ktypeof(b) <: S + + sv = V(sprand(FC, 10, 0.5)) + b = view(sv, 4:8) + @test Krylov.ktypeof(sv) <: S + @test Krylov.ktypeof(b) <: S + end + end +end diff --git a/test/runtests.jl b/test/runtests.jl index 99ab25fda..b69865f61 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -4,7 +4,9 @@ import Krylov.KRYLOV_SOLVERS include("test_utils.jl") include("test_aux.jl") include("test_stats.jl") +include("test_processes.jl") +include("test_fgmres.jl") include("test_gpmr.jl") include("test_fom.jl") include("test_gmres.jl") diff --git a/test/test_allocations.jl b/test/test_allocations.jl index 4c6817499..174d0ae55 100644 --- a/test/test_allocations.jl +++ b/test/test_allocations.jl @@ -1,26 +1,27 @@ @testset "allocations" begin - for FC in (Float64, ComplexF64) + for FC in (Float32, Float64, ComplexF32, ComplexF64) @testset "Data Type: $FC" begin - A = FC.(get_div_grad(16, 16, 16)) # Dimension n x n - n = size(A, 1) - m = div(n, 2) - Au = A[1:m,:] # Dimension m x n - Ao = A[:,1:m] # Dimension n x m - b = Ao * ones(FC, m) # Dimension n - c = Au * ones(FC, n) # Dimension m + A = FC.(get_div_grad(18, 18, 18)) # Dimension m x n + m,n = size(A) + k = div(n, 2) + Au = A[1:k,:] # Dimension k x n + Ao = A[:,1:k] # Dimension m x k + b = Ao * ones(FC, k) # Dimension m + c = Au * ones(FC, n) # Dimension k mem = 200 - shifts = [1.0; 2.0; 3.0; 4.0; 5.0] + T = real(FC) + shifts = T[1; 2; 3; 4; 5] nshifts = 5 - nbits = sizeof(FC) # 8 bits for Float64 and 16 bits for ComplexF64 + nbits_FC = sizeof(FC) # 8 bits for ComplexF32 and 16 bits for ComplexF64 + nbits_T = sizeof(T) # 4 bits for Float32 and 8 bits for Float64 @testset "SYMMLQ" begin # SYMMLQ needs: # 5 n-vectors: x, Mvold, Mv, Mv_next, w̅ - storage_symmlq(n) = 5 * n - storage_symmlq_bytes(n) = nbits * storage_symmlq(n) + storage_symmlq_bytes(n) = nbits_FC * 5 * n expected_symmlq_bytes = storage_symmlq_bytes(n) symmlq(A, b) # warmup @@ -36,8 +37,7 @@ @testset "CG" begin # CG needs: # 4 n-vectors: x, r, p, Ap - storage_cg(n) = 4 * n - storage_cg_bytes(n) = nbits * storage_cg(n) + storage_cg_bytes(n) = nbits_FC * 4 * n expected_cg_bytes = storage_cg_bytes(n) cg(A, b) # warmup @@ -53,8 +53,7 @@ @testset "CG-LANCZOS" begin # CG-LANCZOS needs: # 5 n-vectors: x, Mv, Mv_prev, p, Mv_next - storage_cg_lanczos(n) = 5 * n - storage_cg_lanczos_bytes(n) = nbits * storage_cg_lanczos(n) + storage_cg_lanczos_bytes(n) = nbits_FC * 5 * n expected_cg_lanczos_bytes = storage_cg_lanczos_bytes(n) cg_lanczos(A, b) # warmup @@ -73,9 +72,7 @@ # - 2 (n*nshifts)-matrices: x, p # - 5 nshifts-vectors: σ, δhat, ω, γ, rNorms # - 3 nshifts-bitVector: indefinite, converged, not_cv - storage_cg_lanczos_shift(n, nshifts) = (3 * n) + (2 * n * nshifts) + (5 * nshifts) + (3 * nshifts / 64) - storage_cg_lanczos_shift_bytes(n, nshifts) = nbits * storage_cg_lanczos_shift(n, nshifts) - + storage_cg_lanczos_shift_bytes(n, nshifts) = nbits_FC * ((3 * n) + (2 * n * nshifts)) + nbits_T * (5 * nshifts) + (3 * nshifts) expected_cg_lanczos_shift_bytes = storage_cg_lanczos_shift_bytes(n, nshifts) cg_lanczos_shift(A, b, shifts) # warmup actual_cg_lanczos_shift_bytes = @allocated cg_lanczos_shift(A, b, shifts) @@ -90,8 +87,7 @@ @testset "CR" begin # CR needs: # 5 n-vectors: x, r, p, q, Ar - storage_cr(n) = 5 * n - storage_cr_bytes(n) = nbits * storage_cr(n) + storage_cr_bytes(n) = nbits_FC * 5 * n expected_cr_bytes = storage_cr_bytes(n) cr(A, b) # warmup @@ -107,8 +103,7 @@ @testset "MINRES" begin # MINRES needs: # 6 n-vectors: x, r1, r2, w1, w2, y - storage_minres(n) = 6 * n - storage_minres_bytes(n) = nbits * storage_minres(n) + storage_minres_bytes(n) = nbits_FC * 6 * n expected_minres_bytes = storage_minres_bytes(n) minres(A, b) # warmup @@ -124,8 +119,7 @@ @testset "MINRES-QLP" begin # MINRES-QLP needs: # - 6 n-vectors: wₖ₋₁, wₖ, vₖ₋₁, vₖ, x, p - storage_minres_qlp(n) = 6 * n - storage_minres_qlp_bytes(n) = nbits * storage_minres_qlp(n) + storage_minres_qlp_bytes(n) = nbits_FC * 6 * n expected_minres_qlp_bytes = storage_minres_qlp_bytes(n) minres_qlp(A, b) # warmup @@ -141,11 +135,11 @@ @testset "DIOM" begin # DIOM needs: # - 2 n-vectors: x, t - # - 2 (n*mem)-matrices: P, V - # - 1 mem-vector: L - # - 1 (mem+2)-vector: H - storage_diom(mem, n) = (2 * n) + (2 * n * mem) + (mem) + (mem + 2) - storage_diom_bytes(mem, n) = nbits * storage_diom(mem, n) + # - 1 (n*mem)-matrix: V + # - 1 n*(mem-1)-matrix: P + # - 1 (mem-1)-vector: L + # - 1 mem-vector: H + storage_diom_bytes(mem, n) = nbits_FC * ((2 * n) + (n * mem) + (n * (mem-1)) + (mem-1) + (mem)) expected_diom_bytes = storage_diom_bytes(mem, n) diom(A, b, memory=mem) # warmup @@ -164,8 +158,7 @@ # - 1 (n*mem)-matrix: V # - 2 mem-vectors: l, z # - 1 (mem*(mem+1)/2)-vector: U - storage_fom(mem, n) = (2 * n) + (n * mem) + (2 * mem) + (mem * (mem+1) / 2) - storage_fom_bytes(mem, n) = nbits * storage_fom(mem, n) + storage_fom_bytes(mem, n) = nbits_FC * ((2 * n) + (n * mem) + (2 * mem) + (mem * (mem+1) / 2)) expected_fom_bytes = storage_fom_bytes(mem, n) fom(A, b, memory=mem) # warmup @@ -183,9 +176,8 @@ # - 2 n-vectors: x, t # - 2 (n*mem)-matrices: P, V # - 2 mem-vectors: c, s - # - 1 (mem+2)-vector: H - storage_dqgmres(mem, n) = (2 * n) + (2 * n * mem) + (2 * mem) + (mem + 2) - storage_dqgmres_bytes(mem, n) = nbits * storage_dqgmres(mem, n) + # - 1 (mem+1)-vector: H + storage_dqgmres_bytes(mem, n) = nbits_FC * ((2 * n) + (2 * n * mem) + mem + (mem + 1)) + nbits_T * mem expected_dqgmres_bytes = storage_dqgmres_bytes(mem, n) dqgmres(A, b, memory=mem) # warmup @@ -204,8 +196,7 @@ # - 1 n*(mem)-matrix: V # - 3 mem-vectors: c, s, z # - 1 (mem*(mem+1)/2)-vector: R - storage_gmres(mem, n) = (2 * n) + (n * mem) + (3 * mem) + (mem * (mem+1) / 2) - storage_gmres_bytes(mem, n) = nbits * storage_gmres(mem, n) + storage_gmres_bytes(mem, n) = nbits_FC * ((2 * n) + (n * mem) + (2 * mem) + (mem * (mem+1) / 2)) + nbits_T * mem expected_gmres_bytes = storage_gmres_bytes(mem, n) gmres(A, b, memory=mem) # warmup @@ -218,11 +209,29 @@ @test inplace_gmres_bytes == 0 end + @testset "FGMRES" begin + # FGMRES needs: + # - 2 n-vectors: x, w + # - 2 n*(mem)-matrix: V, Z + # - 3 mem-vectors: c, s, z + # - 1 (mem*(mem+1)/2)-vector: R + storage_fgmres_bytes(mem, n) = nbits_FC * ((2 * n) + (2 * n * mem) + (2 * mem) + (mem * (mem+1) / 2)) + nbits_T * mem + + expected_fgmres_bytes = storage_fgmres_bytes(mem, n) + fgmres(A, b, memory=mem) # warmup + actual_fgmres_bytes = @allocated fgmres(A, b, memory=mem) + @test expected_fgmres_bytes ≤ actual_fgmres_bytes ≤ 1.02 * expected_fgmres_bytes + + solver = FgmresSolver(A, b, mem) + fgmres!(solver, A, b) # warmup + inplace_fgmres_bytes = @allocated fgmres!(solver, A, b) + @test inplace_fgmres_bytes == 0 + end + @testset "CGS" begin # CGS needs: # 6 n-vectors: x, r, u, p, q, ts - storage_cgs(n) = 6 * n - storage_cgs_bytes(n) = nbits * storage_cgs(n) + storage_cgs_bytes(n) = nbits_FC * 6 * n expected_cgs_bytes = storage_cgs_bytes(n) cgs(A, b) # warmup @@ -238,8 +247,7 @@ @testset "BICGSTAB" begin # BICGSTAB needs: # 6 n-vectors: x, r, p, v, s, qd - storage_bicgstab(n) = 6 * n - storage_bicgstab_bytes(n) = nbits * storage_bicgstab(n) + storage_bicgstab_bytes(n) = nbits_FC * 6 * n expected_bicgstab_bytes = storage_bicgstab_bytes(n) bicgstab(A, b) # warmup @@ -254,12 +262,11 @@ @testset "CGNE" begin # CGNE needs: - # - 3 n-vectors: x, p, Aᵀz + # - 3 n-vectors: x, p, Aᴴz # - 2 m-vectors: r, q - storage_cgne(n, m) = 3 * n + 2 * m - storage_cgne_bytes(n, m) = nbits * storage_cgne(n, m) + storage_cgne_bytes(m, n) = nbits_FC * (3 * n + 2 * m) - expected_cgne_bytes = storage_cgne_bytes(n, m) + expected_cgne_bytes = storage_cgne_bytes(k, n) (x, stats) = cgne(Au, c) # warmup actual_cgne_bytes = @allocated cgne(Au, c) @test expected_cgne_bytes ≤ actual_cgne_bytes ≤ 1.02 * expected_cgne_bytes @@ -272,12 +279,11 @@ @testset "CRMR" begin # CRMR needs: - # - 3 n-vectors: x, p, Aᵀr + # - 3 n-vectors: x, p, Aᴴr # - 2 m-vectors: r, q - storage_crmr(n, m) = 3 * n + 2 * m - storage_crmr_bytes(n, m) = nbits * storage_crmr(n, m) + storage_crmr_bytes(m, n) = nbits_FC * (3 * n + 2 * m) - expected_crmr_bytes = storage_crmr_bytes(n, m) + expected_crmr_bytes = storage_crmr_bytes(k, n) (x, stats) = crmr(Au, c) # warmup actual_crmr_bytes = @allocated crmr(Au, c) @test expected_crmr_bytes ≤ actual_crmr_bytes ≤ 1.02 * expected_crmr_bytes @@ -290,12 +296,11 @@ @testset "LNLQ" begin # LNLQ needs: - # - 3 n-vectors: x, v, Aᵀu + # - 3 n-vectors: x, v, Aᴴu # - 4 m-vectors: y, w̄, u, Av - storage_lnlq(n, m) = 3 * n + 4 * m - storage_lnlq_bytes(n, m) = nbits * storage_lnlq(n, m) + storage_lnlq_bytes(m, n) = nbits_FC * (3 * n + 4 * m) - expected_lnlq_bytes = storage_lnlq_bytes(n, m) + expected_lnlq_bytes = storage_lnlq_bytes(k, n) lnlq(Au, c) # warmup actual_lnlq_bytes = @allocated lnlq(Au, c) @test expected_lnlq_bytes ≤ actual_lnlq_bytes ≤ 1.02 * expected_lnlq_bytes @@ -308,12 +313,11 @@ @testset "CRAIG" begin # CRAIG needs: - # - 3 n-vectors: x, v, Aᵀu + # - 3 n-vectors: x, v, Aᴴu # - 4 m-vectors: y, w, u, Av - storage_craig(n, m) = 3 * n + 4 * m - storage_craig_bytes(n, m) = nbits * storage_craig(n, m) + storage_craig_bytes(m, n) = nbits_FC * (3 * n + 4 * m) - expected_craig_bytes = storage_craig_bytes(n, m) + expected_craig_bytes = storage_craig_bytes(k, n) craig(Au, c) # warmup actual_craig_bytes = @allocated craig(Au, c) @test expected_craig_bytes ≤ actual_craig_bytes ≤ 1.02 * expected_craig_bytes @@ -326,12 +330,11 @@ @testset "CRAIGMR" begin # CRAIGMR needs: - # - 4 n-vectors: x, v, Aᵀu, d + # - 4 n-vectors: x, v, Aᴴu, d # - 5 m-vectors: y, u, w, wbar, Av - storage_craigmr(n, m) = 4 * n + 5 * m - storage_craigmr_bytes(n, m) = nbits * storage_craigmr(n, m) + storage_craigmr_bytes(m, n) = nbits_FC * (4 * n + 5 * m) - expected_craigmr_bytes = storage_craigmr_bytes(n, m) + expected_craigmr_bytes = storage_craigmr_bytes(k, n) craigmr(Au, c) # warmup actual_craigmr_bytes = @allocated craigmr(Au, c) @test expected_craigmr_bytes ≤ actual_craigmr_bytes ≤ 1.02 * expected_craigmr_bytes @@ -344,12 +347,11 @@ @testset "CGLS" begin # CGLS needs: - # - 3 m-vectors: x, p, s - # - 2 n-vectors: r, q - storage_cgls(n, m) = 3 * m + 2 * n - storage_cgls_bytes(n, m) = nbits * storage_cgls(n, m) + # - 3 n-vectors: x, p, s + # - 2 m-vectors: r, q + storage_cgls_bytes(m, n) = nbits_FC * (3 * n + 2 * m) - expected_cgls_bytes = storage_cgls_bytes(n, m) + expected_cgls_bytes = storage_cgls_bytes(m, k) (x, stats) = cgls(Ao, b) # warmup actual_cgls_bytes = @allocated cgls(Ao, b) @test expected_cgls_bytes ≤ actual_cgls_bytes ≤ 1.02 * expected_cgls_bytes @@ -362,12 +364,11 @@ @testset "LSLQ" begin # LSLQ needs: - # - 4 m-vectors: x_lq, v, Aᵀu, w̄ (= x_cg) - # - 2 n-vectors: u, Av - storage_lslq(n, m) = 4 * m + 2 * n - storage_lslq_bytes(n, m) = nbits * storage_lslq(n, m) + # - 4 n-vectors: x_lq, v, Aᴴu, w̄ (= x_cg) + # - 2 m-vectors: u, Av + storage_lslq_bytes(m, n) = nbits_FC * (4 * n + 2 * m) - expected_lslq_bytes = storage_lslq_bytes(n, m) + expected_lslq_bytes = storage_lslq_bytes(m, k) (x, stats) = lslq(Ao, b) # warmup actual_lslq_bytes = @allocated lslq(Ao, b) @test expected_lslq_bytes ≤ actual_lslq_bytes ≤ 1.02 * expected_lslq_bytes @@ -380,12 +381,11 @@ @testset "CRLS" begin # CRLS needs: - # - 4 m-vectors: x, p, Ar, q - # - 3 n-vectors: r, Ap, s - storage_crls(n, m) = 4 * m + 3 * n - storage_crls_bytes(n, m) = nbits * storage_crls(n, m) + # - 4 n-vectors: x, p, Ar, q + # - 3 m-vectors: r, Ap, s + storage_crls_bytes(m, n) = nbits_FC * (4 * n + 3 * m) - expected_crls_bytes = storage_crls_bytes(n, m) + expected_crls_bytes = storage_crls_bytes(m, k) (x, stats) = crls(Ao, b) # warmup actual_crls_bytes = @allocated crls(Ao, b) @test expected_crls_bytes ≤ actual_crls_bytes ≤ 1.02 * expected_crls_bytes @@ -398,12 +398,11 @@ @testset "LSQR" begin # LSQR needs: - # - 4 m-vectors: x, v, w, Aᵀu - # - 2 n-vectors: u, Av - storage_lsqr(n, m) = 4 * m + 2 * n - storage_lsqr_bytes(n, m) = nbits * storage_lsqr(n, m) + # - 4 n-vectors: x, v, w, Aᴴu + # - 2 m-vectors: u, Av + storage_lsqr_bytes(m, n) = nbits_FC * (4 * n + 2 * m) - expected_lsqr_bytes = storage_lsqr_bytes(n, m) + expected_lsqr_bytes = storage_lsqr_bytes(m, k) (x, stats) = lsqr(Ao, b) # warmup actual_lsqr_bytes = @allocated lsqr(Ao, b) @test expected_lsqr_bytes ≤ actual_lsqr_bytes ≤ 1.02 * expected_lsqr_bytes @@ -416,12 +415,11 @@ @testset "LSMR" begin # LSMR needs: - # - 5 m-vectors: x, v, h, hbar, Aᵀu - # - 2 n-vectors: u, Av - storage_lsmr(n, m) = 5 * m + 2 * n - storage_lsmr_bytes(n, m) = nbits * storage_lsmr(n, m) + # - 5 n-vectors: x, v, h, hbar, Aᴴu + # - 2 m-vectors: u, Av + storage_lsmr_bytes(m, n) = nbits_FC * (5 * n + 2 * m) - expected_lsmr_bytes = storage_lsmr_bytes(n, m) + expected_lsmr_bytes = storage_lsmr_bytes(m, k) (x, stats) = lsmr(Ao, b) # warmup actual_lsmr_bytes = @allocated lsmr(Ao, b) @test expected_lsmr_bytes ≤ actual_lsmr_bytes ≤ 1.02 * expected_lsmr_bytes @@ -435,8 +433,7 @@ @testset "BiLQ" begin # BILQ needs: # - 8 n-vectors: uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, d̅, p, q - storage_bilq(n) = 8 * n - storage_bilq_bytes(n) = nbits * storage_bilq(n) + storage_bilq_bytes(n) = nbits_FC * 8 * n expected_bilq_bytes = storage_bilq_bytes(n) bilq(A, b) # warmup @@ -452,8 +449,7 @@ @testset "QMR" begin # QMR needs: # - 9 n-vectors: uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, wₖ₋₁, wₖ, p, q - storage_qmr(n) = 9 * n - storage_qmr_bytes(n) = nbits * storage_qmr(n) + storage_qmr_bytes(n) = nbits_FC * 9 * n expected_qmr_bytes = storage_qmr_bytes(n) qmr(A, b) # warmup @@ -469,8 +465,7 @@ @testset "BiLQR" begin # BILQR needs: # - 11 n-vectors: uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, t, d̅, wₖ₋₁, wₖ, p, q - storage_bilqr(n) = 11 * n - storage_bilqr_bytes(n) = nbits * storage_bilqr(n) + storage_bilqr_bytes(n) = nbits_FC * 11 * n expected_bilqr_bytes = storage_bilqr_bytes(n) bilqr(A, b, b) # warmup @@ -487,10 +482,9 @@ # USYMLQ needs: # - 5 n-vectors: uₖ₋₁, uₖ, x, d̅, p # - 3 m-vectors: vₖ₋₁, vₖ, q - storage_usymlq(n, m) = 5 * n + 3 * m - storage_usymlq_bytes(n, m) = nbits * storage_usymlq(n, m) + storage_usymlq_bytes(m, n) = nbits_FC * (5 * n + 3 * m) - expected_usymlq_bytes = storage_usymlq_bytes(n, m) + expected_usymlq_bytes = storage_usymlq_bytes(k, n) usymlq(Au, c, b) # warmup actual_usymlq_bytes = @allocated usymlq(Au, c, b) @test expected_usymlq_bytes ≤ actual_usymlq_bytes ≤ 1.02 * expected_usymlq_bytes @@ -503,12 +497,11 @@ @testset "USYMQR" begin # USYMQR needs: - # - 6 m-vectors: vₖ₋₁, vₖ, x, wₖ₋₁, wₖ, p - # - 3 n-vectors: uₖ₋₁, uₖ, q - storage_usymqr(n, m) = 6 * m + 3 * n - storage_usymqr_bytes(n, m) = nbits * storage_usymqr(n, m) + # - 6 n-vectors: vₖ₋₁, vₖ, x, wₖ₋₁, wₖ, p + # - 3 m-vectors: uₖ₋₁, uₖ, q + storage_usymqr_bytes(m, n) = nbits_FC * (6 * n + 3 * m) - expected_usymqr_bytes = storage_usymqr_bytes(n, m) + expected_usymqr_bytes = storage_usymqr_bytes(m, k) (x, stats) = usymqr(Ao, b, c) # warmup actual_usymqr_bytes = @allocated usymqr(Ao, b, c) @test expected_usymqr_bytes ≤ actual_usymqr_bytes ≤ 1.02 * expected_usymqr_bytes @@ -523,8 +516,7 @@ # TRILQR needs: # - 6 m-vectors: vₖ₋₁, vₖ, t, wₖ₋₁, wₖ, q # - 5 n-vectors: uₖ₋₁, uₖ, x, d̅, p - storage_trilqr(n, m) = 6 * m + 5 * n - storage_trilqr_bytes(n, m) = nbits * storage_trilqr(n, m) + storage_trilqr_bytes(m, n) = nbits_FC * (6 * m + 5 * n) expected_trilqr_bytes = storage_trilqr_bytes(n, n) trilqr(A, b, b) # warmup @@ -541,10 +533,9 @@ # TriCG needs: # - 6 n-vectors: yₖ, uₖ₋₁, uₖ, gy₂ₖ₋₁, gy₂ₖ, p # - 6 m-vectors: xₖ, vₖ₋₁, vₖ, gx₂ₖ₋₁, gx₂ₖ, q - storage_tricg(n, m) = 6 * n + 6 * m - storage_tricg_bytes(n, m) = nbits * storage_tricg(n, m) + storage_tricg_bytes(m, n) = nbits_FC * (6 * n + 6 * m) - expected_tricg_bytes = storage_tricg_bytes(n, m) + expected_tricg_bytes = storage_tricg_bytes(k, n) tricg(Au, c, b) # warmup actual_tricg_bytes = @allocated tricg(Au, c, b) @test expected_tricg_bytes ≤ actual_tricg_bytes ≤ 1.02 * expected_tricg_bytes @@ -559,10 +550,9 @@ # TriMR needs: # - 8 n-vectors: yₖ, uₖ₋₁, uₖ, gy₂ₖ₋₃, gy₂ₖ₋₂, gy₂ₖ₋₁, gy₂ₖ, p # - 8 m-vectors: xₖ, vₖ₋₁, vₖ, gx₂ₖ₋₃, gx₂ₖ₋₂, gx₂ₖ₋₁, gx₂ₖ, q - storage_trimr(n, m) = 8 * n + 8 * m - storage_trimr_bytes(n, m) = nbits * storage_trimr(n, m) + storage_trimr_bytes(m, n) = nbits_FC * (8 * n + 8 * m) - expected_trimr_bytes = storage_trimr_bytes(n, m) + expected_trimr_bytes = storage_trimr_bytes(k, n) trimr(Au, c, b) # warmup actual_trimr_bytes = @allocated trimr(Au, c, b) @test expected_trimr_bytes ≤ actual_trimr_bytes ≤ 1.02 * expected_trimr_bytes @@ -575,17 +565,16 @@ @testset "GPMR" begin # GPMR needs: - # - 2 n-vectors: x, q - # - 2 m-vectors: y, p - # - 1 (n*mem)-matrix: V - # - 1 (m*mem)-matrix: U + # - 2 m-vectors: x, q + # - 2 n-vectors: y, p + # - 1 (m*mem)-matrix: V + # - 1 (n*mem)-matrix: U # - 1 (2*mem)-vector: zt # - 2 (4*mem)-vectors: gc, gs # - 1 (mem*(2mem+1))-vector: R - storage_gpmr(mem, n, m) = (mem + 2) * (n + m) + mem * (2 * mem + 11) - storage_gpmr_bytes(mem, n, m) = nbits * storage_gpmr(mem, n, m) + storage_gpmr_bytes(mem, m, n) = nbits_FC * ((mem + 2) * (n + m) + mem * (2 * mem + 7)) + nbits_T * 4 * mem - expected_gpmr_bytes = storage_gpmr_bytes(mem, n, m) + expected_gpmr_bytes = storage_gpmr_bytes(mem, m, k) gpmr(Ao, Au, b, c, memory=mem, itmax=mem) # warmup actual_gpmr_bytes = @allocated gpmr(Ao, Au, b, c, memory=mem, itmax=mem) @test expected_gpmr_bytes ≤ actual_gpmr_bytes ≤ 1.02 * expected_gpmr_bytes diff --git a/test/test_aux.jl b/test/test_aux.jl index 11bdb7c2d..6c43142c0 100644 --- a/test/test_aux.jl +++ b/test/test_aux.jl @@ -1,119 +1,203 @@ @testset "aux" begin - # test Givens reflector corner cases - (c, s, ρ) = Krylov.sym_givens(0.0, 0.0) - @test (c == 1.0) && (s == 0.0) && (ρ == 0.0) - - a = 3.14 - (c, s, ρ) = Krylov.sym_givens(a, 0.0) - @test (c == 1.0) && (s == 0.0) && (ρ == a) - (c, s, ρ) = Krylov.sym_givens(-a, 0.0) - @test (c == -1.0) && (s == 0.0) && (ρ == a) - - b = 3.14 - (c, s, ρ) = Krylov.sym_givens(0.0, b) - @test (c == 0.0) && (s == 1.0) && (ρ == b) - (c, s, ρ) = Krylov.sym_givens(0.0, -b) - @test (c == 0.0) && (s == -1.0) && (ρ == b) - - (c, s, ρ) = Krylov.sym_givens(Complex(0.0), Complex(0.0)) - @test (c == 1.0) && (s == Complex(0.0)) && (ρ == Complex(0.0)) - - a = Complex(1.0, 1.0) - (c, s, ρ) = Krylov.sym_givens(a, Complex(0.0)) - @test (c == 1.0) && (s == Complex(0.0)) && (ρ == a) - (c, s, ρ) = Krylov.sym_givens(-a, Complex(0.0)) - @test (c == 1.0) && (s == Complex(0.0)) && (ρ == -a) - - b = Complex(1.0, 1.0) - (c, s, ρ) = Krylov.sym_givens(Complex(0.0), b) - @test (c == 0.0) && (s == Complex(1.0)) && (ρ == b) - (c, s, ρ) = Krylov.sym_givens(Complex(0.0), -b) - @test (c == 0.0) && (s == Complex(1.0)) && (ρ == -b) - - # test roots of a quadratic - roots = Krylov.roots_quadratic(0.0, 0.0, 0.0) - @test length(roots) == 1 - @test roots[1] == 0.0 - - roots = Krylov.roots_quadratic(0.0, 0.0, 1.0) - @test length(roots) == 0 - - roots = Krylov.roots_quadratic(0.0, 3.14, -1.0) - @test length(roots) == 1 - @test roots[1] == 1.0 / 3.14 - - roots = Krylov.roots_quadratic(1.0, 0.0, 1.0) - @test length(roots) == 0 - - roots = Krylov.roots_quadratic(1.0, 0.0, 0.0) - @test length(roots) == 2 - @test roots[1] == 0.0 - @test roots[2] == 0.0 - - roots = Krylov.roots_quadratic(1.0, 3.0, 2.0) - @test length(roots) == 2 - @test roots[1] ≈ -2.0 - @test roots[2] ≈ -1.0 - - roots = Krylov.roots_quadratic(1.0e+8, 1.0, 1.0) - @test length(roots) == 0 - - # ill-conditioned quadratic - roots = Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=0) - @test length(roots) == 2 - @test roots[1] == 1.0e+13 - @test roots[2] == 0.0 - - # iterative refinement is crucial! - roots = Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=1) - @test length(roots) == 2 - @test roots[1] == 1.0e+13 - @test roots[2] == -1.0e-05 - - # not ill-conditioned quadratic - roots = Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=0) - @test length(roots) == 2 - @test isapprox(roots[1], 1.0e+7, rtol=1.0e-6) - @test isapprox(roots[2], -1.0, rtol=1.0e-6) - - roots = Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=1) - @test length(roots) == 2 - @test isapprox(roots[1], 1.0e+7, rtol=1.0e-6) - @test isapprox(roots[2], -1.0, rtol=1.0e-6) - - # test trust-region boundary - x = ones(5) - d = ones(5); d[1:2:5] .= -1 - @test_throws ErrorException Krylov.to_boundary(x, d, -1.0) - @test_throws ErrorException Krylov.to_boundary(x, d, 0.5) - @test_throws ErrorException Krylov.to_boundary(x, zeros(5), 1.0) - @test maximum(Krylov.to_boundary(x, d, 5.0)) ≈ 2.209975124224178 - @test minimum(Krylov.to_boundary(x, d, 5.0)) ≈ -1.8099751242241782 - @test maximum(Krylov.to_boundary(x, d, 5.0, flip=true)) ≈ 1.8099751242241782 - @test minimum(Krylov.to_boundary(x, d, 5.0, flip=true)) ≈ -2.209975124224178 - - # test kzeros and kones - @test Krylov.kzeros(Vector{Float64}, 10) == zeros(10) - @test Krylov.kones(Vector{Float64}, 10) == ones(10) - - # test ktypeof - a = rand(Float32, 10) - b = view(a, 4:8) - @test Krylov.ktypeof(a) == Vector{Float32} - @test Krylov.ktypeof(b) == Vector{Float32} - - a = rand(Float64, 10) - b = view(a, 4:8) - @test Krylov.ktypeof(a) == Vector{Float64} - @test Krylov.ktypeof(b) == Vector{Float64} - - a = sprand(Float32, 10, 0.5) - b = view(a, 4:8) - @test Krylov.ktypeof(a) == Vector{Float32} - @test Krylov.ktypeof(b) == Vector{Float32} - - a = sprand(Float64, 10, 0.5) - b = view(a, 4:8) - @test Krylov.ktypeof(a) == Vector{Float64} - @test Krylov.ktypeof(b) == Vector{Float64} + + @testset "sym_givens" begin + # test Givens reflector corner cases + (c, s, ρ) = Krylov.sym_givens(0.0, 0.0) + @test (c == 1.0) && (s == 0.0) && (ρ == 0.0) + + a = 3.14 + (c, s, ρ) = Krylov.sym_givens(a, 0.0) + @test (c == 1.0) && (s == 0.0) && (ρ == a) + (c, s, ρ) = Krylov.sym_givens(-a, 0.0) + @test (c == -1.0) && (s == 0.0) && (ρ == a) + + b = 3.14 + (c, s, ρ) = Krylov.sym_givens(0.0, b) + @test (c == 0.0) && (s == 1.0) && (ρ == b) + (c, s, ρ) = Krylov.sym_givens(0.0, -b) + @test (c == 0.0) && (s == -1.0) && (ρ == b) + + (c, s, ρ) = Krylov.sym_givens(Complex(0.0), Complex(0.0)) + @test (c == 1.0) && (s == Complex(0.0)) && (ρ == Complex(0.0)) + + a = Complex(1.0, 1.0) + (c, s, ρ) = Krylov.sym_givens(a, Complex(0.0)) + @test (c == 1.0) && (s == Complex(0.0)) && (ρ == a) + (c, s, ρ) = Krylov.sym_givens(-a, Complex(0.0)) + @test (c == 1.0) && (s == Complex(0.0)) && (ρ == -a) + + b = Complex(1.0, 1.0) + (c, s, ρ) = Krylov.sym_givens(Complex(0.0), b) + @test (c == 0.0) && (s == Complex(1.0)) && (ρ == b) + (c, s, ρ) = Krylov.sym_givens(Complex(0.0), -b) + @test (c == 0.0) && (s == Complex(1.0)) && (ρ == -b) + end + + @testset "roots_quadratic" begin + # test roots of a quadratic + roots = Krylov.roots_quadratic(0.0, 0.0, 0.0) + @test roots[1] == 0.0 + @test roots[2] == 0.0 + + @test_throws ErrorException Krylov.roots_quadratic(0.0, 0.0, 1.0) + + roots = Krylov.roots_quadratic(0.0, 3.14, -1.0) + @test roots[1] == 1.0 / 3.14 + @test roots[2] == 1.0 / 3.14 + + @test_throws ErrorException Krylov.roots_quadratic(1.0, 0.0, 1.0) + + roots = Krylov.roots_quadratic(1.0, 0.0, 0.0) + @test roots[1] == 0.0 + @test roots[2] == 0.0 + + roots = Krylov.roots_quadratic(1.0, 3.0, 2.0) + @test roots[1] ≈ -2.0 + @test roots[2] ≈ -1.0 + + @test_throws ErrorException Krylov.roots_quadratic(1.0e+8, 1.0, 1.0) + + # ill-conditioned quadratic + roots = Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=0) + @test roots[1] == 1.0e+13 + @test roots[2] == 0.0 + + # iterative refinement is crucial! + roots = Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=1) + @test roots[1] == 1.0e+13 + @test roots[2] == -1.0e-05 + + # not ill-conditioned quadratic + roots = Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=0) + @test isapprox(roots[1], 1.0e+7, rtol=1.0e-6) + @test isapprox(roots[2], -1.0, rtol=1.0e-6) + + roots = Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=1) + @test isapprox(roots[1], 1.0e+7, rtol=1.0e-6) + @test isapprox(roots[2], -1.0, rtol=1.0e-6) + + allocations = @allocated Krylov.roots_quadratic(0.0, 0.0, 0.0) + @test allocations == 0 + + allocations = @allocated Krylov.roots_quadratic(0.0, 3.14, -1.0) + @test allocations == 0 + + allocations = @allocated Krylov.roots_quadratic(1.0, 0.0, 0.0) + @test allocations == 0 + + allocations = @allocated Krylov.roots_quadratic(1.0, 3.0, 2.0) + @test allocations == 0 + + allocations = @allocated Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=0) + @test allocations == 0 + + allocations = @allocated Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=1) + @test allocations == 0 + + allocations = @allocated Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=0) + @test allocations == 0 + + allocations = @allocated Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=1) + @test allocations == 0 + end + + @testset "to_boundary" begin + # test trust-region boundary + n = 5 + x = ones(n) + d = ones(n); d[1:2:n] .= -1 + @test_throws ErrorException Krylov.to_boundary(n, x, d, -1.0) + @test_throws ErrorException Krylov.to_boundary(n, x, d, 0.5) + @test_throws ErrorException Krylov.to_boundary(n, x, zeros(n), 1.0) + @test maximum(Krylov.to_boundary(n, x, d, 5.0)) ≈ 2.209975124224178 + @test minimum(Krylov.to_boundary(n, x, d, 5.0)) ≈ -1.8099751242241782 + @test maximum(Krylov.to_boundary(n, x, d, 5.0, flip=true)) ≈ 1.8099751242241782 + @test minimum(Krylov.to_boundary(n, x, d, 5.0, flip=true)) ≈ -2.209975124224178 + end + + @testset "kzeros" begin + # test kzeros + @test Krylov.kzeros(Vector{Float64}, 10) == zeros(Float64, 10) + @test Krylov.kzeros(Vector{ComplexF32}, 10) == zeros(ComplexF32, 10) + end + + @testset "kones" begin + # test kones + @test Krylov.kones(Vector{Float64}, 10) == ones(Float64, 10) + @test Krylov.kones(Vector{ComplexF32}, 10) == ones(ComplexF32, 10) + end + + @testset "ktypeof" begin + # test ktypeof + for FC in (Float32, Float64, ComplexF32, ComplexF64) + dv = rand(FC, 10) + b = view(dv, 4:8) + @test Krylov.ktypeof(dv) == Vector{FC} + @test Krylov.ktypeof(b) == Vector{FC} + + dm = rand(FC, 10, 10) + b = view(dm, :, 3) + @test Krylov.ktypeof(b) == Vector{FC} + + sv = sprand(FC, 10, 0.5) + b = view(sv, 4:8) + @test Krylov.ktypeof(sv) == Vector{FC} + @test Krylov.ktypeof(b) == Vector{FC} + end + end + + @testset "vector_to_matrix" begin + # test vector_to_matrix + for FC in (Float32, Float64, ComplexF32, ComplexF64) + S = Vector{FC} + M = Krylov.vector_to_matrix(S) + @test M == Matrix{FC} + end + end + + @testset "matrix_to_vector" begin + # test matrix_to_vector + for FC in (Float32, Float64, ComplexF32, ComplexF64) + M = Matrix{FC} + S = Krylov.matrix_to_vector(M) + @test S == Vector{FC} + end + end + + @testset "macros" begin + # test macros + for FC ∈ (Float16, Float32, Float64, ComplexF16, ComplexF32, ComplexF64) + n = 10 + x = rand(FC, n) + y = rand(FC, n) + a = rand(FC) + b = rand(FC) + c = rand(FC) + s = rand(FC) + + T = real(FC) + a2 = rand(T) + b2 = rand(T) + + Krylov.@kdot(n, x, y) + + Krylov.@kdotr(n, x, y) + + Krylov.@knrm2(n, x) + + Krylov.@kaxpy!(n, a, x, y) + Krylov.@kaxpy!(n, a2, x, y) + + Krylov.@kaxpby!(n, a, x, b, y) + Krylov.@kaxpby!(n, a2, x, b, y) + Krylov.@kaxpby!(n, a, x, b2, y) + Krylov.@kaxpby!(n, a2, x, b2, y) + + Krylov.@kcopy!(n, x, y) + + Krylov.@kswap(x, y) + + Krylov.@kref!(n, x, y, c, s) + end + end end diff --git a/test/test_bicgstab.jl b/test/test_bicgstab.jl index ce4e6dcd4..6817acf3d 100644 --- a/test/test_bicgstab.jl +++ b/test/test_bicgstab.jl @@ -82,10 +82,10 @@ @test(resid ≤ bicgstab_tol) @test(stats.solved) - # Test bᵀc == 0 + # Test bᴴc == 0 A, b, c = bc_breakdown(FC=FC) (x, stats) = bicgstab(A, b, c=c) - @test stats.status == "Breakdown bᵀc = 0" + @test stats.status == "Breakdown bᴴc = 0" # test callback function solver = BicgstabSolver(A, b) diff --git a/test/test_bilq.jl b/test/test_bilq.jl index 900d1f6e5..40b9872db 100644 --- a/test/test_bilq.jl +++ b/test/test_bilq.jl @@ -66,10 +66,10 @@ @test(resid ≤ bilq_tol) @test(stats.solved) - # Test bᵀc == 0 + # Test bᴴc == 0 A, b, c = bc_breakdown(FC=FC) (x, stats) = bilq(A, b, c=c) - @test stats.status == "Breakdown bᵀc = 0" + @test stats.status == "Breakdown bᴴc = 0" # test callback function diff --git a/test/test_bilqr.jl b/test/test_bilqr.jl index 6dab06ec7..fd46aade4 100644 --- a/test/test_bilqr.jl +++ b/test/test_bilqr.jl @@ -46,10 +46,10 @@ @test(resid_dual ≤ bilqr_tol) @test(stats.solved_dual) - # Test bᵀc == 0 + # Test bᴴc == 0 A, b, c = bc_breakdown(FC=FC) (x, t, stats) = bilqr(A, b, c) - @test stats.status == "Breakdown bᵀc = 0" + @test stats.status == "Breakdown bᴴc = 0" # test callback function A, b, c = adjoint_pde(FC=FC) diff --git a/test/test_cgne.jl b/test/test_cgne.jl index 64cbc0ea7..c1a3e798b 100644 --- a/test/test_cgne.jl +++ b/test/test_cgne.jl @@ -1,6 +1,6 @@ -function test_cgne(A, b; λ=0.0, M=I) +function test_cgne(A, b; λ=0.0, N=I, history=false) (nrow, ncol) = size(A) - (x, stats) = cgne(A, b, λ=λ, M=M) + (x, stats) = cgne(A, b, λ=λ, N=N, history=history) r = b - A * x if λ > 0 s = r / sqrt(λ) @@ -69,8 +69,8 @@ end @test stats.status == "x = 0 is a zero-residual solution" # Test with Jacobi (or diagonal) preconditioner - A, b, M = square_preconditioned(FC=FC) - (x, stats, resid) = test_cgne(A, b, M=M) + A, b, N = square_preconditioned(FC=FC) + (x, stats, resid) = test_cgne(A, b, N=N) @test(resid ≤ cgne_tol) @test(stats.solved) (xI, xmin, xmin_norm) = check_min_norm(A, b, x) @@ -81,8 +81,8 @@ end A = 0.5 * [19.0 17.0 15.0 13.0 11.0 9.0 7.0 5.0 3.0 1.0; 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0] b = [1.0; 0.0] - M = Diagonal(1 ./ (A * A')) - (x, stats, resid) = test_cgne(A, b, M=M) + N = Diagonal(1 ./ (A * A')) + (x, stats, resid) = test_cgne(A, b, N=N) @test(resid ≤ cgne_tol) @test(stats.solved) (xI, xmin, xmin_norm) = check_min_norm(A, b, x) @@ -92,7 +92,7 @@ end for transpose ∈ (false, true) A, b, c, D = small_sp(transpose, FC=FC) D⁻¹ = inv(D) - (x, stats) = cgne(A, b, M=D⁻¹, λ=1.0) + (x, stats) = cgne(A, b, N=D⁻¹, λ=1.0) end # test callback function diff --git a/test/test_cgs.jl b/test/test_cgs.jl index 5c505bb70..832cd76c3 100644 --- a/test/test_cgs.jl +++ b/test/test_cgs.jl @@ -74,10 +74,10 @@ @test(resid ≤ cgs_tol) @test(stats.solved) - # Test bᵀc == 0 + # Test bᴴc == 0 A, b, c = bc_breakdown(FC=FC) (x, stats) = cgs(A, b, c=c) - @test stats.status == "Breakdown bᵀc = 0" + @test stats.status == "Breakdown bᴴc = 0" # test callback function A, b = sparse_laplacian(FC=FC) diff --git a/test/test_crmr.jl b/test/test_crmr.jl index 6354f329f..d0f902df6 100644 --- a/test/test_crmr.jl +++ b/test/test_crmr.jl @@ -1,6 +1,6 @@ -function test_crmr(A, b; λ=0.0, M=I, history=false) +function test_crmr(A, b; λ=0.0, N=I, history=false) (nrow, ncol) = size(A) - (x, stats) = crmr(A, b, λ=λ, M=M, history=history) + (x, stats) = crmr(A, b, λ=λ, N=N, history=history) r = b - A * x if λ > 0 s = r / sqrt(λ) @@ -76,8 +76,8 @@ end A = 0.5 * [19.0 17.0 15.0 13.0 11.0 9.0 7.0 5.0 3.0 1.0; 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0] b = [1.0; 0.0] - M = Diagonal(1 ./ (A * A')) - (x, stats, resid) = test_crmr(A, b, M=M) + N = Diagonal(1 ./ (A * A')) + (x, stats, resid) = test_crmr(A, b, N=N) @test(resid ≤ crmr_tol) @test(stats.solved) (xI, xmin, xmin_norm) = check_min_norm(A, b, x) @@ -87,7 +87,7 @@ end for transpose ∈ (false, true) A, b, c, D = small_sp(transpose, FC=FC) D⁻¹ = inv(D) - (x, stats) = crmr(A, b, M=D⁻¹, λ=1.0) + (x, stats) = crmr(A, b, N=D⁻¹, λ=1.0) end # test callback function diff --git a/test/test_diom.jl b/test/test_diom.jl index 4f1a8ecea..62a38b198 100644 --- a/test/test_diom.jl +++ b/test/test_diom.jl @@ -60,7 +60,7 @@ # Poisson equation in polar coordinates. A, b = polar_poisson(FC=FC) - (x, stats) = diom(A, b, memory=200) + (x, stats) = diom(A, b, memory=150) r = b - A * x resid = norm(r) / norm(b) @test(resid ≤ diom_tol) diff --git a/test/test_fgmres.jl b/test/test_fgmres.jl new file mode 100644 index 000000000..9bb73d3e4 --- /dev/null +++ b/test/test_fgmres.jl @@ -0,0 +1,154 @@ +import LinearAlgebra.mul! + +mutable struct FlexiblePreconditioner{T,S} + D::Diagonal{T, S} + ω::T +end + +function mul!(y::Vector, P::FlexiblePreconditioner, x::Vector) + P.ω = -P.ω + mul!(y, P.D, x) + y .*= P.ω +end + +@testset "fgmres" begin + fgmres_tol = 1.0e-6 + + for FC in (Float64, ComplexF64) + @testset "Data Type: $FC" begin + + # Symmetric and positive definite system. + A, b = symmetric_definite(FC=FC) + (x, stats) = fgmres(A, b) + r = b - A * x + resid = norm(r) / norm(b) + @test(resid ≤ fgmres_tol) + @test(stats.solved) + + # Symmetric indefinite variant. + A, b = symmetric_indefinite(FC=FC) + (x, stats) = fgmres(A, b) + r = b - A * x + resid = norm(r) / norm(b) + @test(resid ≤ fgmres_tol) + @test(stats.solved) + + # Nonsymmetric and positive definite systems. + A, b = nonsymmetric_definite(FC=FC) + (x, stats) = fgmres(A, b) + r = b - A * x + resid = norm(r) / norm(b) + @test(resid ≤ fgmres_tol) + @test(stats.solved) + + # Nonsymmetric indefinite variant. + A, b = nonsymmetric_indefinite(FC=FC) + (x, stats) = fgmres(A, b) + r = b - A * x + resid = norm(r) / norm(b) + @test(resid ≤ fgmres_tol) + @test(stats.solved) + + # Symmetric indefinite variant, almost singular. + A, b = almost_singular(FC=FC) + (x, stats) = fgmres(A, b) + r = b - A * x + resid = norm(r) / norm(b) + @test(resid ≤ 100 * fgmres_tol) + @test(stats.solved) + + # Singular system. + A, b = square_inconsistent(FC=FC) + (x, stats) = fgmres(A, b) + r = b - A * x + Aresid = norm(A' * r) / norm(A' * b) + @test(Aresid ≤ fgmres_tol) + @test(stats.inconsistent) + + # Test b == 0 + A, b = zero_rhs(FC=FC) + (x, stats) = fgmres(A, b) + @test norm(x) == 0 + @test stats.status == "x = 0 is a zero-residual solution" + + # Poisson equation in polar coordinates. + A, b = polar_poisson(FC=FC) + (x, stats) = fgmres(A, b, reorthogonalization=true) + r = b - A * x + resid = norm(r) / norm(b) + @test(resid ≤ fgmres_tol) + @test(stats.solved) + + # Left preconditioning + A, b, M = square_preconditioned(FC=FC) + (x, stats) = fgmres(A, b, M=M) + r = b - A * x + resid = norm(M * r) / norm(M * b) + @test(resid ≤ fgmres_tol) + @test(stats.solved) + + # Right preconditioning + A, b, N = square_preconditioned(FC=FC) + (x, stats) = fgmres(A, b, N=N) + r = b - A * x + resid = norm(r) / norm(b) + @test(resid ≤ fgmres_tol) + @test(stats.solved) + + # Split preconditioning + A, b, M, N = two_preconditioners(FC=FC) + (x, stats) = fgmres(A, b, M=M, N=N) + r = b - A * x + resid = norm(M * r) / norm(M * b) + @test(resid ≤ fgmres_tol) + @test(stats.solved) + + # Restart + for restart ∈ (false, true) + memory = 10 + + A, b = sparse_laplacian(FC=FC) + (x, stats) = fgmres(A, b, restart=restart, memory=memory) + r = b - A * x + resid = norm(r) / norm(b) + @test(resid ≤ fgmres_tol) + @test(stats.niter > memory) + @test(stats.solved) + + M = Diagonal(1 ./ diag(A)) + (x, stats) = fgmres(A, b, M=M, restart=restart, memory=memory) + r = b - A * x + resid = norm(M * r) / norm(M * b) + @test(resid ≤ fgmres_tol) + @test(stats.niter > memory) + @test(stats.solved) + + N = Diagonal(1 ./ diag(A)) + (x, stats) = fgmres(A, b, N=N, restart=restart, memory=memory) + r = b - A * x + resid = norm(r) / norm(b) + @test(resid ≤ fgmres_tol) + @test(stats.niter > memory) + @test(stats.solved) + + N = Diagonal(1 ./ sqrt.(diag(A))) + N = Diagonal(1 ./ sqrt.(diag(A))) + (x, stats) = fgmres(A, b, M=M, N=N, restart=restart, memory=memory) + r = b - A * x + resid = norm(M * r) / norm(M * b) + @test(resid ≤ fgmres_tol) + @test(stats.niter > memory) + @test(stats.solved) + end + + A, b = polar_poisson(FC=FC) + J = inv(Diagonal(A)) # Jacobi preconditioner + N = FlexiblePreconditioner(J, 1.0) + (x, stats) = fgmres(A, b, N=N) + r = b - A * x + resid = norm(r) / norm(b) + @test(resid ≤ fgmres_tol) + @test(stats.solved) + end + end +end diff --git a/test/test_lnlq.jl b/test/test_lnlq.jl index 888119db8..b308609fa 100644 --- a/test/test_lnlq.jl +++ b/test/test_lnlq.jl @@ -1,5 +1,5 @@ function test_lnlq(A, b,transfer_to_craig) - (x, y, stats) = lnlq(A, b, transfer_to_craig=transfer_to_craig, etolx=0.0, etoly=0.0) + (x, y, stats) = lnlq(A, b, transfer_to_craig=transfer_to_craig, utolx=0.0, utoly=0.0) r = b - A * x resid = norm(r) / norm(b) return (x, y, stats, resid) @@ -61,8 +61,8 @@ end # Test regularization A, b, λ = regularization(FC=FC) - (x, y, stats) = lnlq(A, b, λ=λ, transfer_to_craig=transfer_to_craig, etolx=0.0, etoly=0.0) - (xₛ, yₛ, stats) = lnlq(A, b, transfer_to_craig=transfer_to_craig, atol=0.0, rtol=0.0, etolx=1e-10, etoly=1e-10, λ=λ) + (x, y, stats) = lnlq(A, b, λ=λ, transfer_to_craig=transfer_to_craig, utolx=0.0, utoly=0.0) + (xₛ, yₛ, stats) = lnlq(A, b, transfer_to_craig=transfer_to_craig, atol=0.0, rtol=0.0, utolx=1e-10, utoly=1e-10, λ=λ) for (x, y) in ((x, y), (xₛ, yₛ)) s = λ * y r = b - (A * x + λ * s) diff --git a/test/test_minres_qlp.jl b/test/test_minres_qlp.jl index 6e983e49a..0b4d2046d 100644 --- a/test/test_minres_qlp.jl +++ b/test/test_minres_qlp.jl @@ -80,7 +80,7 @@ solver = MinresQlpSolver(A, b) tol = 1.0 cb_n2 = TestCallbackN2(A, b, tol = tol) - minres_qlp!(solver, A, b, atol = 0.0, rtol = 0.0, ctol = 0.0, callback = cb_n2) + minres_qlp!(solver, A, b, atol = 0.0, rtol = 0.0, Artol = 0.0, callback = cb_n2) @test solver.stats.status == "user-requested exit" @test cb_n2(solver) diff --git a/test/test_mp.jl b/test/test_mp.jl index b7aa43d38..6b6d58450 100644 --- a/test/test_mp.jl +++ b/test/test_mp.jl @@ -3,7 +3,7 @@ for fn in (:cg, :cgls, :usymqr, :cgne, :cgs, :crmr, :cg_lanczos, :dqgmres, :diom, :cr, :gpmr, :lslq, :lsqr, :lsmr, :lnlq, :craig, :bicgstab, :craigmr, :crls, :symmlq, :minres, :bilq, :minres_qlp, :qmr, :usymlq, :tricg, :trimr, :trilqr, :bilqr, :gmres, :fom, - :cg_lanczos_shift) + :fgmres, :cg_lanczos_shift) for T in (Float16, Float32, Float64, BigFloat) for FC in (T, Complex{T}) A = spdiagm(-1 => -ones(FC,n-1), 0 => 3*ones(FC,n), 1 => -ones(FC,n-1)) diff --git a/test/test_processes.jl b/test/test_processes.jl new file mode 100644 index 000000000..eb3ad19af --- /dev/null +++ b/test/test_processes.jl @@ -0,0 +1,146 @@ +""" + P = permutation_paige(k) + +Return the sparse (2k) × (2k) matrix + + [e₁ • eₖ ] + [ e₁ • eₖ] +""" +function permutation_paige(k) + P = spzeros(Float64, 2k, 2k) + for i = 1:k + P[i,2i-1] = 1.0 + P[i+k,2i] = 1.0 + end + return P +end + +@testset "processes" begin + m = 250 + n = 500 + k = 20 + + for FC in (Float64, ComplexF64) + R = real(FC) + nbits_FC = sizeof(FC) + nbits_R = sizeof(R) + nbits_I = sizeof(Int) + + @testset "Data Type: $FC" begin + + @testset "Hermitian Lanczos" begin + A, b = symmetric_indefinite(n, FC=FC) + V, T = hermitian_lanczos(A, b, k) + + @test A * V[:,1:k] ≈ V * T + + storage_hermitian_lanczos_bytes(n, k) = 4k * nbits_I + (3k-1) * nbits_R + n*(k+1) * nbits_FC + + expected_hermitian_lanczos_bytes = storage_hermitian_lanczos_bytes(n, k) + actual_hermitian_lanczos_bytes = @allocated hermitian_lanczos(A, b, k) + @test expected_hermitian_lanczos_bytes ≤ actual_hermitian_lanczos_bytes ≤ 1.02 * expected_hermitian_lanczos_bytes + end + + @testset "Non-Hermitian Lanczos" begin + A, b = nonsymmetric_definite(n, FC=FC) + c = -b + V, T, U, Tᴴ = nonhermitian_lanczos(A, b, c, k) + + @test T[1:k,1:k] ≈ Tᴴ[1:k,1:k]' + @test A * V[:,1:k] ≈ V * T + @test A' * U[:,1:k] ≈ U * Tᴴ + + storage_nonhermitian_lanczos_bytes(n, k) = 4k * nbits_I + (6k-2) * nbits_FC + 2*n*(k+1) * nbits_FC + + expected_nonhermitian_lanczos_bytes = storage_nonhermitian_lanczos_bytes(n, k) + actual_nonhermitian_lanczos_bytes = @allocated nonhermitian_lanczos(A, b, c, k) + @test expected_nonhermitian_lanczos_bytes ≤ actual_nonhermitian_lanczos_bytes ≤ 1.02 * expected_nonhermitian_lanczos_bytes + end + + @testset "Arnoldi" begin + A, b = nonsymmetric_indefinite(n, FC=FC) + V, H = arnoldi(A, b, k) + + @test A * V[:,1:k] ≈ V * H + + function storage_arnoldi_bytes(n, k) + return k*(k+1) * nbits_FC + n*(k+1) * nbits_FC + end + + expected_arnoldi_bytes = storage_arnoldi_bytes(n, k) + actual_arnoldi_bytes = @allocated arnoldi(A, b, k) + @test expected_arnoldi_bytes ≤ actual_arnoldi_bytes ≤ 1.02 * expected_arnoldi_bytes + end + + @testset "Golub-Kahan" begin + A, b = under_consistent(m, n, FC=FC) + V, U, L = golub_kahan(A, b, k) + B = L[1:k+1,1:k] + + @test A * V[:,1:k] ≈ U * B + @test A' * U ≈ V * L' + @test A' * A * V[:,1:k] ≈ V * L' * B + @test A * A' * U[:,1:k] ≈ U * B * L[1:k,1:k]' + + storage_golub_kahan_bytes(m, n, k) = 3*(k+1) * nbits_I + (2k+1) * nbits_R + (n+m)*(k+1) * nbits_FC + + expected_golub_kahan_bytes = storage_golub_kahan_bytes(m, n, k) + actual_golub_kahan_bytes = @allocated golub_kahan(A, b, k) + @test expected_golub_kahan_bytes ≤ actual_golub_kahan_bytes ≤ 1.02 * expected_golub_kahan_bytes + end + + @testset "Saunders-Simon-Yip" begin + A, b = under_consistent(m, n, FC=FC) + _, c = over_consistent(n, m, FC=FC) + V, T, U, Tᴴ = saunders_simon_yip(A, b, c, k) + + @test T[1:k,1:k] ≈ Tᴴ[1:k,1:k]' + @test A * U[:,1:k] ≈ V * T + @test A' * V[:,1:k] ≈ U * Tᴴ + @test A' * A * U[:,1:k-1] ≈ U * Tᴴ * T[1:k,1:k-1] + @test A * A' * V[:,1:k-1] ≈ V * T * Tᴴ[1:k,1:k-1] + + K = [zeros(FC,m,m) A; A' zeros(FC,n,n)] + Pₖ = permutation_paige(k) + Wₖ = [V[:,1:k] zeros(FC,m,k); zeros(FC,n,k) U[:,1:k]] * Pₖ + Pₖ₊₁ = permutation_paige(k+1) + Wₖ₊₁ = [V zeros(FC,m,k+1); zeros(FC,n,k+1) U] * Pₖ₊₁ + G = Pₖ₊₁' * [zeros(FC,k+1,k) T; Tᴴ zeros(FC,k+1,k)] * Pₖ + @test K * Wₖ ≈ Wₖ₊₁ * G + + storage_saunders_simon_yip_bytes(m, n, k) = 4k * nbits_I + (6k-2) * nbits_FC + (n+m)*(k+1) * nbits_FC + + expected_saunders_simon_yip_bytes = storage_saunders_simon_yip_bytes(m, n, k) + actual_saunders_simon_yip_bytes = @allocated saunders_simon_yip(A, b, c, k) + @test expected_saunders_simon_yip_bytes ≤ actual_saunders_simon_yip_bytes ≤ 1.02 * expected_saunders_simon_yip_bytes + end + + @testset "Montoison-Orban" begin + A, b = under_consistent(m, n, FC=FC) + B, c = over_consistent(n, m, FC=FC) + V, H, U, F = montoison_orban(A, B, b, c, k) + + @test A * U[:,1:k] ≈ V * H + @test B * V[:,1:k] ≈ U * F + @test B * A * U[:,1:k-1] ≈ U * F * H[1:k,1:k-1] + @test A * B * V[:,1:k-1] ≈ V * H * F[1:k,1:k-1] + + K = [zeros(FC,m,m) A; B zeros(FC,n,n)] + Pₖ = permutation_paige(k) + Wₖ = [V[:,1:k] zeros(FC,m,k); zeros(FC,n,k) U[:,1:k]] * Pₖ + Pₖ₊₁ = permutation_paige(k+1) + Wₖ₊₁ = [V zeros(FC,m,k+1); zeros(FC,n,k+1) U] * Pₖ₊₁ + G = Pₖ₊₁' * [zeros(FC,k+1,k) H; F zeros(FC,k+1,k)] * Pₖ + @test K * Wₖ ≈ Wₖ₊₁ * G + + function storage_montoison_orban_bytes(m, n, k) + return 2*k*(k+1) * nbits_FC + (n+m)*(k+1) * nbits_FC + end + + expected_montoison_orban_bytes = storage_montoison_orban_bytes(m, n, k) + actual_montoison_orban_bytes = @allocated montoison_orban(A, B, b, c, k) + @test expected_montoison_orban_bytes ≤ actual_montoison_orban_bytes ≤ 1.02 * expected_montoison_orban_bytes + end + end + end +end diff --git a/test/test_qmr.jl b/test/test_qmr.jl index 184b9877d..4a6b8c1c9 100644 --- a/test/test_qmr.jl +++ b/test/test_qmr.jl @@ -58,10 +58,10 @@ @test(resid ≤ qmr_tol) @test(stats.solved) - # Test bᵀc == 0 + # Test bᴴc == 0 A, b, c = bc_breakdown(FC=FC) (x, stats) = qmr(A, b, c=c) - @test stats.status == "Breakdown bᵀc = 0" + @test stats.status == "Breakdown bᴴc = 0" # test callback function solver = QmrSolver(A, b) diff --git a/test/test_solvers.jl b/test/test_solvers.jl index 468fa5a05..2c98dc795 100644 --- a/test/test_solvers.jl +++ b/test/test_solvers.jl @@ -11,1139 +11,142 @@ function test_solvers(FC) nshifts = 5 T = real(FC) S = Vector{FC} + solvers = Dict{Symbol, KrylovSolver}() @eval begin - cg_solver = $(KRYLOV_SOLVERS[:cg])($n, $n, $S) - symmlq_solver = $(KRYLOV_SOLVERS[:symmlq])($n, $n, $S) - minres_solver = $(KRYLOV_SOLVERS[:minres])($n, $n, $S) - cg_lanczos_solver = $(KRYLOV_SOLVERS[:cg_lanczos])($n, $n, $S) - diom_solver = $(KRYLOV_SOLVERS[:diom])($n, $n, $mem, $S) - fom_solver = $(KRYLOV_SOLVERS[:fom])($n, $n, $mem, $S) - dqgmres_solver = $(KRYLOV_SOLVERS[:dqgmres])($n, $n, $mem, $S) - gmres_solver = $(KRYLOV_SOLVERS[:gmres])($n, $n, $mem, $S) - cr_solver = $(KRYLOV_SOLVERS[:cr])($n, $n, $S) - crmr_solver = $(KRYLOV_SOLVERS[:crmr])($m, $n, $S) - cgs_solver = $(KRYLOV_SOLVERS[:cgs])($n, $n, $S) - bicgstab_solver = $(KRYLOV_SOLVERS[:bicgstab])($n, $n, $S) - craigmr_solver = $(KRYLOV_SOLVERS[:craigmr])($m, $n, $S) - cgne_solver = $(KRYLOV_SOLVERS[:cgne])($m, $n, $S) - lnlq_solver = $(KRYLOV_SOLVERS[:lnlq])($m, $n, $S) - craig_solver = $(KRYLOV_SOLVERS[:craig])($m, $n, $S) - lslq_solver = $(KRYLOV_SOLVERS[:lslq])($n, $m, $S) - cgls_solver = $(KRYLOV_SOLVERS[:cgls])($n, $m, $S) - lsqr_solver = $(KRYLOV_SOLVERS[:lsqr])($n, $m, $S) - crls_solver = $(KRYLOV_SOLVERS[:crls])($n, $m, $S) - lsmr_solver = $(KRYLOV_SOLVERS[:lsmr])($n, $m, $S) - usymqr_solver = $(KRYLOV_SOLVERS[:usymqr])($n, $m, $S) - trilqr_solver = $(KRYLOV_SOLVERS[:trilqr])($n, $n, $S) - bilq_solver = $(KRYLOV_SOLVERS[:bilq])($n, $n, $S) - bilqr_solver = $(KRYLOV_SOLVERS[:bilqr])($n, $n, $S) - minres_qlp_solver = $(KRYLOV_SOLVERS[:minres_qlp])($n, $n, $S) - qmr_solver = $(KRYLOV_SOLVERS[:qmr])($n, $n, $S) - usymlq_solver = $(KRYLOV_SOLVERS[:usymlq])($m, $n, $S) - tricg_solver = $(KRYLOV_SOLVERS[:tricg])($m, $n, $S) - trimr_solver = $(KRYLOV_SOLVERS[:trimr])($m, $n, $S) - gpmr_solver = $(KRYLOV_SOLVERS[:gpmr])($n, $m, $mem, $S) - cg_lanczos_shift_solver = $(KRYLOV_SOLVERS[:cg_lanczos_shift])($n, $m, $nshifts, $S) + $solvers[:cg] = $(KRYLOV_SOLVERS[:cg])($n, $n, $S) + $solvers[:symmlq] = $(KRYLOV_SOLVERS[:symmlq])($n, $n, $S) + $solvers[:minres] = $(KRYLOV_SOLVERS[:minres])($n, $n, $S) + $solvers[:cg_lanczos] = $(KRYLOV_SOLVERS[:cg_lanczos])($n, $n, $S) + $solvers[:cg_lanczos_shift] = $(KRYLOV_SOLVERS[:cg_lanczos_shift])($n, $n, $nshifts, $S) + $solvers[:diom] = $(KRYLOV_SOLVERS[:diom])($n, $n, $mem, $S) + $solvers[:fom] = $(KRYLOV_SOLVERS[:fom])($n, $n, $mem, $S) + $solvers[:dqgmres] = $(KRYLOV_SOLVERS[:dqgmres])($n, $n, $mem, $S) + $solvers[:gmres] = $(KRYLOV_SOLVERS[:gmres])($n, $n, $mem, $S) + $solvers[:fgmres] = $(KRYLOV_SOLVERS[:fgmres])($n, $n, $mem, $S) + $solvers[:cr] = $(KRYLOV_SOLVERS[:cr])($n, $n, $S) + $solvers[:crmr] = $(KRYLOV_SOLVERS[:crmr])($m, $n, $S) + $solvers[:cgs] = $(KRYLOV_SOLVERS[:cgs])($n, $n, $S) + $solvers[:bicgstab] = $(KRYLOV_SOLVERS[:bicgstab])($n, $n, $S) + $solvers[:craigmr] = $(KRYLOV_SOLVERS[:craigmr])($m, $n, $S) + $solvers[:cgne] = $(KRYLOV_SOLVERS[:cgne])($m, $n, $S) + $solvers[:lnlq] = $(KRYLOV_SOLVERS[:lnlq])($m, $n, $S) + $solvers[:craig] = $(KRYLOV_SOLVERS[:craig])($m, $n, $S) + $solvers[:lslq] = $(KRYLOV_SOLVERS[:lslq])($n, $m, $S) + $solvers[:cgls] = $(KRYLOV_SOLVERS[:cgls])($n, $m, $S) + $solvers[:lsqr] = $(KRYLOV_SOLVERS[:lsqr])($n, $m, $S) + $solvers[:crls] = $(KRYLOV_SOLVERS[:crls])($n, $m, $S) + $solvers[:lsmr] = $(KRYLOV_SOLVERS[:lsmr])($n, $m, $S) + $solvers[:usymqr] = $(KRYLOV_SOLVERS[:usymqr])($n, $m, $S) + $solvers[:trilqr] = $(KRYLOV_SOLVERS[:trilqr])($n, $n, $S) + $solvers[:bilq] = $(KRYLOV_SOLVERS[:bilq])($n, $n, $S) + $solvers[:bilqr] = $(KRYLOV_SOLVERS[:bilqr])($n, $n, $S) + $solvers[:minres_qlp] = $(KRYLOV_SOLVERS[:minres_qlp])($n, $n, $S) + $solvers[:qmr] = $(KRYLOV_SOLVERS[:qmr])($n, $n, $S) + $solvers[:usymlq] = $(KRYLOV_SOLVERS[:usymlq])($m, $n, $S) + $solvers[:tricg] = $(KRYLOV_SOLVERS[:tricg])($m, $n, $S) + $solvers[:trimr] = $(KRYLOV_SOLVERS[:trimr])($m, $n, $S) + $solvers[:gpmr] = $(KRYLOV_SOLVERS[:gpmr])($n, $m, $mem, $S) + $solvers[:cg_lanczos_shift] = $(KRYLOV_SOLVERS[:cg_lanczos_shift])($n, $n, $nshifts, $S) end - for i = 1 : 3 - A = i * A - Au = i * Au - Ao = i * Ao - b = 5 * b - c = 3 * c - - solver = solve!(cg_solver, A, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == 0 - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(symmlq_solver, A, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == 0 - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(minres_solver, A, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == 0 - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(cg_lanczos_solver, A, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == 0 - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(cg_lanczos_shift_solver, A, b, shifts) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == 0 - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(diom_solver, A, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == 0 - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(fom_solver, A, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == 0 - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(dqgmres_solver, A, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == 0 - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(gmres_solver, A, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == 0 - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(cr_solver, A, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == 0 - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(crmr_solver, Au, c) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == niter - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(cgs_solver, A, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == 2 * niter - @test Atprod(solver) == 0 - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(bicgstab_solver, A, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == 2 * niter - @test Atprod(solver) == 0 - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(craigmr_solver, Au, c) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == niter - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 2 - @test issolved(solver) - - solver = solve!(cgne_solver, Au, c) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == niter - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(lnlq_solver, Au, c) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == niter - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test solution(solver, 2) === solver.y - @test nsolution(solver) == 2 - @test issolved(solver) - - solver = solve!(craig_solver, Au, c) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == niter - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test solution(solver, 2) === solver.y - @test nsolution(solver) == 2 - @test issolved(solver) - - solver = solve!(lslq_solver, Ao, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == niter - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(cgls_solver, Ao, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == niter - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(lsqr_solver, Ao, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == niter - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(crls_solver, Ao, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == niter - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(lsmr_solver, Ao, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == niter - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(usymqr_solver, Ao, b, c) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == niter - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(trilqr_solver, A, b, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == niter - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test solution(solver, 2) === solver.y - @test nsolution(solver) == 2 - @test issolved_primal(solver) - @test issolved_dual(solver) - @test issolved(solver) - - solver = solve!(bilq_solver, A, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == niter - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(bilqr_solver, A, b, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == niter - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test solution(solver, 2) === solver.y - @test nsolution(solver) == 2 - @test issolved_primal(solver) - @test issolved_dual(solver) - @test issolved(solver) - - solver = solve!(minres_qlp_solver, A, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == 0 - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(qmr_solver, A, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == niter - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(usymlq_solver, Au, c, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == niter - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test nsolution(solver) == 1 - @test issolved(solver) - - solver = solve!(tricg_solver, Au, c, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == niter - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test solution(solver, 2) === solver.y - @test nsolution(solver) == 2 - @test issolved(solver) - - solver = solve!(trimr_solver, Au, c, b) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == niter - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test solution(solver, 2) === solver.y - @test nsolution(solver) == 2 - @test issolved(solver) - - solver = solve!(gpmr_solver, Ao, Au, b, c) - niter = niterations(solver) - @test niter > 0 - @test Aprod(solver) == niter - @test Atprod(solver) == 0 - @test Bprod(solver) == niter - @test statistics(solver) === solver.stats - @test solution(solver, 1) === solver.x - @test solution(solver, 2) === solver.y - @test nsolution(solver) == 2 - @test issolved(solver) + for (method, solver) in solvers + @testset "$(method)" begin + for i = 1 : 3 + A = i * A + Au = i * Au + Ao = i * Ao + b = 5 * b + c = 3 * c + + if method ∈ (:cg, :cr, :symmlq, :minres, :minres_qlp, :cg_lanczos, :diom, :fom, + :dqgmres, :gmres, :fgmres, :cgs, :bicgstab, :bilq, :qmr, :cg_lanczos_shift) + method == :cg_lanczos_shift ? solve!(solver, A, b, shifts) : solve!(solver, A, b) + niter = niterations(solver) + @test Aprod(solver) == (method ∈ (:cgs, :bicgstab) ? 2 * niter : niter) + @test Atprod(solver) == (method ∈ (:bilq, :qmr) ? niter : 0) + @test solution(solver) === solver.x + @test nsolution(solver) == 1 + end + + if method ∈ (:cgne, :crmr, :lnlq, :craig, :craigmr) + solve!(solver, Au, c) + niter = niterations(solver) + @test Aprod(solver) == niter + @test Atprod(solver) == niter + @test solution(solver, 1) === solver.x + @test nsolution(solver) == (method ∈ (:cgne, :crmr) ? 1 : 2) + (nsolution == 2) && (@test solution(solver, 2) == solver.y) + end + + if method ∈ (:cgls, :crls, :lslq, :lsqr, :lsmr) + solve!(solver, Ao, b) + niter = niterations(solver) + @test Aprod(solver) == niter + @test Atprod(solver) == niter + @test solution(solver) === solver.x + @test nsolution(solver) == 1 + end + + if method ∈ (:bilqr, :trilqr) + solve!(solver, A, b, b) + niter = niterations(solver) + @test Aprod(solver) == niter + @test Atprod(solver) == niter + @test solution(solver, 1) === solver.x + @test solution(solver, 2) === solver.y + @test nsolution(solver) == 2 + @test issolved_primal(solver) + @test issolved_dual(solver) + end + + if method ∈ (:tricg, :trimr, :gpmr) + method == :gpmr ? solve!(solver, Ao, Au, b, c) : solve!(solver, Au, c, b) + niter = niterations(solver) + @test Aprod(solver) == niter + method != :gpmr && (@test Atprod(solver) == niter) + method == :gpmr && (@test Bprod(solver) == niter) + @test solution(solver, 1) === solver.x + @test solution(solver, 2) === solver.y + @test nsolution(solver) == 2 + end + + if method ∈ (:usymlq, :usymqr) + method == :usymlq ? solve!(solver, Au, c, b) : solve!(solver, Ao, b, c) + niter = niterations(solver) + @test Aprod(solver) == niter + @test Atprod(solver) == niter + @test solution(solver) === solver.x + @test nsolution(solver) == 1 + end + + @test niter > 0 + @test statistics(solver) === solver.stats + @test issolved(solver) + end + + io = IOBuffer() + show(io, solver, show_stats=false) + showed = String(take!(io)) + + # Test that the lines have the same length + str = split(showed, "\n", keepempty=false) + len_row = length(str[1]) + @test mapreduce(x -> length(x) - mapreduce(y -> occursin(y, x), |, ["w̅","w̄","d̅"]) == len_row, &, str) + + # Test that the columns have the same length + str2 = split(showed, ['│','┌','┬','┐','├','┼','┤','└','┴','┴','┘','\n'], keepempty=false) + len_col1 = length(str2[1]) + len_col2 = length(str2[2]) + len_col3 = length(str2[3]) + @test mapreduce(x -> length(x) - mapreduce(y -> occursin(y, x), |, ["w̅","w̄","d̅"]) == len_col1, &, str2[1:3:end-2]) + @test mapreduce(x -> length(x) - mapreduce(y -> occursin(y, x), |, ["w̅","w̄","d̅"]) == len_col2, &, str2[2:3:end-1]) + @test mapreduce(x -> length(x) - mapreduce(y -> occursin(y, x), |, ["w̅","w̄","d̅"]) == len_col3, &, str2[3:3:end]) + + # Code coverage + show(io, solver, show_stats=true) + end end - - io = IOBuffer() - show(io, cg_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌──────────┬───────────────┬─────────────────┐ - │ CgSolver│Precision: $FC │Architecture: CPU│ - ├──────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├──────────┼───────────────┼─────────────────┤ - │ Δx│ Vector{$FC}│ 0│ - │ x│ Vector{$FC}│ 64│ - │ r│ Vector{$FC}│ 64│ - │ p│ Vector{$FC}│ 64│ - │ Ap│ Vector{$FC}│ 64│ - │ z│ Vector{$FC}│ 0│ - │warm_start│ Bool│ 0│ - └──────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, symmlq_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌────────────┬───────────────┬─────────────────┐ - │SymmlqSolver│Precision: $FC │Architecture: CPU│ - ├────────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├────────────┼───────────────┼─────────────────┤ - │ Δx│ Vector{$FC}│ 0│ - │ x│ Vector{$FC}│ 64│ - │ Mvold│ Vector{$FC}│ 64│ - │ Mv│ Vector{$FC}│ 64│ - │ Mv_next│ Vector{$FC}│ 64│ - │ w̅│ Vector{$FC}│ 64│ - │ v│ Vector{$FC}│ 0│ - │ clist│ Vector{$T}│ 5│ - │ zlist│ Vector{$T}│ 5│ - │ sprod│ Vector{$T}│ 5│ - │ warm_start│ Bool│ 0│ - └────────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, minres_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌────────────┬───────────────┬─────────────────┐ - │MinresSolver│Precision: $FC │Architecture: CPU│ - ├────────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├────────────┼───────────────┼─────────────────┤ - │ Δx│ Vector{$FC}│ 0│ - │ x│ Vector{$FC}│ 64│ - │ r1│ Vector{$FC}│ 64│ - │ r2│ Vector{$FC}│ 64│ - │ w1│ Vector{$FC}│ 64│ - │ w2│ Vector{$FC}│ 64│ - │ y│ Vector{$FC}│ 64│ - │ v│ Vector{$FC}│ 0│ - │ err_vec│ Vector{$T}│ 5│ - │ warm_start│ Bool│ 0│ - └────────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, cg_lanczos_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌───────────────┬───────────────┬─────────────────┐ - │CgLanczosSolver│Precision: $FC │Architecture: CPU│ - ├───────────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├───────────────┼───────────────┼─────────────────┤ - │ Δx│ Vector{$FC}│ 0│ - │ x│ Vector{$FC}│ 64│ - │ Mv│ Vector{$FC}│ 64│ - │ Mv_prev│ Vector{$FC}│ 64│ - │ p│ Vector{$FC}│ 64│ - │ Mv_next│ Vector{$FC}│ 64│ - │ v│ Vector{$FC}│ 0│ - │ warm_start│ Bool│ 0│ - └───────────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, cg_lanczos_shift_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌────────────────────┬───────────────────┬─────────────────┐ - │CgLanczosShiftSolver│ Precision: $FC │Architecture: CPU│ - ├────────────────────┼───────────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├────────────────────┼───────────────────┼─────────────────┤ - │ Mv│ Vector{$FC}│ 64│ - │ Mv_prev│ Vector{$FC}│ 64│ - │ Mv_next│ Vector{$FC}│ 64│ - │ v│ Vector{$FC}│ 0│ - │ x│Vector{Vector{$FC}}│ 5 x 64│ - │ p│Vector{Vector{$FC}}│ 5 x 64│ - │ σ│ Vector{$T}│ 5│ - │ δhat│ Vector{$T}│ 5│ - │ ω│ Vector{$T}│ 5│ - │ γ│ Vector{$T}│ 5│ - │ rNorms│ Vector{$T}│ 5│ - │ converged│ BitVector│ 5│ - │ not_cv│ BitVector│ 5│ - └────────────────────┴───────────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, diom_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌──────────┬───────────────────┬─────────────────┐ - │DiomSolver│ Precision: $FC │Architecture: CPU│ - ├──────────┼───────────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├──────────┼───────────────────┼─────────────────┤ - │ Δx│ Vector{$FC}│ 0│ - │ x│ Vector{$FC}│ 64│ - │ t│ Vector{$FC}│ 64│ - │ z│ Vector{$FC}│ 0│ - │ w│ Vector{$FC}│ 0│ - │ P│Vector{Vector{$FC}}│ 10 x 64│ - │ V│Vector{Vector{$FC}}│ 10 x 64│ - │ L│ Vector{$FC}│ 10│ - │ H│ Vector{$FC}│ 12│ - │warm_start│ Bool│ 0│ - └──────────┴───────────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, fom_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌──────────┬───────────────────┬─────────────────┐ - │ FomSolver│ Precision: $FC │Architecture: CPU│ - ├──────────┼───────────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├──────────┼───────────────────┼─────────────────┤ - │ Δx│ Vector{$FC}│ 0│ - │ x│ Vector{$FC}│ 64│ - │ w│ Vector{$FC}│ 64│ - │ p│ Vector{$FC}│ 0│ - │ q│ Vector{$FC}│ 0│ - │ V│Vector{Vector{$FC}}│ 10 x 64│ - │ l│ Vector{$FC}│ 10│ - │ z│ Vector{$FC}│ 10│ - │ U│ Vector{$FC}│ 55│ - │warm_start│ Bool│ 0│ - └──────────┴───────────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, dqgmres_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌─────────────┬───────────────────┬─────────────────┐ - │DqgmresSolver│ Precision: $FC │Architecture: CPU│ - ├─────────────┼───────────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├─────────────┼───────────────────┼─────────────────┤ - │ Δx│ Vector{$FC}│ 0│ - │ x│ Vector{$FC}│ 64│ - │ t│ Vector{$FC}│ 64│ - │ z│ Vector{$FC}│ 0│ - │ w│ Vector{$FC}│ 0│ - │ P│Vector{Vector{$FC}}│ 10 x 64│ - │ V│Vector{Vector{$FC}}│ 10 x 64│ - │ c│ Vector{$T}│ 10│ - │ s│ Vector{$FC}│ 10│ - │ H│ Vector{$FC}│ 12│ - │ warm_start│ Bool│ 0│ - └─────────────┴───────────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, gmres_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌───────────┬───────────────────┬─────────────────┐ - │GmresSolver│ Precision: $FC │Architecture: CPU│ - ├───────────┼───────────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├───────────┼───────────────────┼─────────────────┤ - │ Δx│ Vector{$FC}│ 0│ - │ x│ Vector{$FC}│ 64│ - │ w│ Vector{$FC}│ 64│ - │ p│ Vector{$FC}│ 0│ - │ q│ Vector{$FC}│ 0│ - │ V│Vector{Vector{$FC}}│ 10 x 64│ - │ c│ Vector{$T}│ 10│ - │ s│ Vector{$FC}│ 10│ - │ z│ Vector{$FC}│ 10│ - │ R│ Vector{$FC}│ 55│ - │ warm_start│ Bool│ 0│ - │ inner_iter│ Int64│ 0│ - └───────────┴───────────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, cr_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌──────────┬───────────────┬─────────────────┐ - │ CrSolver│Precision: $FC │Architecture: CPU│ - ├──────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├──────────┼───────────────┼─────────────────┤ - │ Δx│ Vector{$FC}│ 0│ - │ x│ Vector{$FC}│ 64│ - │ r│ Vector{$FC}│ 64│ - │ p│ Vector{$FC}│ 64│ - │ q│ Vector{$FC}│ 64│ - │ Ar│ Vector{$FC}│ 64│ - │ Mq│ Vector{$FC}│ 0│ - │warm_start│ Bool│ 0│ - └──────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, crmr_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌──────────┬───────────────┬─────────────────┐ - │CrmrSolver│Precision: $FC │Architecture: CPU│ - ├──────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├──────────┼───────────────┼─────────────────┤ - │ x│ Vector{$FC}│ 64│ - │ p│ Vector{$FC}│ 64│ - │ Aᵀr│ Vector{$FC}│ 64│ - │ r│ Vector{$FC}│ 32│ - │ q│ Vector{$FC}│ 32│ - │ Mq│ Vector{$FC}│ 0│ - │ s│ Vector{$FC}│ 0│ - └──────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, cgs_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌──────────┬───────────────┬─────────────────┐ - │ CgsSolver│Precision: $FC │Architecture: CPU│ - ├──────────┼───────────────┼─────────────────┤ - │Attribute │ Type│ Size│ - ├──────────┼───────────────┼─────────────────┤ - │ Δx│ Vector{$FC}│ 0│ - │ x│ Vector{$FC}│ 64│ - │ r│ Vector{$FC}│ 64│ - │ u│ Vector{$FC}│ 64│ - │ p│ Vector{$FC}│ 64│ - │ q│ Vector{$FC}│ 64│ - │ ts│ Vector{$FC}│ 64│ - │ yz│ Vector{$FC}│ 0│ - │ vw│ Vector{$FC}│ 0│ - │warm_start│ Bool│ 0│ - └──────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, bicgstab_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌──────────────┬───────────────┬─────────────────┐ - │BicgstabSolver│Precision: $FC │Architecture: CPU│ - ├──────────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├──────────────┼───────────────┼─────────────────┤ - │ Δx│ Vector{$FC}│ 0│ - │ x│ Vector{$FC}│ 64│ - │ r│ Vector{$FC}│ 64│ - │ p│ Vector{$FC}│ 64│ - │ v│ Vector{$FC}│ 64│ - │ s│ Vector{$FC}│ 64│ - │ qd│ Vector{$FC}│ 64│ - │ yz│ Vector{$FC}│ 0│ - │ t│ Vector{$FC}│ 0│ - │ warm_start│ Bool│ 0│ - └──────────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, craigmr_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌─────────────┬───────────────┬─────────────────┐ - │CraigmrSolver│Precision: $FC │Architecture: CPU│ - ├─────────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├─────────────┼───────────────┼─────────────────┤ - │ x│ Vector{$FC}│ 64│ - │ Nv│ Vector{$FC}│ 64│ - │ Aᵀu│ Vector{$FC}│ 64│ - │ d│ Vector{$FC}│ 64│ - │ y│ Vector{$FC}│ 32│ - │ Mu│ Vector{$FC}│ 32│ - │ w│ Vector{$FC}│ 32│ - │ wbar│ Vector{$FC}│ 32│ - │ Av│ Vector{$FC}│ 32│ - │ u│ Vector{$FC}│ 0│ - │ v│ Vector{$FC}│ 0│ - │ q│ Vector{$FC}│ 0│ - └─────────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, cgne_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌──────────┬───────────────┬─────────────────┐ - │CgneSolver│Precision: $FC │Architecture: CPU│ - ├──────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├──────────┼───────────────┼─────────────────┤ - │ x│ Vector{$FC}│ 64│ - │ p│ Vector{$FC}│ 64│ - │ Aᵀz│ Vector{$FC}│ 64│ - │ r│ Vector{$FC}│ 32│ - │ q│ Vector{$FC}│ 32│ - │ s│ Vector{$FC}│ 0│ - │ z│ Vector{$FC}│ 0│ - └──────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, lnlq_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌──────────┬───────────────┬─────────────────┐ - │LnlqSolver│Precision: $FC │Architecture: CPU│ - ├──────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├──────────┼───────────────┼─────────────────┤ - │ x│ Vector{$FC}│ 64│ - │ Nv│ Vector{$FC}│ 64│ - │ Aᵀu│ Vector{$FC}│ 64│ - │ y│ Vector{$FC}│ 32│ - │ w̄│ Vector{$FC}│ 32│ - │ Mu│ Vector{$FC}│ 32│ - │ Av│ Vector{$FC}│ 32│ - │ u│ Vector{$FC}│ 0│ - │ v│ Vector{$FC}│ 0│ - │ q│ Vector{$FC}│ 0│ - └──────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, craig_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌───────────┬───────────────┬─────────────────┐ - │CraigSolver│Precision: $FC │Architecture: CPU│ - ├───────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├───────────┼───────────────┼─────────────────┤ - │ x│ Vector{$FC}│ 64│ - │ Nv│ Vector{$FC}│ 64│ - │ Aᵀu│ Vector{$FC}│ 64│ - │ y│ Vector{$FC}│ 32│ - │ w│ Vector{$FC}│ 32│ - │ Mu│ Vector{$FC}│ 32│ - │ Av│ Vector{$FC}│ 32│ - │ u│ Vector{$FC}│ 0│ - │ v│ Vector{$FC}│ 0│ - │ w2│ Vector{$FC}│ 0│ - └───────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, lslq_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌──────────┬───────────────┬─────────────────┐ - │LslqSolver│Precision: $FC │Architecture: CPU│ - ├──────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├──────────┼───────────────┼─────────────────┤ - │ x│ Vector{$FC}│ 32│ - │ Nv│ Vector{$FC}│ 32│ - │ Aᵀu│ Vector{$FC}│ 32│ - │ w̄│ Vector{$FC}│ 32│ - │ Mu│ Vector{$FC}│ 64│ - │ Av│ Vector{$FC}│ 64│ - │ u│ Vector{$FC}│ 0│ - │ v│ Vector{$FC}│ 0│ - │ err_vec│ Vector{$T}│ 5│ - └──────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, cgls_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌──────────┬───────────────┬─────────────────┐ - │CglsSolver│Precision: $FC │Architecture: CPU│ - ├──────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├──────────┼───────────────┼─────────────────┤ - │ x│ Vector{$FC}│ 32│ - │ p│ Vector{$FC}│ 32│ - │ s│ Vector{$FC}│ 32│ - │ r│ Vector{$FC}│ 64│ - │ q│ Vector{$FC}│ 64│ - │ Mr│ Vector{$FC}│ 0│ - └──────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, lsqr_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌──────────┬───────────────┬─────────────────┐ - │LsqrSolver│Precision: $FC │Architecture: CPU│ - ├──────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├──────────┼───────────────┼─────────────────┤ - │ x│ Vector{$FC}│ 32│ - │ Nv│ Vector{$FC}│ 32│ - │ Aᵀu│ Vector{$FC}│ 32│ - │ w│ Vector{$FC}│ 32│ - │ Mu│ Vector{$FC}│ 64│ - │ Av│ Vector{$FC}│ 64│ - │ u│ Vector{$FC}│ 0│ - │ v│ Vector{$FC}│ 0│ - │ err_vec│ Vector{$T}│ 5│ - └──────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, crls_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌──────────┬───────────────┬─────────────────┐ - │CrlsSolver│Precision: $FC │Architecture: CPU│ - ├──────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├──────────┼───────────────┼─────────────────┤ - │ x│ Vector{$FC}│ 32│ - │ p│ Vector{$FC}│ 32│ - │ Ar│ Vector{$FC}│ 32│ - │ q│ Vector{$FC}│ 32│ - │ r│ Vector{$FC}│ 64│ - │ Ap│ Vector{$FC}│ 64│ - │ s│ Vector{$FC}│ 64│ - │ Ms│ Vector{$FC}│ 0│ - └──────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, lsmr_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌──────────┬───────────────┬─────────────────┐ - │LsmrSolver│Precision: $FC │Architecture: CPU│ - ├──────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├──────────┼───────────────┼─────────────────┤ - │ x│ Vector{$FC}│ 32│ - │ Nv│ Vector{$FC}│ 32│ - │ Aᵀu│ Vector{$FC}│ 32│ - │ h│ Vector{$FC}│ 32│ - │ hbar│ Vector{$FC}│ 32│ - │ Mu│ Vector{$FC}│ 64│ - │ Av│ Vector{$FC}│ 64│ - │ u│ Vector{$FC}│ 0│ - │ v│ Vector{$FC}│ 0│ - │ err_vec│ Vector{$T}│ 5│ - └──────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, usymqr_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌────────────┬───────────────┬─────────────────┐ - │UsymqrSolver│Precision: $FC │Architecture: CPU│ - ├────────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├────────────┼───────────────┼─────────────────┤ - │ vₖ₋₁│ Vector{$FC}│ 64│ - │ vₖ│ Vector{$FC}│ 64│ - │ q│ Vector{$FC}│ 64│ - │ Δx│ Vector{$FC}│ 0│ - │ x│ Vector{$FC}│ 32│ - │ wₖ₋₂│ Vector{$FC}│ 32│ - │ wₖ₋₁│ Vector{$FC}│ 32│ - │ uₖ₋₁│ Vector{$FC}│ 32│ - │ uₖ│ Vector{$FC}│ 32│ - │ p│ Vector{$FC}│ 32│ - │ warm_start│ Bool│ 0│ - └────────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, trilqr_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌────────────┬───────────────┬─────────────────┐ - │TrilqrSolver│Precision: $FC │Architecture: CPU│ - ├────────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├────────────┼───────────────┼─────────────────┤ - │ uₖ₋₁│ Vector{$FC}│ 64│ - │ uₖ│ Vector{$FC}│ 64│ - │ p│ Vector{$FC}│ 64│ - │ d̅│ Vector{$FC}│ 64│ - │ Δx│ Vector{$FC}│ 0│ - │ x│ Vector{$FC}│ 64│ - │ vₖ₋₁│ Vector{$FC}│ 64│ - │ vₖ│ Vector{$FC}│ 64│ - │ q│ Vector{$FC}│ 64│ - │ Δy│ Vector{$FC}│ 0│ - │ y│ Vector{$FC}│ 64│ - │ wₖ₋₃│ Vector{$FC}│ 64│ - │ wₖ₋₂│ Vector{$FC}│ 64│ - │ warm_start│ Bool│ 0│ - └────────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, bilq_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌──────────┬───────────────┬─────────────────┐ - │BilqSolver│Precision: $FC │Architecture: CPU│ - ├──────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├──────────┼───────────────┼─────────────────┤ - │ uₖ₋₁│ Vector{$FC}│ 64│ - │ uₖ│ Vector{$FC}│ 64│ - │ q│ Vector{$FC}│ 64│ - │ vₖ₋₁│ Vector{$FC}│ 64│ - │ vₖ│ Vector{$FC}│ 64│ - │ p│ Vector{$FC}│ 64│ - │ Δx│ Vector{$FC}│ 0│ - │ x│ Vector{$FC}│ 64│ - │ d̅│ Vector{$FC}│ 64│ - │warm_start│ Bool│ 0│ - └──────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, bilqr_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌───────────┬───────────────┬─────────────────┐ - │BilqrSolver│Precision: $FC │Architecture: CPU│ - ├───────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├───────────┼───────────────┼─────────────────┤ - │ uₖ₋₁│ Vector{$FC}│ 64│ - │ uₖ│ Vector{$FC}│ 64│ - │ q│ Vector{$FC}│ 64│ - │ vₖ₋₁│ Vector{$FC}│ 64│ - │ vₖ│ Vector{$FC}│ 64│ - │ p│ Vector{$FC}│ 64│ - │ Δx│ Vector{$FC}│ 0│ - │ x│ Vector{$FC}│ 64│ - │ Δy│ Vector{$FC}│ 0│ - │ y│ Vector{$FC}│ 64│ - │ d̅│ Vector{$FC}│ 64│ - │ wₖ₋₃│ Vector{$FC}│ 64│ - │ wₖ₋₂│ Vector{$FC}│ 64│ - │ warm_start│ Bool│ 0│ - └───────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, minres_qlp_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌───────────────┬───────────────┬─────────────────┐ - │MinresQlpSolver│Precision: $FC │Architecture: CPU│ - ├───────────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├───────────────┼───────────────┼─────────────────┤ - │ Δx│ Vector{$FC}│ 0│ - │ wₖ₋₁│ Vector{$FC}│ 64│ - │ wₖ│ Vector{$FC}│ 64│ - │ M⁻¹vₖ₋₁│ Vector{$FC}│ 64│ - │ M⁻¹vₖ│ Vector{$FC}│ 64│ - │ x│ Vector{$FC}│ 64│ - │ p│ Vector{$FC}│ 64│ - │ vₖ│ Vector{$FC}│ 0│ - │ warm_start│ Bool│ 0│ - └───────────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, qmr_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌──────────┬───────────────┬─────────────────┐ - │ QmrSolver│Precision: $FC │Architecture: CPU│ - ├──────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├──────────┼───────────────┼─────────────────┤ - │ uₖ₋₁│ Vector{$FC}│ 64│ - │ uₖ│ Vector{$FC}│ 64│ - │ q│ Vector{$FC}│ 64│ - │ vₖ₋₁│ Vector{$FC}│ 64│ - │ vₖ│ Vector{$FC}│ 64│ - │ p│ Vector{$FC}│ 64│ - │ Δx│ Vector{$FC}│ 0│ - │ x│ Vector{$FC}│ 64│ - │ wₖ₋₂│ Vector{$FC}│ 64│ - │ wₖ₋₁│ Vector{$FC}│ 64│ - │warm_start│ Bool│ 0│ - └──────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, usymlq_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌────────────┬───────────────┬─────────────────┐ - │UsymlqSolver│Precision: $FC │Architecture: CPU│ - ├────────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├────────────┼───────────────┼─────────────────┤ - │ uₖ₋₁│ Vector{$FC}│ 64│ - │ uₖ│ Vector{$FC}│ 64│ - │ p│ Vector{$FC}│ 64│ - │ Δx│ Vector{$FC}│ 0│ - │ x│ Vector{$FC}│ 64│ - │ d̅│ Vector{$FC}│ 64│ - │ vₖ₋₁│ Vector{$FC}│ 32│ - │ vₖ│ Vector{$FC}│ 32│ - │ q│ Vector{$FC}│ 32│ - │ warm_start│ Bool│ 0│ - └────────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, tricg_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌───────────┬───────────────┬─────────────────┐ - │TricgSolver│Precision: $FC │Architecture: CPU│ - ├───────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├───────────┼───────────────┼─────────────────┤ - │ y│ Vector{$FC}│ 64│ - │ N⁻¹uₖ₋₁│ Vector{$FC}│ 64│ - │ N⁻¹uₖ│ Vector{$FC}│ 64│ - │ p│ Vector{$FC}│ 64│ - │ gy₂ₖ₋₁│ Vector{$FC}│ 64│ - │ gy₂ₖ│ Vector{$FC}│ 64│ - │ x│ Vector{$FC}│ 32│ - │ M⁻¹vₖ₋₁│ Vector{$FC}│ 32│ - │ M⁻¹vₖ│ Vector{$FC}│ 32│ - │ q│ Vector{$FC}│ 32│ - │ gx₂ₖ₋₁│ Vector{$FC}│ 32│ - │ gx₂ₖ│ Vector{$FC}│ 32│ - │ Δx│ Vector{$FC}│ 0│ - │ Δy│ Vector{$FC}│ 0│ - │ uₖ│ Vector{$FC}│ 0│ - │ vₖ│ Vector{$FC}│ 0│ - │ warm_start│ Bool│ 0│ - └───────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, trimr_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌───────────┬───────────────┬─────────────────┐ - │TrimrSolver│Precision: $FC │Architecture: CPU│ - ├───────────┼───────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├───────────┼───────────────┼─────────────────┤ - │ y│ Vector{$FC}│ 64│ - │ N⁻¹uₖ₋₁│ Vector{$FC}│ 64│ - │ N⁻¹uₖ│ Vector{$FC}│ 64│ - │ p│ Vector{$FC}│ 64│ - │ gy₂ₖ₋₃│ Vector{$FC}│ 64│ - │ gy₂ₖ₋₂│ Vector{$FC}│ 64│ - │ gy₂ₖ₋₁│ Vector{$FC}│ 64│ - │ gy₂ₖ│ Vector{$FC}│ 64│ - │ x│ Vector{$FC}│ 32│ - │ M⁻¹vₖ₋₁│ Vector{$FC}│ 32│ - │ M⁻¹vₖ│ Vector{$FC}│ 32│ - │ q│ Vector{$FC}│ 32│ - │ gx₂ₖ₋₃│ Vector{$FC}│ 32│ - │ gx₂ₖ₋₂│ Vector{$FC}│ 32│ - │ gx₂ₖ₋₁│ Vector{$FC}│ 32│ - │ gx₂ₖ│ Vector{$FC}│ 32│ - │ Δx│ Vector{$FC}│ 0│ - │ Δy│ Vector{$FC}│ 0│ - │ uₖ│ Vector{$FC}│ 0│ - │ vₖ│ Vector{$FC}│ 0│ - │ warm_start│ Bool│ 0│ - └───────────┴───────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) - - io = IOBuffer() - show(io, gpmr_solver, show_stats=false) - showed = String(take!(io)) - expected = """ - ┌──────────┬───────────────────┬─────────────────┐ - │GpmrSolver│ Precision: $FC │Architecture: CPU│ - ├──────────┼───────────────────┼─────────────────┤ - │ Attribute│ Type│ Size│ - ├──────────┼───────────────────┼─────────────────┤ - │ wA│ Vector{$FC}│ 0│ - │ wB│ Vector{$FC}│ 0│ - │ dA│ Vector{$FC}│ 64│ - │ dB│ Vector{$FC}│ 32│ - │ Δx│ Vector{$FC}│ 0│ - │ Δy│ Vector{$FC}│ 0│ - │ x│ Vector{$FC}│ 64│ - │ y│ Vector{$FC}│ 32│ - │ q│ Vector{$FC}│ 0│ - │ p│ Vector{$FC}│ 0│ - │ V│Vector{Vector{$FC}}│ 10 x 64│ - │ U│Vector{Vector{$FC}}│ 10 x 32│ - │ gs│ Vector{$FC}│ 40│ - │ gc│ Vector{$T}│ 40│ - │ zt│ Vector{$FC}│ 20│ - │ R│ Vector{$FC}│ 210│ - │warm_start│ Bool│ 0│ - └──────────┴───────────────────┴─────────────────┘ - """ - @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected) end @testset "solvers" begin diff --git a/test/test_stats.jl b/test/test_stats.jl index 4289a78a3..186c56c20 100644 --- a/test/test_stats.jl +++ b/test/test_stats.jl @@ -4,7 +4,7 @@ show(io, stats) showed = String(take!(io)) storage_type = typeof(stats) - expected = """Simple stats + expected = """SimpleStats niter: 0 solved: true inconsistent: true @@ -15,14 +15,15 @@ @test strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n")) Krylov.reset!(stats) check_reset(stats) - @test (VERSION < v"1.5") || (@allocated Krylov.reset!(stats)) == 0 + nbytes_allocated = @allocated Krylov.reset!(stats) + @test nbytes_allocated == 0 stats = Krylov.LsmrStats(0, true, true, Float64[1.0], Float64[2.0], Float64(3.0), Float64(4.0), Float64(5.0), Float64(6.0), Float64(7.0), "t") io = IOBuffer() show(io, stats) showed = String(take!(io)) storage_type = typeof(stats) - expected = """Lsmr stats + expected = """LsmrStats niter: 0 solved: true inconsistent: true @@ -37,14 +38,15 @@ @test strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n")) Krylov.reset!(stats) check_reset(stats) - @test (VERSION < v"1.5") || (@allocated Krylov.reset!(stats)) == 0 + nbytes_allocated = @allocated Krylov.reset!(stats) + @test nbytes_allocated == 0 stats = Krylov.LanczosStats(0, true, Float64[3.0], true, NaN, NaN, "t") io = IOBuffer() show(io, stats) showed = String(take!(io)) storage_type = typeof(stats) - expected = """Lanczos stats + expected = """LanczosStats niter: 0 solved: true residuals: [ 3.0e+00 ] @@ -55,14 +57,15 @@ @test strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n")) Krylov.reset!(stats) check_reset(stats) - @test (VERSION < v"1.5") || (@allocated Krylov.reset!(stats)) == 0 + nbytes_allocated = @allocated Krylov.reset!(stats) + @test nbytes_allocated == 0 stats = Krylov.LanczosShiftStats(0, true, [Float64[0.9, 0.5], Float64[0.6, 0.4, 0.1]], BitVector([false, true]), NaN, NaN, "t") io = IOBuffer() show(io, stats) showed = String(take!(io)) storage_type = typeof(stats) - expected = """LanczosShift stats + expected = """LanczosShiftStats niter: 0 solved: true residuals: [[0.9, 0.5], [0.6, 0.4, 0.1]] @@ -70,16 +73,17 @@ ‖A‖F: NaN κ₂(A): NaN status: t""" - @test (VERSION < v"1.5") || strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n")) + @test strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n")) Krylov.reset!(stats) - @test (VERSION < v"1.5") || (@allocated Krylov.reset!(stats)) == 0 + nbytes_allocated = @allocated Krylov.reset!(stats) + @test nbytes_allocated == 0 stats = Krylov.SymmlqStats(0, true, Float64[4.0], Union{Float64,Missing}[5.0, missing], Float64[6.0], Union{Float64,Missing}[7.0, missing], NaN, NaN, "t") io = IOBuffer() show(io, stats) showed = String(take!(io)) storage_type = typeof(stats) - expected = """Symmlq stats + expected = """SymmlqStats niter: 0 solved: true residuals: [ 4.0e+00 ] @@ -92,14 +96,15 @@ @test strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n")) Krylov.reset!(stats) check_reset(stats) - @test (VERSION < v"1.5") || (@allocated Krylov.reset!(stats)) == 0 + nbytes_allocated = @allocated Krylov.reset!(stats) + @test nbytes_allocated == 0 stats = Krylov.AdjointStats(0, true, true, Float64[8.0], Float64[9.0], "t") io = IOBuffer() show(io, stats) showed = String(take!(io)) storage_type = typeof(stats) - expected = """Adjoint stats + expected = """AdjointStats niter: 0 solved primal: true solved dual: true @@ -109,14 +114,15 @@ @test strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n")) Krylov.reset!(stats) check_reset(stats) - @test (VERSION < v"1.5") || (@allocated Krylov.reset!(stats)) == 0 + nbytes_allocated = @allocated Krylov.reset!(stats) + @test nbytes_allocated == 0 stats = Krylov.LNLQStats(0, true, Float64[10.0], false, Float64[11.0], Float64[12.0], "t") io = IOBuffer() show(io, stats) showed = String(take!(io)) storage_type = typeof(stats) - expected = """LNLQ stats + expected = """LNLQStats niter: 0 solved: true residuals: [ 1.0e+01 ] @@ -127,14 +133,15 @@ @test strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n")) Krylov.reset!(stats) check_reset(stats) - @test (VERSION < v"1.5") || (@allocated Krylov.reset!(stats)) == 0 + nbytes_allocated = @allocated Krylov.reset!(stats) + @test nbytes_allocated == 0 stats = Krylov.LSLQStats(0, true, false, Float64[13.0], Float64[14.0], Float64[15.0], false, Float64[16.0], Float64[17.0], "t") io = IOBuffer() show(io, stats) showed = String(take!(io)) storage_type = typeof(stats) - expected = """LSLQ stats + expected = """LSLQStats niter: 0 solved: true inconsistent: false @@ -148,5 +155,6 @@ @test strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n")) Krylov.reset!(stats) check_reset(stats) - @test (VERSION < v"1.5") || (@allocated Krylov.reset!(stats)) == 0 + nbytes_allocated = @allocated Krylov.reset!(stats) + @test nbytes_allocated == 0 end diff --git a/test/test_trilqr.jl b/test/test_trilqr.jl index 7d7927372..baf8a597e 100644 --- a/test/test_trilqr.jl +++ b/test/test_trilqr.jl @@ -74,7 +74,7 @@ @test(resid_dual ≤ trilqr_tol) @test(stats.solved_dual) - # Test consistent Ax = b and inconsistent Aᵀt = c. + # Test consistent Ax = b and inconsistent Aᴴt = c. A, b, c = rectangular_adjoint(FC=FC) (x, t, stats) = trilqr(A, b, c) diff --git a/test/test_utils.jl b/test/test_utils.jl index ed72056b6..f1c3ca44e 100644 --- a/test/test_utils.jl +++ b/test/test_utils.jl @@ -1,50 +1,51 @@ include("get_div_grad.jl") include("gen_lsq.jl") include("check_min_norm.jl") +include("callback_utils.jl") # Symmetric and positive definite systems. function symmetric_definite(n :: Int=10; FC=Float64) - α = FC <: Complex ? im : 1 + α = FC <: Complex ? FC(im) : one(FC) A = spdiagm(-1 => α * ones(FC, n-1), 0 => 4 * ones(FC, n), 1 => conj(α) * ones(FC, n-1)) - b = A * [1:n;] + b = A * FC[1:n;] return A, b end # Symmetric and indefinite systems. function symmetric_indefinite(n :: Int=10; FC=Float64) - α = FC <: Complex ? im : 1 + α = FC <: Complex ? FC(im) : one(FC) A = spdiagm(-1 => α * ones(FC, n-1), 0 => ones(FC, n), 1 => conj(α) * ones(FC, n-1)) - b = A * [1:n;] + b = A * FC[1:n;] return A, b end # Nonsymmetric and positive definite systems. function nonsymmetric_definite(n :: Int=10; FC=Float64) if FC <: Complex - A = [i == j ? n * one(FC) : im * one(FC) for i=1:n, j=1:n] + A = [i == j ? n * one(FC) : FC(im) * one(FC) for i=1:n, j=1:n] else A = [i == j ? n * one(FC) : i < j ? one(FC) : -one(FC) for i=1:n, j=1:n] end - b = A * [1:n;] + b = A * FC[1:n;] return A, b end # Nonsymmetric and indefinite systems. function nonsymmetric_indefinite(n :: Int=10; FC=Float64) if FC <: Complex - A = [i == j ? n * (-one(FC))^(i*j) : im * one(FC) for i=1:n, j=1:n] + A = [i == j ? n * (-one(FC))^(i*j) : FC(im) * one(FC) for i=1:n, j=1:n] else A = [i == j ? n * (-one(FC))^(i*j) : i < j ? one(FC) : -one(FC) for i=1:n, j=1:n] end - b = A * [1:n;] + b = A * FC[1:n;] return A, b end # Underdetermined and consistent systems. function under_consistent(n :: Int=10, m :: Int=25; FC=Float64) n < m || error("Square or overdetermined system!") - α = FC <: Complex ? im : 1 - A = [i/j - α * j/i for i=1:n, j=1:m] + α = FC <: Complex ? FC(im) : one(FC) + A = FC[i/j - α * j/i for i=1:n, j=1:m] b = A * ones(FC, m) return A, b end @@ -52,7 +53,7 @@ end # Underdetermined and inconsistent systems. function under_inconsistent(n :: Int=10, m :: Int=25; FC=Float64) n < m || error("Square or overdetermined system!") - α = FC <: Complex ? 1 + im : 1 + α = FC <: Complex ? FC(1 + im) : one(FC) A = α * ones(FC, n, m) b = [i == 1 ? -one(FC) : i * one(FC) for i=1:n] return A, b @@ -84,8 +85,8 @@ end # Overdetermined and consistent systems. function over_consistent(n :: Int=25, m :: Int=10; FC=Float64) n > m || error("Underdetermined or square system!") - α = FC <: Complex ? im : 1 - A = [i/j - α * j/i for i=1:n, j=1:m] + α = FC <: Complex ? FC(im) : one(FC) + A = FC[i/j - α * j/i for i=1:n, j=1:m] b = A * ones(FC, m) return A, b end @@ -93,7 +94,7 @@ end # Overdetermined and inconsistent systems. function over_inconsistent(n :: Int=25, m :: Int=10; FC=Float64) n > m || error("Underdetermined or square system!") - α = FC <: Complex ? 1 + im : 1 + α = FC <: Complex ? FC(1 + im) : one(FC) A = α * ones(FC, n, m) b = [i == 1 ? -one(FC) : i * one(FC) for i=1:n] return A, b @@ -162,23 +163,23 @@ end function underdetermined_adjoint(n :: Int=100, m :: Int=200; FC=Float64) n < m || error("Square or overdetermined system!") A = [i == j ? FC(10.0) : i < j ? one(FC) : -one(FC) for i=1:n, j=1:m] - b = A * [1:m;] - c = A' * [-n:-1;] + b = A * FC[1:m;] + c = A' * FC[-n:-1;] return A, b, c end # Square consistent adjoint systems. function square_adjoint(n :: Int=100; FC=Float64) A = [i == j ? FC(10.0) : i < j ? one(FC) : -one(FC) for i=1:n, j=1:n] - b = A * [1:n;] - c = A' * [-n:-1;] + b = A * FC[1:n;] + c = A' * FC[-n:-1;] return A, b, c end -# Adjoint systems with Ax = b underdetermined consistent and Aᵀt = c overdetermined insconsistent. +# Adjoint systems with Ax = b underdetermined consistent and Aᴴt = c overdetermined insconsistent. function rectangular_adjoint(n :: Int=10, m :: Int=25; FC=Float64) - Aᵀ, c = over_inconsistent(m, n; FC=FC) - A = adjoint(Aᵀ) + Aᴴ, c = over_inconsistent(m, n; FC=FC) + A = adjoint(Aᴴ) b = A * ones(FC, m) return A, b, c end @@ -187,8 +188,8 @@ end function overdetermined_adjoint(n :: Int=200, m :: Int=100; FC=Float64) n > m || error("Underdetermined or square system!") A = [i == j ? FC(10.0) : i < j ? one(FC) : -one(FC) for i=1:n, j=1:m] - b = A * [1:m;] - c = A' * [-n:-1;] + b = A * FC[1:m;] + c = A' * FC[-n:-1;] return A, b, c end @@ -251,7 +252,7 @@ end # Square and preconditioned problems. function square_preconditioned(n :: Int=10; FC=Float64) A = ones(FC, n, n) + (n-1) * eye(n) - b = FC(10.0) * [1:n;] + b = 10 * FC[1:n;] M⁻¹ = FC(1/n) * eye(n) return A, b, M⁻¹ end @@ -363,110 +364,3 @@ function check_reset(stats :: KS) where KS <: Krylov.KrylovStats end end end - -# Test callback -mutable struct TestCallbackN2{T, S, M} - A::M - b::S - storage_vec::S - tol::T -end -TestCallbackN2(A, b; tol = 0.1) = TestCallbackN2(A, b, similar(b), tol) - -function (cb_n2::TestCallbackN2)(solver) - mul!(cb_n2.storage_vec, cb_n2.A, solver.x) - cb_n2.storage_vec .-= cb_n2.b - return norm(cb_n2.storage_vec) ≤ cb_n2.tol -end - -mutable struct TestCallbackN2Adjoint{T, S, M} - A::M - b::S - c::S - storage_vec1::S - storage_vec2::S - tol::T -end -TestCallbackN2Adjoint(A, b, c; tol = 0.1) = TestCallbackN2Adjoint(A, b, c, similar(b), similar(c), tol) - -function (cb_n2::TestCallbackN2Adjoint)(solver) - mul!(cb_n2.storage_vec1, cb_n2.A, solver.x) - cb_n2.storage_vec1 .-= cb_n2.b - mul!(cb_n2.storage_vec2, cb_n2.A', solver.y) - cb_n2.storage_vec2 .-= cb_n2.c - return (norm(cb_n2.storage_vec1) ≤ cb_n2.tol && norm(cb_n2.storage_vec2) ≤ cb_n2.tol) -end - -mutable struct TestCallbackN2Shifts{T, S, M} - A::M - b::S - shifts::Vector{T} - tol::T -end -TestCallbackN2Shifts(A, b, shifts; tol = 0.1) = TestCallbackN2Shifts(A, b, shifts, tol) - -function (cb_n2::TestCallbackN2Shifts)(solver) - r = residuals(cb_n2.A, cb_n2.b, cb_n2.shifts, solver.x) - return all(map(norm, r) .≤ cb_n2.tol) -end - -mutable struct TestCallbackN2LS{T, S, M} - A::M - b::S - λ::T - storage_vec1::S - storage_vec2::S - tol::T -end -TestCallbackN2LS(A, b, λ; tol = 0.1) = TestCallbackN2LS(A, b, λ, similar(b), similar(b, size(A, 2)), tol) - -function (cb_n2::TestCallbackN2LS)(solver) - mul!(cb_n2.storage_vec1, cb_n2.A, solver.x) - cb_n2.storage_vec1 .-= cb_n2.b - mul!(cb_n2.storage_vec2, cb_n2.A', cb_n2.storage_vec1) - cb_n2.storage_vec2 .+= cb_n2.λ .* solver.x - return norm(cb_n2.storage_vec2) ≤ cb_n2.tol -end - -mutable struct TestCallbackN2LN{T, S, M} - A::M - b::S - λ::T - storage_vec::S - tol::T -end -TestCallbackN2LN(A, b, λ; tol = 0.1) = TestCallbackN2LN(A, b, λ, similar(b), tol) - -function (cb_n2::TestCallbackN2LN)(solver) - mul!(cb_n2.storage_vec, cb_n2.A, solver.x) - cb_n2.storage_vec .-= cb_n2.b - cb_n2.λ != 0 && (cb_n2.storage_vec .+= sqrt(cb_n2.λ) .* solver.s) - return norm(cb_n2.storage_vec) ≤ cb_n2.tol -end - -mutable struct TestCallbackN2SaddlePts{T, S, M} - A::M - b::S - c::S - storage_vec1::S - storage_vec2::S - tol::T -end -TestCallbackN2SaddlePts(A, b, c; tol = 0.1) = - TestCallbackN2SaddlePts(A, b, c, similar(b), similar(c), tol) - -function (cb_n2::TestCallbackN2SaddlePts)(solver) - mul!(cb_n2.storage_vec1, cb_n2.A, solver.y) - cb_n2.storage_vec1 .+= solver.x .- cb_n2.b - mul!(cb_n2.storage_vec2, cb_n2.A', solver.x) - cb_n2.storage_vec2 .-= solver.y .+ cb_n2.c - return (norm(cb_n2.storage_vec1) ≤ cb_n2.tol && norm(cb_n2.storage_vec2) ≤ cb_n2.tol) -end - -function restarted_gmres_callback_n2(solver::GmresSolver, A, b, stor, N, storage_vec, tol) - get_x_restarted_gmres!(solver, A, stor, N) - x = stor.x - mul!(storage_vec, A, x) - storage_vec .-= b - return (norm(storage_vec) ≤ tol) -end diff --git a/test/test_warm_start.jl b/test/test_warm_start.jl index 66a1cbea7..232a5a9cf 100644 --- a/test/test_warm_start.jl +++ b/test/test_warm_start.jl @@ -70,6 +70,11 @@ function test_warm_start(FC) resid = norm(r) / norm(b) @test(resid ≤ tol) + x, stats = fgmres(A, b, x0) + r = b - A * x + resid = norm(r) / norm(b) + @test(resid ≤ tol) + x, stats = bicgstab(A, b, x0) r = b - A * x resid = norm(r) / norm(b)