diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
new file mode 100644
index 000000000..d2cbb0258
--- /dev/null
+++ b/.buildkite/pipeline.yml
@@ -0,0 +1,67 @@
+steps:
+ - label: "Nvidia GPUs -- CUDA.jl"
+ plugins:
+ - JuliaCI/julia#v1:
+ version: 1.8
+ agents:
+ queue: "juliagpu"
+ cuda: "*"
+ command: |
+ julia --color=yes --project -e '
+ using Pkg
+ Pkg.add("CUDA")
+ Pkg.add("LinearOperators")
+ Pkg.instantiate()
+ include("test/gpu/nvidia.jl")'
+ timeout_in_minutes: 30
+
+ - label: "AMD GPUs -- AMDGPU.jl"
+ plugins:
+ - JuliaCI/julia#v1:
+ version: 1.9-nightly
+ agents:
+ queue: "juliagpu"
+ rocm: "*"
+ rocmgpu: "*"
+ env:
+ JULIA_AMDGPU_CORE_MUST_LOAD: "1"
+ JULIA_AMDGPU_HIP_MUST_LOAD: "1"
+ command: |
+ julia --color=yes --project -e '
+ using Pkg
+ # Pkg.add("AMDGPU")
+ Pkg.add(url="https://github.com/JuliaGPU/AMDGPU.jl", rev="master")
+ Pkg.instantiate()
+ include("test/gpu/amd.jl")'
+ timeout_in_minutes: 30
+
+ - label: "Intel GPUs -- oneAPI.jl"
+ plugins:
+ - JuliaCI/julia#v1:
+ version: 1.8
+ agents:
+ queue: "juliagpu"
+ intel: "*"
+ command: |
+ julia --color=yes --project -e '
+ using Pkg
+ Pkg.add("oneAPI")
+ Pkg.instantiate()
+ include("test/gpu/intel.jl")'
+ timeout_in_minutes: 30
+
+ - label: "Apple M1 GPUs -- Metal.jl"
+ plugins:
+ - JuliaCI/julia#v1:
+ version: 1.8
+ agents:
+ queue: "juliaecosystem"
+ os: "macos"
+ arch: "aarch64"
+ command: |
+ julia --color=yes --project -e '
+ using Pkg
+ Pkg.add("Metal")
+ Pkg.instantiate()
+ include("test/gpu/metal.jl")'
+ timeout_in_minutes: 30
diff --git a/.cirrus.yml b/.cirrus.yml
index d559cf609..f51d815a3 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -1,15 +1,41 @@
-freebsd_instance:
- image: freebsd-13-0-release-amd64
task:
- name: FreeBSD
- env:
- matrix:
- - JULIA_VERSION: 1.6
- - JULIA_VERSION: 1
- - JULIA_VERSION: nightly
- allow_failures: $JULIA_VERSION == 'nightly'
- install_script:
- - sh -c "$(fetch https://raw.githubusercontent.com/ararslan/CirrusCI.jl/master/bin/install.sh -o -)"
+ matrix:
+ - name: FreeBSD
+ freebsd_instance:
+ image_family: freebsd-13-1
+ env:
+ matrix:
+ - JULIA_VERSION: 1.6
+ - JULIA_VERSION: 1
+ - name: Linux ARMv8
+ arm_container:
+ image: ubuntu:latest
+ env:
+ - JULIA_VERSION: 1
+ - name: musl Linux
+ container:
+ image: alpine:3.14
+ env:
+ - JULIA_VERSION: 1
+ - name: MacOS M1
+ macos_instance:
+ image: ghcr.io/cirruslabs/macos-monterey-base:latest
+ env:
+ - JULIA_VERSION: 1
+ install_script: |
+ URL="https://raw.githubusercontent.com/ararslan/CirrusCI.jl/master/bin/install.sh"
+ set -x
+ if [ "$(uname -s)" = "Linux" ] && command -v apt; then
+ apt update
+ apt install -y curl
+ fi
+ if command -v curl; then
+ sh -c "$(curl ${URL})"
+ elif command -v wget; then
+ sh -c "$(wget ${URL} -q -O-)"
+ elif command -v fetch; then
+ sh -c "$(fetch ${URL} -o -)"
+ fi
build_script:
- cirrusjl build
test_script:
diff --git a/.github/workflows/Aqua.yml b/.github/workflows/Aqua.yml
new file mode 100644
index 000000000..da872e225
--- /dev/null
+++ b/.github/workflows/Aqua.yml
@@ -0,0 +1,17 @@
+name: Aqua
+on:
+ push:
+ branches:
+ - main
+ pull_request:
+ types: [opened, synchronize, reopened]
+jobs:
+ build:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v3
+ - uses: julia-actions/setup-julia@latest
+ with:
+ version: '1'
+ - name: Aqua.jl
+ run: julia --color=yes -e 'using Pkg; Pkg.add("Aqua"); Pkg.develop(path="."); using Aqua, Krylov; Aqua.test_all(Krylov)'
diff --git a/.github/workflows/Breakage.yml b/.github/workflows/Breakage.yml
index 266eed3cc..8fd92afdd 100644
--- a/.github/workflows/Breakage.yml
+++ b/.github/workflows/Breakage.yml
@@ -24,14 +24,14 @@ jobs:
pkgversion: [latest, stable]
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v3
# Install Julia
- uses: julia-actions/setup-julia@v1
with:
version: '1'
arch: x64
- - uses: actions/cache@v1
+ - uses: actions/cache@v3
env:
cache-name: cache-artifacts
with:
@@ -85,7 +85,7 @@ jobs:
end;
end'
- - uses: actions/upload-artifact@v2
+ - uses: actions/upload-artifact@v3
with:
name: pr
path: pr/
@@ -94,9 +94,9 @@ jobs:
needs: break
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v3
- - uses: actions/download-artifact@v2
+ - uses: actions/download-artifact@v3
with:
name: pr
path: pr/
@@ -127,7 +127,7 @@ jobs:
fi
done >> MSG
- - uses: actions/upload-artifact@v2
+ - uses: actions/upload-artifact@v3
with:
name: pr
path: pr/
diff --git a/.github/workflows/CI_M1.yml b/.github/workflows/CI_M1.yml
deleted file mode 100644
index 6f9aa720b..000000000
--- a/.github/workflows/CI_M1.yml
+++ /dev/null
@@ -1,31 +0,0 @@
-name: CI_M1
-on:
- push:
- branches:
- - main
- pull_request:
- types: [opened, synchronize, reopened]
-jobs:
- test:
- name: Julia ${{ matrix.version }} - macOS - ${{ matrix.arch }} - ${{ github.event_name }}
- runs-on: self-hosted
- strategy:
- fail-fast: false
- matrix:
- version:
- - '1'
- arch:
- - aarch64
- steps:
- - uses: actions/checkout@v3
- - uses: julia-actions/setup-julia@v1
- with:
- version: ${{ matrix.version }}
- arch: ${{ matrix.arch }}
- - name: Version Info
- shell: julia --color=yes {0}
- run: |
- using InteractiveUtils
- versioninfo()
- - uses: julia-actions/julia-buildpkg@v1
- - uses: julia-actions/julia-runtest@v1
diff --git a/.github/workflows/CommentPR.yml b/.github/workflows/CommentPR.yml
index 14f6dcd47..043113f74 100644
--- a/.github/workflows/CommentPR.yml
+++ b/.github/workflows/CommentPR.yml
@@ -39,16 +39,36 @@ jobs:
- run: unzip pr.zip
- name: 'Comment on PR'
- uses: actions/github-script@v3
+ uses: actions/github-script@v6
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
- var fs = require('fs');
- var issue_number = Number(fs.readFileSync('./NR'));
- var msg = fs.readFileSync('./MSG', 'utf8');
- await github.issues.createComment({
+ var fs = require('fs')
+ var issue_number = Number(fs.readFileSync('./NR'))
+ var msg = fs.readFileSync('./MSG', 'utf8')
+
+ // Get the existing comments.
+ const {data: comments} = await github.rest.issues.listComments({
owner: context.repo.owner,
repo: context.repo.repo,
- issue_number: issue_number,
- body: msg
- });
+ issue_number: issue_number
+ })
+
+ // Find any comment already made by the bot.
+ const botComment = comments.find(comment => comment.user.id === 41898282)
+
+ if (botComment) {
+ await github.rest.issues.updateComment({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ comment_id: botComment.id,
+ body: msg
+ })
+ } else {
+ await github.rest.issues.createComment({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ issue_number: issue_number,
+ body: msg
+ })
+ }
diff --git a/.github/workflows/CompatHelper.yml b/.github/workflows/CompatHelper.yml
index b546a8082..7a9c79fd4 100644
--- a/.github/workflows/CompatHelper.yml
+++ b/.github/workflows/CompatHelper.yml
@@ -1,19 +1,44 @@
name: CompatHelper
-
on:
schedule:
- - cron: '00 00 * * *'
-
+ - cron: 0 0 * * *
+ workflow_dispatch:
+permissions:
+ contents: write
+ pull-requests: write
jobs:
CompatHelper:
runs-on: ubuntu-latest
steps:
- - uses: julia-actions/setup-julia@latest
+ - name: Check if Julia is already available in the PATH
+ id: julia_in_path
+ run: which julia
+ continue-on-error: true
+ - name: Install Julia, but only if it is not already available in the PATH
+ uses: julia-actions/setup-julia@v1
with:
version: '1'
- - name: CompatHelper
- run: julia -e 'using Pkg; Pkg.add("CompatHelper")'
- - name: CompatHelper.main()
+ arch: ${{ runner.arch }}
+ if: steps.julia_in_path.outcome != 'success'
+ - name: "Add the General registry via Git"
+ run: |
+ import Pkg
+ ENV["JULIA_PKG_SERVER"] = ""
+ Pkg.Registry.add("General")
+ shell: julia --color=yes {0}
+ - name: "Install CompatHelper"
+ run: |
+ import Pkg
+ name = "CompatHelper"
+ uuid = "aa819f21-2bde-4658-8897-bab36330d9b7"
+ version = "3"
+ Pkg.add(; name, uuid, version)
+ shell: julia --color=yes {0}
+ - name: "Run CompatHelper"
+ run: |
+ import CompatHelper
+ CompatHelper.main()
+ shell: julia --color=yes {0}
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- run: julia -e 'using CompatHelper; CompatHelper.main()'
+ COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }}
diff --git a/.github/workflows/Documentation.yml b/.github/workflows/Documentation.yml
index be0b86584..406f15e0d 100644
--- a/.github/workflows/Documentation.yml
+++ b/.github/workflows/Documentation.yml
@@ -10,12 +10,12 @@ jobs:
build:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v3
- uses: julia-actions/setup-julia@latest
with:
version: '1'
- name: Install dependencies
- run: julia --project=docs -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()'
+ run: julia --project=docs --color=yes -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()'
- name: Build and deploy
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/Invalidations.yml b/.github/workflows/Invalidations.yml
new file mode 100644
index 000000000..b0c37e05f
--- /dev/null
+++ b/.github/workflows/Invalidations.yml
@@ -0,0 +1,43 @@
+name: Invalidations
+# Uses SnoopCompile to evaluate number of invalidations caused by `using` the package
+# using https://github.com/julia-actions/julia-invalidations
+# Based on https://github.com/julia-actions/julia-invalidations
+
+on:
+ pull_request:
+
+concurrency:
+ # Skip intermediate builds: always.
+ # Cancel intermediate builds: always.
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ evaluate:
+ # Only run on PRs to the default branch.
+ # In the PR trigger above branches can be specified only explicitly whereas this check should work for master, main, or any other default branch
+ if: github.base_ref == github.event.repository.default_branch
+ runs-on: ubuntu-latest
+ steps:
+ - uses: julia-actions/setup-julia@v1
+ with:
+ version: '1'
+ - uses: actions/checkout@v3
+ - uses: julia-actions/julia-buildpkg@v1
+ - uses: julia-actions/julia-invalidations@v1
+ id: invs_pr
+
+ - uses: actions/checkout@v3
+ with:
+ ref: ${{ github.event.repository.default_branch }}
+ - uses: julia-actions/julia-buildpkg@v1
+ - uses: julia-actions/julia-invalidations@v1
+ id: invs_default
+
+ - name: Report invalidation counts
+ run: |
+ echo "Invalidations on default branch: ${{ steps.invs_default.outputs.total }} (${{ steps.invs_default.outputs.deps }} via deps)" >> $GITHUB_STEP_SUMMARY
+ echo "This branch: ${{ steps.invs_pr.outputs.total }} (${{ steps.invs_pr.outputs.deps }} via deps)" >> $GITHUB_STEP_SUMMARY
+ - name: Check if the PR does increase number of invalidations
+ if: steps.invs_pr.outputs.total > steps.invs_default.outputs.total
+ run: exit 1
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 409e0d146..9e1791f48 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -31,12 +31,12 @@ jobs:
arch: x64
allow_failure: true
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v3
- uses: julia-actions/setup-julia@v1
with:
version: ${{ matrix.version }}
arch: ${{ matrix.arch }}
- - uses: actions/cache@v1
+ - uses: actions/cache@v3
env:
cache-name: cache-artifacts
with:
@@ -49,6 +49,6 @@ jobs:
- uses: julia-actions/julia-buildpkg@v1
- uses: julia-actions/julia-runtest@v1
- uses: julia-actions/julia-processcoverage@v1
- - uses: codecov/codecov-action@v1
+ - uses: codecov/codecov-action@v3
with:
- file: lcov.info
+ files: lcov.info
diff --git a/Project.toml b/Project.toml
index a91e07b8a..6249e13f4 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
name = "Krylov"
uuid = "ba0b0d4f-ebba-5204-a429-3ac8c609bfb7"
-version = "0.8.3"
+version = "0.9.0"
[deps]
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
diff --git a/README.md b/README.md
index a4664e187..55476e684 100644
--- a/README.md
+++ b/README.md
@@ -71,22 +71,22 @@ Overdetermined sytems are less common but also occur.
4. Adjoint systems
- Ax = b and Aᵀy = c
+ Ax = b and Aᴴy = c
where **_A_** can have any shape.
-5. Saddle-point and symmetric quasi-definite (SQD) systems
+5. Saddle-point and Hermitian quasi-definite systems
[M A] [x] = [b]
- [Aᵀ -N] [y] [c]
+ [Aᴴ -N] [y] [c]
where **_A_** can have any shape.
-6. Generalized saddle-point and unsymmetric partitioned systems
+6. Generalized saddle-point and non-Hermitian partitioned systems
[M A] [x] = [b]
@@ -94,7 +94,7 @@ where **_A_** can have any shape.
[B N] [y] [c]
-where **_A_** can have any shape and **_B_** has the shape of **_Aᵀ_**.
+where **_A_** can have any shape and **_B_** has the shape of **_Aᴴ_**.
**_A_**, **_B_**, **_b_** and **_c_** must be all nonzero.
Krylov solvers are particularly appropriate in situations where such problems must be solved but a factorization is not possible, either because:
@@ -121,3 +121,10 @@ julia> ]
pkg> add Krylov
pkg> test Krylov
```
+
+## Bug reports and discussions
+
+If you think you found a bug, feel free to open an [issue](https://github.com/JuliaSmoothOptimizers/Krylov.jl/issues).
+Focused suggestions and requests can also be opened as issues. Before opening a pull request, start an issue or a discussion on the topic, please.
+
+If you want to ask a question not suited for a bug report, feel free to start a discussion [here](https://github.com/JuliaSmoothOptimizers/Organization/discussions). This forum is for general discussion about this repository and the [JuliaSmoothOptimizers](https://github.com/JuliaSmoothOptimizers) organization, so questions about any of our packages are welcome.
diff --git a/docs/make.jl b/docs/make.jl
index 57ad87cd2..441ddb3ee 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -6,23 +6,26 @@ makedocs(
linkcheck = true,
strict = true,
format = Documenter.HTML(assets = ["assets/style.css"],
- ansicolor=true,
+ ansicolor = true,
prettyurls = get(ENV, "CI", nothing) == "true",
collapselevel = 1),
sitename = "Krylov.jl",
pages = ["Home" => "index.md",
"API" => "api.md",
- "Krylov methods" => ["Symmetric positive definite linear systems" => "solvers/spd.md",
- "Symmetric indefinite linear systems" => "solvers/sid.md",
- "Unsymmetric linear systems" => "solvers/unsymmetric.md",
+ "Krylov processes" => "processes.md",
+ "Krylov methods" => ["Hermitian positive definite linear systems" => "solvers/spd.md",
+ "Hermitian indefinite linear systems" => "solvers/sid.md",
+ "Non-Hermitian square linear systems" => "solvers/unsymmetric.md",
"Least-norm problems" => "solvers/ln.md",
"Least-squares problems" => "solvers/ls.md",
"Adjoint systems" => "solvers/as.md",
- "Saddle-point and symmetric quasi-definite systems" => "solvers/sp_sqd.md",
- "Generalized saddle-point and unsymmetric partitioned systems" => "solvers/gsp.md"],
+ "Saddle-point and Hermitian quasi-definite systems" => "solvers/sp_sqd.md",
+ "Generalized saddle-point and non-Hermitian partitioned systems" => "solvers/gsp.md"],
"In-place methods" => "inplace.md",
+ "Preconditioners" => "preconditioners.md",
+ "Storage requirements" => "storage.md",
"GPU support" => "gpu.md",
- "Warm start" => "warm_start.md",
+ "Warm-start" => "warm-start.md",
"Factorization-free operators" => "factorization-free.md",
"Callbacks" => "callbacks.md",
"Performance tips" => "tips.md",
diff --git a/docs/src/api.md b/docs/src/api.md
index 7f2f4dff7..238c86f1a 100644
--- a/docs/src/api.md
+++ b/docs/src/api.md
@@ -48,6 +48,7 @@ LnlqSolver
CraigSolver
CraigmrSolver
GpmrSolver
+FgmresSolver
```
## Utilities
@@ -60,4 +61,6 @@ Krylov.vec2str
Krylov.ktypeof
Krylov.kzeros
Krylov.kones
+Krylov.vector_to_matrix
+Krylov.matrix_to_vector
```
diff --git a/docs/src/callbacks.md b/docs/src/callbacks.md
index f44018687..91e0b521c 100644
--- a/docs/src/callbacks.md
+++ b/docs/src/callbacks.md
@@ -1,43 +1,80 @@
-## Callbacks
+# [Callbacks](@id callbacks)
-Each Krylov method is able to call a callback function as `callback(solver)` at each iteration. The callback should return `true` if the main loop should terminate, and `false` otherwise.
+Each Krylov method is able to call a callback function as `callback(solver)` at each iteration.
+The callback should return `true` if the main loop should terminate, and `false` otherwise.
If the method terminated because of the callback, the output status will be `"user-requested exit"`.
-For example, if the user defines `my_callback(solver::MinresSolver)`, it can be passed to the solver using
+For example, if the user defines `minres_callback(solver::MinresSolver)`, it can be passed to the solver using
```julia
-(x, stats) = minres(A, b, callback = my_callback)
+(x, stats) = minres(A, b, callback = minres_callback)
```
-If you need to write a callback that uses variables that are not in the `MinresSolver`, use a closure:
+If you need to write a callback that uses variables that are not in a `KrylovSolver`, use a closure:
```julia
-function my_callback2(solver::MinresSolver, A, b, storage_vec, tol::Float64)
- mul!(storage_vec, A, solver.x)
- storage_vec .-= b
- return norm(storage_vec) ≤ tol # tolerance based on the 2-norm of the residual
+function custom_stopping_condition(solver::KrylovSolver, A, b, r, tol)
+ mul!(r, A, solver.x)
+ r .-= b # r := b - Ax
+ bool = norm(r) ≤ tol # tolerance based on the 2-norm of the residual
+ return bool
end
-storage_vec = similar(b)
-(x, stats) = minres(A, b, callback = solver -> my_callback2(solver, A, b, storage_vec, 0.1))
+cg_callback(solver) = custom_stopping_condition(solver, A, b, r, tol)
+(x, stats) = cg(A, b, callback = cg_callback)
```
Alternatively, use a structure and make it callable:
```julia
-mutable struct MyCallback3{S, M}
- A::M
- b::S
- storage_vec::S
- tol::Float64
+mutable struct CallbackWorkspace{T}
+ A::Matrix{T}
+ b::Vector{T}
+ r::Vector{T}
+ tol::T
end
-MyCallback3(A, b; tol = 0.1) = MyCallback3(A, b, similar(b), tol)
-function (my_cb::MyCallback3)(solver)
- mul!(my_cb.storage_vec, my_cb.A, solver.x)
- my_cb.storage_vec .-= my_cb.b
- return norm(my_cb.storage_vec) ≤ my_cb.tol # tolerance based on the 2-norm of the residual
+function (workspace::CallbackWorkspace)(solver::KrylovSolver)
+ mul!(workspace.r, workspace.A, solver.x)
+ workspace.r .-= workspace.b
+ bool = norm(workspace.r) ≤ workspace.tol
+ return bool
end
-my_cb = MyCallback3(A, b; tol = 0.1)
-(x, stats) = minres(A, b, callback = my_cb)
+bicgstab_callback = CallbackWorkspace(A, b, r, tol)
+(x, stats) = bicgstab(A, b, callback = bicgstab_callback)
+```
+
+Although the main goal of a callback is to add new stopping conditions, it can also retrieve information from the workspace of a Krylov method along the iterations.
+We now illustrate how to store all iterates $x_k$ of the GMRES method.
+
+```julia
+S = Krylov.ktypeof(b)
+global X = S[] # Storage for GMRES iterates
+
+function gmres_callback(solver)
+ z = solver.z
+ k = solver.inner_iter
+ nr = sum(1:k)
+ V = solver.V
+ R = solver.R
+ y = copy(z)
+
+ # Solve Rk * yk = zk
+ for i = k : -1 : 1
+ pos = nr + i - k
+ for j = k : -1 : i+1
+ y[i] = y[i] - R[pos] * y[j]
+ pos = pos - j + 1
+ end
+ y[i] = y[i] / R[pos]
+ end
+
+ # xk = Vk * yk
+ xk = sum(V[i] * y[i] for i = 1:k)
+ push!(X, xk)
+
+ return false # We don't want to add new stopping conditions
+end
+
+(x, stats) = gmres(A, b, callback = gmres_callback)
```
diff --git a/docs/src/examples/tricg.md b/docs/src/examples/tricg.md
index e981c2f7e..61750de5f 100644
--- a/docs/src/examples/tricg.md
+++ b/docs/src/examples/tricg.md
@@ -14,7 +14,7 @@ N = diagm(0 => [5.0 * i for i = 1:n])
c = -b
# [I A] [x] = [b]
-# [Aᵀ -I] [y] [c]
+# [Aᴴ -I] [y] [c]
(x, y, stats) = tricg(A, b, c)
K = [eye(m) A; A' -eye(n)]
B = [b; c]
@@ -23,7 +23,7 @@ resid = norm(r)
@printf("TriCG: Relative residual: %8.1e\n", resid)
# [-I A] [x] = [b]
-# [ Aᵀ I] [y] [c]
+# [ Aᴴ I] [y] [c]
(x, y, stats) = tricg(A, b, c, flip=true)
K = [-eye(m) A; A' eye(n)]
B = [b; c]
@@ -32,7 +32,7 @@ resid = norm(r)
@printf("TriCG: Relative residual: %8.1e\n", resid)
# [I A] [x] = [b]
-# [Aᵀ I] [y] [c]
+# [Aᴴ I] [y] [c]
(x, y, stats) = tricg(A, b, c, spd=true)
K = [eye(m) A; A' eye(n)]
B = [b; c]
@@ -41,7 +41,7 @@ resid = norm(r)
@printf("TriCG: Relative residual: %8.1e\n", resid)
# [-I A] [x] = [b]
-# [ Aᵀ -I] [y] [c]
+# [ Aᴴ -I] [y] [c]
(x, y, stats) = tricg(A, b, c, snd=true)
K = [-eye(m) A; A' -eye(n)]
B = [b; c]
@@ -50,7 +50,7 @@ resid = norm(r)
@printf("TriCG: Relative residual: %8.1e\n", resid)
# [τI A] [x] = [b]
-# [ Aᵀ νI] [y] [c]
+# [ Aᴴ νI] [y] [c]
(τ, ν) = (1e-4, 1e2)
(x, y, stats) = tricg(A, b, c, τ=τ, ν=ν)
K = [τ*eye(m) A; A' ν*eye(n)]
@@ -60,7 +60,7 @@ resid = norm(r)
@printf("TriCG: Relative residual: %8.1e\n", resid)
# [M⁻¹ A ] [x] = [b]
-# [Aᵀ -N⁻¹] [y] [c]
+# [Aᴴ -N⁻¹] [y] [c]
(x, y, stats) = tricg(A, b, c, M=M, N=N, verbose=1)
K = [inv(M) A; A' -inv(N)]
H = BlockDiagonalOperator(M, N)
diff --git a/docs/src/examples/trimr.md b/docs/src/examples/trimr.md
index 2aa48be1e..adc4e82e5 100644
--- a/docs/src/examples/trimr.md
+++ b/docs/src/examples/trimr.md
@@ -14,7 +14,7 @@ m, n = size(A)
c = -b
# [D A] [x] = [b]
-# [Aᵀ 0] [y] [c]
+# [Aᴴ 0] [y] [c]
llt_D = cholesky(D)
opD⁻¹ = LinearOperator(Float64, 5, 5, true, true, (y, v) -> ldiv!(y, llt_D, v))
opH⁻¹ = BlockDiagonalOperator(opD⁻¹, eye(n))
@@ -34,7 +34,7 @@ N = diagm(0 => [5.0 * i for i = 1:n])
c = -b
# [I A] [x] = [b]
-# [Aᵀ -I] [y] [c]
+# [Aᴴ -I] [y] [c]
(x, y, stats) = trimr(A, b, c)
K = [eye(m) A; A' -eye(n)]
B = [b; c]
@@ -43,7 +43,7 @@ resid = norm(r)
@printf("TriMR: Relative residual: %8.1e\n", resid)
# [M A] [x] = [b]
-# [Aᵀ -N] [y] [c]
+# [Aᴴ -N] [y] [c]
ldlt_M = ldl(M)
ldlt_N = ldl(N)
opM⁻¹ = LinearOperator(Float64, size(M,1), size(M,2), true, true, (y, v) -> ldiv!(y, ldlt_M, v))
diff --git a/docs/src/factorization-free.md b/docs/src/factorization-free.md
index aa0f51f07..b97108b99 100644
--- a/docs/src/factorization-free.md
+++ b/docs/src/factorization-free.md
@@ -1,3 +1,32 @@
+```@raw html
+
+```
+
## [Factorization-free operators](@id factorization-free)
All methods are factorization-free, which means that you only need to provide operator-vector products.
@@ -10,8 +39,11 @@ Some methods only require `A * v` products, whereas other ones also require `A'
|:--------------------------------------:|:----------------------------------------:|
| CG, CR | CGLS, CRLS, CGNE, CRMR |
| SYMMLQ, CG-LANCZOS, MINRES, MINRES-QLP | LSLQ, LSQR, LSMR, LNLQ, CRAIG, CRAIGMR |
-| DIOM, FOM, DQGMRES, GMRES | BiLQ, QMR, BiLQR, USYMLQ, USYMQR, TriLQR |
-| CGS, BICGSTAB | TriCG, TriMR, USYMLQR |
+| DIOM, FOM, DQGMRES, GMRES, FGMRES | BiLQ, QMR, BiLQR, USYMLQ, USYMQR, TriLQR |
+| CGS, BICGSTAB | TriCG, TriMR |
+
+!!! info
+ GPMR is the only method that requires `A * v` and `B * w` products.
Preconditioners `M`, `N`, `C`, `D`, `E` or `F` can be also linear operators and must implement `mul!` or `ldiv!`.
diff --git a/docs/src/gpu.md b/docs/src/gpu.md
index 4c9887f24..378f4f5d3 100644
--- a/docs/src/gpu.md
+++ b/docs/src/gpu.md
@@ -1,6 +1,15 @@
-## GPU support
+# [GPU support](@id gpu)
-All solvers in Krylov.jl can be used with `CuArrays` and allow computations with Nvidia GPU. Problems stored in CPU format (`Matrix` and `Vector`) must first be converted to GPU format (`CuMatrix` and `CuVector`).
+Krylov methods are well suited for GPU computations because they only require matrix-vector products ($u \leftarrow Av$, $u \leftarrow A^{H}w$) and vector operations ($\|v\|$, $u^H v$, $v \leftarrow \alpha u + \beta v$), which are highly parallelizable.
+
+The implementations in Krylov.jl are generic so as to take advantage of the multiple dispatch and broadcast features of Julia.
+Those allow the implementations to be specialized automatically by the compiler for both CPU and GPU.
+Thus, Krylov.jl works with GPU backends that build on [GPUArrays.jl](https://github.com/JuliaGPU/GPUArrays.jl), such as [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl), [AMDGPU.jl](https://github.com/JuliaGPU/AMDGPU.jl), [oneAPI.jl](https://github.com/JuliaGPU/oneAPI.jl) or [Metal.jl](https://github.com/JuliaGPU/Metal.jl).
+
+## Nvidia GPUs
+
+All solvers in Krylov.jl can be used with [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl) and allow computations on Nvidia GPUs.
+Problems stored in CPU format (`Matrix` and `Vector`) must first be converted to the related GPU format (`CuMatrix` and `CuVector`).
```julia
using CUDA, Krylov
@@ -13,11 +22,11 @@ b_cpu = rand(20)
A_gpu = CuMatrix(A_cpu)
b_gpu = CuVector(b_cpu)
-# Solve a square and dense system on GPU
+# Solve a square and dense system on an Nivida GPU
x, stats = bilq(A_gpu, b_gpu)
```
-Sparse matrices have a specific storage on GPU (`CuSparseMatrixCSC` or `CuSparseMatrixCSR`):
+Sparse matrices have a specific storage on Nvidia GPUs (`CuSparseMatrixCSC`, `CuSparseMatrixCSR` or `CuSparseMatrixCOO`):
```julia
using CUDA, Krylov
@@ -31,7 +40,7 @@ b_cpu = rand(200)
A_gpu = CuSparseMatrixCSC(A_cpu)
b_gpu = CuVector(b_cpu)
-# Solve a rectangular and sparse system on GPU
+# Solve a rectangular and sparse system on an Nvidia GPU
x, stats = lsmr(A_gpu, b_gpu)
```
@@ -47,14 +56,14 @@ using SparseArrays, Krylov, LinearOperators
using CUDA, CUDA.CUSPARSE
# Transfer the linear system from the CPU to the GPU
-A_gpu = CuSparseMatrixCSC(A_cpu) # A = CuSparseMatrixCSR(A_cpu)
+A_gpu = CuSparseMatrixCSC(A_cpu) # A_gpu = CuSparseMatrixCSR(A_cpu)
b_gpu = CuVector(b_cpu)
-# LLᵀ ≈ A for CuSparseMatrixCSC or CuSparseMatrixCSR matrices
+# LLᴴ ≈ A for CuSparseMatrixCSC or CuSparseMatrixCSR matrices
P = ic02(A_gpu, 'O')
# Solve Py = x
-function ldiv!(y, P, x)
+function ldiv_ic0!(y, P, x)
copyto!(y, x) # Variant for CuSparseMatrixCSR
sv2!('T', 'U', 'N', 1.0, P, y, 'O') # sv2!('N', 'L', 'N', 1.0, P, y, 'O')
sv2!('N', 'U', 'N', 1.0, P, y, 'O') # sv2!('T', 'L', 'N', 1.0, P, y, 'O')
@@ -65,12 +74,15 @@ end
n = length(b_gpu)
T = eltype(b_gpu)
symmetric = hermitian = true
-opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv!(y, P, x))
+opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ic0!(y, P, x))
# Solve a symmetric positive definite system with an incomplete Cholesky preconditioner on GPU
-(x, stats) = cg(A_gpu, b_gpu, M=opM)
+x, stats = cg(A_gpu, b_gpu, M=opM)
```
+!!! note
+ You need to replace `'T'` by `'C'` in `ldiv_ic0!` if `A_gpu` is a complex matrix.
+
### Example with a general square system
```julia
@@ -84,14 +96,14 @@ A_cpu = A_cpu[p,:]
b_cpu = b_cpu[p]
# Transfer the linear system from the CPU to the GPU
-A_gpu = CuSparseMatrixCSC(A_cpu) # A = CuSparseMatrixCSR(A_cpu)
+A_gpu = CuSparseMatrixCSC(A_cpu) # A_gpu = CuSparseMatrixCSR(A_cpu)
b_gpu = CuVector(b_cpu)
# LU ≈ A for CuSparseMatrixCSC or CuSparseMatrixCSR matrices
P = ilu02(A_gpu, 'O')
# Solve Py = x
-function ldiv!(y, P, x)
+function ldiv_ilu0!(y, P, x)
copyto!(y, x) # Variant for CuSparseMatrixCSR
sv2!('N', 'L', 'N', 1.0, P, y, 'O') # sv2!('N', 'L', 'U', 1.0, P, y, 'O')
sv2!('N', 'U', 'U', 1.0, P, y, 'O') # sv2!('N', 'U', 'N', 1.0, P, y, 'O')
@@ -102,8 +114,85 @@ end
n = length(b_gpu)
T = eltype(b_gpu)
symmetric = hermitian = false
-opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv!(y, P, x))
+opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_ilu0!(y, P, x))
# Solve an unsymmetric system with an incomplete LU preconditioner on GPU
-(x, stats) = bicgstab(A_gpu, b_gpu, M=opM)
+x, stats = bicgstab(A_gpu, b_gpu, M=opM)
+```
+
+## AMD GPUs
+
+All solvers in Krylov.jl can be used with [AMDGPU.jl](https://github.com/JuliaGPU/AMDGPU.jl) and allow computations on AMD GPUs.
+Problems stored in CPU format (`Matrix` and `Vector`) must first be converted to the related GPU format (`ROCMatrix` and `ROCVector`).
+
+```julia
+using Krylov, AMDGPU
+
+# CPU Arrays
+A_cpu = rand(ComplexF64, 20, 20)
+A_cpu = A_cpu + A_cpu'
+b_cpu = rand(ComplexF64, 20)
+
+A_gpu = ROCMatrix(A_cpu)
+b_gpu = ROCVector(b_cpu)
+
+# Solve a dense Hermitian system on an AMD GPU
+x, stats = minres(A_gpu, b_gpu)
+```
+
+!!! info
+ The library `rocSPARSE` is not interfaced yet in AMDGPU.jl and only dense linear systems are supported.
+
+## Intel GPUs
+
+All solvers in Krylov.jl can be used with [oneAPI.jl](https://github.com/JuliaGPU/oneAPI.jl) and allow computations on Intel GPUs.
+Problems stored in CPU format (`Matrix` and `Vector`) must first be converted to the related GPU format (`oneMatrix` and `oneVector`).
+
+```julia
+using Krylov, oneAPI
+
+T = Float32 # oneAPI.jl also works with ComplexF32
+m = 20
+n = 10
+
+# CPU Arrays
+A_cpu = rand(T, m, n)
+b_cpu = rand(T, m)
+
+# GPU Arrays
+A_gpu = oneMatrix(A_cpu)
+b_gpu = oneVector(b_cpu)
+
+# Solve a dense least-squares problem on an Intel GPU
+x, stats = lsqr(A_gpu, b_gpu)
```
+
+!!! warning
+ The library `oneMKL` is not interfaced yet in oneAPI.jl and all BLAS routines (dot, norm, mul!, etc.) dispatch to generic fallbacks.
+
+## Apple M1 GPUs
+
+All solvers in Krylov.jl can be used with [Metal.jl](https://github.com/JuliaGPU/Metal.jl) and allow computations on Apple M1 GPUs.
+Problems stored in CPU format (`Matrix` and `Vector`) must first be converted to the related GPU format (`MtlMatrix` and `MtlVector`).
+
+```julia
+using Krylov, Metal
+
+T = Float32 # Metal.jl also works with ComplexF32
+n = 10
+m = 20
+
+# CPU Arrays
+A_cpu = rand(T, n, m)
+b_cpu = rand(T, n)
+
+# GPU Arrays
+A_gpu = MtlMatrix(A_cpu)
+b_gpu = MtlVector(b_cpu)
+
+# Solve a dense least-norm problem on an Apple M1 GPU
+x, stats = craig(A_gpu, b_gpu)
+```
+
+!!! warning
+ Metal.jl is under heavy development and is considered experimental for now.
diff --git a/docs/src/graphics/arnoldi.png b/docs/src/graphics/arnoldi.png
new file mode 100644
index 000000000..9ef8bd3a3
Binary files /dev/null and b/docs/src/graphics/arnoldi.png differ
diff --git a/docs/src/graphics/golub_kahan.png b/docs/src/graphics/golub_kahan.png
new file mode 100644
index 000000000..32fc3d7b8
Binary files /dev/null and b/docs/src/graphics/golub_kahan.png differ
diff --git a/docs/src/graphics/hermitian_lanczos.png b/docs/src/graphics/hermitian_lanczos.png
new file mode 100644
index 000000000..c70082e72
Binary files /dev/null and b/docs/src/graphics/hermitian_lanczos.png differ
diff --git a/docs/src/graphics/montoison_orban.png b/docs/src/graphics/montoison_orban.png
new file mode 100644
index 000000000..5a14eda04
Binary files /dev/null and b/docs/src/graphics/montoison_orban.png differ
diff --git a/docs/src/graphics/nonhermitian_lanczos.png b/docs/src/graphics/nonhermitian_lanczos.png
new file mode 100644
index 000000000..b8d83961c
Binary files /dev/null and b/docs/src/graphics/nonhermitian_lanczos.png differ
diff --git a/docs/src/graphics/saunders_simon_yip.png b/docs/src/graphics/saunders_simon_yip.png
new file mode 100644
index 000000000..c3acfd181
Binary files /dev/null and b/docs/src/graphics/saunders_simon_yip.png differ
diff --git a/docs/src/index.md b/docs/src/index.md
index ce657436d..1a18e2315 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -46,26 +46,26 @@ Overdetermined sytems are less common but also occur.
4 - Adjoint systems
```math
- Ax = b \quad \text{and} \quad A^T y = c
+ Ax = b \quad \text{and} \quad A^H y = c
```
where **_A_** can have any shape.
-5 - Saddle-point and symmetric quasi-definite (SQD) systems
+5 - Saddle-point and Hermitian quasi-definite systems
```math
- \begin{bmatrix} M & \phantom{-}A \\ A^T & -N \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \left(\begin{bmatrix} b \\ 0 \end{bmatrix},\begin{bmatrix} 0 \\ c \end{bmatrix},\begin{bmatrix} b \\ c \end{bmatrix}\right)
+ \begin{bmatrix} M & \phantom{-}A \\ A^H & -N \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \left(\begin{bmatrix} b \\ 0 \end{bmatrix},\begin{bmatrix} 0 \\ c \end{bmatrix},\begin{bmatrix} b \\ c \end{bmatrix}\right)
```
where **_A_** can have any shape.
-6 - Generalized saddle-point and unsymmetric partitioned systems
+6 - Generalized saddle-point and non-Hermitian partitioned systems
```math
\begin{bmatrix} M & A \\ B & N \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} b \\ c \end{bmatrix}
```
-where **_A_** can have any shape and **_B_** has the shape of **_Aᵀ_**.
+where **_A_** can have any shape and **_B_** has the shape of **_Aᴴ_**.
**_A_**, **_B_**, **_b_** and **_c_** must be all nonzero.
Krylov solvers are particularly appropriate in situations where such problems must be solved but a factorization is not possible, either because:
@@ -92,3 +92,10 @@ julia> ]
pkg> add Krylov
pkg> test Krylov
```
+
+# Bug reports and discussions
+
+If you think you found a bug, feel free to open an [issue](https://github.com/JuliaSmoothOptimizers/Krylov.jl/issues).
+Focused suggestions and requests can also be opened as issues. Before opening a pull request, start an issue or a discussion on the topic, please.
+
+If you want to ask a question not suited for a bug report, feel free to start a discussion [here](https://github.com/JuliaSmoothOptimizers/Organization/discussions). This forum is for general discussion about this repository and the [JuliaSmoothOptimizers](https://github.com/JuliaSmoothOptimizers) organization, so questions about any of our packages are welcome.
diff --git a/docs/src/inplace.md b/docs/src/inplace.md
index 71a4e25de..9950575fe 100644
--- a/docs/src/inplace.md
+++ b/docs/src/inplace.md
@@ -15,7 +15,7 @@ Given an operator `A` and a right-hand side `b`, you can create a `KrylovSolver`
For example, use `S = Vector{Float64}` if you want to solve linear systems in double precision on the CPU and `S = CuVector{Float32}` if you want to solve linear systems in single precision on an Nvidia GPU.
!!! note
- `DiomSolver`, `FomSolver`, `DqgmresSolver`, `GmresSolver`, `GpmrSolver` and `CgLanczosShiftSolver` require an additional argument (`memory` or `nshifts`).
+ `DiomSolver`, `FomSolver`, `DqgmresSolver`, `GmresSolver`, `FgmresSolver`, `GpmrSolver` and `CgLanczosShiftSolver` require an additional argument (`memory` or `nshifts`).
The workspace is always the first argument of the in-place methods:
diff --git a/docs/src/preconditioners.md b/docs/src/preconditioners.md
new file mode 100644
index 000000000..fd203dddb
--- /dev/null
+++ b/docs/src/preconditioners.md
@@ -0,0 +1,237 @@
+# [Preconditioners](@id preconditioners)
+
+The solvers in Krylov.jl support preconditioners, i.e., transformations that modify a linear system $Ax = b$ into an equivalent form that may yield faster convergence in finite-precision arithmetic.
+Preconditioning can be used to reduce the condition number of the problem or cluster its eigenvalues or singular values for instance.
+
+The design of preconditioners is highly dependent on the origin of the problem and most preconditioners need to take application-dependent information and structure into account.
+Specialized preconditioners generally outperform generic preconditioners such as incomplete factorizations.
+
+The construction of a preconditioner necessitates trade-offs because we need to apply it at least once per iteration within a Krylov method.
+Hence, a preconditioner must be constructed such that it is cheap to apply, while also capturing the characteristics of the original system in some sense.
+
+There exist three variants of preconditioning:
+
+| Left preconditioning | Two-sided preconditioning | Right preconditioning |
+|:----------------------------------:|:----------------------------------------------------------------------:|:--------------------------------------------:|
+| $P_{\ell}^{-1}Ax = P_{\ell}^{-1}b$ | $P_{\ell}^{-1}AP_r^{-1}y = P_{\ell}^{-1}b~~\text{with}~~x = P_r^{-1}y$ | $AP_r^{-1}y = b~~\text{with}~~x = P_r^{-1}y$ |
+
+where $P_{\ell}$ and $P_r$ are square and nonsingular.
+
+In Krylov.jl , we call $P_{\ell}^{-1}$ and $P_r^{-1}$ the preconditioners and we assume that we can apply them with the operation $y \leftarrow P^{-1} * x$.
+It is also common to call $P_{\ell}$ and $P_r$ the preconditioners if the equivalent operation $y \leftarrow P~\backslash~x$ is available.
+Krylov.jl supports both approaches thanks to the argument `ldiv` of the Krylov solvers.
+
+## How to use preconditioners in Krylov.jl?
+
+!!! info
+ - A preconditioner only need support the operation `mul!(y, P⁻¹, x)` when `ldiv=false` or `ldiv!(y, P, x)` when `ldiv=true` to be used in Krylov.jl.
+ - The default value of a preconditioner in Krylov.jl is the identity operator `I`.
+
+### Square non-Hermitian linear systems
+
+Methods concerned: [`CGS`](@ref cgs), [`BiCGSTAB`](@ref bicgstab), [`DQGMRES`](@ref dqgmres), [`GMRES`](@ref gmres), [`FGMRES`](@ref fgmres), [`DIOM`](@ref diom) and [`FOM`](@ref fom).
+
+A Krylov method dedicated to non-Hermitian linear systems allows the three variants of preconditioning.
+
+| Preconditioners | $P_{\ell}^{-1}$ | $P_{\ell}$ | $P_r^{-1}$ | $P_r$ |
+|:---------------:|:---------------------:|:--------------------:|:---------------------:|:--------------------:|
+| Arguments | `M` with `ldiv=false` | `M` with `ldiv=true` | `N` with `ldiv=false` | `N` with `ldiv=true` |
+
+### Hermitian linear systems
+
+Methods concerned: [`SYMMLQ`](@ref symmlq), [`CG`](@ref cg), [`CG-LANCZOS`](@ref cg_lanczos), [`CG-LANCZOS-SHIFT`](@ref cg_lanczos_shift), [`CR`](@ref cr), [`MINRES`](@ref minres) and [`MINRES-QLP`](@ref minres_qlp).
+
+When $A$ is Hermitian, we can only use centered preconditioning $L^{-1}AL^{-H}y = L^{-1}b$ with $x = L^{-H}y$.
+Centered preconditioning is a special case of two-sided preconditioning with $P_{\ell} = L = P_r^H$ that maintains hermicity.
+However, there is no need to specify $L$ and one may specify $P_c = LL^H$ or its inverse directly.
+
+| Preconditioners | $P_c^{-1}$ | $P_c$ |
+|:---------------:|:-------------------------:|:--------------------:|
+| Arguments | `M` with `ldiv=false` | `M` with `ldiv=true` |
+
+!!! warning
+ The preconditioner `M` must be hermitian and positive definite.
+
+### Linear least-squares problems
+
+Methods concerned: [`CGLS`](@ref cgls), [`CRLS`](@ref crls), [`LSLQ`](@ref lslq), [`LSQR`](@ref lsqr) and [`LSMR`](@ref lsmr).
+
+| Formulation | Without preconditioning | With preconditioning |
+|:---------------------:|:------------------------------------:|:-------------------------------------------:|
+| least-squares problem | $\min \tfrac{1}{2} \\|b - Ax\\|^2_2$ | $\min \tfrac{1}{2} \\|b - Ax\\|^2_{E^{-1}}$ |
+| Normal equation | $A^HAx = A^Hb$ | $A^HE^{-1}Ax = A^HE^{-1}b$ |
+| Augmented system | $\begin{bmatrix} I & A \\ A^H & 0 \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ | $\begin{bmatrix} E & A \\ A^H & 0 \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ |
+
+[`LSLQ`](@ref lslq), [`LSQR`](@ref lsqr) and [`LSMR`](@ref lsmr) also handle regularized least-squares problems.
+
+| Formulation | Without preconditioning | With preconditioning |
+|:---------------------:|:-------------------------------------------------------------------------:|:--------------------------------------------------------------------------------:|
+| least-squares problem | $\min \tfrac{1}{2} \\|b - Ax\\|^2_2 + \tfrac{1}{2} \lambda^2 \\|x\\|^2_2$ | $\min \tfrac{1}{2} \\|b - Ax\\|^2_{E^{-1}} + \tfrac{1}{2} \lambda^2 \\|x\\|^2_F$ |
+| Normal equation | $(A^HA + \lambda^2 I)x = A^Hb$ | $(A^HE^{-1}A + \lambda^2 F)x = A^HE^{-1}b$ |
+| Augmented system | $\begin{bmatrix} I & A \\ A^H & -\lambda^2 I \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ | $\begin{bmatrix} E & A \\ A^H & -\lambda^2 F \end{bmatrix} \begin{bmatrix} r \\ x \end{bmatrix} = \begin{bmatrix} b \\ 0 \end{bmatrix}$ |
+
+| Preconditioners | $E^{-1}$ | $E$ | $F^{-1}$ | $F$ |
+|:---------------:|:-----------------------:|:--------------------:|:-----------------------:|:--------------------:|
+| Arguments | `M` with `ldiv=false` | `M` with `ldiv=true` | `N` with `ldiv=false` | `N` with `ldiv=true` |
+
+!!! warning
+ The preconditioners `M` and `N` must be hermitian and positive definite.
+
+### Linear least-norm problems
+
+Methods concerned: [`CGNE`](@ref cgne), [`CRMR`](@ref crmr), [`LNLQ`](@ref lnlq), [`CRAIG`](@ref craig) and [`CRAIGMR`](@ref craigmr).
+
+| Formulation | Without preconditioning | With preconditioning |
+|:--------------------:|:----------------------------------------------------:|:----------------------------------------------------:|
+| minimum-norm problem | $\min \tfrac{1}{2} \\|x\\|^2_2~~\text{s.t.}~~Ax = b$ | $\min \tfrac{1}{2} \\|x\\|^2_F~~\text{s.t.}~~Ax = b$ |
+| Normal equation | $AA^Hy = b~~\text{with}~~x = A^Hy$ | $AF^{-1}A^Hy = b~~\text{with}~~x = F^{-1}A^Hy$ |
+| Augmented system | $\begin{bmatrix} -I & A^H \\ \phantom{-}A & 0 \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ | $\begin{bmatrix} -F & A^H \\ \phantom{-}A & 0 \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ |
+
+[`LNLQ`](@ref lslq), [`CRAIG`](@ref lsqr) and [`CRAIGMR`](@ref lsmr) also handle penalized minimum-norm problems.
+
+| Formulation | Without preconditioning | With preconditioning |
+|:--------------------:|:---------------------------------------------------------------------------------------------:|:----------------------------------------------------------------------------------------------:|
+| minimum-norm problem | $\min \tfrac{1}{2} \\|x\\|^2_2 + \tfrac{1}{2} \\|y\\|^2_2~~\text{s.t.}~~Ax + \lambda^2 y = b$ | $\min \tfrac{1}{2} \\|x\\|^2_F + \tfrac{1}{2} \\|y\\|^2_E~~\text{s.t.}~~Ax + \lambda^2 Ey = b$ |
+| Normal equation | $(AA^H + \lambda^2 I)y = b~~\text{with}~~x = A^Hy$ | $(AF^{-1}A^H + \lambda^2 E)y = b~~\text{with}~~x = F^{-1}A^Hy$ |
+| Augmented system | $\begin{bmatrix} -I & A^H \\ \phantom{-}A & \lambda^2 I \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ | $\begin{bmatrix} -F & A^H \\ \phantom{-}A & \lambda^2 E \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} 0 \\ b \end{bmatrix}$ |
+
+| Preconditioners | $E^{-1}$ | $E$ | $F^{-1}$ | $F$ |
+|:---------------:|:-----------------------:|:--------------------:|:-----------------------:|:--------------------:|
+| Arguments | `M` with `ldiv=false` | `M` with `ldiv=true` | `N` with `ldiv=false` | `N` with `ldiv=true` |
+
+!!! warning
+ The preconditioners `M` and `N` must be hermitian and positive definite.
+
+### Saddle-point and symmetric quasi-definite systems
+
+[`TriCG`](@ref tricg) and [`TriMR`](@ref trimr) can take advantage of the structure of Hermitian systems $Kz = d$ with the 2x2 block structure
+```math
+ \begin{bmatrix} \tau E & \phantom{-}A \\ A^H & \nu F \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} b \\ c \end{bmatrix},
+```
+| Preconditioners | $E^{-1}$ | $E$ | $F^{-1}$ | $F$ |
+|:---------------:|:---------------------:|:--------------------:|:---------------------:|:--------------------:|
+| Arguments | `M` with `ldiv=false` | `M` with `ldiv=true` | `N` with `ldiv=false` | `N` with `ldiv=true` |
+
+!!! warning
+ The preconditioners `M` and `N` must be hermitian and positive definite.
+
+### Generalized saddle-point and unsymmetric partitioned systems
+
+[`GPMR`](@ref gpmr) can take advantage of the structure of general square systems $Kz = d$ with the 2x2 block structure
+```math
+ \begin{bmatrix} \lambda M & A \\ B & \mu N \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} = \begin{bmatrix} b \\ c \end{bmatrix},
+```
+| Relations | $CE = M^{-1}$ | $EC = M$ | $DF = N^{-1}$ | $FD = N$ |
+|:---------------:|:-----------------------------:|:----------------------------:|:-----------------------------:|:----------------------------:|
+| Arguments | `C` and `E` with `ldiv=false` | `C` and `E` with `ldiv=true` | `D` and `F` with `ldiv=false` | `D` and `F` with `ldiv=true` |
+
+!!! note
+ Our implementations of [`BiLQ`](@ref bilq), [`QMR`](@ref qmr), [`BiLQR`](@ref bilqr), [`USYMLQ`](@ref usymlq), [`USYMQR`](@ref usymqr) and [`TriLQR`](@ref trilqr) don't support preconditioning.
+
+## Packages that provide preconditioners
+
+- [IncompleteLU.jl](https://github.com/haampie/IncompleteLU.jl) implements the left-looking and Crout versions of ILU decompositions.
+- [ILUZero.jl](https://github.com/mcovalt/ILUZero.jl) is a Julia implementation of incomplete LU factorization with zero level of fill-in.
+- [LimitedLDLFactorizations.jl](https://github.com/JuliaSmoothOptimizers/LimitedLDLFactorizations.jl) for limited-memory LDLᵀ factorization of symmetric matrices.
+- [AlgebraicMultigrid.jl](https://github.com/JuliaLinearAlgebra/AlgebraicMultigrid.jl) provides two algebraic multigrid (AMG) preconditioners.
+- [RandomizedPreconditioners.jl](https://github.com/tjdiamandis/RandomizedPreconditioners.jl) uses randomized numerical linear algebra to construct approximate inverses of matrices.
+- [BasicLU.jl](https://github.com/JuliaSmoothOptimizers/BasicLU.jl) uses a sparse LU factorization to compute a maximum volume basis that can be used as a preconditioner for least-norm and least-squares problems.
+
+## Examples
+
+```julia
+using Krylov
+n, m = size(A)
+d = [A[i,i] ≠ 0 ? 1 / abs(A[i,i]) : 1 for i=1:n] # Jacobi preconditioner
+P⁻¹ = diagm(d)
+x, stats = symmlq(A, b, M=P⁻¹)
+```
+
+```julia
+using Krylov
+n, m = size(A)
+d = [1 / norm(A[:,i]) for i=1:m] # diagonal preconditioner
+P⁻¹ = diagm(d)
+x, stats = minres(A, b, M=P⁻¹)
+```
+
+```julia
+using IncompleteLU, Krylov
+Pℓ = ilu(A)
+x, stats = gmres(A, b, M=Pℓ, ldiv=true) # left preconditioning
+```
+
+```julia
+using LimitedLDLFactorizations, Krylov
+P = lldl(A)
+P.D .= abs.(P.D)
+x, stats = cg(A, b, M=P, ldiv=true) # centered preconditioning
+```
+
+```julia
+using ILUZero, Krylov
+Pᵣ = ilu0(A)
+x, stats = bicgstab(A, b, N=Pᵣ, ldiv=true) # right preconditioning
+```
+
+```julia
+using LDLFactorizations, Krylov
+
+M = ldl(E)
+N = ldl(F)
+
+# [E A] [x] = [b]
+# [Aᴴ -F] [y] [c]
+x, y, stats = tricg(A, b, c, M=M, N=N, ldiv=true)
+```
+
+```julia
+using SuiteSparse, Krylov
+import LinearAlgebra.ldiv!
+
+M = cholesky(E)
+
+# ldiv! is not implemented for the sparse Cholesky factorization (SuiteSparse.CHOLMOD)
+ldiv!(y::Vector{T}, F::SuiteSparse.CHOLMOD.Factor{T}, x::Vector{T}) where T = (y .= F \ x)
+
+# [E A] [x] = [b]
+# [Aᴴ 0] [y] [c]
+x, y, stats = trimr(A, b, c, M=M, sp=true, ldiv=true)
+```
+
+```julia
+using Krylov
+
+C = lu(M)
+
+# [M A] [x] = [b]
+# [B 0] [y] [c]
+x, y, stats = gpmr(A, B, b, c, C=C, gsp=true, ldiv=true)
+```
+
+```julia
+import BasicLU
+using LinearOperators, Krylov
+
+# Least-squares problem
+m, n = size(A)
+Aᴴ = sparse(A')
+basis, B = BasicLU.maxvolbasis(Aᴴ)
+opA = LinearOperator(A)
+B⁻ᴴ = LinearOperator(Float64, n, n, false, false, (y, v) -> (y .= v ; BasicLU.solve!(B, y, 'T')),
+ (y, v) -> (y .= v ; BasicLU.solve!(B, y, 'N')),
+ (y, v) -> (y .= v ; BasicLU.solve!(B, y, 'N')))
+
+d, stats = lsmr(opA * B⁻ᴴ, b) # min ‖AB⁻ᴴd - b‖₂
+x = B⁻ᴴ * d # recover the solution of min ‖Ax - b‖₂
+
+# Least-norm problem
+m, n = size(A)
+basis, B = maxvolbasis(A)
+opA = LinearOperator(A)
+B⁻¹ = LinearOperator(Float64, m, m, false, false, (y, v) -> (y .= v ; BasicLU.solve!(B, y, 'N')),
+ (y, v) -> (y .= v ; BasicLU.solve!(B, y, 'T')),
+ (y, v) -> (y .= v ; BasicLU.solve!(B, y, 'T')))
+
+x, y, stats = craigmr(B⁻¹ * opA, B⁻¹ * b) # min ‖x‖₂ s.t. B⁻¹Ax = B⁻¹b
+```
diff --git a/docs/src/processes.md b/docs/src/processes.md
new file mode 100644
index 000000000..e9d4066d2
--- /dev/null
+++ b/docs/src/processes.md
@@ -0,0 +1,334 @@
+```@raw html
+
+```
+
+# [Krylov processes](@id krylov-processes)
+
+Krylov processes are the foundation of Krylov methods, they generate bases of Krylov subspaces.
+Depending on the Krylov subspaces generated, Krylov processes are more or less specialized for a subset of linear problems.
+The following table summarizes the most relevant processes for each linear problem.
+
+| Linear problems | Processes |
+|:--------------------------------------------------------------:|:---------------------------------:|
+| Hermitian linear systems | Hermitian Lanczos |
+| Square Non-Hermitian linear systems | Non-Hermitian Lanczos -- Arnoldi |
+| Least-squares problems | Golub-Kahan -- Saunders-Simon-Yip |
+| Least-norm problems | Golub-Kahan -- Saunders-Simon-Yip |
+| Saddle-point and Hermitian quasi-definite systems | Golub-Kahan -- Saunders-Simon-Yip |
+| Generalized saddle-point and non-Hermitian partitioned systems | Montoison-Orban |
+
+### Notation
+
+For a matrix $A$, $A^H$ denotes the conjugate transpose of $A$.
+It coincides with $A^T$, the transpose of $A$, for real matrices.
+Define $V_k := \begin{bmatrix} v_1 & \ldots & v_k \end{bmatrix} \enspace$ and $\enspace U_k := \begin{bmatrix} u_1 & \ldots & u_k \end{bmatrix}$.
+
+For a matrix $C \in \mathbb{C}^{n \times n}$ and a vector $t \in \mathbb{C}^{n}$, the $k$-th Krylov subspace generated by $C$ and $t$ is
+```math
+\mathcal{K}_k(C, t) :=
+\left\{\sum_{i=0}^{k-1} \omega_i C^i t \, \middle \vert \, \omega_i \in \mathbb{C},~0 \le i \le k-1 \right\}.
+```
+
+For matrices $C \in \mathbb{C}^{n \times n} \enspace$ and $\enspace T \in \mathbb{C}^{n \times p}$, the $k$-th block Krylov subspace generated by $C$ and $T$ is
+```math
+\mathcal{K}_k^{\square}(C, T) :=
+\left\{\sum_{i=0}^{k-1} C^i T \, \Omega_i \, \middle \vert \, \Omega_i \in \mathbb{C}^{p \times p},~0 \le i \le k-1 \right\}.
+```
+
+## Hermitian Lanczos
+
+![hermitian_lanczos](./graphics/hermitian_lanczos.png)
+
+After $k$ iterations of the Hermitian Lanczos process, the situation may be summarized as
+```math
+\begin{align*}
+ A V_k &= V_k T_k + \beta_{k+1,k} v_{k+1} e_k^T = V_{k+1} T_{k+1,k}, \\
+ V_k^H V_k &= I_k,
+\end{align*}
+```
+where $V_k$ is an orthonormal basis of the Krylov subspace $\mathcal{K}_k (A,b)$,
+```math
+T_k =
+\begin{bmatrix}
+ \alpha_1 & \beta_2 & & \\
+ \beta_2 & \alpha_2 & \ddots & \\
+ & \ddots & \ddots & \beta_k \\
+ & & \beta_k & \alpha_k
+\end{bmatrix}
+, \qquad
+T_{k+1,k} =
+\begin{bmatrix}
+ T_{k} \\
+ \beta_{k+1} e_{k}^T
+\end{bmatrix}.
+```
+Note that $T_{k+1,k}$ is a real tridiagonal matrix even if $A$ is a complex matrix.
+
+The function [`hermitian_lanczos`](@ref hermitian_lanczos) returns $V_{k+1}$ and $T_{k+1,k}$.
+
+Related methods: [`SYMMLQ`](@ref symmlq), [`CG`](@ref cg), [`CR`](@ref cr), [`MINRES`](@ref minres), [`MINRES-QLP`](@ref minres_qlp), [`CGLS`](@ref cgls), [`CRLS`](@ref crls), [`CGNE`](@ref cgne), [`CRMR`](@ref crmr), [`CG-LANCZOS`](@ref cg_lanczos) and [`CG-LANCZOS-SHIFT`](@ref cg_lanczos_shift).
+
+```@docs
+hermitian_lanczos
+```
+
+## Non-Hermitian Lanczos
+
+![nonhermitian_lanczos](./graphics/nonhermitian_lanczos.png)
+
+After $k$ iterations of the non-Hermitian Lanczos process (also named the Lanczos biorthogonalization process), the situation may be summarized as
+```math
+\begin{align*}
+ A V_k &= V_k T_k + \beta_{k+1} v_{k+1} e_k^T = V_{k+1} T_{k+1,k}, \\
+ A^H U_k &= U_k T_k^H + \bar{\gamma}_{k+1} u_{k+1} e_k^T = U_{k+1} T_{k,k+1}^H, \\
+ V_k^H U_k &= U_k^H V_k = I_k,
+\end{align*}
+```
+where $V_k$ and $U_k$ are bases of the Krylov subspaces $\mathcal{K}_k (A,b)$ and $\mathcal{K}_k (A^H,c)$, respectively,
+```math
+T_k =
+\begin{bmatrix}
+ \alpha_1 & \gamma_2 & & \\
+ \beta_2 & \alpha_2 & \ddots & \\
+ & \ddots & \ddots & \gamma_k \\
+ & & \beta_k & \alpha_k
+\end{bmatrix}
+, \qquad
+T_{k+1,k} =
+\begin{bmatrix}
+ T_{k} \\
+ \beta_{k+1} e_{k}^T
+\end{bmatrix}
+, \qquad
+T_{k,k+1} =
+\begin{bmatrix}
+ T_{k} & \gamma_{k+1} e_k
+\end{bmatrix}.
+```
+
+The function [`nonhermitian_lanczos`](@ref nonhermitian_lanczos) returns $V_{k+1}$, $T_{k+1,k}$, $U_{k+1}$ and $T_{k,k+1}^H$.
+
+Related methods: [`BiLQ`](@ref bilq), [`QMR`](@ref qmr), [`BiLQR`](@ref bilqr), [`CGS`](@ref cgs) and [`BICGSTAB`](@ref bicgstab).
+
+!!! note
+ The scaling factors used in our implementation are $\beta_k = |u_k^H v_k|^{\tfrac{1}{2}}$ and $\gamma_k = (u_k^H v_k) / \beta_k$.
+ With these scaling factors, the non-Hermitian Lanczos process coincides with the Hermitian Lanczos process when $A = A^H$ and $b = c$.
+
+```@docs
+nonhermitian_lanczos
+```
+
+## Arnoldi
+
+![arnoldi](./graphics/arnoldi.png)
+
+After $k$ iterations of the Arnoldi process, the situation may be summarized as
+```math
+\begin{align*}
+ A V_k &= V_k H_k + h_{k+1,k} v_{k+1} e_k^T = V_{k+1} H_{k+1,k}, \\
+ V_k^H V_k &= I_k,
+\end{align*}
+```
+where $V_k$ is an orthonormal basis of the Krylov subspace $\mathcal{K}_k (A,b)$,
+```math
+H_k =
+\begin{bmatrix}
+ h_{1,1}~ & h_{1,2}~ & \ldots & h_{1,k} \\
+ h_{2,1}~ & \ddots~ & \ddots & \vdots \\
+ & \ddots~ & \ddots & h_{k-1,k} \\
+ & & h_{k,k-1} & h_{k,k}
+\end{bmatrix}
+, \qquad
+H_{k+1,k} =
+\begin{bmatrix}
+ H_{k} \\
+ h_{k+1,k} e_{k}^T
+\end{bmatrix}.
+```
+
+The function [`arnoldi`](@ref arnoldi) returns $V_{k+1}$ and $H_{k+1,k}$.
+
+Related methods: [`DIOM`](@ref diom), [`FOM`](@ref fom), [`DQGMRES`](@ref dqgmres), [`GMRES`](@ref gmres) and [`FGMRES`](@ref fgmres).
+
+!!! note
+ The Arnoldi process coincides with the Hermitian Lanczos process when $A$ is Hermitian.
+
+```@docs
+arnoldi
+```
+
+## Golub-Kahan
+
+![golub_kahan](./graphics/golub_kahan.png)
+
+After $k$ iterations of the Golub-Kahan bidiagonalization process, the situation may be summarized as
+```math
+\begin{align*}
+ A V_k &= U_{k+1} B_k, \\
+ A^H U_{k+1} &= V_k B_k^H + \alpha_{k+1} v_{k+1} e_{k+1}^T = V_{k+1} L_{k+1}^H, \\
+ V_k^H V_k &= U_k^H U_k = I_k,
+\end{align*}
+```
+where $V_k$ and $U_k$ are bases of the Krylov subspaces $\mathcal{K}_k (A^HA,A^Hb)$ and $\mathcal{K}_k (AA^H,b)$, respectively,
+```math
+L_k =
+\begin{bmatrix}
+ \alpha_1 & & & \\
+ \beta_2 & \alpha_2 & & \\
+ & \ddots & \ddots & \\
+ & & \beta_k & \alpha_k
+\end{bmatrix}
+, \qquad
+B_k =
+\begin{bmatrix}
+ \alpha_1 & & & \\
+ \beta_2 & \alpha_2 & & \\
+ & \ddots & \ddots & \\
+ & & \beta_k & \alpha_k \\
+ & & & \beta_{k+1} \\
+\end{bmatrix}
+=
+\begin{bmatrix}
+ L_{k} \\
+ \beta_{k+1} e_{k}^T
+\end{bmatrix}.
+```
+Note that $L_k$ is a real bidiagonal matrix even if $A$ is a complex matrix.
+
+The function [`golub_kahan`](@ref golub_kahan) returns $V_{k+1}$, $U_{k+1}$ and $L_{k+1}$.
+
+Related methods: [`LNLQ`](@ref lnlq), [`CRAIG`](@ref craig), [`CRAIGMR`](@ref craigmr), [`LSLQ`](@ref lslq), [`LSQR`](@ref lsqr) and [`LSMR`](@ref lsmr).
+
+!!! note
+ The Golub-Kahan process coincides with the Hermitian Lanczos process applied to the normal equations $A^HA x = A^Hb$ and $AA^H x = b$.
+ It is also related to the Hermitian Lanczos process applied to $\begin{bmatrix} 0 & A \\ A^H & 0 \end{bmatrix}$ with initial vector $\begin{bmatrix} b \\ 0 \end{bmatrix}$.
+
+```@docs
+golub_kahan
+```
+
+## Saunders-Simon-Yip
+
+![saunders_simon_yip](./graphics/saunders_simon_yip.png)
+
+After $k$ iterations of the Saunders-Simon-Yip process (also named the orthogonal tridiagonalization process), the situation may be summarized as
+```math
+\begin{align*}
+ A U_k &= V_k T_k + \beta_{k+1} v_{k+1} e_k^T = V_{k+1} T_{k+1,k}, \\
+ A^H V_k &= U_k T_k^H + \gamma_{k+1} u_{k+1} e_k^T = U_{k+1} T_{k,k+1}^H, \\
+ V_k^H V_k &= U_k^H U_k = I_k,
+\end{align*}
+```
+where $\begin{bmatrix} V_k & 0 \\ 0 & U_k \end{bmatrix}$ is an orthonormal basis of the block Krylov subspace $\mathcal{K}^{\square}_k \left(\begin{bmatrix} 0 & A \\ A^H & 0 \end{bmatrix}, \begin{bmatrix} b & 0 \\ 0 & c \end{bmatrix}\right)$,
+```math
+T_k =
+\begin{bmatrix}
+ \alpha_1 & \gamma_2 & & \\
+ \beta_2 & \alpha_2 & \ddots & \\
+ & \ddots & \ddots & \gamma_k \\
+ & & \beta_k & \alpha_k
+\end{bmatrix}
+, \qquad
+T_{k+1,k} =
+\begin{bmatrix}
+ T_{k} \\
+ \beta_{k+1} e_{k}^T
+\end{bmatrix}
+, \qquad
+T_{k,k+1} =
+\begin{bmatrix}
+ T_{k} & \gamma_{k+1} e_{k}
+\end{bmatrix}.
+```
+
+The function [`saunders_simon_yip`](@ref saunders_simon_yip) returns $V_{k+1}$, $T_{k+1,k}$, $U_{k+1}$ and $T_{k,k+1}^H$.
+
+Related methods: [`USYMLQ`](@ref usymlq), [`USYMQR`](@ref usymqr), [`TriLQR`](@ref trilqr), [`TriCG`](@ref tricg) and [`TriMR`](@ref trimr).
+
+```@docs
+saunders_simon_yip
+```
+
+!!! note
+ The Saunders-Simon-Yip is equivalent to the block-Lanczos process applied to $\begin{bmatrix} 0 & A \\ A^H & 0 \end{bmatrix}$ with initial matrix $\begin{bmatrix} b & 0 \\ 0 & c \end{bmatrix}$.
+
+## Montoison-Orban
+
+![montoison_orban](./graphics/montoison_orban.png)
+
+After $k$ iterations of the Montoison-Orban process (also named the orthogonal Hessenberg reduction process), the situation may be summarized as
+```math
+\begin{align*}
+ A U_k &= V_k H_k + h_{k+1,k} v_{k+1} e_k^T = V_{k+1} H_{k+1,k}, \\
+ B V_k &= U_k F_k + f_{k+1,k} u_{k+1} e_k^T = U_{k+1} F_{k+1,k}, \\
+ V_k^H V_k &= U_k^H U_k = I_k,
+\end{align*}
+```
+where $\begin{bmatrix} V_k & 0 \\ 0 & U_k \end{bmatrix}$ is an orthonormal basis of the block Krylov subspace $\mathcal{K}^{\square}_k \left(\begin{bmatrix} 0 & A \\ B & 0 \end{bmatrix}, \begin{bmatrix} b & 0 \\ 0 & c \end{bmatrix}\right)$,
+```math
+H_k =
+\begin{bmatrix}
+ h_{1,1}~ & h_{1,2}~ & \ldots & h_{1,k} \\
+ h_{2,1}~ & \ddots~ & \ddots & \vdots \\
+ & \ddots~ & \ddots & h_{k-1,k} \\
+ & & h_{k,k-1} & h_{k,k}
+\end{bmatrix}
+, \qquad
+F_k =
+\begin{bmatrix}
+ f_{1,1}~ & f_{1,2}~ & \ldots & f_{1,k} \\
+ f_{2,1}~ & \ddots~ & \ddots & \vdots \\
+ & \ddots~ & \ddots & f_{k-1,k} \\
+ & & f_{k,k-1} & f_{k,k}
+\end{bmatrix},
+```
+```math
+H_{k+1,k} =
+\begin{bmatrix}
+ H_{k} \\
+ h_{k+1,k} e_{k}^T
+\end{bmatrix}
+, \qquad
+F_{k+1,k} =
+\begin{bmatrix}
+ F_{k} \\
+ f_{k+1,k} e_{k}^T
+\end{bmatrix}.
+```
+
+The function [`montoison_orban`](@ref montoison_orban) returns $V_{k+1}$, $H_{k+1,k}$, $U_{k+1}$ and $F_{k+1,k}$.
+
+Related methods: [`GPMR`](@ref gpmr).
+
+!!! note
+ The Montoison-Orban is equivalent to the block-Arnoldi process applied to $\begin{bmatrix} 0 & A \\ B & 0 \end{bmatrix}$ with initial matrix $\begin{bmatrix} b & 0 \\ 0 & c \end{bmatrix}$.
+ It also coincides with the Saunders-Simon-Yip process when $B = A^H$.
+
+```@docs
+montoison_orban
+```
diff --git a/docs/src/reference.md b/docs/src/reference.md
index 0896e1639..f73e10043 100644
--- a/docs/src/reference.md
+++ b/docs/src/reference.md
@@ -6,6 +6,7 @@
```
```@docs
+Krylov.kstdout
Krylov.FloatOrComplex
Krylov.niterations
Krylov.Aprod
diff --git a/docs/src/solvers/gsp.md b/docs/src/solvers/gsp.md
index 10aaccbe0..33c580b8a 100644
--- a/docs/src/solvers/gsp.md
+++ b/docs/src/solvers/gsp.md
@@ -1,5 +1,5 @@
```@meta
-# Generalized saddle-point and unsymmetric partitioned systems
+# Generalized saddle-point and non-Hermitian partitioned systems
```
## GPMR
diff --git a/docs/src/solvers/ln.md b/docs/src/solvers/ln.md
index c5396ffdd..b638b8247 100644
--- a/docs/src/solvers/ln.md
+++ b/docs/src/solvers/ln.md
@@ -36,3 +36,10 @@ craig!
craigmr
craigmr!
```
+
+## USYMLQ
+
+```@docs
+usymlq
+usymlq!
+```
diff --git a/docs/src/solvers/ls.md b/docs/src/solvers/ls.md
index f77057d94..fecfbc417 100644
--- a/docs/src/solvers/ls.md
+++ b/docs/src/solvers/ls.md
@@ -36,3 +36,10 @@ lsqr!
lsmr
lsmr!
```
+
+## USYMQR
+
+```@docs
+usymqr
+usymqr!
+```
diff --git a/docs/src/solvers/sid.md b/docs/src/solvers/sid.md
index 1bd459cd2..e911681be 100644
--- a/docs/src/solvers/sid.md
+++ b/docs/src/solvers/sid.md
@@ -1,5 +1,5 @@
```@meta
-# Symmetric indefinite linear systems
+# Hermitian indefinite linear systems
```
## SYMMLQ
diff --git a/docs/src/solvers/sp_sqd.md b/docs/src/solvers/sp_sqd.md
index 518684b5b..4ee4ab09b 100644
--- a/docs/src/solvers/sp_sqd.md
+++ b/docs/src/solvers/sp_sqd.md
@@ -1,5 +1,5 @@
```@meta
-# Saddle-point and symmetric quasi-definite systems
+# Saddle-point and Hermitian quasi-definite systems
```
## TriCG
diff --git a/docs/src/solvers/spd.md b/docs/src/solvers/spd.md
index 79bb6e9e8..aebda285b 100644
--- a/docs/src/solvers/spd.md
+++ b/docs/src/solvers/spd.md
@@ -1,5 +1,5 @@
```@meta
-# Symmetric positive definite linear systems
+# Hermitian positive definite linear systems
```
## CG
diff --git a/docs/src/solvers/unsymmetric.md b/docs/src/solvers/unsymmetric.md
index 280908ea5..c9e77f787 100644
--- a/docs/src/solvers/unsymmetric.md
+++ b/docs/src/solvers/unsymmetric.md
@@ -1,5 +1,5 @@
```@meta
-# Unsymmetric linear systems
+# Non-Hermitian square linear systems
```
## BiLQ
@@ -16,20 +16,6 @@ qmr
qmr!
```
-## USYMLQ
-
-```@docs
-usymlq
-usymlq!
-```
-
-## USYMQR
-
-```@docs
-usymqr
-usymqr!
-```
-
## CGS
```@docs
@@ -71,3 +57,10 @@ dqgmres!
gmres
gmres!
```
+
+## FGMRES
+
+```@docs
+fgmres
+fgmres!
+```
diff --git a/docs/src/storage.md b/docs/src/storage.md
new file mode 100644
index 000000000..903cc0558
--- /dev/null
+++ b/docs/src/storage.md
@@ -0,0 +1,152 @@
+```@meta
+# Thanks Morten Piibeleht for the hack with the tables!
+```
+
+```@raw html
+
+```
+
+# [Storage requirements](@id storage-requirements)
+
+This section provides the storage requirements of all Krylov methods available in Krylov.jl.
+
+### Notation
+
+We denote by $m$ and $n$ the number of rows and columns of the linear problem.
+The memory parameter of DIOM, FOM, DQGMRES, GMRES, FGMRES and GPMR is $k$.
+The numbers of shifts of CG-LANCZOS-SHIFT is $p$.
+
+## Theoretical storage requirements
+
+The following tables provide the number of coefficients that must be allocated for each Krylov method.
+The coefficients have the same type as those that compose the linear problem we seek to solve.
+Each table summarizes the storage requirements of Krylov methods recommended to a specific linear problem.
+
+#### Hermitian positive definite linear systems
+
+| Methods | [`CG`](@ref cg) | [`CR`](@ref cr) | [`CG-LANCZOS`](@ref cg_lanczos) | [`CG-LANCZOS-SHIFT`](@ref cg_lanczos_shift) |
+|:-------:|:---------------:|:---------------:|:-------------------------------:|:-------------------------------------------:|
+ Storage | $4n$ | $5n$ | $5n$ | $3n + 2np + 5p$ |
+
+#### Hermitian indefinite linear systems
+
+| Methods | [`SYMMLQ`](@ref symmlq) | [`MINRES`](@ref minres) | [`MINRES-QLP`](@ref minres_qlp) |
+|:-------:|:-----------------------:|:-----------------------:|:-------------------------------:|
+| Storage | $5n$ | $6n$ | $6n$ |
+
+#### Non-Hermitian square linear systems
+
+| Methods | [`CGS`](@ref cgs) | [`BICGSTAB`](@ref bicgstab) | [`BiLQ`](@ref bilq) | [`QMR`](@ref qmr) |
+|:-------:|:-----------------:|:---------------------------:|:-------------------:|:-----------------:|
+| Storage | $6n$ | $6n$ | $8n$ | $9n$ |
+
+| Methods | [`DIOM`](@ref diom) | [`DQGMRES`](@ref dqgmres) |
+|:-------:|:-------------------:|:-------------------------:|
+| Storage | $n(2k+1) + 2k - 1$ | $n(2k+2) + 3k + 1$ |
+
+| Methods | [`FOM`](@ref fom) | [`GMRES`](@ref gmres) | [`FGMRES`](@ref fgmres) |
+|:-------:|:--------------------------------------------------:|:---------------------------------------:|:----------------------------------------:|
+| Storage$\dfrac{}{}$ | $\!n(2+k) +2k + \dfrac{k(k + 1)}{2}\!$ | $\!n(2+k) + 3k + \dfrac{k(k + 1)}{2}\!$ | $\!n(2+2k) + 3k + \dfrac{k(k + 1)}{2}\!$ |
+
+#### Least-norm problems
+
+| Methods | [`USYMLQ`](@ref usymlq) | [`CGNE`](@ref cgne) | [`CRMR`](@ref crmr) | [`LNLQ`](@ref lnlq) | [`CRAIG`](@ref craig) | [`CRAIGMR`](@ref craigmr) |
+|:-------:|:-----------------------:|:-------------------:|:-------------------:|:-------------------:|:---------------------:|:-------------------------:|
+| Storage | $5n + 3m$ | $3n + 2m$ | $3n + 2m$ | $3n + 4m$ | $3n + 4m$ | $4n + 5m$ |
+
+#### Least-squares problems
+
+| Methods | [`USYMQR`](@ref usymqr) | [`CGLS`](@ref cgls) | [`CRLS`](@ref crls) | [`LSLQ`](@ref lslq) | [`LSQR`](@ref lsqr) | [`LSMR`](@ref lsmr) |
+|:-------:|:-----------------------:|:-------------------:|:-------------------:|:-------------------:|:-------------------:|:-------------------:|
+| Storage | $6n + 3m$ | $3n + 2m$ | $4n + 3m$ | $4n + 2m$ | $4n + 2m$ | $5n + 2m$ |
+
+#### Adjoint systems
+
+| Methods | [`BiLQR`](@ref bilqr) | [`TriLQR`](@ref trilqr) |
+|:-------:|:---------------------:|:-----------------------:|
+| Storage | $11n$ | $6m + 5n$ |
+
+#### Saddle-point and Hermitian quasi-definite systems
+
+| Methods | [`TriCG`](@ref tricg) | [`TriMR`](@ref trimr) |
+|:--------:|:---------------------:|:---------------------:|
+| Storage | $6n + 6m$ | $8n + 8m$ |
+
+#### Generalized saddle-point and non-Hermitian partitioned systems
+
+| Method | [`GPMR`](@ref gpmr) |
+|:-------:|:-------------------------:|
+| Storage | $(2+k)(n+m) + 2k^2 + 11k$ |
+
+## Practical storage requirements
+
+Each method has its own `KrylovSolver` that contains all the storage needed by the method.
+In the REPL, the size in bytes of each attribute and the total amount of memory allocated by the solver are displayed when we show a `KrylovSolver`.
+
+```@example storage
+using Krylov
+
+m = 5000
+n = 12000
+A = rand(Float64, m, n)
+b = rand(Float64, m)
+solver = LsmrSolver(A, b)
+show(stdout, solver, show_stats=false)
+```
+
+If we want the total number of bytes used by the solver, we can call `nbytes = sizeof(solver)`.
+
+```@example storage
+nbytes = sizeof(solver)
+```
+
+Thereafter, we can use `Base.format_bytes(nbytes)` to recover what is displayed in the REPL.
+
+```@example storage
+Base.format_bytes(nbytes)
+```
+
+To verify that we match the theoretical results, we just need to multiply the storage requirement of a method by the number of bytes associated to the precision of the linear problem.
+For instance, we need 4 bytes for the precision `Float32`, 8 bytes for precisions `Float64` and `ComplexF32`, and 16 bytes for the precision `ComplexF64`.
+
+```@example storage
+FC = Float64 # precision of the least-squares problem
+ncoefs_lsmr = 5*n + 2*m # number of coefficients
+nbytes_lsmr = sizeof(FC) * ncoefs_lsmr # number of bytes
+```
+
+Therefore, you can check that you have enough memory in RAM to allocate a `KrylovSolver`.
+
+```@example storage
+free_nbytes = Sys.free_memory()
+Base.format_bytes(free_nbytes) # Total free memory in RAM in bytes.
+```
+
+!!! note
+ - Beyond having faster operations, using low precisions, such as simple precision, allows to store more coefficients in RAM and solve larger linear problems.
+ - In the file [test_allocations.jl](https://github.com/JuliaSmoothOptimizers/Krylov.jl/blob/main/test/test_allocations.jl), we use the macro `@allocated` to test that we match the expected storage requirement of each method with a tolerance of 2%.
diff --git a/docs/src/tips.md b/docs/src/tips.md
index 604c0633d..ca3d927bd 100644
--- a/docs/src/tips.md
+++ b/docs/src/tips.md
@@ -23,7 +23,7 @@ BLAS.set_num_threads(N) # 1 ≤ N ≤ NMAX
BLAS.get_num_threads()
```
-The recommended number of BLAS threads is the number of physical and not logical cores, which is in general `N = NMAX / 2`.
+The recommended number of BLAS threads is the number of physical and not logical cores, which is in general `N = NMAX / 2` if your CPU supports simultaneous multithreading (SMT).
By default Julia ships with OpenBLAS but it's also possible to use Intel MKL BLAS and LAPACK with [MKL.jl](https://github.com/JuliaLinearAlgebra/MKL.jl).
diff --git a/docs/src/warm_start.md b/docs/src/warm-start.md
similarity index 59%
rename from docs/src/warm_start.md
rename to docs/src/warm-start.md
index 030cad6c0..d926db183 100644
--- a/docs/src/warm_start.md
+++ b/docs/src/warm-start.md
@@ -1,9 +1,10 @@
-## Warm Start
+# [Warm-start](@id warm-start)
-Most Krylov methods in this module accept a starting point as argument. The starting point is used as initial approximation to a solution.
+Most Krylov methods in this module accept a starting point as argument.
+The starting point is used as initial approximation to a solution.
```julia
-solver = CgSolver(n, n, S)
+solver = CgSolver(A, b)
cg!(solver, A, b, itmax=100)
if !issolved(solver)
cg!(solver, A, b, solver.x, itmax=100) # cg! uses the approximate solution `solver.x` as starting point
@@ -28,7 +29,7 @@ If a Krylov method doesn't have the option to warm start, it can still be done e
We provide an example with `cg_lanczos!`.
```julia
-solver = CgLanczosSolver(n, n, S)
+solver = CgLanczosSolver(A, b)
cg_lanczos!(solver, A, b)
x₀ = solver.x # Ax₀ ≈ b
r = b - A * x₀ # r = b - Ax₀
@@ -41,33 +42,34 @@ Explicit restarts cannot be avoided in certain block methods, such as TriMR, due
```julia
# [E A] [x] = [b]
-# [Aᵀ F] [y] [c]
+# [Aᴴ F] [y] [c]
M = inv(E)
N = inv(F)
x₀, y₀, stats = trimr(A, b, c, M=M, N=N)
# E and F are not available inside TriMR
b₀ = b - Ex₀ - Ay
-c₀ = c - Aᵀx₀ - Fy
+c₀ = c - Aᴴx₀ - Fy
Δx, Δy, stats = trimr(A, b₀, c₀, M=M, N=N)
x = x₀ + Δx
y = y₀ + Δy
```
-
-## Restarted methods
-
-The storage requierements of Krylov methods based on the Arnoldi process, such as FOM and GMRES, increase as the iteration progresses.
-For very large problems, the storage costs become prohibitive after only few iterations and restarted variants FOM(k) and GMRES(k) are prefered.
-In this section, we show how to use warm starts to implement GMRES(k) and FOM(k).
-
-```julia
-k = 50
-solver = GmresSolver(A, b, k) # FomSolver(A, b, k)
-solver.x .= 0 # solver.x .= x₀
-nrestart = 0
-while !issolved(solver) || nrestart ≤ 10
- solve!(solver, A, b, solver.x, itmax=k)
- nrestart += 1
-end
+```@meta
+# ## Restarted methods
+#
+# The storage requierements of Krylov methods based on the Arnoldi process, such as FOM and GMRES, increase as the iteration progresses.
+# For very large problems, the storage costs become prohibitive after only few iterations and restarted variants FOM(k) and GMRES(k) are prefered.
+# In this section, we show how to use warm starts to implement GMRES(k) and FOM(k).
+#
+# ```julia
+# k = 50
+# solver = GmresSolver(A, b, k) # FomSolver(A, b, k)
+# solver.x .= 0 # solver.x .= x₀
+# nrestart = 0
+# while !issolved(solver) || nrestart ≤ 10
+# solve!(solver, A, b, solver.x, itmax=k)
+# nrestart += 1
+# end
+# ```
```
diff --git a/src/Krylov.jl b/src/Krylov.jl
index b714ccd79..aadde1575 100644
--- a/src/Krylov.jl
+++ b/src/Krylov.jl
@@ -5,6 +5,7 @@ using LinearAlgebra, SparseArrays, Printf
include("krylov_utils.jl")
include("krylov_stats.jl")
include("krylov_solvers.jl")
+include("krylov_processes.jl")
include("cg.jl")
include("cr.jl")
@@ -19,6 +20,7 @@ include("diom.jl")
include("fom.jl")
include("dqgmres.jl")
include("gmres.jl")
+include("fgmres.jl")
include("gpmr.jl")
@@ -49,6 +51,4 @@ include("lnlq.jl")
include("craig.jl")
include("craigmr.jl")
-include("callback_utils.jl")
-
end
diff --git a/src/bicgstab.jl b/src/bicgstab.jl
index c3b914599..c4f16595e 100644
--- a/src/bicgstab.jl
+++ b/src/bicgstab.jl
@@ -16,40 +16,59 @@
export bicgstab, bicgstab!
"""
- (x, stats) = bicgstab(A, b::AbstractVector{FC}; c::AbstractVector{FC}=b,
- M=I, N=I, atol::T=√eps(T), rtol::T=√eps(T),
- itmax::Int=0, verbose::Int=0, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ (x, stats) = bicgstab(A, b::AbstractVector{FC};
+ c::AbstractVector{FC}=b, M=I, N=I,
+ ldiv::Bool=false, atol::T=√eps(T),
+ rtol::T=√eps(T), itmax::Int=0,
+ verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
-Solve the square linear system Ax = b using the BICGSTAB method.
+ (x, stats) = bicgstab(A, b, x0::AbstractVector; kwargs...)
+
+BICGSTAB can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
+
+Solve the square linear system Ax = b of size n using BICGSTAB.
BICGSTAB requires two initial vectors `b` and `c`.
-The relation `bᵀc ≠ 0` must be satisfied and by default `c = b`.
+The relation `bᴴc ≠ 0` must be satisfied and by default `c = b`.
The Biconjugate Gradient Stabilized method is a variant of BiCG, like CGS,
-but using different updates for the Aᵀ-sequence in order to obtain smoother
+but using different updates for the Aᴴ-sequence in order to obtain smoother
convergence than CGS.
If BICGSTAB stagnates, we recommend DQGMRES and BiLQ as alternative methods for unsymmetric square systems.
BICGSTAB stops when `itmax` iterations are reached or when `‖rₖ‖ ≤ atol + ‖b‖ * rtol`.
-`atol` is an absolute tolerance and `rtol` is a relative tolerance.
-Additional details can be displayed if verbose mode is enabled (verbose > 0).
-Information will be displayed every `verbose` iterations.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension n;
+* `b`: a vector of length n.
+
+#### Optional argument
-This implementation allows a left preconditioner `M` and a right preconditioner `N`.
+* `x0`: a vector of length n that represents an initial guess of the solution x.
-BICGSTAB can be warm-started from an initial guess `x0` with the method
+#### Keyword arguments
- (x, stats) = bicgstab(A, b, x0; kwargs...)
+* `c`: the second initial vector of length `n` required by the Lanczos biorthogonalization process;
+* `M`: linear operator that models a nonsingular matrix of size `n` used for left preconditioning;
+* `N`: linear operator that models a nonsingular matrix of size `n` used for right preconditioning;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
-where `kwargs` are the same keyword arguments as above.
+#### Output arguments
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### References
@@ -86,15 +105,17 @@ function bicgstab!(solver :: BicgstabSolver{T,FC,S}, A, b :: AbstractVector{FC},
return solver
end
-function bicgstab!(solver :: BicgstabSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: AbstractVector{FC}=b,
- M=I, N=I, atol :: T=√eps(T), rtol :: T=√eps(T),
- itmax :: Int=0, verbose :: Int=0, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
+function bicgstab!(solver :: BicgstabSolver{T,FC,S}, A, b :: AbstractVector{FC};
+ c :: AbstractVector{FC}=b, M=I, N=I,
+ ldiv :: Bool=false, atol :: T=√eps(T),
+ rtol :: T=√eps(T), itmax :: Int=0,
+ verbose :: Int=0, history :: Bool=false,
+ callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
- n, m = size(A)
+ m, n = size(A)
m == n || error("System must be square")
length(b) == m || error("Inconsistent problem size")
- (verbose > 0) && @printf("BICGSTAB: system of size %d\n", n)
+ (verbose > 0) && @printf(iostream, "BICGSTAB: system of size %d\n", n)
# Check M = Iₙ and N = Iₙ
MisI = (M === I)
@@ -102,8 +123,8 @@ function bicgstab!(solver :: BicgstabSolver{T,FC,S}, A, b :: AbstractVector{FC};
# Check type consistency
eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
- ktypeof(c) == S || error("ktypeof(c) ≠ $S")
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+ ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S")
# Set up workspace.
allocate_if(!MisI, solver, :t , S, n)
@@ -150,14 +171,14 @@ function bicgstab!(solver :: BicgstabSolver{T,FC,S}, A, b :: AbstractVector{FC};
itmax == 0 && (itmax = 2*n)
ε = atol + rtol * rNorm
- (verbose > 0) && @printf("%5s %7s %8s %8s\n", "k", "‖rₖ‖", "|αₖ|", "|ωₖ|")
- kdisplay(iter, verbose) && @printf("%5d %7.1e %8.1e %8.1e\n", iter, rNorm, abs(α), abs(ω))
+ (verbose > 0) && @printf(iostream, "%5s %7s %8s %8s\n", "k", "‖rₖ‖", "|αₖ|", "|ωₖ|")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %8.1e %8.1e\n", iter, rNorm, abs(α), abs(ω))
next_ρ = @kdot(n, c, r) # ρ₁ = ⟨r̅₀,r₀⟩
if next_ρ == 0
stats.niter = 0
stats.solved, stats.inconsistent = false, false
- stats.status = "Breakdown bᵀc = 0"
+ stats.status = "Breakdown bᴴc = 0"
solver.warm_start = false
return solver
end
@@ -207,9 +228,9 @@ function bicgstab!(solver :: BicgstabSolver{T,FC,S}, A, b :: AbstractVector{FC};
solved = resid_decrease_lim || resid_decrease_mach
tired = iter ≥ itmax
breakdown = (α == 0 || isnan(α))
- kdisplay(iter, verbose) && @printf("%5d %7.1e %8.1e %8.1e\n", iter, rNorm, abs(α), abs(ω))
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %8.1e %8.1e\n", iter, rNorm, abs(α), abs(ω))
end
- (verbose > 0) && @printf("\n")
+ (verbose > 0) && @printf(iostream, "\n")
tired && (status = "maximum number of iterations exceeded")
breakdown && (status = "breakdown αₖ == 0")
diff --git a/src/bilq.jl b/src/bilq.jl
index 39725fbfe..12ee40652 100644
--- a/src/bilq.jl
+++ b/src/bilq.jl
@@ -13,35 +13,54 @@
export bilq, bilq!
"""
- (x, stats) = bilq(A, b::AbstractVector{FC}; c::AbstractVector{FC}=b,
- atol::T=√eps(T), rtol::T=√eps(T), transfer_to_bicg::Bool=true,
- itmax::Int=0, verbose::Int=0, history::Bool=false,
- callback=solver->false)
+ (x, stats) = bilq(A, b::AbstractVector{FC};
+ c::AbstractVector{FC}=b, transfer_to_bicg::Bool=true,
+ atol::T=√eps(T), rtol::T=√eps(T), itmax::Int=0,
+ verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
-Solve the square linear system Ax = b using the BiLQ method.
+ (x, stats) = bilq(A, b, x0::AbstractVector; kwargs...)
+BiLQ can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
+
+Solve the square linear system Ax = b of size n using BiLQ.
BiLQ is based on the Lanczos biorthogonalization process and requires two initial vectors `b` and `c`.
-The relation `bᵀc ≠ 0` must be satisfied and by default `c = b`.
-When `A` is symmetric and `b = c`, BiLQ is equivalent to SYMMLQ.
+The relation `bᴴc ≠ 0` must be satisfied and by default `c = b`.
+When `A` is Hermitian and `b = c`, BiLQ is equivalent to SYMMLQ.
+
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension n;
+* `b`: a vector of length n.
+
+#### Optional argument
-An option gives the possibility of transferring to the BiCG point,
-when it exists. The transfer is based on the residual norm.
+* `x0`: a vector of length n that represents an initial guess of the solution x.
-BiLQ can be warm-started from an initial guess `x0` with the method
+#### Keyword arguments
- (x, stats) = bilq(A, b, x0; kwargs...)
+* `c`: the second initial vector of length `n` required by the Lanczos biorthogonalization process;
+* `transfer_to_bicg`: transfer from the BiLQ point to the BiCG point, when it exists. The transfer is based on the residual norm;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
-where `kwargs` are the same keyword arguments as above.
+#### Output arguments
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
-#### Reference
+#### References
* A. Montoison and D. Orban, [*BiLQ: An Iterative Method for Nonsymmetric Linear Systems with a Quasi-Minimum Error Property*](https://doi.org/10.1137/19M1290991), SIAM Journal on Matrix Analysis and Applications, 41(3), pp. 1145--1166, 2020.
+* R. Fletcher, [*Conjugate gradient methods for indefinite systems*](https://doi.org/10.1007/BFb0080116), Numerical Analysis, Springer, pp. 73--89, 1976.
"""
function bilq end
@@ -73,23 +92,24 @@ function bilq!(solver :: BilqSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: A
return solver
end
-function bilq!(solver :: BilqSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: AbstractVector{FC}=b,
- atol :: T=√eps(T), rtol :: T=√eps(T), transfer_to_bicg :: Bool=true,
- itmax :: Int=0, verbose :: Int=0, history :: Bool=false,
- callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
+function bilq!(solver :: BilqSolver{T,FC,S}, A, b :: AbstractVector{FC};
+ c :: AbstractVector{FC}=b, transfer_to_bicg :: Bool=true,
+ atol :: T=√eps(T), rtol :: T=√eps(T), itmax :: Int=0,
+ verbose :: Int=0, history :: Bool=false,
+ callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
- n, m = size(A)
+ m, n = size(A)
m == n || error("System must be square")
length(b) == m || error("Inconsistent problem size")
- (verbose > 0) && @printf("BILQ: system of size %d\n", n)
+ (verbose > 0) && @printf(iostream, "BILQ: system of size %d\n", n)
# Check type consistency
eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
- ktypeof(c) == S || error("ktypeof(c) ≠ $S")
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+ ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S")
# Compute the adjoint of A
- Aᵀ = A'
+ Aᴴ = A'
# Set up workspace.
uₖ₋₁, uₖ, q, vₖ₋₁, vₖ = solver.uₖ₋₁, solver.uₖ, solver.q, solver.vₖ₋₁, solver.vₖ
@@ -122,29 +142,29 @@ function bilq!(solver :: BilqSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Ab
itmax == 0 && (itmax = 2*n)
ε = atol + rtol * bNorm
- (verbose > 0) && @printf("%5s %7s\n", "k", "‖rₖ‖")
- kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, bNorm)
+ (verbose > 0) && @printf(iostream, "%5s %7s\n", "k", "‖rₖ‖")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e\n", iter, bNorm)
# Initialize the Lanczos biorthogonalization process.
- cᵗb = @kdot(n, c, r₀) # ⟨c,r₀⟩
- if cᵗb == 0
+ cᴴb = @kdot(n, c, r₀) # ⟨c,r₀⟩
+ if cᴴb == 0
stats.niter = 0
stats.solved = false
stats.inconsistent = false
- stats.status = "Breakdown bᵀc = 0"
+ stats.status = "Breakdown bᴴc = 0"
solver.warm_start = false
return solver
end
- βₖ = √(abs(cᵗb)) # β₁γ₁ = cᵀ(b - Ax₀)
- γₖ = cᵗb / βₖ # β₁γ₁ = cᵀ(b - Ax₀)
+ βₖ = √(abs(cᴴb)) # β₁γ₁ = cᴴ(b - Ax₀)
+ γₖ = cᴴb / βₖ # β₁γ₁ = cᴴ(b - Ax₀)
vₖ₋₁ .= zero(FC) # v₀ = 0
uₖ₋₁ .= zero(FC) # u₀ = 0
vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁
uₖ .= c ./ conj(γₖ) # u₁ = c / γ̄₁
cₖ₋₁ = cₖ = -one(T) # Givens cosines used for the LQ factorization of Tₖ
sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the LQ factorization of Tₖ
- d̅ .= zero(FC) # Last column of D̅ₖ = Vₖ(Qₖ)ᵀ
+ d̅ .= zero(FC) # Last column of D̅ₖ = Vₖ(Qₖ)ᴴ
ζₖ₋₁ = ζbarₖ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁
ζₖ₋₂ = ηₖ = zero(FC) # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ
δbarₖ₋₁ = δbarₖ = zero(FC) # Coefficients of Lₖ₋₁ and L̅ₖ modified over the course of two iterations
@@ -164,10 +184,10 @@ function bilq!(solver :: BilqSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Ab
# Continue the Lanczos biorthogonalization process.
# AVₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ
- # AᵀUₖ = Uₖ(Tₖ)ᵀ + γ̄ₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᵀ
+ # AᴴUₖ = Uₖ(Tₖ)ᴴ + γ̄ₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᴴ
mul!(q, A , vₖ) # Forms vₖ₊₁ : q ← Avₖ
- mul!(p, Aᵀ, uₖ) # Forms uₖ₊₁ : p ← Aᵀuₖ
+ mul!(p, Aᴴ, uₖ) # Forms uₖ₊₁ : p ← Aᴴuₖ
@kaxpy!(n, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁
@kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - β̄ₖ * uₖ₋₁
@@ -177,9 +197,9 @@ function bilq!(solver :: BilqSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Ab
@kaxpy!(n, - αₖ , vₖ, q) # q ← q - αₖ * vₖ
@kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ
- pᵗq = @kdot(n, p, q) # pᵗq = ⟨p,q⟩
- βₖ₊₁ = √(abs(pᵗq)) # βₖ₊₁ = √(|pᵗq|)
- γₖ₊₁ = pᵗq / βₖ₊₁ # γₖ₊₁ = pᵗq / βₖ₊₁
+ pᴴq = @kdot(n, p, q) # pᴴq = ⟨p,q⟩
+ βₖ₊₁ = √(abs(pᴴq)) # βₖ₊₁ = √(|pᴴq|)
+ γₖ₊₁ = pᴴq / βₖ₊₁ # γₖ₊₁ = pᴴq / βₖ₊₁
# Update the LQ factorization of Tₖ = L̅ₖQₖ.
# [ α₁ γ₂ 0 • • • 0 ] [ δ₁ 0 • • • • 0 ]
@@ -234,7 +254,7 @@ function bilq!(solver :: BilqSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Ab
ηₖ = -ϵₖ₋₂ * ζₖ₋₂ - λₖ₋₁ * ζₖ₋₁
end
- # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Vₖ(Qₖ)ᵀ.
+ # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Vₖ(Qₖ)ᴴ.
# [d̅ₖ₋₁ vₖ] [cₖ s̄ₖ] = [dₖ₋₁ d̅ₖ] ⟷ dₖ₋₁ = cₖ * d̅ₖ₋₁ + sₖ * vₖ
# [sₖ -cₖ] ⟷ d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * vₖ
if iter ≥ 2
@@ -257,13 +277,13 @@ function bilq!(solver :: BilqSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Ab
@. vₖ₋₁ = vₖ # vₖ₋₁ ← vₖ
@. uₖ₋₁ = uₖ # uₖ₋₁ ← uₖ
- if pᵗq ≠ 0
+ if pᴴq ≠ 0
@. vₖ = q / βₖ₊₁ # βₖ₊₁vₖ₊₁ = q
@. uₖ = p / conj(γₖ₊₁) # γ̄ₖ₊₁uₖ₊₁ = p
end
# Compute ⟨vₖ,vₖ₊₁⟩ and ‖vₖ₊₁‖
- vₖᵀvₖ₊₁ = @kdot(n, vₖ₋₁, vₖ)
+ vₖᴴvₖ₊₁ = @kdot(n, vₖ₋₁, vₖ)
norm_vₖ₊₁ = @knrm2(n, vₖ)
# Compute BiLQ residual norm
@@ -273,7 +293,7 @@ function bilq!(solver :: BilqSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Ab
else
μₖ = βₖ * (sₖ₋₁ * ζₖ₋₂ - cₖ₋₁ * cₖ * ζₖ₋₁) + αₖ * sₖ * ζₖ₋₁
ωₖ = βₖ₊₁ * sₖ * ζₖ₋₁
- θₖ = conj(μₖ) * ωₖ * vₖᵀvₖ₊₁
+ θₖ = conj(μₖ) * ωₖ * vₖᴴvₖ₊₁
rNorm_lq = sqrt(abs2(μₖ) * norm_vₖ^2 + abs2(ωₖ) * norm_vₖ₊₁^2 + 2 * real(θₖ))
end
history && push!(rNorms, rNorm_lq)
@@ -299,10 +319,10 @@ function bilq!(solver :: BilqSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Ab
solved_lq = rNorm_lq ≤ ε
solved_cg = transfer_to_bicg && (abs(δbarₖ) > eps(T)) && (rNorm_cg ≤ ε)
tired = iter ≥ itmax
- breakdown = !solved_lq && !solved_cg && (pᵗq == 0)
- kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm_lq)
+ breakdown = !solved_lq && !solved_cg && (pᴴq == 0)
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e\n", iter, rNorm_lq)
end
- (verbose > 0) && @printf("\n")
+ (verbose > 0) && @printf(iostream, "\n")
# Compute BICG point
# (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * d̅ₖ
diff --git a/src/bilqr.jl b/src/bilqr.jl
index 09fef1f6c..5666f0863 100644
--- a/src/bilqr.jl
+++ b/src/bilqr.jl
@@ -1,5 +1,5 @@
# An implementation of BILQR for the solution of square
-# consistent linear adjoint systems Ax = b and Aᵀy = c.
+# consistent linear adjoint systems Ax = b and Aᴴy = c.
#
# This method is described in
#
@@ -14,33 +14,54 @@ export bilqr, bilqr!
"""
(x, y, stats) = bilqr(A, b::AbstractVector{FC}, c::AbstractVector{FC};
- atol::T=√eps(T), rtol::T=√eps(T), transfer_to_bicg::Bool=true,
- itmax::Int=0, verbose::Int=0, history::Bool=false,
- callback=solver->false)
+ transfer_to_bicg::Bool=true, atol::T=√eps(T),
+ rtol::T=√eps(T), itmax::Int=0,
+ verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
+ (x, y, stats) = bilqr(A, b, c, x0::AbstractVector, y0::AbstractVector; kwargs...)
+
+BiLQR can be warm-started from initial guesses `x0` and `y0` where `kwargs` are the same keyword arguments as above.
+
Combine BiLQ and QMR to solve adjoint systems.
[0 A] [y] = [b]
- [Aᵀ 0] [x] [c]
+ [Aᴴ 0] [x] [c]
+
+The relation `bᴴc ≠ 0` must be satisfied.
+BiLQ is used for solving primal system `Ax = b` of size n.
+QMR is used for solving dual system `Aᴴy = c` of size n.
+
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension n;
+* `b`: a vector of length n;
+* `c`: a vector of length n.
-The relation `bᵀc ≠ 0` must be satisfied.
-BiLQ is used for solving primal system `Ax = b`.
-QMR is used for solving dual system `Aᵀy = c`.
+#### Optional arguments
-An option gives the possibility of transferring from the BiLQ point to the
-BiCG point, when it exists. The transfer is based on the residual norm.
+* `x0`: a vector of length n that represents an initial guess of the solution x;
+* `y0`: a vector of length n that represents an initial guess of the solution y.
-BiLQR can be warm-started from initial guesses `x0` and `y0` with the method
+#### Keyword arguments
- (x, y, stats) = bilqr(A, b, c, x0, y0; kwargs...)
+* `transfer_to_bicg`: transfer from the BiLQ point to the BiCG point, when it exists. The transfer is based on the residual norm;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
-where `kwargs` are the same keyword arguments as above.
+#### Output arguments
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* `x`: a dense vector of length n;
+* `y`: a dense vector of length n;
+* `stats`: statistics collected on the run in an [`AdjointStats`](@ref) structure.
#### Reference
@@ -78,23 +99,24 @@ function bilqr!(solver :: BilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
end
function bilqr!(solver :: BilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC};
- atol :: T=√eps(T), rtol :: T=√eps(T), transfer_to_bicg :: Bool=true,
- itmax :: Int=0, verbose :: Int=0, history :: Bool=false,
- callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
+ transfer_to_bicg :: Bool=true, atol :: T=√eps(T),
+ rtol :: T=√eps(T), itmax :: Int=0,
+ verbose :: Int=0, history :: Bool=false,
+ callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
- n, m = size(A)
+ m, n = size(A)
m == n || error("Systems must be square")
length(b) == m || error("Inconsistent problem size")
length(c) == n || error("Inconsistent problem size")
- (verbose > 0) && @printf("BILQR: systems of size %d\n", n)
+ (verbose > 0) && @printf(iostream, "BILQR: systems of size %d\n", n)
# Check type consistency
eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
- ktypeof(c) == S || error("ktypeof(c) ≠ $S")
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+ ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S")
# Compute the adjoint of A
- Aᵀ = A'
+ Aᴴ = A'
# Set up workspace.
uₖ₋₁, uₖ, q, vₖ₋₁, vₖ = solver.uₖ₋₁, solver.uₖ, solver.q, solver.vₖ₋₁, solver.vₖ
@@ -109,7 +131,7 @@ function bilqr!(solver :: BilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
if warm_start
mul!(r₀, A, Δx)
@kaxpby!(n, one(FC), b, -one(FC), r₀)
- mul!(s₀, Aᵀ, Δy)
+ mul!(s₀, Aᴴ, Δy)
@kaxpby!(n, one(FC), c, -one(FC), s₀)
end
@@ -117,7 +139,7 @@ function bilqr!(solver :: BilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
x .= zero(FC) # x₀
bNorm = @knrm2(n, r₀) # rNorm = ‖r₀‖
- # Initial solution t₀ and residual norm ‖s₀‖ = ‖c - Aᵀy₀‖.
+ # Initial solution t₀ and residual norm ‖s₀‖ = ‖c - Aᴴy₀‖.
t .= zero(FC) # t₀
cNorm = @knrm2(n, s₀) # sNorm = ‖s₀‖
@@ -128,38 +150,38 @@ function bilqr!(solver :: BilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
history && push!(sNorms, cNorm)
εL = atol + rtol * bNorm
εQ = atol + rtol * cNorm
- (verbose > 0) && @printf("%5s %7s %7s\n", "k", "‖rₖ‖", "‖sₖ‖")
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e\n", iter, bNorm, cNorm)
+ (verbose > 0) && @printf(iostream, "%5s %7s %7s\n", "k", "‖rₖ‖", "‖sₖ‖")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e\n", iter, bNorm, cNorm)
# Initialize the Lanczos biorthogonalization process.
- cᵗb = @kdot(n, s₀, r₀) # ⟨s₀,r₀⟩ = ⟨c - Aᵀy₀,b - Ax₀⟩
- if cᵗb == 0
+ cᴴb = @kdot(n, s₀, r₀) # ⟨s₀,r₀⟩ = ⟨c - Aᴴy₀,b - Ax₀⟩
+ if cᴴb == 0
stats.niter = 0
stats.solved_primal = false
stats.solved_dual = false
- stats.status = "Breakdown bᵀc = 0"
+ stats.status = "Breakdown bᴴc = 0"
solver.warm_start = false
return solver
end
# Set up workspace.
- βₖ = √(abs(cᵗb)) # β₁γ₁ = (c - Aᵀy₀)ᵀ(b - Ax₀)
- γₖ = cᵗb / βₖ # β₁γ₁ = (c - Aᵀy₀)ᵀ(b - Ax₀)
+ βₖ = √(abs(cᴴb)) # β₁γ₁ = (c - Aᴴy₀)ᴴ(b - Ax₀)
+ γₖ = cᴴb / βₖ # β₁γ₁ = (c - Aᴴy₀)ᴴ(b - Ax₀)
vₖ₋₁ .= zero(FC) # v₀ = 0
uₖ₋₁ .= zero(FC) # u₀ = 0
vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁
- uₖ .= s₀ ./ conj(γₖ) # u₁ = (c - Aᵀy₀) / γ̄₁
+ uₖ .= s₀ ./ conj(γₖ) # u₁ = (c - Aᴴy₀) / γ̄₁
cₖ₋₁ = cₖ = -one(T) # Givens cosines used for the LQ factorization of Tₖ
sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the LQ factorization of Tₖ
- d̅ .= zero(FC) # Last column of D̅ₖ = Vₖ(Qₖ)ᵀ
+ d̅ .= zero(FC) # Last column of D̅ₖ = Vₖ(Qₖ)ᴴ
ζₖ₋₁ = ζbarₖ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁
ζₖ₋₂ = ηₖ = zero(FC) # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ
δbarₖ₋₁ = δbarₖ = zero(FC) # Coefficients of Lₖ₋₁ and L̅ₖ modified over the course of two iterations
ψbarₖ₋₁ = ψₖ₋₁ = zero(FC) # ψₖ₋₁ and ψbarₖ are the last components of h̅ₖ = Qₖγ̄₁e₁
norm_vₖ = bNorm / βₖ # ‖vₖ‖ is used for residual norm estimates
ϵₖ₋₃ = λₖ₋₂ = zero(FC) # Components of Lₖ₋₁
- wₖ₋₃ .= zero(FC) # Column k-3 of Wₖ = Uₖ(Lₖ)⁻ᵀ
- wₖ₋₂ .= zero(FC) # Column k-2 of Wₖ = Uₖ(Lₖ)⁻ᵀ
+ wₖ₋₃ .= zero(FC) # Column k-3 of Wₖ = Uₖ(Lₖ)⁻ᴴ
+ wₖ₋₂ .= zero(FC) # Column k-2 of Wₖ = Uₖ(Lₖ)⁻ᴴ
τₖ = zero(T) # τₖ is used for the dual residual norm estimate
# Stopping criterion.
@@ -180,10 +202,10 @@ function bilqr!(solver :: BilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
# Continue the Lanczos biorthogonalization process.
# AVₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ
- # AᵀUₖ = Uₖ(Tₖ)ᵀ + γ̄ₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᵀ
+ # AᴴUₖ = Uₖ(Tₖ)ᴴ + γ̄ₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᴴ
mul!(q, A , vₖ) # Forms vₖ₊₁ : q ← Avₖ
- mul!(p, Aᵀ, uₖ) # Forms uₖ₊₁ : p ← Aᵀuₖ
+ mul!(p, Aᴴ, uₖ) # Forms uₖ₊₁ : p ← Aᴴuₖ
@kaxpy!(n, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁
@kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - β̄ₖ * uₖ₋₁
@@ -193,9 +215,9 @@ function bilqr!(solver :: BilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
@kaxpy!(n, - αₖ , vₖ, q) # q ← q - αₖ * vₖ
@kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ
- pᵗq = @kdot(n, p, q) # pᵗq = ⟨p,q⟩
- βₖ₊₁ = √(abs(pᵗq)) # βₖ₊₁ = √(|pᵗq|)
- γₖ₊₁ = pᵗq / βₖ₊₁ # γₖ₊₁ = pᵗq / βₖ₊₁
+ pᴴq = @kdot(n, p, q) # pᴴq = ⟨p,q⟩
+ βₖ₊₁ = √(abs(pᴴq)) # βₖ₊₁ = √(|pᴴq|)
+ γₖ₊₁ = pᴴq / βₖ₊₁ # γₖ₊₁ = pᴴq / βₖ₊₁
# Update the LQ factorization of Tₖ = L̅ₖQₖ.
# [ α₁ γ₂ 0 • • • 0 ] [ δ₁ 0 • • • • 0 ]
@@ -251,7 +273,7 @@ function bilqr!(solver :: BilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
ηₖ = -ϵₖ₋₂ * ζₖ₋₂ - λₖ₋₁ * ζₖ₋₁
end
- # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Vₖ(Qₖ)ᵀ.
+ # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Vₖ(Qₖ)ᴴ.
# [d̅ₖ₋₁ vₖ] [cₖ s̄ₖ] = [dₖ₋₁ d̅ₖ] ⟷ dₖ₋₁ = cₖ * d̅ₖ₋₁ + sₖ * vₖ
# [sₖ -cₖ] ⟷ d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * vₖ
if iter ≥ 2
@@ -271,7 +293,7 @@ function bilqr!(solver :: BilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
end
# Compute ⟨vₖ,vₖ₊₁⟩ and ‖vₖ₊₁‖
- vₖᵀvₖ₊₁ = @kdot(n, vₖ, q) / βₖ₊₁
+ vₖᴴvₖ₊₁ = @kdot(n, vₖ, q) / βₖ₊₁
norm_vₖ₊₁ = @knrm2(n, q) / βₖ₊₁
# Compute BiLQ residual norm
@@ -281,7 +303,7 @@ function bilqr!(solver :: BilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
else
μₖ = βₖ * (sₖ₋₁ * ζₖ₋₂ - cₖ₋₁ * cₖ * ζₖ₋₁) + αₖ * sₖ * ζₖ₋₁
ωₖ = βₖ₊₁ * sₖ * ζₖ₋₁
- θₖ = conj(μₖ) * ωₖ * vₖᵀvₖ₊₁
+ θₖ = conj(μₖ) * ωₖ * vₖᴴvₖ₊₁
rNorm_lq = sqrt(abs2(μₖ) * norm_vₖ^2 + abs2(ωₖ) * norm_vₖ₊₁^2 + 2 * real(θₖ))
end
history && push!(rNorms, rNorm_lq)
@@ -318,7 +340,7 @@ function bilqr!(solver :: BilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
ψbarₖ = sₖ * ψbarₖ₋₁
end
- # Compute the direction wₖ₋₁, the last column of Wₖ₋₁ = (Uₖ₋₁)(Lₖ₋₁)⁻ᵀ ⟷ (L̄ₖ₋₁)(Wₖ₋₁)ᵀ = (Uₖ₋₁)ᵀ.
+ # Compute the direction wₖ₋₁, the last column of Wₖ₋₁ = (Uₖ₋₁)(Lₖ₋₁)⁻ᴴ ⟷ (L̄ₖ₋₁)(Wₖ₋₁)ᵀ = (Uₖ₋₁)ᵀ.
# w₁ = u₁ / δ̄₁
if iter == 2
wₖ₋₁ = wₖ₋₂
@@ -372,7 +394,7 @@ function bilqr!(solver :: BilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
@. vₖ₋₁ = vₖ # vₖ₋₁ ← vₖ
@. uₖ₋₁ = uₖ # uₖ₋₁ ← uₖ
- if pᵗq ≠ zero(FC)
+ if pᴴq ≠ zero(FC)
@. vₖ = q / βₖ₊₁ # βₖ₊₁vₖ₊₁ = q
@. uₖ = p / conj(γₖ₊₁) # γ̄ₖ₊₁uₖ₊₁ = p
end
@@ -392,13 +414,13 @@ function bilqr!(solver :: BilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
user_requested_exit = callback(solver) :: Bool
tired = iter ≥ itmax
- breakdown = !solved_lq && !solved_cg && (pᵗq == 0)
+ breakdown = !solved_lq && !solved_cg && (pᴴq == 0)
- kdisplay(iter, verbose) && solved_primal && !solved_dual && @printf("%5d %7s %7.1e\n", iter, "", sNorm)
- kdisplay(iter, verbose) && !solved_primal && solved_dual && @printf("%5d %7.1e %7s\n", iter, rNorm_lq, "")
- kdisplay(iter, verbose) && !solved_primal && !solved_dual && @printf("%5d %7.1e %7.1e\n", iter, rNorm_lq, sNorm)
+ kdisplay(iter, verbose) && solved_primal && !solved_dual && @printf(iostream, "%5d %7s %7.1e\n", iter, "", sNorm)
+ kdisplay(iter, verbose) && !solved_primal && solved_dual && @printf(iostream, "%5d %7.1e %7s\n", iter, rNorm_lq, "")
+ kdisplay(iter, verbose) && !solved_primal && !solved_dual && @printf(iostream, "%5d %7.1e %7.1e\n", iter, rNorm_lq, sNorm)
end
- (verbose > 0) && @printf("\n")
+ (verbose > 0) && @printf(iostream, "\n")
# Compute BICG point
# (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * d̅ₖ
diff --git a/src/callback_utils.jl b/src/callback_utils.jl
deleted file mode 100644
index eac362e5d..000000000
--- a/src/callback_utils.jl
+++ /dev/null
@@ -1,50 +0,0 @@
-export StorageGetxRestartedGmres
-
-export get_x_restarted_gmres!
-
-mutable struct StorageGetxRestartedGmres{S}
- x::S
- y::S
- p::S
-end
-StorageGetxRestartedGmres(solver::GmresSolver; N = I) =
- StorageGetxRestartedGmres(similar(solver.x), similar(solver.z), (N === I) ? similar(solver.p) : similar(solver.x))
-
-function get_x_restarted_gmres!(solver::GmresSolver{T,FC,S}, A,
- stor::StorageGetxRestartedGmres{S}, N) where {T,FC,S}
- NisI = (N === I)
- x2, y2, p2 = stor.x, stor.y, stor.p
- n = size(A, 2)
- # Compute yₖ by solving Rₖyₖ = zₖ with backward substitution.
- nr = sum(1:solver.inner_iter)
- y = solver.z # yᵢ = zᵢ
- y2 .= y
- R = solver.R
- V = solver.V
- x2 .= solver.Δx
- for i = solver.inner_iter : -1 : 1
- pos = nr + i - solver.inner_iter # position of rᵢ.ₖ
- for j = solver.inner_iter : -1 : i+1
- y2[i] = y2[i] - R[pos] * y2[j] # yᵢ ← yᵢ - rᵢⱼyⱼ
- pos = pos - j + 1 # position of rᵢ.ⱼ₋₁
- end
- # Rₖ can be singular if the system is inconsistent
- if abs(R[pos]) ≤ eps(T)^(3/4)
- y2[i] = zero(FC)
- inconsistent = true
- else
- y2[i] = y2[i] / R[pos] # yᵢ ← yᵢ / rᵢᵢ
- end
- end
-
- # Form xₖ = N⁻¹Vₖyₖ
- for i = 1 : solver.inner_iter
- @kaxpy!(n, y2[i], V[i], x2)
- end
- if !NisI
- p2 .= solver.p
- p2 .= x2
- mul!(x2, N, p2)
- end
- x2 .+= solver.x
-end
diff --git a/src/cg.jl b/src/cg.jl
index 8a974accc..ed9d88cfa 100644
--- a/src/cg.jl
+++ b/src/cg.jl
@@ -15,36 +15,53 @@
export cg, cg!
-
"""
(x, stats) = cg(A, b::AbstractVector{FC};
- M=I, atol::T=√eps(T), rtol::T=√eps(T),
- itmax::Int=0, radius::T=zero(T), linesearch::Bool=false,
+ M=I, ldiv::Bool=false, radius::T=zero(T),
+ linesearch::Bool=false, atol::T=√eps(T),
+ rtol::T=√eps(T), itmax::Int=0,
verbose::Int=0, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
-The conjugate gradient method to solve the symmetric linear system Ax=b.
+ (x, stats) = cg(A, b, x0::AbstractVector; kwargs...)
-The method does _not_ abort if A is not definite.
+CG can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
-A preconditioner M may be provided in the form of a linear operator and is
-assumed to be symmetric and positive definite.
+The conjugate gradient method to solve the Hermitian linear system Ax = b of size n.
+
+The method does _not_ abort if A is not definite.
M also indicates the weighted norm in which residuals are measured.
-If `itmax=0`, the default number of iterations is set to `2 * n`,
-with `n = length(b)`.
+#### Input arguments
+
+* `A`: a linear operator that models a Hermitian positive definite matrix of dimension n;
+* `b`: a vector of length n.
+
+#### Optional argument
+
+* `x0`: a vector of length n that represents an initial guess of the solution x.
-CG can be warm-started from an initial guess `x0` with the method
+#### Keyword arguments
- (x, stats) = cg(A, b, x0; kwargs...)
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning;
+* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`;
+* `radius`: add the trust-region constraint ‖x‖ ≤ `radius` if `radius > 0`. Useful to compute a step in a trust-region method for optimization;
+* `linesearch`: if `true`, indicate that the solution is to be used in an inexact Newton method with linesearch. If negative curvature is detected at iteration k > 0, the solution of iteration k-1 is returned. If negative curvature is detected at iteration 0, the right-hand side is returned (i.e., the negative gradient);
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
-where `kwargs` are the same keyword arguments as above.
+#### Output arguments
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### Reference
@@ -81,24 +98,25 @@ function cg!(solver :: CgSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: Abstr
end
function cg!(solver :: CgSolver{T,FC,S}, A, b :: AbstractVector{FC};
- M=I, atol :: T=√eps(T), rtol :: T=√eps(T),
- itmax :: Int=0, radius :: T=zero(T), linesearch :: Bool=false,
+ M=I, ldiv :: Bool=false, radius :: T=zero(T),
+ linesearch :: Bool=false, atol :: T=√eps(T),
+ rtol :: T=√eps(T), itmax :: Int=0,
verbose :: Int=0, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
+ callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
linesearch && (radius > 0) && error("`linesearch` set to `true` but trust-region radius > 0")
- n, m = size(A)
+ m, n = size(A)
m == n || error("System must be square")
length(b) == n || error("Inconsistent problem size")
- (verbose > 0) && @printf("CG: system of %d equations in %d variables\n", n, n)
+ (verbose > 0) && @printf(iostream, "CG: system of %d equations in %d variables\n", n, n)
# Tests M = Iₙ
MisI = (M === I)
# Check type consistency
eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
# Set up workspace.
allocate_if(!MisI, solver, :z, S, n)
@@ -134,8 +152,8 @@ function cg!(solver :: CgSolver{T,FC,S}, A, b :: AbstractVector{FC};
pAp = zero(T)
pNorm² = γ
ε = atol + rtol * rNorm
- (verbose > 0) && @printf("%5s %7s %8s %8s %8s\n", "k", "‖r‖", "pAp", "α", "σ")
- kdisplay(iter, verbose) && @printf("%5d %7.1e ", iter, rNorm)
+ (verbose > 0) && @printf(iostream, "%5s %7s %8s %8s %8s\n", "k", "‖r‖", "pAp", "α", "σ")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e ", iter, rNorm)
solved = rNorm ≤ ε
tired = iter ≥ itmax
@@ -164,9 +182,9 @@ function cg!(solver :: CgSolver{T,FC,S}, A, b :: AbstractVector{FC};
α = γ / pAp
# Compute step size to boundary if applicable.
- σ = radius > 0 ? maximum(to_boundary(x, p, radius, dNorm2=pNorm²)) : α
+ σ = radius > 0 ? maximum(to_boundary(n, x, p, radius, dNorm2=pNorm²)) : α
- kdisplay(iter, verbose) && @printf("%8.1e %8.1e %8.1e\n", pAp, α, σ)
+ kdisplay(iter, verbose) && @printf(iostream, "%8.1e %8.1e %8.1e\n", pAp, α, σ)
# Move along p from x to the boundary if either
# the next step leads outside the trust region or
@@ -201,9 +219,9 @@ function cg!(solver :: CgSolver{T,FC,S}, A, b :: AbstractVector{FC};
iter = iter + 1
tired = iter ≥ itmax
user_requested_exit = callback(solver) :: Bool
- kdisplay(iter, verbose) && @printf("%5d %7.1e ", iter, rNorm)
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e ", iter, rNorm)
end
- (verbose > 0) && @printf("\n")
+ (verbose > 0) && @printf(iostream, "\n")
solved && on_boundary && (status = "on trust-region boundary")
solved && linesearch && (pAp ≤ 0) && (status = "nonpositive curvature detected")
diff --git a/src/cg_lanczos.jl b/src/cg_lanczos.jl
index a8e24f02f..f648eb2a8 100644
--- a/src/cg_lanczos.jl
+++ b/src/cg_lanczos.jl
@@ -12,34 +12,52 @@
export cg_lanczos, cg_lanczos!
-
"""
(x, stats) = cg_lanczos(A, b::AbstractVector{FC};
- M=I, atol::T=√eps(T), rtol::T=√eps(T), itmax::Int=0,
- check_curvature::Bool=false, verbose::Int=0, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ M=I, ldiv::Bool=false,
+ check_curvature::Bool=false, atol::T=√eps(T),
+ rtol::T=√eps(T), itmax::Int=0,
+ verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
-The Lanczos version of the conjugate gradient method to solve the
-symmetric linear system
+ (x, stats) = cg_lanczos(A, b, x0::AbstractVector; kwargs...)
- Ax = b
+CG-LANCZOS can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
+
+The Lanczos version of the conjugate gradient method to solve the
+Hermitian linear system Ax = b of size n.
The method does _not_ abort if A is not definite.
-A preconditioner M may be provided in the form of a linear operator and is
-assumed to be hermitian and positive definite.
+#### Input arguments
+
+* `A`: a linear operator that models a Hermitian matrix of dimension n;
+* `b`: a vector of length n.
+
+#### Optional argument
+
+* `x0`: a vector of length n that represents an initial guess of the solution x.
-CG-LANCZOS can be warm-started from an initial guess `x0` with the method
+#### Keyword arguments
- (x, stats) = cg_lanczos(A, b, x0; kwargs...)
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning;
+* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`;
+* `check_curvature`: if `true`, check that the curvature of the quadratic along the search direction is positive, and abort if not, unless `linesearch` is also `true`;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
-where `kwargs` are the same keyword arguments as above.
+#### Output arguments
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`LanczosStats`](@ref) structure.
#### References
@@ -77,21 +95,23 @@ function cg_lanczos!(solver :: CgLanczosSolver{T,FC,S}, A, b :: AbstractVector{F
end
function cg_lanczos!(solver :: CgLanczosSolver{T,FC,S}, A, b :: AbstractVector{FC};
- M=I, atol :: T=√eps(T), rtol :: T=√eps(T), itmax :: Int=0,
- check_curvature :: Bool=false, verbose :: Int=0, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
+ M=I, ldiv :: Bool=false,
+ check_curvature :: Bool=false, atol :: T=√eps(T),
+ rtol :: T=√eps(T), itmax :: Int=0,
+ verbose :: Int=0, history :: Bool=false,
+ callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
- n, m = size(A)
+ m, n = size(A)
m == n || error("System must be square")
length(b) == n || error("Inconsistent problem size")
- (verbose > 0) && @printf("CG Lanczos: system of %d equations in %d variables\n", n, n)
+ (verbose > 0) && @printf(iostream, "CG Lanczos: system of %d equations in %d variables\n", n, n)
# Tests M = Iₙ
MisI = (M === I)
# Check type consistency
eltype(A) == FC || error("eltype(A) ≠ $T")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
# Set up workspace.
allocate_if(!MisI, solver, :v, S, n)
@@ -111,7 +131,7 @@ function cg_lanczos!(solver :: CgLanczosSolver{T,FC,S}, A, b :: AbstractVector{F
Mv .= b
end
MisI || mulorldiv!(v, M, Mv, ldiv) # v₁ = M⁻¹r₀
- β = sqrt(@kdotr(n, v, Mv)) # β₁ = v₁ᵀ M v₁
+ β = sqrt(@kdotr(n, v, Mv)) # β₁ = v₁ᴴ M v₁
σ = β
rNorm = σ
history && push!(rNorms, rNorm)
@@ -143,8 +163,8 @@ function cg_lanczos!(solver :: CgLanczosSolver{T,FC,S}, A, b :: AbstractVector{F
# Define stopping tolerance.
ε = atol + rtol * rNorm
- (verbose > 0) && @printf("%5s %7s\n", "k", "‖rₖ‖")
- kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm)
+ (verbose > 0) && @printf(iostream, "%5s %7s\n", "k", "‖rₖ‖")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e\n", iter, rNorm)
indefinite = false
solved = rNorm ≤ ε
@@ -157,10 +177,10 @@ function cg_lanczos!(solver :: CgLanczosSolver{T,FC,S}, A, b :: AbstractVector{F
# Form next Lanczos vector.
# βₖ₊₁Mvₖ₊₁ = Avₖ - δₖMvₖ - βₖMvₖ₋₁
mul!(Mv_next, A, v) # Mvₖ₊₁ ← Avₖ
- δ = @kdotr(n, v, Mv_next) # δₖ = vₖᵀ A vₖ
+ δ = @kdotr(n, v, Mv_next) # δₖ = vₖᴴ A vₖ
# Check curvature. Exit fast if requested.
- # It is possible to show that σₖ² (δₖ - ωₖ₋₁ / γₖ₋₁) = pₖᵀ A pₖ.
+ # It is possible to show that σₖ² (δₖ - ωₖ₋₁ / γₖ₋₁) = pₖᴴ A pₖ.
γ = one(T) / (δ - ω / γ) # γₖ = 1 / (δₖ - ωₖ₋₁ / γₖ₋₁)
indefinite |= (γ ≤ 0)
(check_curvature & indefinite) && continue
@@ -172,7 +192,7 @@ function cg_lanczos!(solver :: CgLanczosSolver{T,FC,S}, A, b :: AbstractVector{F
end
@. Mv = Mv_next # Mvₖ ← Mvₖ₊₁
MisI || mulorldiv!(v, M, Mv, ldiv) # vₖ₊₁ = M⁻¹ * Mvₖ₊₁
- β = sqrt(@kdotr(n, v, Mv)) # βₖ₊₁ = vₖ₊₁ᵀ M vₖ₊₁
+ β = sqrt(@kdotr(n, v, Mv)) # βₖ₊₁ = vₖ₊₁ᴴ M vₖ₊₁
@kscal!(n, one(FC) / β, v) # vₖ₊₁ ← vₖ₊₁ / βₖ₊₁
MisI || @kscal!(n, one(FC) / β, Mv) # Mvₖ₊₁ ← Mvₖ₊₁ / βₖ₊₁
Anorm2 += β_prev^2 + β^2 + δ^2 # Use ‖Tₖ₊₁‖₂ as increasing approximation of ‖A‖₂.
@@ -187,7 +207,7 @@ function cg_lanczos!(solver :: CgLanczosSolver{T,FC,S}, A, b :: AbstractVector{F
rNorm = abs(σ) # ‖rₖ₊₁‖_M = |σₖ₊₁| because rₖ₊₁ = σₖ₊₁ * vₖ₊₁ and ‖vₖ₊₁‖_M = 1
history && push!(rNorms, rNorm)
iter = iter + 1
- kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm)
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e\n", iter, rNorm)
# Stopping conditions that do not depend on user input.
# This is to guard against tolerances that are unreasonably small.
@@ -198,7 +218,7 @@ function cg_lanczos!(solver :: CgLanczosSolver{T,FC,S}, A, b :: AbstractVector{F
solved = resid_decrease_lim || resid_decrease_mach
tired = iter ≥ itmax
end
- (verbose > 0) && @printf("\n")
+ (verbose > 0) && @printf(iostream, "\n")
tired && (status = "maximum number of iterations exceeded")
(check_curvature & indefinite) && (status = "negative curvature")
diff --git a/src/cg_lanczos_shift.jl b/src/cg_lanczos_shift.jl
index 01f11e41f..bf883649d 100644
--- a/src/cg_lanczos_shift.jl
+++ b/src/cg_lanczos_shift.jl
@@ -13,13 +13,13 @@
export cg_lanczos_shift, cg_lanczos_shift!
-
"""
(x, stats) = cg_lanczos_shift(A, b::AbstractVector{FC}, shifts::AbstractVector{T};
- M=I, atol::T=√eps(T), rtol::T=√eps(T),
- itmax::Int=0, check_curvature::Bool=false,
+ M=I, ldiv::Bool=false,
+ check_curvature::Bool=false, atol::T=√eps(T),
+ rtol::T=√eps(T), itmax::Int=0,
verbose::Int=0, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
@@ -27,15 +27,38 @@ export cg_lanczos_shift, cg_lanczos_shift!
The Lanczos version of the conjugate gradient method to solve a family
of shifted systems
- (A + αI) x = b (α = α₁, ..., αₙ)
+ (A + αI) x = b (α = α₁, ..., αₚ)
+
+of size n. The method does _not_ abort if A + αI is not definite.
+
+#### Input arguments
+
+* `A`: a linear operator that models a Hermitian matrix of dimension n;
+* `b`: a vector of length n;
+* `shifts`: a vector of length p.
-The method does _not_ abort if A + αI is not definite.
+#### Keyword arguments
-A preconditioner M may be provided in the form of a linear operator and is
-assumed to be hermitian and positive definite.
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning;
+* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`;
+* `check_curvature`: if `true`, check that the curvature of the quadratic along the search direction is positive, and abort if not, unless `linesearch` is also `true`;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Output arguments
+
+* `x`: a vector of p dense vectors, each one of length n;
+* `stats`: statistics collected on the run in a [`LanczosShiftStats`](@ref) structure.
+
+#### References
+
+* A. Frommer and P. Maass, [*Fast CG-Based Methods for Tikhonov-Phillips Regularization*](https://doi.org/10.1137/S1064827596313310), SIAM Journal on Scientific Computing, 20(5), pp. 1831--1850, 1999.
+* C. C. Paige and M. A. Saunders, [*Solution of Sparse Indefinite Systems of Linear Equations*](https://doi.org/10.1137/0712047), SIAM Journal on Numerical Analysis, 12(4), pp. 617--629, 1975.
"""
function cg_lanczos_shift end
@@ -56,24 +79,25 @@ See [`CgLanczosShiftSolver`](@ref) for more details about the `solver`.
function cg_lanczos_shift! end
function cg_lanczos_shift!(solver :: CgLanczosShiftSolver{T,FC,S}, A, b :: AbstractVector{FC}, shifts :: AbstractVector{T};
- M=I, atol :: T=√eps(T), rtol :: T=√eps(T),
- itmax :: Int=0, check_curvature :: Bool=false,
+ M=I, ldiv :: Bool=false,
+ check_curvature :: Bool=false, atol :: T=√eps(T),
+ rtol :: T=√eps(T), itmax :: Int=0,
verbose :: Int=0, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
+ callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
- n, m = size(A)
+ m, n = size(A)
m == n || error("System must be square")
length(b) == n || error("Inconsistent problem size")
nshifts = length(shifts)
- (verbose > 0) && @printf("CG Lanczos: system of %d equations in %d variables with %d shifts\n", n, n, nshifts)
+ (verbose > 0) && @printf(iostream, "CG Lanczos: system of %d equations in %d variables with %d shifts\n", n, n, nshifts)
# Tests M = Iₙ
MisI = (M === I)
# Check type consistency
eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
# Set up workspace.
allocate_if(!MisI, solver, :v, S, n)
@@ -92,7 +116,7 @@ function cg_lanczos_shift!(solver :: CgLanczosShiftSolver{T,FC,S}, A, b :: Abstr
end
Mv .= b # Mv₁ ← b
MisI || mulorldiv!(v, M, Mv, ldiv) # v₁ = M⁻¹ * Mv₁
- β = sqrt(@kdotr(n, v, Mv)) # β₁ = v₁ᵀ M v₁
+ β = sqrt(@kdotr(n, v, Mv)) # β₁ = v₁ᴴ M v₁
rNorms .= β
if history
for i = 1 : nshifts
@@ -140,14 +164,10 @@ function cg_lanczos_shift!(solver :: CgLanczosShiftSolver{T,FC,S}, A, b :: Abstr
itmax == 0 && (itmax = 2 * n)
# Build format strings for printing.
- if kdisplay(iter, verbose)
- fmt = "%5d" * repeat(" %8.1e", nshifts) * "\n"
- # precompile printf for our particular format
- local_printf(data...) = Core.eval(Main, :(@printf($fmt, $(data)...)))
- local_printf(iter, rNorms...)
- end
+ (verbose > 0) && (fmt = Printf.Format("%5d" * repeat(" %8.1e", nshifts) * "\n"))
+ kdisplay(iter, verbose) && Printf.format(iostream, fmt, iter, rNorms...)
- solved = sum(not_cv) == 0
+ solved = !reduce(|, not_cv)
tired = iter ≥ itmax
status = "unknown"
user_requested_exit = false
@@ -157,7 +177,7 @@ function cg_lanczos_shift!(solver :: CgLanczosShiftSolver{T,FC,S}, A, b :: Abstr
# Form next Lanczos vector.
# βₖ₊₁Mvₖ₊₁ = Avₖ - δₖMvₖ - βₖMvₖ₋₁
mul!(Mv_next, A, v) # Mvₖ₊₁ ← Avₖ
- δ = @kdotr(n, v, Mv_next) # δₖ = vₖᵀ A vₖ
+ δ = @kdotr(n, v, Mv_next) # δₖ = vₖᴴ A vₖ
@kaxpy!(n, -δ, Mv, Mv_next) # Mvₖ₊₁ ← Mvₖ₊₁ - δₖMvₖ
if iter > 0
@kaxpy!(n, -β, Mv_prev, Mv_next) # Mvₖ₊₁ ← Mvₖ₊₁ - βₖMvₖ₋₁
@@ -165,12 +185,12 @@ function cg_lanczos_shift!(solver :: CgLanczosShiftSolver{T,FC,S}, A, b :: Abstr
end
@. Mv = Mv_next # Mvₖ ← Mvₖ₊₁
MisI || mulorldiv!(v, M, Mv, ldiv) # vₖ₊₁ = M⁻¹ * Mvₖ₊₁
- β = sqrt(@kdotr(n, v, Mv)) # βₖ₊₁ = vₖ₊₁ᵀ M vₖ₊₁
+ β = sqrt(@kdotr(n, v, Mv)) # βₖ₊₁ = vₖ₊₁ᴴ M vₖ₊₁
@kscal!(n, one(FC) / β, v) # vₖ₊₁ ← vₖ₊₁ / βₖ₊₁
MisI || @kscal!(n, one(FC) / β, Mv) # Mvₖ₊₁ ← Mvₖ₊₁ / βₖ₊₁
- # Check curvature: vₖᵀ(A + sᵢI)vₖ = vₖᵀAvₖ + sᵢ‖vₖ‖² = δₖ + ρₖ * sᵢ with ρₖ = ‖vₖ‖².
- # It is possible to show that σₖ² (δₖ + ρₖ * sᵢ - ωₖ₋₁ / γₖ₋₁) = pₖᵀ (A + sᵢ I) pₖ.
+ # Check curvature: vₖᴴ(A + sᵢI)vₖ = vₖᴴAvₖ + sᵢ‖vₖ‖² = δₖ + ρₖ * sᵢ with ρₖ = ‖vₖ‖².
+ # It is possible to show that σₖ² (δₖ + ρₖ * sᵢ - ωₖ₋₁ / γₖ₋₁) = pₖᴴ (A + sᵢ I) pₖ.
MisI || (ρ = @kdotr(n, v, v))
for i = 1 : nshifts
δhat[i] = δ + ρ * shifts[i]
@@ -208,13 +228,13 @@ function cg_lanczos_shift!(solver :: CgLanczosShiftSolver{T,FC,S}, A, b :: Abstr
not_cv[i] = check_curvature ? !(converged[i] || indefinite[i]) : !converged[i]
end
iter = iter + 1
- kdisplay(iter, verbose) && local_printf(iter, rNorms...)
+ kdisplay(iter, verbose) && Printf.format(iostream, fmt, iter, rNorms...)
user_requested_exit = callback(solver) :: Bool
- solved = sum(not_cv) == 0
+ solved = !reduce(|, not_cv)
tired = iter ≥ itmax
end
- (verbose > 0) && @printf("\n")
+ (verbose > 0) && @printf(iostream, "\n")
tired && (status = "maximum number of iterations exceeded")
solved && (status = "solution good enough given atol and rtol")
diff --git a/src/cgls.jl b/src/cgls.jl
index f5529fbfb..55fe6d0ec 100644
--- a/src/cgls.jl
+++ b/src/cgls.jl
@@ -5,7 +5,7 @@
#
# equivalently, of the normal equations
#
-# AᵀAx = Aᵀb.
+# AᴴAx = Aᴴb.
#
# CGLS is formally equivalent to applying the conjugate gradient method
# to the normal equations but should be more stable. It is also formally
@@ -28,12 +28,12 @@
export cgls, cgls!
-
"""
(x, stats) = cgls(A, b::AbstractVector{FC};
- M=I, λ::T=zero(T), atol::T=√eps(T), rtol::T=√eps(T),
- radius::T=zero(T), itmax::Int=0, verbose::Int=0, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ M=I, ldiv::Bool=false, radius::T=zero(T),
+ λ::T=zero(T), atol::T=√eps(T), rtol::T=√eps(T),
+ itmax::Int=0, verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
@@ -42,19 +42,40 @@ Solve the regularized linear least-squares problem
minimize ‖b - Ax‖₂² + λ‖x‖₂²
-using the Conjugate Gradient (CG) method, where λ ≥ 0 is a regularization
+of size m × n using the Conjugate Gradient (CG) method, where λ ≥ 0 is a regularization
parameter. This method is equivalent to applying CG to the normal equations
- (AᵀA + λI) x = Aᵀb
+ (AᴴA + λI) x = Aᴴb
but is more stable.
-CGLS produces monotonic residuals ‖r‖₂ but not optimality residuals ‖Aᵀr‖₂.
+CGLS produces monotonic residuals ‖r‖₂ but not optimality residuals ‖Aᴴr‖₂.
It is formally equivalent to LSQR, though can be slightly less accurate,
but simpler to implement.
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m.
+
+#### Keyword arguments
+
+* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for preconditioning;
+* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`;
+* `radius`: add the trust-region constraint ‖x‖ ≤ `radius` if `radius > 0`. Useful to compute a step in a trust-region method for optimization;
+* `λ`: regularization parameter;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### References
@@ -79,23 +100,24 @@ See [`CglsSolver`](@ref) for more details about the `solver`.
function cgls! end
function cgls!(solver :: CglsSolver{T,FC,S}, A, b :: AbstractVector{FC};
- M=I, λ :: T=zero(T), atol :: T=√eps(T), rtol :: T=√eps(T),
- radius :: T=zero(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
+ M=I, ldiv :: Bool=false, radius :: T=zero(T),
+ λ :: T=zero(T), atol :: T=√eps(T), rtol :: T=√eps(T),
+ itmax :: Int=0, verbose :: Int=0, history :: Bool=false,
+ callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
m, n = size(A)
length(b) == m || error("Inconsistent problem size")
- (verbose > 0) && @printf("CGLS: system of %d equations in %d variables\n", m, n)
+ (verbose > 0) && @printf(iostream, "CGLS: system of %d equations in %d variables\n", m, n)
# Tests M = Iₙ
MisI = (M === I)
# Check type consistency
eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
# Compute the adjoint of A
- Aᵀ = A'
+ Aᴴ = A'
# Set up workspace.
allocate_if(!MisI, solver, :Mr, S, m)
@@ -117,9 +139,9 @@ function cgls!(solver :: CglsSolver{T,FC,S}, A, b :: AbstractVector{FC};
return solver
end
MisI || mulorldiv!(Mr, M, r, ldiv)
- mul!(s, Aᵀ, Mr)
+ mul!(s, Aᴴ, Mr)
p .= s
- γ = @kdotr(n, s, s) # γ = sᵀs
+ γ = @kdotr(n, s, s) # γ = sᴴs
iter = 0
itmax == 0 && (itmax = m + n)
@@ -128,8 +150,8 @@ function cgls!(solver :: CglsSolver{T,FC,S}, A, b :: AbstractVector{FC};
history && push!(rNorms, rNorm)
history && push!(ArNorms, ArNorm)
ε = atol + rtol * ArNorm
- (verbose > 0) && @printf("%5s %8s %8s\n", "k", "‖Aᵀr‖", "‖r‖")
- kdisplay(iter, verbose) && @printf("%5d %8.2e %8.2e\n", iter, ArNorm, rNorm)
+ (verbose > 0) && @printf(iostream, "%5s %8s %8s\n", "k", "‖Aᴴr‖", "‖r‖")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %8.2e %8.2e\n", iter, ArNorm, rNorm)
status = "unknown"
on_boundary = false
@@ -140,12 +162,12 @@ function cgls!(solver :: CglsSolver{T,FC,S}, A, b :: AbstractVector{FC};
while ! (solved || tired || user_requested_exit)
mul!(q, A, p)
MisI || mulorldiv!(Mq, M, q, ldiv)
- δ = @kdotr(m, q, Mq) # δ = qᵀMq
- λ > 0 && (δ += λ * @kdotr(n, p, p)) # δ = δ + pᵀp
+ δ = @kdotr(m, q, Mq) # δ = qᴴMq
+ λ > 0 && (δ += λ * @kdotr(n, p, p)) # δ = δ + pᴴp
α = γ / δ
# if a trust-region constraint is give, compute step to the boundary
- σ = radius > 0 ? maximum(to_boundary(x, p, radius)) : α
+ σ = radius > 0 ? maximum(to_boundary(n, x, p, radius)) : α
if (radius > 0) & (α > σ)
α = σ
on_boundary = true
@@ -154,9 +176,9 @@ function cgls!(solver :: CglsSolver{T,FC,S}, A, b :: AbstractVector{FC};
@kaxpy!(n, α, p, x) # Faster than x = x + α * p
@kaxpy!(m, -α, q, r) # Faster than r = r - α * q
MisI || mulorldiv!(Mr, M, r, ldiv)
- mul!(s, Aᵀ, Mr)
+ mul!(s, Aᴴ, Mr)
λ > 0 && @kaxpy!(n, -λ, x, s) # s = A' * r - λ * x
- γ_next = @kdotr(n, s, s) # γ_next = sᵀs
+ γ_next = @kdotr(n, s, s) # γ_next = sᴴs
β = γ_next / γ
@kaxpby!(n, one(FC), s, β, p) # p = s + βp
γ = γ_next
@@ -165,12 +187,12 @@ function cgls!(solver :: CglsSolver{T,FC,S}, A, b :: AbstractVector{FC};
history && push!(rNorms, rNorm)
history && push!(ArNorms, ArNorm)
iter = iter + 1
- kdisplay(iter, verbose) && @printf("%5d %8.2e %8.2e\n", iter, ArNorm, rNorm)
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %8.2e %8.2e\n", iter, ArNorm, rNorm)
user_requested_exit = callback(solver) :: Bool
solved = (ArNorm ≤ ε) | on_boundary
tired = iter ≥ itmax
end
- (verbose > 0) && @printf("\n")
+ (verbose > 0) && @printf(iostream, "\n")
tired && (status = "maximum number of iterations exceeded")
solved && (status = "solution good enough given atol and rtol")
diff --git a/src/cgne.jl b/src/cgne.jl
index 2859414e1..f85af32be 100644
--- a/src/cgne.jl
+++ b/src/cgne.jl
@@ -10,7 +10,7 @@
# and is equivalent to applying the conjugate gradient method
# to the linear system
#
-# AAᵀy = b.
+# AAᴴy = b.
#
# This method is also known as Craig's method, CGME, and other
# names, and is described in
@@ -28,12 +28,13 @@
export cgne, cgne!
-
"""
(x, stats) = cgne(A, b::AbstractVector{FC};
- M=I, λ::T=zero(T), atol::T=√eps(T), rtol::T=√eps(T),
- itmax::Int=0, verbose::Int=0, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ N=I, ldiv::Bool=false,
+ λ::T=zero(T), atol::T=√eps(T),
+ rtol::T=√eps(T), itmax::Int=0,
+ verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
@@ -42,11 +43,11 @@ Solve the consistent linear system
Ax + √λs = b
-using the Conjugate Gradient (CG) method, where λ ≥ 0 is a regularization
+of size m × n using the Conjugate Gradient (CG) method, where λ ≥ 0 is a regularization
parameter. This method is equivalent to applying CG to the normal equations
of the second kind
- (AAᵀ + λI) y = b
+ (AAᴴ + λI) y = b
but is more stable. When λ = 0, this method solves the minimum-norm problem
@@ -60,10 +61,28 @@ CGNE produces monotonic errors ‖x-x*‖₂ but not residuals ‖r‖₂.
It is formally equivalent to CRAIG, though can be slightly less accurate,
but simpler to implement. Only the x-part of the solution is returned.
-A preconditioner M may be provided in the form of a linear operator.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m.
+
+#### Keyword arguments
+
+* `N`:
+* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`;
+* `λ`: regularization parameter;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### References
@@ -88,35 +107,37 @@ See [`CgneSolver`](@ref) for more details about the `solver`.
function cgne! end
function cgne!(solver :: CgneSolver{T,FC,S}, A, b :: AbstractVector{FC};
- M=I, λ :: T=zero(T), atol :: T=√eps(T), rtol :: T=√eps(T),
- itmax :: Int=0, verbose :: Int=0, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
+ N=I, ldiv :: Bool=false,
+ λ :: T=zero(T), atol :: T=√eps(T),
+ rtol :: T=√eps(T), itmax :: Int=0,
+ verbose :: Int=0, history :: Bool=false,
+ callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
m, n = size(A)
length(b) == m || error("Inconsistent problem size")
- (verbose > 0) && @printf("CGNE: system of %d equations in %d variables\n", m, n)
+ (verbose > 0) && @printf(iostream, "CGNE: system of %d equations in %d variables\n", m, n)
- # Tests M = Iₙ
- MisI = (M === I)
+ # Tests N = Iₙ
+ NisI = (N === I)
# Check type consistency
eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
# Compute the adjoint of A
- Aᵀ = A'
+ Aᴴ = A'
# Set up workspace.
- allocate_if(!MisI, solver, :z, S, m)
+ allocate_if(!NisI, solver, :z, S, m)
allocate_if(λ > 0, solver, :s, S, m)
- x, p, Aᵀz, r, q, s, stats = solver.x, solver.p, solver.Aᵀz, solver.r, solver.q, solver.s, solver.stats
+ x, p, Aᴴz, r, q, s, stats = solver.x, solver.p, solver.Aᴴz, solver.r, solver.q, solver.s, solver.stats
rNorms = stats.residuals
reset!(stats)
- z = MisI ? r : solver.z
+ z = NisI ? r : solver.z
x .= zero(FC)
r .= b
- MisI || mulorldiv!(z, M, r, ldiv)
+ NisI || mulorldiv!(z, N, r, ldiv)
rNorm = @knrm2(m, r) # Marginally faster than norm(r)
history && push!(rNorms, rNorm)
if rNorm == 0
@@ -126,7 +147,7 @@ function cgne!(solver :: CgneSolver{T,FC,S}, A, b :: AbstractVector{FC};
return solver
end
λ > 0 && (s .= r)
- mul!(p, Aᵀ, z)
+ mul!(p, Aᴴ, z)
# Use ‖p‖ to detect inconsistent system.
# An inconsistent system will necessarily have AA' singular.
@@ -141,8 +162,8 @@ function cgne!(solver :: CgneSolver{T,FC,S}, A, b :: AbstractVector{FC};
ɛ_c = atol + rtol * rNorm # Stopping tolerance for consistent systems.
ɛ_i = atol + rtol * pNorm # Stopping tolerance for inconsistent systems.
- (verbose > 0) && @printf("%5s %8s\n", "k", "‖r‖")
- kdisplay(iter, verbose) && @printf("%5d %8.2e\n", iter, rNorm)
+ (verbose > 0) && @printf(iostream, "%5s %8s\n", "k", "‖r‖")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %8.2e\n", iter, rNorm)
status = "unknown"
solved = rNorm ≤ ɛ_c
@@ -158,11 +179,11 @@ function cgne!(solver :: CgneSolver{T,FC,S}, A, b :: AbstractVector{FC};
α = γ / δ
@kaxpy!(n, α, p, x) # Faster than x = x + α * p
@kaxpy!(m, -α, q, r) # Faster than r = r - α * q
- MisI || mulorldiv!(z, M, r, ldiv)
+ NisI || mulorldiv!(z, N, r, ldiv)
γ_next = @kdotr(m, r, z) # Faster than γ_next = dot(r, z)
β = γ_next / γ
- mul!(Aᵀz, Aᵀ, z)
- @kaxpby!(n, one(FC), Aᵀz, β, p) # Faster than p = Aᵀz + β * p
+ mul!(Aᴴz, Aᴴ, z)
+ @kaxpby!(n, one(FC), Aᴴz, β, p) # Faster than p = Aᴴz + β * p
pNorm = @knrm2(n, p)
if λ > 0
@kaxpby!(m, one(FC), r, β, s) # s = r + β * s
@@ -171,7 +192,7 @@ function cgne!(solver :: CgneSolver{T,FC,S}, A, b :: AbstractVector{FC};
rNorm = sqrt(γ_next)
history && push!(rNorms, rNorm)
iter = iter + 1
- kdisplay(iter, verbose) && @printf("%5d %8.2e\n", iter, rNorm)
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %8.2e\n", iter, rNorm)
# Stopping conditions that do not depend on user input.
# This is to guard against tolerances that are unreasonably small.
@@ -183,7 +204,7 @@ function cgne!(solver :: CgneSolver{T,FC,S}, A, b :: AbstractVector{FC};
inconsistent = (rNorm > 100 * ɛ_c) && (pNorm ≤ ɛ_i)
tired = iter ≥ itmax
end
- (verbose > 0) && @printf("\n")
+ (verbose > 0) && @printf(iostream, "\n")
tired && (status = "maximum number of iterations exceeded")
inconsistent && (status = "system probably inconsistent")
diff --git a/src/cgs.jl b/src/cgs.jl
index c1eb1056e..cbb3db13b 100644
--- a/src/cgs.jl
+++ b/src/cgs.jl
@@ -11,17 +11,23 @@
export cgs, cgs!
"""
- (x, stats) = cgs(A, b::AbstractVector{FC}; c::AbstractVector{FC}=b,
- M=I, N=I, atol::T=√eps(T), rtol::T=√eps(T),
- itmax::Int=0, verbose::Int=0, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ (x, stats) = cgs(A, b::AbstractVector{FC};
+ c::AbstractVector{FC}=b, M=I, N=I,
+ ldiv::Bool=false, atol::T=√eps(T),
+ rtol::T=√eps(T), itmax::Int=0,
+ verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
-Solve the consistent linear system Ax = b using conjugate gradient squared algorithm.
+ (x, stats) = cgs(A, b, x0::AbstractVector; kwargs...)
+
+CGS can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
+
+Solve the consistent linear system Ax = b of size n using CGS.
CGS requires two initial vectors `b` and `c`.
-The relation `bᵀc ≠ 0` must be satisfied and by default `c = b`.
+The relation `bᴴc ≠ 0` must be satisfied and by default `c = b`.
From "Iterative Methods for Sparse Linear Systems (Y. Saad)" :
@@ -38,16 +44,33 @@ to become inaccurate.
TFQMR and BICGSTAB were developed to remedy this difficulty.»
-This implementation allows a left preconditioner M and a right preconditioner N.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension n;
+* `b`: a vector of length n.
+
+#### Optional argument
+
+* `x0`: a vector of length n that represents an initial guess of the solution x.
-CGS can be warm-started from an initial guess `x0` with the method
+#### Keyword arguments
- (x, stats) = cgs(A, b, x0; kwargs...)
+* `c`: the second initial vector of length `n` required by the Lanczos biorthogonalization process;
+* `M`: linear operator that models a nonsingular matrix of size `n` used for left preconditioning;
+* `N`: linear operator that models a nonsingular matrix of size `n` used for right preconditioning;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
-where `kwargs` are the same keyword arguments as above.
+#### Output arguments
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### Reference
@@ -83,15 +106,17 @@ function cgs!(solver :: CgsSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: Abs
return solver
end
-function cgs!(solver :: CgsSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: AbstractVector{FC}=b,
- M=I, N=I, atol :: T=√eps(T), rtol :: T=√eps(T),
- itmax :: Int=0, verbose :: Int=0, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
+function cgs!(solver :: CgsSolver{T,FC,S}, A, b :: AbstractVector{FC};
+ c :: AbstractVector{FC}=b, M=I, N=I,
+ ldiv :: Bool=false, atol :: T=√eps(T),
+ rtol :: T=√eps(T), itmax :: Int=0,
+ verbose :: Int=0, history :: Bool=false,
+ callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
m, n = size(A)
m == n || error("System must be square")
length(b) == m || error("Inconsistent problem size")
- (verbose > 0) && @printf("CGS: system of size %d\n", n)
+ (verbose > 0) && @printf(iostream, "CGS: system of size %d\n", n)
# Check M = Iₙ and N = Iₙ
MisI = (M === I)
@@ -99,8 +124,8 @@ function cgs!(solver :: CgsSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Abst
# Check type consistency
eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
- ktypeof(c) == S || error("ktypeof(c) ≠ $S")
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+ ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S")
# Set up workspace.
allocate_if(!MisI, solver, :vw, S, n)
@@ -142,7 +167,7 @@ function cgs!(solver :: CgsSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Abst
if ρ == 0
stats.niter = 0
stats.solved, stats.inconsistent = false, false
- stats.status = "Breakdown bᵀc = 0"
+ stats.status = "Breakdown bᴴc = 0"
solver.warm_start =false
return solver
end
@@ -151,8 +176,8 @@ function cgs!(solver :: CgsSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Abst
itmax == 0 && (itmax = 2*n)
ε = atol + rtol * rNorm
- (verbose > 0) && @printf("%5s %7s\n", "k", "‖rₖ‖")
- kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm)
+ (verbose > 0) && @printf(iostream, "%5s %7s\n", "k", "‖rₖ‖")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e\n", iter, rNorm)
u .= r # u₀
p .= r # p₀
@@ -207,9 +232,9 @@ function cgs!(solver :: CgsSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Abst
solved = resid_decrease_lim || resid_decrease_mach
tired = iter ≥ itmax
breakdown = (α == 0 || isnan(α))
- kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm)
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e\n", iter, rNorm)
end
- (verbose > 0) && @printf("\n")
+ (verbose > 0) && @printf(iostream, "\n")
tired && (status = "maximum number of iterations exceeded")
breakdown && (status = "breakdown αₖ == 0")
diff --git a/src/cr.jl b/src/cr.jl
index c678c7d29..26f317385 100644
--- a/src/cr.jl
+++ b/src/cr.jl
@@ -16,32 +16,52 @@ export cr, cr!
"""
(x, stats) = cr(A, b::AbstractVector{FC};
- M=I, atol::T=√eps(T), rtol::T=√eps(T), γ::T=√eps(T), itmax::Int=0,
- radius::T=zero(T), verbose::Int=0, linesearch::Bool=false, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ M=I, ldiv::Bool=false, radius::T=zero(T),
+ linesearch::Bool=false, γ::T=√eps(T),
+ atol::T=√eps(T), rtol::T=√eps(T), itmax::Int=0,
+ verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
-A truncated version of Stiefel’s Conjugate Residual method to solve the symmetric linear system Ax = b or the least-squares problem min ‖b - Ax‖.
-The matrix A must be positive semi-definite.
+ (x, stats) = cr(A, b, x0::AbstractVector; kwargs...)
-A preconditioner M may be provided in the form of a linear operator and is assumed to be symmetric and positive definite.
+CR can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
+
+A truncated version of Stiefel’s Conjugate Residual method to solve the Hermitian linear system Ax = b
+of size n or the least-squares problem min ‖b - Ax‖ if A is singular.
+The matrix A must be Hermitian semi-definite.
M also indicates the weighted norm in which residuals are measured.
-In a linesearch context, 'linesearch' must be set to 'true'.
+#### Input arguments
+
+* `A`: a linear operator that models a Hermitian positive definite matrix of dimension n;
+* `b`: a vector of length n.
-If `itmax=0`, the default number of iterations is set to `2 * n`,
-with `n = length(b)`.
+#### Optional argument
-CR can be warm-started from an initial guess `x0` with the method
+* `x0`: a vector of length n that represents an initial guess of the solution x.
- (x, stats) = cr(A, b, x0; kwargs...)
+#### Keyword arguments
-where `kwargs` are the same keyword arguments as above.
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning;
+* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`;
+* `radius`: add the trust-region constraint ‖x‖ ≤ `radius` if `radius > 0`. Useful to compute a step in a trust-region method for optimization;
+* `linesearch`: if `true`, indicate that the solution is to be used in an inexact Newton method with linesearch. If negative curvature is detected at iteration k > 0, the solution of iteration k-1 is returned. If negative curvature is detected at iteration 0, the right-hand side is returned (i.e., the negative gradient);
+* `γ`: tolerance to determine that the curvature of the quadratic model is nonpositive;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### References
@@ -80,22 +100,25 @@ function cr!(solver :: CrSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: Abstr
end
function cr!(solver :: CrSolver{T,FC,S}, A, b :: AbstractVector{FC};
- M=I, atol :: T=√eps(T), rtol :: T=√eps(T), γ :: T=√eps(T), itmax :: Int=0,
- radius :: T=zero(T), verbose :: Int=0, linesearch :: Bool=false, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
+ M=I, ldiv :: Bool=false, radius :: T=zero(T),
+ linesearch :: Bool=false, γ :: T=√eps(T),
+ atol :: T=√eps(T), rtol :: T=√eps(T), itmax :: Int=0,
+ verbose :: Int=0, history :: Bool=false,
+ callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
linesearch && (radius > 0) && error("'linesearch' set to 'true' but radius > 0")
- n, m = size(A)
+
+ m, n = size(A)
m == n || error("System must be square")
length(b) == n || error("Inconsistent problem size")
- (verbose > 0) && @printf("CR: system of %d equations in %d variables\n", n, n)
+ (verbose > 0) && @printf(iostream, "CR: system of %d equations in %d variables\n", n, n)
# Tests M = Iₙ
MisI = (M === I)
# Check type consistency
eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
# Set up workspace
allocate_if(!MisI, solver, :Mq, S, n)
@@ -146,10 +169,10 @@ function cr!(solver :: CrSolver{T,FC,S}, A, b :: AbstractVector{FC};
ArNorm = @knrm2(n, Ar) # ‖Ar‖
history && push!(ArNorms, ArNorm)
ε = atol + rtol * rNorm
- (verbose > 0) && @printf("%5s %8s %8s %8s\n", "k", "‖x‖", "‖r‖", "quad")
- kdisplay(iter, verbose) && @printf(" %d %8.1e %8.1e %8.1e\n", iter, xNorm, rNorm, m)
+ (verbose > 0) && @printf(iostream, "%5s %8s %8s %8s\n", "k", "‖x‖", "‖r‖", "quad")
+ kdisplay(iter, verbose) && @printf(iostream, " %d %8.1e %8.1e %8.1e\n", iter, xNorm, rNorm, m)
- descent = pr > 0 # pᵀr > 0 means p is a descent direction
+ descent = pr > 0 # pᴴr > 0 means p is a descent direction
solved = rNorm ≤ ε
tired = iter ≥ itmax
on_boundary = false
@@ -161,7 +184,7 @@ function cr!(solver :: CrSolver{T,FC,S}, A, b :: AbstractVector{FC};
if linesearch
if (pAp ≤ γ * pNorm²) || (ρ ≤ γ * rNorm²)
npcurv = true
- (verbose > 0) && @printf("nonpositive curvature detected: pᵀAp = %8.1e and rᵀAr = %8.1e\n", pAp, ρ)
+ (verbose > 0) && @printf(iostream, "nonpositive curvature detected: pᴴAp = %8.1e and rᴴAr = %8.1e\n", pAp, ρ)
stats.solved = solved
stats.inconsistent = false
stats.status = "nonpositive curvature"
@@ -173,52 +196,52 @@ function cr!(solver :: CrSolver{T,FC,S}, A, b :: AbstractVector{FC};
MisI || mulorldiv!(Mq, M, q, ldiv)
if radius > 0
- (verbose > 0) && @printf("radius = %8.1e > 0 and ‖x‖ = %8.1e\n", radius, xNorm)
+ (verbose > 0) && @printf(iostream, "radius = %8.1e > 0 and ‖x‖ = %8.1e\n", radius, xNorm)
# find t1 > 0 and t2 < 0 such that ‖x + ti * p‖² = radius² (i = 1, 2)
xNorm² = xNorm * xNorm
- t = to_boundary(x, p, radius; flip = false, xNorm2 = xNorm², dNorm2 = pNorm²)
+ t = to_boundary(n, x, p, radius; flip = false, xNorm2 = xNorm², dNorm2 = pNorm²)
t1 = maximum(t) # > 0
t2 = minimum(t) # < 0
- tr = maximum(to_boundary(x, r, radius; flip = false, xNorm2 = xNorm², dNorm2 = rNorm²))
- (verbose > 0) && @printf("t1 = %8.1e, t2 = %8.1e and tr = %8.1e\n", t1, t2, tr)
+ tr = maximum(to_boundary(n, x, r, radius; flip = false, xNorm2 = xNorm², dNorm2 = rNorm²))
+ (verbose > 0) && @printf(iostream, "t1 = %8.1e, t2 = %8.1e and tr = %8.1e\n", t1, t2, tr)
- if abspAp ≤ γ * pNorm * @knrm2(n, q) # pᵀAp ≃ 0
+ if abspAp ≤ γ * pNorm * @knrm2(n, q) # pᴴAp ≃ 0
npcurv = true # nonpositive curvature
- (verbose > 0) && @printf("pᵀAp = %8.1e ≃ 0\n", pAp)
- if abspr ≤ γ * pNorm * rNorm # pᵀr ≃ 0
- (verbose > 0) && @printf("pᵀr = %8.1e ≃ 0, redefining p := r\n", pr)
+ (verbose > 0) && @printf(iostream, "pᴴAp = %8.1e ≃ 0\n", pAp)
+ if abspr ≤ γ * pNorm * rNorm # pᴴr ≃ 0
+ (verbose > 0) && @printf(iostream, "pᴴr = %8.1e ≃ 0, redefining p := r\n", pr)
p = r # - ∇q(x)
q = Ar
- # q(x + αr) = q(x) - α ‖r‖² + ½ α² rᵀAr
- # 1) if rᵀAr > 0, the quadratic decreases from α = 0 to α = ‖r‖² / rᵀAr
- # 2) if rᵀAr ≤ 0, the quadratic decreases to -∞ in the direction r
+ # q(x + αr) = q(x) - α ‖r‖² + ½ α² rᴴAr
+ # 1) if rᴴAr > 0, the quadratic decreases from α = 0 to α = ‖r‖² / rᴴAr
+ # 2) if rᴴAr ≤ 0, the quadratic decreases to -∞ in the direction r
if ρ > 0 # case 1
- (verbose > 0) && @printf("quadratic is convex in direction r, curv = %8.1e\n", ρ)
+ (verbose > 0) && @printf(iostream, "quadratic is convex in direction r, curv = %8.1e\n", ρ)
α = min(tr, rNorm² / ρ)
else # case 2
- (verbose > 0) && @printf("r is a direction of nonpositive curvature: %8.1e\n", ρ)
+ (verbose > 0) && @printf(iostream, "r is a direction of nonpositive curvature: %8.1e\n", ρ)
α = tr
end
else
- # q_p = q(x + α_p * p) - q(x) = -α_p * rᵀp + ½ (α_p)² * pᵀAp
- # q_r = q(x + α_r * r) - q(x) = -α_r * ‖r‖² + ½ (α_r)² * rᵀAr
+ # q_p = q(x + α_p * p) - q(x) = -α_p * rᴴp + ½ (α_p)² * pᴴAp
+ # q_r = q(x + α_r * r) - q(x) = -α_r * ‖r‖² + ½ (α_r)² * rᴴAr
# Δ = q_p - q_r. If Δ > 0, r is followed, else p is followed
α = descent ? t1 : t2
ρ > 0 && (tr = min(tr, rNorm² / ρ))
- Δ = -α * pr + tr * rNorm² - (tr)^2 * ρ / 2 # as pᵀAp = 0
+ Δ = -α * pr + tr * rNorm² - (tr)^2 * ρ / 2 # as pᴴAp = 0
if Δ > 0 # direction r engenders a better decrease
- (verbose > 0) && @printf("direction r engenders a bigger decrease. q_p - q_r = %8.1e > 0\n", Δ)
- (verbose > 0) && @printf("redefining p := r\n")
+ (verbose > 0) && @printf(iostream, "direction r engenders a bigger decrease. q_p - q_r = %8.1e > 0\n", Δ)
+ (verbose > 0) && @printf(iostream, "redefining p := r\n")
p = r
q = Ar
α = tr
else
- (verbose > 0) && @printf("direction p engenders an equal or a bigger decrease. q_p - q_r = %8.1e ≤ 0\n", Δ)
+ (verbose > 0) && @printf(iostream, "direction p engenders an equal or a bigger decrease. q_p - q_r = %8.1e ≤ 0\n", Δ)
end
end
elseif pAp > 0 && ρ > 0 # no negative curvature
- (verbose > 0) && @printf("positive curvatures along p and r. pᵀAp = %8.1e and rᵀAr = %8.1e\n", pAp, ρ)
+ (verbose > 0) && @printf(iostream, "positive curvatures along p and r. pᴴAp = %8.1e and rᴴAr = %8.1e\n", pAp, ρ)
α = ρ / @kdotr(n, q, Mq)
if α ≥ t1
α = t1
@@ -227,49 +250,49 @@ function cr!(solver :: CrSolver{T,FC,S}, A, b :: AbstractVector{FC};
elseif pAp > 0 && ρ < 0
npcurv = true
- (verbose > 0) && @printf("pᵀAp = %8.1e > 0 and rᵀAr = %8.1e < 0\n", pAp, ρ)
- # q_p is minimal for α_p = rᵀp / pᵀAp
+ (verbose > 0) && @printf(iostream, "pᴴAp = %8.1e > 0 and rᴴAr = %8.1e < 0\n", pAp, ρ)
+ # q_p is minimal for α_p = rᴴp / pᴴAp
α = descent ? min(t1, pr / pAp) : max(t2, pr / pAp)
Δ = -α * pr + tr * rNorm² + (α^2 * pAp - (tr)^2 * ρ) / 2
if Δ > 0
- (verbose > 0) && @printf("direction r engenders a bigger decrease. q_p - q_r = %8.1e > 0\n", Δ)
- (verbose > 0) && @printf("redefining p := r\n")
+ (verbose > 0) && @printf(iostream, "direction r engenders a bigger decrease. q_p - q_r = %8.1e > 0\n", Δ)
+ (verbose > 0) && @printf(iostream, "redefining p := r\n")
p = r
q = Ar
α = tr
else
- (verbose > 0) && @printf("direction p engenders an equal or a bigger decrease. q_p - q_r = %8.1e ≤ 0\n", Δ)
+ (verbose > 0) && @printf(iostream, "direction p engenders an equal or a bigger decrease. q_p - q_r = %8.1e ≤ 0\n", Δ)
end
elseif pAp < 0 && ρ > 0
npcurv = true
- (verbose > 0) && @printf("pᵀAp = %8.1e < 0 and rᵀAr = %8.1e > 0\n", pAp, ρ)
+ (verbose > 0) && @printf(iostream, "pᴴAp = %8.1e < 0 and rᴴAr = %8.1e > 0\n", pAp, ρ)
α = descent ? t1 : t2
tr = min(tr, rNorm² / ρ)
Δ = -α * pr + tr * rNorm² + (α^2 * pAp - (tr)^2 * ρ) / 2
if Δ > 0
- (verbose > 0) && @printf("direction r engenders a bigger decrease. q_p - q_r = %8.1e > 0\n", Δ)
- (verbose > 0) && @printf("redefining p := r\n")
+ (verbose > 0) && @printf(iostream, "direction r engenders a bigger decrease. q_p - q_r = %8.1e > 0\n", Δ)
+ (verbose > 0) && @printf(iostream, "redefining p := r\n")
p = r
q = Ar
α = tr
else
- (verbose > 0) && @printf("direction p engenders an equal or a bigger decrease. q_p - q_r = %8.1e ≤ 0\n", Δ)
+ (verbose > 0) && @printf(iostream, "direction p engenders an equal or a bigger decrease. q_p - q_r = %8.1e ≤ 0\n", Δ)
end
elseif pAp < 0 && ρ < 0
npcurv = true
- (verbose > 0) && @printf("negative curvatures along p and r. pᵀAp = %8.1e and rᵀAr = %8.1e\n", pAp, ρ)
+ (verbose > 0) && @printf(iostream, "negative curvatures along p and r. pᴴAp = %8.1e and rᴴAr = %8.1e\n", pAp, ρ)
α = descent ? t1 : t2
Δ = -α * pr + tr * rNorm² + (α^2 * pAp - (tr)^2 * ρ) / 2
if Δ > 0
- (verbose > 0) && @printf("direction r engenders a bigger decrease. q_p - q_r = %8.1e > 0\n", Δ)
- (verbose > 0) && @printf("redefining p := r\n")
+ (verbose > 0) && @printf(iostream, "direction r engenders a bigger decrease. q_p - q_r = %8.1e > 0\n", Δ)
+ (verbose > 0) && @printf(iostream, "redefining p := r\n")
p = r
q = Ar
α = tr
else
- (verbose > 0) && @printf("direction p engenders an equal or a bigger decrease. q_p - q_r = %8.1e ≤ 0\n", Δ)
+ (verbose > 0) && @printf(iostream, "direction p engenders an equal or a bigger decrease. q_p - q_r = %8.1e ≤ 0\n", Δ)
end
end
@@ -297,7 +320,7 @@ function cr!(solver :: CrSolver{T,FC,S}, A, b :: AbstractVector{FC};
iter = iter + 1
if kdisplay(iter, verbose)
m = m - α * pr + α^2 * pAp / 2
- @printf(" %d %8.1e %8.1e %8.1e\n", iter, xNorm, rNorm, m)
+ @printf(iostream, " %d %8.1e %8.1e %8.1e\n", iter, xNorm, rNorm, m)
end
# Stopping conditions that do not depend on user input.
@@ -330,14 +353,14 @@ function cr!(solver :: CrSolver{T,FC,S}, A, b :: AbstractVector{FC};
solver.warm_start = false
return solver
end
- pr = rNorm² + β * pr - β * α * pAp # pᵀr
+ pr = rNorm² + β * pr - β * α * pAp # pᴴr
abspr = abs(pr)
- pAp = ρ + β^2 * pAp # pᵀq
+ pAp = ρ + β^2 * pAp # pᴴq
abspAp = abs(pAp)
descent = pr > 0
end
- (verbose > 0) && @printf("\n")
+ (verbose > 0) && @printf(iostream, "\n")
tired && (status = "maximum number of iterations exceeded")
on_boundary && (status = "on trust-region boundary")
diff --git a/src/craig.jl b/src/craig.jl
index 20597ea02..76afe9d51 100644
--- a/src/craig.jl
+++ b/src/craig.jl
@@ -11,7 +11,7 @@
# and is equivalent to applying the conjugate gradient method
# to the linear system
#
-# AAᵀy = b.
+# AAᴴy = b.
#
# This method, sometimes known under the name CRAIG, is the
# Golub-Kahan implementation of CGNE, and is described in
@@ -32,13 +32,15 @@
export craig, craig!
-
"""
(x, y, stats) = craig(A, b::AbstractVector{FC};
- M=I, N=I, sqd::Bool=false, λ::T=zero(T), atol::T=√eps(T),
- btol::T=√eps(T), rtol::T=√eps(T), conlim::T=1/√eps(T), itmax::Int=0,
- verbose::Int=0, transfer_to_lsqr::Bool=false, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ M=I, N=I, ldiv::Bool=false,
+ transfer_to_lsqr::Bool=false, sqd::Bool=false,
+ λ::T=zero(T), btol::T=√eps(T),
+ conlim::T=1/√eps(T), atol::T=√eps(T),
+ rtol::T=√eps(T), itmax::Int=0,
+ verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
@@ -47,19 +49,19 @@ Find the least-norm solution of the consistent linear system
Ax + λ²y = b
-using the Golub-Kahan implementation of Craig's method, where λ ≥ 0 is a
+of size m × n using the Golub-Kahan implementation of Craig's method, where λ ≥ 0 is a
regularization parameter. This method is equivalent to CGNE but is more
stable.
For a system in the form Ax = b, Craig's method is equivalent to applying
-CG to AAᵀy = b and recovering x = Aᵀy. Note that y are the Lagrange
+CG to AAᴴy = b and recovering x = Aᴴy. Note that y are the Lagrange
multipliers of the least-norm problem
minimize ‖x‖ s.t. Ax = b.
If `λ > 0`, CRAIG solves the symmetric and quasi-definite system
- [ -F Aᵀ ] [ x ] [ 0 ]
+ [ -F Aᴴ ] [ x ] [ 0 ]
[ A λ²E ] [ y ] = [ b ],
where E and F are symmetric and positive definite.
@@ -70,12 +72,12 @@ The system above represents the optimality conditions of
min ‖x‖²_F + λ²‖y‖²_E s.t. Ax + λ²Ey = b.
-For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᵀKx`.
-CRAIG is then equivalent to applying CG to `(AF⁻¹Aᵀ + λ²E)y = b` with `Fx = Aᵀy`.
+For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᴴKx`.
+CRAIG is then equivalent to applying CG to `(AF⁻¹Aᴴ + λ²E)y = b` with `Fx = Aᴴy`.
If `λ = 0`, CRAIG solves the symmetric and indefinite system
- [ -F Aᵀ ] [ x ] [ 0 ]
+ [ -F Aᴴ ] [ x ] [ 0 ]
[ A 0 ] [ y ] = [ b ].
The system above represents the optimality conditions of
@@ -86,8 +88,34 @@ In this case, `M` can still be specified and indicates the weighted norm in whic
In this implementation, both the x and y-parts of the solution are returned.
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m.
+
+#### Keyword arguments
+
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `m` used for centered preconditioning of the augmented system;
+* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning of the augmented system;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `transfer_to_lsqr`: transfer from the LSLQ point to the LSQR point, when it exists. The transfer is based on the residual norm;
+* `sqd`: if `true`, set `λ=1` for Hermitian quasi-definite systems;
+* `λ`: regularization parameter;
+* `btol`: stopping tolerance used to detect zero-residual problems;
+* `conlim`: limit on the estimated condition number of `A` beyond which the solution will be abandoned;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `y`: a dense vector of length m;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### References
@@ -112,14 +140,17 @@ See [`CraigSolver`](@ref) for more details about the `solver`.
function craig! end
function craig!(solver :: CraigSolver{T,FC,S}, A, b :: AbstractVector{FC};
- M=I, N=I, sqd :: Bool=false, λ :: T=zero(T), atol :: T=√eps(T),
- btol :: T=√eps(T), rtol :: T=√eps(T), conlim :: T=1/√eps(T), itmax :: Int=0,
- verbose :: Int=0, transfer_to_lsqr :: Bool=false, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
+ M=I, N=I, ldiv :: Bool=false,
+ transfer_to_lsqr :: Bool=false, sqd :: Bool=false,
+ λ :: T=zero(T), btol :: T=√eps(T),
+ conlim :: T=1/√eps(T), atol :: T=√eps(T),
+ rtol :: T=√eps(T), itmax :: Int=0,
+ verbose :: Int=0, history :: Bool=false,
+ callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
m, n = size(A)
length(b) == m || error("Inconsistent problem size")
- (verbose > 0) && @printf("CRAIG: system of %d equations in %d variables\n", m, n)
+ (verbose > 0) && @printf(iostream, "CRAIG: system of %d equations in %d variables\n", m, n)
# Check sqd and λ parameters
sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !")
@@ -131,16 +162,16 @@ function craig!(solver :: CraigSolver{T,FC,S}, A, b :: AbstractVector{FC};
# Check type consistency
eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
# Compute the adjoint of A
- Aᵀ = A'
+ Aᴴ = A'
# Set up workspace.
allocate_if(!MisI, solver, :u , S, m)
allocate_if(!NisI, solver, :v , S, n)
allocate_if(λ > 0, solver, :w2, S, n)
- x, Nv, Aᵀu, y, w = solver.x, solver.Nv, solver.Aᵀu, solver.y, solver.w
+ x, Nv, Aᴴu, y, w = solver.x, solver.Nv, solver.Aᴴu, solver.y, solver.w
Mu, Av, w2, stats = solver.Mu, solver.Av, solver.w2, solver.stats
rNorms = stats.residuals
reset!(stats)
@@ -180,7 +211,7 @@ function craig!(solver :: CraigSolver{T,FC,S}, A, b :: AbstractVector{FC};
Anorm² = zero(T) # Estimate of ‖A‖²_F.
Anorm = zero(T)
- Dnorm² = zero(T) # Estimate of ‖(AᵀA)⁻¹‖².
+ Dnorm² = zero(T) # Estimate of ‖(AᴴA)⁻¹‖².
Acond = zero(T) # Estimate of cond(A).
xNorm² = zero(T) # Estimate of ‖x‖².
xNorm = zero(T)
@@ -191,8 +222,8 @@ function craig!(solver :: CraigSolver{T,FC,S}, A, b :: AbstractVector{FC};
ɛ_c = atol + rtol * rNorm # Stopping tolerance for consistent systems.
ɛ_i = atol # Stopping tolerance for inconsistent systems.
ctol = conlim > 0 ? 1/conlim : zero(T) # Stopping tolerance for ill-conditioned operators.
- (verbose > 0) && @printf("%5s %8s %8s %8s %8s %8s %7s\n", "k", "‖r‖", "‖x‖", "‖A‖", "κ(A)", "α", "β")
- kdisplay(iter, verbose) && @printf("%5d %8.2e %8.2e %8.2e %8.2e\n", iter, rNorm, xNorm, Anorm, Acond)
+ (verbose > 0) && @printf(iostream, "%5s %8s %8s %8s %8s %8s %7s\n", "k", "‖r‖", "‖x‖", "‖A‖", "κ(A)", "α", "β")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %8.2e %8.2e %8.2e %8.2e\n", iter, rNorm, xNorm, Anorm, Acond)
bkwerr = one(T) # initial value of the backward error ‖r‖ / √(‖b‖² + ‖A‖² ‖x‖²)
@@ -212,9 +243,9 @@ function craig!(solver :: CraigSolver{T,FC,S}, A, b :: AbstractVector{FC};
while ! (solved || inconsistent || ill_cond || tired || user_requested_exit)
# Generate the next Golub-Kahan vectors
- # 1. αₖ₊₁Nvₖ₊₁ = Aᵀuₖ₊₁ - βₖ₊₁Nvₖ
- mul!(Aᵀu, Aᵀ, u)
- @kaxpby!(n, one(FC), Aᵀu, -β, Nv)
+ # 1. αₖ₊₁Nvₖ₊₁ = Aᴴuₖ₊₁ - βₖ₊₁Nvₖ
+ mul!(Aᴴu, Aᴴ, u)
+ @kaxpby!(n, one(FC), Aᴴu, -β, Nv)
NisI || mulorldiv!(v, N, Nv, ldiv)
α = sqrt(@kdotr(n, v, Nv))
if α == 0
@@ -296,7 +327,7 @@ function craig!(solver :: CraigSolver{T,FC,S}, A, b :: AbstractVector{FC};
ρ_prev = ρ # Only differs from α if λ > 0.
- kdisplay(iter, verbose) && @printf("%5d %8.2e %8.2e %8.2e %8.2e %8.1e %7.1e\n", iter, rNorm, xNorm, Anorm, Acond, α, β)
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %8.2e %8.2e %8.2e %8.2e %8.1e %7.1e\n", iter, rNorm, xNorm, Anorm, Acond, α, β)
solved_lim = bkwerr ≤ btol
solved_mach = one(T) + bkwerr ≤ one(T)
@@ -312,7 +343,7 @@ function craig!(solver :: CraigSolver{T,FC,S}, A, b :: AbstractVector{FC};
inconsistent = false
tired = iter ≥ itmax
end
- (verbose > 0) && @printf("\n")
+ (verbose > 0) && @printf(iostream, "\n")
# transfer to LSQR point if requested
if λ > 0 && transfer_to_lsqr
diff --git a/src/craigmr.jl b/src/craigmr.jl
index e08bb9c36..3b64829d6 100644
--- a/src/craigmr.jl
+++ b/src/craigmr.jl
@@ -10,7 +10,7 @@
# and is equivalent to applying the conjugate residual method
# to the linear system
#
-# AAᵀy = b.
+# AAᴴy = b.
#
# This method is equivalent to CRMR, and is described in
#
@@ -26,12 +26,13 @@
export craigmr, craigmr!
-
"""
(x, y, stats) = craigmr(A, b::AbstractVector{FC};
- M=I, N=I, sqd :: Bool=false, λ :: T=zero(T), atol :: T=√eps(T),
- rtol::T=√eps(T), itmax::Int=0, verbose::Int=0, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ M=I, N=I, ldiv::Bool=false,
+ sqd::Bool=false, λ::T=zero(T), atol::T=√eps(T),
+ rtol::T=√eps(T), itmax::Int=0,
+ verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
@@ -40,11 +41,11 @@ Solve the consistent linear system
Ax + λ²y = b
-using the CRAIGMR method, where λ ≥ 0 is a regularization parameter.
+of size m × n using the CRAIGMR method, where λ ≥ 0 is a regularization parameter.
This method is equivalent to applying the Conjugate Residuals method
to the normal equations of the second kind
- (AAᵀ + λ²I) y = b
+ (AAᴴ + λ²I) y = b
but is more stable. When λ = 0, this method solves the minimum-norm problem
@@ -52,7 +53,7 @@ but is more stable. When λ = 0, this method solves the minimum-norm problem
If `λ > 0`, CRAIGMR solves the symmetric and quasi-definite system
- [ -F Aᵀ ] [ x ] [ 0 ]
+ [ -F Aᴴ ] [ x ] [ 0 ]
[ A λ²E ] [ y ] = [ b ],
where E and F are symmetric and positive definite.
@@ -63,12 +64,12 @@ The system above represents the optimality conditions of
min ‖x‖²_F + λ²‖y‖²_E s.t. Ax + λ²Ey = b.
-For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᵀKx`.
-CRAIGMR is then equivalent to applying MINRES to `(AF⁻¹Aᵀ + λ²E)y = b` with `Fx = Aᵀy`.
+For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᴴKx`.
+CRAIGMR is then equivalent to applying MINRES to `(AF⁻¹Aᴴ + λ²E)y = b` with `Fx = Aᴴy`.
If `λ = 0`, CRAIGMR solves the symmetric and indefinite system
- [ -F Aᵀ ] [ x ] [ 0 ]
+ [ -F Aᴴ ] [ x ] [ 0 ]
[ A 0 ] [ y ] = [ b ].
The system above represents the optimality conditions of
@@ -82,8 +83,31 @@ It is formally equivalent to CRMR, though can be slightly more accurate,
and intricate to implement. Both the x- and y-parts of the solution are
returned.
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m.
+
+#### Keyword arguments
+
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `m` used for centered preconditioning of the augmented system;
+* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning of the augmented system;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `sqd`: if `true`, set `λ=1` for Hermitian quasi-definite systems;
+* `λ`: regularization parameter;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `y`: a dense vector of length m;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### References
@@ -108,13 +132,15 @@ See [`CraigmrSolver`](@ref) for more details about the `solver`.
function craigmr! end
function craigmr!(solver :: CraigmrSolver{T,FC,S}, A, b :: AbstractVector{FC};
- M=I, N=I, sqd :: Bool=false, λ :: T=zero(T), atol :: T=√eps(T),
- rtol :: T=√eps(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
+ M=I, N=I, ldiv :: Bool=false,
+ sqd :: Bool=false, λ :: T=zero(T), atol :: T=√eps(T),
+ rtol :: T=√eps(T), itmax :: Int=0,
+ verbose :: Int=0, history :: Bool=false,
+ callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
m, n = size(A)
length(b) == m || error("Inconsistent problem size")
- (verbose > 0) && @printf("CRAIGMR: system of %d equations in %d variables\n", m, n)
+ (verbose > 0) && @printf(iostream, "CRAIGMR: system of %d equations in %d variables\n", m, n)
# Check sqd and λ parameters
sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !")
@@ -126,23 +152,23 @@ function craigmr!(solver :: CraigmrSolver{T,FC,S}, A, b :: AbstractVector{FC};
# Check type consistency
eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
# Compute the adjoint of A
- Aᵀ = A'
+ Aᴴ = A'
# Set up workspace.
allocate_if(!MisI, solver, :u, S, m)
allocate_if(!NisI, solver, :v, S, n)
allocate_if(λ > 0, solver, :q, S, n)
- x, Nv, Aᵀu, d, y, Mu = solver.x, solver.Nv, solver.Aᵀu, solver.d, solver.y, solver.Mu
+ x, Nv, Aᴴu, d, y, Mu = solver.x, solver.Nv, solver.Aᴴu, solver.d, solver.y, solver.Mu
w, wbar, Av, q, stats = solver.w, solver.wbar, solver.Av, solver.q, solver.stats
rNorms, ArNorms = stats.residuals, stats.Aresiduals
reset!(stats)
u = MisI ? Mu : solver.u
v = NisI ? Nv : solver.v
- # Compute y such that AAᵀy = b. Then recover x = Aᵀy.
+ # Compute y such that AAᴴy = b. Then recover x = Aᴴy.
x .= zero(FC)
y .= zero(FC)
Mu .= b
@@ -161,9 +187,9 @@ function craigmr!(solver :: CraigmrSolver{T,FC,S}, A, b :: AbstractVector{FC};
# β₁Mu₁ = b.
@kscal!(m, one(FC)/β, u)
MisI || @kscal!(m, one(FC)/β, Mu)
- # α₁Nv₁ = Aᵀu₁.
- mul!(Aᵀu, Aᵀ, u)
- Nv .= Aᵀu
+ # α₁Nv₁ = Aᴴu₁.
+ mul!(Aᴴu, Aᴴ, u)
+ Nv .= Aᴴu
NisI || mulorldiv!(v, N, Nv, ldiv)
α = sqrt(@kdotr(n, v, Nv))
Anorm² = α * α
@@ -171,10 +197,10 @@ function craigmr!(solver :: CraigmrSolver{T,FC,S}, A, b :: AbstractVector{FC};
iter = 0
itmax == 0 && (itmax = m + n)
- (verbose > 0) && @printf("%5s %7s %7s %7s %7s %8s %8s %7s\n", "k", "‖r‖", "‖Aᵀr‖", "β", "α", "cos", "sin", "‖A‖²")
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e\n", iter, β, α, β, α, 0, 1, Anorm²)
+ (verbose > 0) && @printf(iostream, "%5s %7s %7s %7s %7s %8s %8s %7s\n", "k", "‖r‖", "‖Aᴴr‖", "β", "α", "cos", "sin", "‖A‖²")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e\n", iter, β, α, β, α, 0, 1, Anorm²)
- # Aᵀb = 0 so x = 0 is a minimum least-squares solution
+ # Aᴴb = 0 so x = 0 is a minimum least-squares solution
if α == 0
stats.niter = 0
stats.solved, stats.inconsistent = true, false
@@ -288,16 +314,16 @@ function craigmr!(solver :: CraigmrSolver{T,FC,S}, A, b :: AbstractVector{FC};
# xₖ = Dₖzₖ
@kaxpy!(n, ζ, d, x)
- # 2. αₖ₊₁Nvₖ₊₁ = Aᵀuₖ₊₁ - βₖ₊₁Nvₖ
- mul!(Aᵀu, Aᵀ, u)
- @kaxpby!(n, one(FC), Aᵀu, -β, Nv)
+ # 2. αₖ₊₁Nvₖ₊₁ = Aᴴuₖ₊₁ - βₖ₊₁Nvₖ
+ mul!(Aᴴu, Aᴴ, u)
+ @kaxpby!(n, one(FC), Aᴴu, -β, Nv)
NisI || mulorldiv!(v, N, Nv, ldiv)
α = sqrt(@kdotr(n, v, Nv))
Anorm² = Anorm² + α * α # = ‖Lₖ‖
ArNorm = α * β * abs(ζ/ρ)
history && push!(ArNorms, ArNorm)
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e\n", iter, rNorm, ArNorm, β, α, c, s, Anorm²)
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e\n", iter, rNorm, ArNorm, β, α, c, s, Anorm²)
if λ > 0
(cdₖ, sdₖ, λₖ₊₁) = sym_givens(λ, λₐᵤₓ)
@@ -320,7 +346,7 @@ function craigmr!(solver :: CraigmrSolver{T,FC,S}, A, b :: AbstractVector{FC};
inconsistent = (rNorm > 100 * ɛ_c) & (ArNorm ≤ ɛ_i)
tired = iter ≥ itmax
end
- (verbose > 0) && @printf("\n")
+ (verbose > 0) && @printf(iostream, "\n")
tired && (status = "maximum number of iterations exceeded")
solved && (status = "found approximate minimum-norm solution")
diff --git a/src/crls.jl b/src/crls.jl
index 6410fb836..78615fad6 100644
--- a/src/crls.jl
+++ b/src/crls.jl
@@ -5,7 +5,7 @@
#
# equivalently, of the linear system
#
-# AᵀAx = Aᵀb.
+# AᴴAx = Aᴴb.
#
# This implementation follows the formulation given in
#
@@ -20,12 +20,12 @@
export crls, crls!
-
"""
(x, stats) = crls(A, b::AbstractVector{FC};
- M=I, λ::T=zero(T), atol::T=√eps(T), rtol::T=√eps(T),
- radius::T=zero(T), itmax::Int=0, verbose::Int=0, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ M=I, ldiv::Bool=false, radius::T=zero(T),
+ λ::T=zero(T), atol::T=√eps(T), rtol::T=√eps(T),
+ itmax::Int=0, verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
@@ -34,19 +34,40 @@ Solve the linear least-squares problem
minimize ‖b - Ax‖₂² + λ‖x‖₂²
-using the Conjugate Residuals (CR) method. This method is equivalent to
-applying MINRES to the normal equations
+of size m × n using the Conjugate Residuals (CR) method.
+This method is equivalent to applying MINRES to the normal equations
- (AᵀA + λI) x = Aᵀb.
+ (AᴴA + λI) x = Aᴴb.
This implementation recurs the residual r := b - Ax.
-CRLS produces monotonic residuals ‖r‖₂ and optimality residuals ‖Aᵀr‖₂.
+CRLS produces monotonic residuals ‖r‖₂ and optimality residuals ‖Aᴴr‖₂.
It is formally equivalent to LSMR, though can be substantially less accurate,
but simpler to implement.
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m.
+
+#### Keyword arguments
+
+* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for preconditioning;
+* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`;
+* `radius`: add the trust-region constraint ‖x‖ ≤ `radius` if `radius > 0`. Useful to compute a step in a trust-region method for optimization;
+* `λ`: regularization parameter;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### Reference
@@ -70,23 +91,24 @@ See [`CrlsSolver`](@ref) for more details about the `solver`.
function crls! end
function crls!(solver :: CrlsSolver{T,FC,S}, A, b :: AbstractVector{FC};
- M=I, λ :: T=zero(T), atol :: T=√eps(T), rtol :: T=√eps(T),
- radius :: T=zero(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
+ M=I, ldiv :: Bool=false, radius :: T=zero(T),
+ λ :: T=zero(T), atol :: T=√eps(T), rtol :: T=√eps(T),
+ itmax :: Int=0, verbose :: Int=0, history :: Bool=false,
+ callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
m, n = size(A)
length(b) == m || error("Inconsistent problem size")
- (verbose > 0) && @printf("CRLS: system of %d equations in %d variables\n", m, n)
+ (verbose > 0) && @printf(iostream, "CRLS: system of %d equations in %d variables\n", m, n)
# Tests M = Iₙ
MisI = (M === I)
# Check type consistency
eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
# Compute the adjoint of A
- Aᵀ = A'
+ Aᴴ = A'
# Set up workspace.
allocate_if(!MisI, solver, :Ms, S, m)
@@ -112,13 +134,13 @@ function crls!(solver :: CrlsSolver{T,FC,S}, A, b :: AbstractVector{FC};
end
MisI || mulorldiv!(Mr, M, r, ldiv)
- mul!(Ar, Aᵀ, Mr) # - λ * x0 if x0 ≠ 0.
+ mul!(Ar, Aᴴ, Mr) # - λ * x0 if x0 ≠ 0.
mul!(s, A, Ar)
MisI || mulorldiv!(Ms, M, s, ldiv)
p .= Ar
Ap .= s
- mul!(q, Aᵀ, Ms) # Ap
+ mul!(q, Aᴴ, Ms) # Ap
λ > 0 && @kaxpy!(n, λ, p, q) # q = q + λ * p
γ = @kdotr(m, s, Ms) # Faster than γ = dot(s, Ms)
iter = 0
@@ -128,8 +150,8 @@ function crls!(solver :: CrlsSolver{T,FC,S}, A, b :: AbstractVector{FC};
λ > 0 && (γ += λ * ArNorm * ArNorm)
history && push!(ArNorms, ArNorm)
ε = atol + rtol * ArNorm
- (verbose > 0) && @printf("%5s %8s %8s\n", "k", "‖Aᵀr‖", "‖r‖")
- kdisplay(iter, verbose) && @printf("%5d %8.2e %8.2e\n", iter, ArNorm, rNorm)
+ (verbose > 0) && @printf(iostream, "%5s %8s %8s\n", "k", "‖Aᴴr‖", "‖r‖")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %8.2e %8.2e\n", iter, ArNorm, rNorm)
status = "unknown"
on_boundary = false
@@ -147,14 +169,14 @@ function crls!(solver :: CrlsSolver{T,FC,S}, A, b :: AbstractVector{FC};
if radius > 0
pNorm = @knrm2(n, p)
if @kdotr(m, Ap, Ap) ≤ ε * sqrt(qNorm²) * pNorm # the quadratic is constant in the direction p
- psd = true # det(AᵀA) = 0
- p = Ar # p = Aᵀr
+ psd = true # det(AᴴA) = 0
+ p = Ar # p = Aᴴr
pNorm² = ArNorm * ArNorm
- mul!(q, Aᵀ, s)
- α = min(ArNorm^2 / γ, maximum(to_boundary(x, p, radius, flip = false, dNorm2 = pNorm²))) # the quadratic is minimal in the direction Aᵀr for α = ‖Ar‖²/γ
+ mul!(q, Aᴴ, s)
+ α = min(ArNorm^2 / γ, maximum(to_boundary(n, x, p, radius, flip = false, dNorm2 = pNorm²))) # the quadratic is minimal in the direction Aᴴr for α = ‖Ar‖²/γ
else
pNorm² = pNorm * pNorm
- σ = maximum(to_boundary(x, p, radius, flip = false, dNorm2 = pNorm²))
+ σ = maximum(to_boundary(n, x, p, radius, flip = false, dNorm2 = pNorm²))
if α ≥ σ
α = σ
on_boundary = true
@@ -177,7 +199,7 @@ function crls!(solver :: CrlsSolver{T,FC,S}, A, b :: AbstractVector{FC};
@kaxpby!(n, one(FC), Ar, β, p) # Faster than p = Ar + β * p
@kaxpby!(m, one(FC), s, β, Ap) # Faster than Ap = s + β * Ap
MisI || mulorldiv!(MAp, M, Ap, ldiv)
- mul!(q, Aᵀ, MAp)
+ mul!(q, Aᴴ, MAp)
λ > 0 && @kaxpy!(n, λ, p, q) # q = q + λ * p
γ = γ_next
@@ -189,12 +211,12 @@ function crls!(solver :: CrlsSolver{T,FC,S}, A, b :: AbstractVector{FC};
history && push!(rNorms, rNorm)
history && push!(ArNorms, ArNorm)
iter = iter + 1
- kdisplay(iter, verbose) && @printf("%5d %8.2e %8.2e\n", iter, ArNorm, rNorm)
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %8.2e %8.2e\n", iter, ArNorm, rNorm)
user_requested_exit = callback(solver) :: Bool
solved = (ArNorm ≤ ε) || on_boundary
tired = iter ≥ itmax
end
- (verbose > 0) && @printf("\n")
+ (verbose > 0) && @printf(iostream, "\n")
tired && (status = "maximum number of iterations exceeded")
solved && (status = "solution good enough given atol and rtol")
diff --git a/src/crmr.jl b/src/crmr.jl
index deb5cf79f..621ba5ef3 100644
--- a/src/crmr.jl
+++ b/src/crmr.jl
@@ -10,9 +10,9 @@
# and is equivalent to applying the conjugate residual method
# to the linear system
#
-# AAᵀy = b.
+# AAᴴy = b.
#
-# This method is equivalent to Craig-MR, described in
+# This method is equivalent to CRAIGMR, described in
#
# D. Orban and M. Arioli. Iterative Solution of Symmetric Quasi-Definite Linear Systems,
# Volume 3 of Spotlights. SIAM, Philadelphia, PA, 2017.
@@ -26,12 +26,13 @@
export crmr, crmr!
-
"""
(x, stats) = crmr(A, b::AbstractVector{FC};
- M=I, λ::T=zero(T), atol::T=√eps(T),
- rtol::T=√eps(T), itmax::Int=0, verbose::Int=0, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ N=I, ldiv::Bool=false,
+ λ::T=zero(T), atol::T=√eps(T),
+ rtol::T=√eps(T), itmax::Int=0,
+ verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
@@ -40,11 +41,11 @@ Solve the consistent linear system
Ax + √λs = b
-using the Conjugate Residual (CR) method, where λ ≥ 0 is a regularization
+of size m × n using the Conjugate Residual (CR) method, where λ ≥ 0 is a regularization
parameter. This method is equivalent to applying CR to the normal equations
of the second kind
- (AAᵀ + λI) y = b
+ (AAᴴ + λI) y = b
but is more stable. When λ = 0, this method solves the minimum-norm problem
@@ -58,10 +59,28 @@ CRMR produces monotonic residuals ‖r‖₂.
It is formally equivalent to CRAIG-MR, though can be slightly less accurate,
but simpler to implement. Only the x-part of the solution is returned.
-A preconditioner M may be provided.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m.
+
+#### Keyword arguments
+
+* `N`:
+* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`;
+* `λ`: regularization parameter;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### References
@@ -86,35 +105,37 @@ See [`CrmrSolver`](@ref) for more details about the `solver`.
function crmr! end
function crmr!(solver :: CrmrSolver{T,FC,S}, A, b :: AbstractVector{FC};
- M=I, λ :: T=zero(T), atol :: T=√eps(T),
- rtol :: T=√eps(T), itmax :: Int=0, verbose :: Int=0, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
+ N=I, ldiv :: Bool=false,
+ λ :: T=zero(T), atol :: T=√eps(T),
+ rtol :: T=√eps(T), itmax :: Int=0,
+ verbose :: Int=0, history :: Bool=false,
+ callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
m, n = size(A)
length(b) == m || error("Inconsistent problem size")
- (verbose > 0) && @printf("CRMR: system of %d equations in %d variables\n", m, n)
+ (verbose > 0) && @printf(iostream, "CRMR: system of %d equations in %d variables\n", m, n)
- # Tests M = Iₙ
- MisI = (M === I)
+ # Tests N = Iₙ
+ NisI = (N === I)
# Check type consistency
eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
# Compute the adjoint of A
- Aᵀ = A'
+ Aᴴ = A'
# Set up workspace.
- allocate_if(!MisI, solver, :Mq, S, m)
+ allocate_if(!NisI, solver, :Nq, S, m)
allocate_if(λ > 0, solver, :s , S, m)
- x, p, Aᵀr, r = solver.x, solver.p, solver.Aᵀr, solver.r
+ x, p, Aᴴr, r = solver.x, solver.p, solver.Aᴴr, solver.r
q, s, stats = solver.q, solver.s, solver.stats
rNorms, ArNorms = stats.residuals, stats.Aresiduals
reset!(stats)
- Mq = MisI ? q : solver.Mq
+ Nq = NisI ? q : solver.Nq
x .= zero(FC) # initial estimation x = 0
- mulorldiv!(r, M, b, ldiv) # initial residual r = M * (b - Ax) = M * b
+ mulorldiv!(r, N, b, ldiv) # initial residual r = N * (b - Ax) = N * b
bNorm = @knrm2(m, r) # norm(b - A * x0) if x0 ≠ 0.
rNorm = bNorm # + λ * ‖x0‖ if x0 ≠ 0 and λ > 0.
history && push!(rNorms, rNorm)
@@ -126,9 +147,9 @@ function crmr!(solver :: CrmrSolver{T,FC,S}, A, b :: AbstractVector{FC};
return solver
end
λ > 0 && (s .= r)
- mul!(Aᵀr, Aᵀ, r) # - λ * x0 if x0 ≠ 0.
- p .= Aᵀr
- γ = @kdotr(n, Aᵀr, Aᵀr) # Faster than γ = dot(Aᵀr, Aᵀr)
+ mul!(Aᴴr, Aᴴ, r) # - λ * x0 if x0 ≠ 0.
+ p .= Aᴴr
+ γ = @kdotr(n, Aᴴr, Aᴴr) # Faster than γ = dot(Aᴴr, Aᴴr)
λ > 0 && (γ += λ * rNorm * rNorm)
iter = 0
itmax == 0 && (itmax = m + n)
@@ -137,8 +158,8 @@ function crmr!(solver :: CrmrSolver{T,FC,S}, A, b :: AbstractVector{FC};
history && push!(ArNorms, ArNorm)
ɛ_c = atol + rtol * rNorm # Stopping tolerance for consistent systems.
ɛ_i = atol + rtol * ArNorm # Stopping tolerance for inconsistent systems.
- (verbose > 0) && @printf("%5s %8s %8s\n", "k", "‖Aᵀr‖", "‖r‖")
- kdisplay(iter, verbose) && @printf("%5d %8.2e %8.2e\n", iter, ArNorm, rNorm)
+ (verbose > 0) && @printf(iostream, "%5s %8s %8s\n", "k", "‖Aᴴr‖", "‖r‖")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %8.2e %8.2e\n", iter, ArNorm, rNorm)
status = "unknown"
solved = rNorm ≤ ɛ_c
@@ -149,17 +170,17 @@ function crmr!(solver :: CrmrSolver{T,FC,S}, A, b :: AbstractVector{FC};
while ! (solved || inconsistent || tired || user_requested_exit)
mul!(q, A, p)
λ > 0 && @kaxpy!(m, λ, s, q) # q = q + λ * s
- MisI || mulorldiv!(Mq, M, q, ldiv)
- α = γ / @kdotr(m, q, Mq) # Compute qᵗ * M * q
+ NisI || mulorldiv!(Nq, N, q, ldiv)
+ α = γ / @kdotr(m, q, Nq) # Compute qᴴ * N * q
@kaxpy!(n, α, p, x) # Faster than x = x + α * p
- @kaxpy!(m, -α, Mq, r) # Faster than r = r - α * Mq
+ @kaxpy!(m, -α, Nq, r) # Faster than r = r - α * Nq
rNorm = @knrm2(m, r) # norm(r)
- mul!(Aᵀr, Aᵀ, r)
- γ_next = @kdotr(n, Aᵀr, Aᵀr) # Faster than γ_next = dot(Aᵀr, Aᵀr)
+ mul!(Aᴴr, Aᴴ, r)
+ γ_next = @kdotr(n, Aᴴr, Aᴴr) # Faster than γ_next = dot(Aᴴr, Aᴴr)
λ > 0 && (γ_next += λ * rNorm * rNorm)
β = γ_next / γ
- @kaxpby!(n, one(FC), Aᵀr, β, p) # Faster than p = Aᵀr + β * p
+ @kaxpby!(n, one(FC), Aᴴr, β, p) # Faster than p = Aᴴr + β * p
if λ > 0
@kaxpby!(m, one(FC), r, β, s) # s = r + β * s
end
@@ -169,13 +190,13 @@ function crmr!(solver :: CrmrSolver{T,FC,S}, A, b :: AbstractVector{FC};
history && push!(rNorms, rNorm)
history && push!(ArNorms, ArNorm)
iter = iter + 1
- kdisplay(iter, verbose) && @printf("%5d %8.2e %8.2e\n", iter, ArNorm, rNorm)
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %8.2e %8.2e\n", iter, ArNorm, rNorm)
user_requested_exit = callback(solver) :: Bool
solved = rNorm ≤ ɛ_c
inconsistent = (rNorm > 100 * ɛ_c) && (ArNorm ≤ ɛ_i)
tired = iter ≥ itmax
end
- (verbose > 0) && @printf("\n")
+ (verbose > 0) && @printf(iostream, "\n")
tired && (status = "maximum number of iterations exceeded")
solved && (status = "solution good enough given atol and rtol")
diff --git a/src/diom.jl b/src/diom.jl
index 9c6b9767b..7bf23e355 100644
--- a/src/diom.jl
+++ b/src/diom.jl
@@ -11,40 +11,58 @@
export diom, diom!
"""
- (x, stats) = diom(A, b::AbstractVector{FC}; memory::Int=20,
- M=I, N=I, atol::T=√eps(T), rtol::T=√eps(T),
- reorthogonalization::Bool=false, itmax::Int=0,
+ (x, stats) = diom(A, b::AbstractVector{FC};
+ memory::Int=20, M=I, N=I, ldiv::Bool=false,
+ reorthogonalization::Bool=false, atol::T=√eps(T),
+ rtol::T=√eps(T), itmax::Int=0,
verbose::Int=0, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
-Solve the consistent linear system Ax = b using direct incomplete orthogonalization method.
+ (x, stats) = diom(A, b, x0::AbstractVector; kwargs...)
+
+DIOM can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
+
+Solve the consistent linear system Ax = b of size n using DIOM.
DIOM only orthogonalizes the new vectors of the Krylov basis against the `memory` most recent vectors.
If CG is well defined on `Ax = b` and `memory = 2`, DIOM is theoretically equivalent to CG.
If `k ≤ memory` where `k` is the number of iterations, DIOM is theoretically equivalent to FOM.
Otherwise, DIOM interpolates between CG and FOM and is similar to CG with partial reorthogonalization.
-Partial reorthogonalization is available with the `reorthogonalization` option.
-
-An advantage of DIOM is that nonsymmetric or symmetric indefinite or both nonsymmetric
+An advantage of DIOM is that non-Hermitian or Hermitian indefinite or both non-Hermitian
and indefinite systems of linear equations can be handled by this single algorithm.
-This implementation allows a left preconditioner M and a right preconditioner N.
-- Left preconditioning : M⁻¹Ax = M⁻¹b
-- Right preconditioning : AN⁻¹u = b with x = N⁻¹u
-- Split preconditioning : M⁻¹AN⁻¹u = M⁻¹b with x = N⁻¹u
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension n;
+* `b`: a vector of length n.
-DIOM can be warm-started from an initial guess `x0` with the method
+#### Optional argument
- (x, stats) = diom(A, b, x0; kwargs...)
+* `x0`: a vector of length n that represents an initial guess of the solution x.
-where `kwargs` are the same keyword arguments as above.
+#### Keyword arguments
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* `memory`: the number of most recent vectors of the Krylov basis against which to orthogonalize a new vector;
+* `M`: linear operator that models a nonsingular matrix of size `n` used for left preconditioning;
+* `N`: linear operator that models a nonsingular matrix of size `n` used for right preconditioning;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `reorthogonalization`: reorthogonalize the new vectors of the Krylov basis against the `memory` most recent vectors;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### Reference
@@ -84,15 +102,16 @@ function diom!(solver :: DiomSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: A
end
function diom!(solver :: DiomSolver{T,FC,S}, A, b :: AbstractVector{FC};
- M=I, N=I, atol :: T=√eps(T), rtol :: T=√eps(T),
- reorthogonalization :: Bool=false, itmax :: Int=0,
+ M=I, N=I, ldiv :: Bool=false,
+ reorthogonalization :: Bool=false, atol :: T=√eps(T),
+ rtol :: T=√eps(T), itmax :: Int=0,
verbose :: Int=0, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
+ callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
m, n = size(A)
m == n || error("System must be square")
length(b) == m || error("Inconsistent problem size")
- (verbose > 0) && @printf("DIOM: system of size %d\n", n)
+ (verbose > 0) && @printf(iostream, "DIOM: system of size %d\n", n)
# Check M = Iₙ and N = Iₙ
MisI = (M === I)
@@ -100,7 +119,7 @@ function diom!(solver :: DiomSolver{T,FC,S}, A, b :: AbstractVector{FC};
# Check type consistency
eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
# Set up workspace.
allocate_if(!MisI, solver, :w, S, n)
@@ -121,7 +140,7 @@ function diom!(solver :: DiomSolver{T,FC,S}, A, b :: AbstractVector{FC};
else
t .= b
end
- MisI || mulorldiv!(r₀, M, t, ldiv) # M⁻¹(b - Ax₀)
+ MisI || mulorldiv!(r₀, M, t, ldiv) # M(b - Ax₀)
rNorm = @knrm2(n, r₀) # β = ‖r₀‖₂
history && push!(rNorms, rNorm)
if rNorm == 0
@@ -136,23 +155,26 @@ function diom!(solver :: DiomSolver{T,FC,S}, A, b :: AbstractVector{FC};
itmax == 0 && (itmax = 2*n)
ε = atol + rtol * rNorm
- (verbose > 0) && @printf("%5s %7s\n", "k", "‖rₖ‖")
- kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm)
+ (verbose > 0) && @printf(iostream, "%5s %7s\n", "k", "‖rₖ‖")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e\n", iter, rNorm)
- mem = length(L) # Memory
+ mem = length(V) # Memory
for i = 1 : mem
- V[i] .= zero(FC) # Orthogonal basis of Kₖ(M⁻¹AN⁻¹, M⁻¹b).
- P[i] .= zero(FC) # Directions for x : Pₘ = N⁻¹Vₘ(Uₘ)⁻¹.
+ V[i] .= zero(FC) # Orthogonal basis of Kₖ(MAN, Mr₀).
end
- H .= zero(FC) # Last column of the band hessenberg matrix Hₘ = LₘUₘ.
- # Each column has at most mem + 1 nonzero elements. hᵢ.ₘ is stored as H[m-i+2].
- # m-i+2 represents the indice of the diagonal where hᵢ.ₘ is located.
- # In addition of that, the last column of Uₘ is stored in H.
- L .= zero(FC) # Last mem pivots of Lₘ.
+ for i = 1 : mem-1
+ P[i] .= zero(FC) # Directions Pₖ = NVₖ(Uₖ)⁻¹.
+ end
+ H .= zero(FC) # Last column of the band hessenberg matrix Hₖ = LₖUₖ.
+ # Each column has at most mem + 1 nonzero elements.
+ # hᵢ.ₖ is stored as H[k-i+1], i ≤ k. hₖ₊₁.ₖ is not stored in H.
+ # k-i+1 represents the indice of the diagonal where hᵢ.ₖ is located.
+ # In addition of that, the last column of Uₖ is stored in H.
+ L .= zero(FC) # Last mem-1 pivots of Lₖ.
# Initial ξ₁ and V₁.
ξ = rNorm
- @. V[1] = r₀ / rNorm
+ V[1] .= r₀ ./ rNorm
# Stopping criterion.
solved = rNorm ≤ ε
@@ -166,83 +188,88 @@ function diom!(solver :: DiomSolver{T,FC,S}, A, b :: AbstractVector{FC};
iter = iter + 1
# Set position in circulars stacks.
- pos = mod(iter-1, mem) + 1 # Position corresponding to pₘ and vₘ in circular stacks P and V.
- next_pos = mod(iter, mem) + 1 # Position corresponding to vₘ₊₁ in the circular stack V.
+ pos = mod(iter-1, mem) + 1 # Position corresponding to vₖ in the circular stack V.
+ next_pos = mod(iter, mem) + 1 # Position corresponding to vₖ₊₁ in the circular stack V.
# Incomplete Arnoldi procedure.
z = NisI ? V[pos] : solver.z
- NisI || mulorldiv!(z, N, V[pos], ldiv) # N⁻¹vₘ, forms pₘ
- mul!(t, A, z) # AN⁻¹vₘ
- MisI || mulorldiv!(w, M, t, ldiv) # M⁻¹AN⁻¹vₘ, forms vₘ₊₁
+ NisI || mulorldiv!(z, N, V[pos], ldiv) # Nvₖ, forms pₖ
+ mul!(t, A, z) # ANvₖ
+ MisI || mulorldiv!(w, M, t, ldiv) # MANvₖ, forms vₖ₊₁
for i = max(1, iter-mem+1) : iter
- ipos = mod(i-1, mem) + 1 # Position corresponding to vᵢ in the circular stack V.
- diag = iter - i + 2
- H[diag] = @kdot(n, w, V[ipos]) # hᵢ.ₘ = ⟨M⁻¹AN⁻¹vₘ , vᵢ⟩
- @kaxpy!(n, -H[diag], V[ipos], w) # w ← w - hᵢ.ₘ * vᵢ
+ ipos = mod(i-1, mem) + 1 # Position corresponding to vᵢ in the circular stack V.
+ diag = iter - i + 1
+ H[diag] = @kdot(n, w, V[ipos]) # hᵢ.ₖ = ⟨MANvₖ, vᵢ⟩
+ @kaxpy!(n, -H[diag], V[ipos], w) # w ← w - hᵢ.ₖvᵢ
end
# Partial reorthogonalization of the Krylov basis.
if reorthogonalization
for i = max(1, iter-mem+1) : iter
ipos = mod(i-1, mem) + 1
- diag = iter - i + 2
+ diag = iter - i + 1
Htmp = @kdot(n, w, V[ipos])
H[diag] += Htmp
@kaxpy!(n, -Htmp, V[ipos], w)
end
end
- # Compute hₘ₊₁.ₘ and vₘ₊₁.
- H[1] = @knrm2(n, w) # hₘ₊₁.ₘ = ‖vₘ₊₁‖₂
- if H[1] ≠ 0 # hₘ₊₁.ₘ = 0 ⇒ "lucky breakdown"
- @. V[next_pos] = w / H[1] # vₘ₊₁ = w / hₘ₊₁.ₘ
- end
- # It's possible that uₘ₋ₘₑₘ.ₘ ≠ 0 when m ≥ mem + 1
- if iter ≥ mem + 2
- H[mem+2] = zero(FC) # hₘ₋ₘₑₘ.ₘ = 0
+ # Compute hₖ₊₁.ₖ and vₖ₊₁.
+ Haux = @knrm2(n, w) # hₖ₊₁.ₖ = ‖vₖ₊₁‖₂
+ if Haux ≠ 0 # hₖ₊₁.ₖ = 0 ⇒ "lucky breakdown"
+ V[next_pos] .= w ./ Haux # vₖ₊₁ = w / hₖ₊₁.ₖ
end
- # Update the LU factorization with partial pivoting of H.
- # Compute the last column of Uₘ.
+ # Update the LU factorization of Hₖ.
+ # Compute the last column of Uₖ.
if iter ≥ 2
- for i = max(2,iter-mem+1) : iter
- lpos = mod(i-1, mem) + 1 # Position corresponding to lᵢ.ᵢ₋₁ in the circular stack L.
- diag = iter - i + 2
+ # u₁.ₖ ← h₁.ₖ if iter ≤ mem
+ # uₖ₋ₘₑₘ₊₁.ₖ ← hₖ₋ₘₑₘ₊₁.ₖ if iter ≥ mem + 1
+ for i = max(2,iter-mem+2) : iter
+ lpos = mod(i-1, mem-1) + 1 # Position corresponding to lᵢ.ᵢ₋₁ in the circular stack L.
+ diag = iter - i + 1
next_diag = diag + 1
- # uᵢ.ₘ ← hᵢ.ₘ - lᵢ.ᵢ₋₁ * uᵢ₋₁.ₘ
+ # uᵢ.ₖ ← hᵢ.ₖ - lᵢ.ᵢ₋₁ * uᵢ₋₁.ₖ
H[diag] = H[diag] - L[lpos] * H[next_diag]
+ if i == iter
+ # Compute ξₖ the last component of zₖ = β(Lₖ)⁻¹e₁.
+ # ξₖ = -lₖ.ₖ₋₁ * ξₖ₋₁
+ ξ = - L[lpos] * ξ
+ end
end
- # Compute ξₘ the last component of zₘ = β(Lₘ)⁻¹e₁.
- # ξₘ = -lₘ.ₘ₋₁ * ξₘ₋₁
- ξ = - L[pos] * ξ
end
- # Compute next pivot lₘ₊₁.ₘ = hₘ₊₁.ₘ / uₘ.ₘ
- L[next_pos] = H[1] / H[2]
-
- # Compute the direction pₘ, the last column of Pₘ = N⁻¹Vₘ(Uₘ)⁻¹.
- for i = max(1,iter-mem) : iter-1
- ipos = mod(i-1, mem) + 1 # Position corresponding to pᵢ in the circular stack P.
- diag = iter - i + 2
- if ipos == pos
- # pₐᵤₓ ← -hₘ₋ₘₑₘ.ₘ * pₘ₋ₘₑₘ
- @kscal!(n, -H[diag], P[pos])
+ # Compute next pivot lₖ₊₁.ₖ = hₖ₊₁.ₖ / uₖ.ₖ
+ next_lpos = mod(iter, mem-1) + 1
+ L[next_lpos] = Haux / H[1]
+
+ ppos = mod(iter-1, mem-1) + 1 # Position corresponding to pₖ in the circular stack P.
+
+ # Compute the direction pₖ, the last column of Pₖ = NVₖ(Uₖ)⁻¹.
+ # u₁.ₖp₁ + ... + uₖ.ₖpₖ = Nvₖ if k ≤ mem
+ # uₖ₋ₘₑₘ₊₁.ₖpₖ₋ₘₑₘ₊₁ + ... + uₖ.ₖpₖ = Nvₖ if k ≥ mem + 1
+ for i = max(1,iter-mem+1) : iter-1
+ ipos = mod(i-1, mem-1) + 1 # Position corresponding to pᵢ in the circular stack P.
+ diag = iter - i + 1
+ if ipos == ppos
+ # pₖ ← -uₖ₋ₘₑₘ₊₁.ₖ * pₖ₋ₘₑₘ₊₁
+ @kscal!(n, -H[diag], P[ppos])
else
- # pₐᵤₓ ← pₐᵤₓ - hᵢ.ₘ * pᵢ
- @kaxpy!(n, -H[diag], P[ipos], P[pos])
+ # pₖ ← pₖ - uᵢ.ₖ * pᵢ
+ @kaxpy!(n, -H[diag], P[ipos], P[ppos])
end
end
- # pₐᵤₓ ← pₐᵤₓ + N⁻¹vₘ
- @kaxpy!(n, one(FC), z, P[pos])
- # pₘ = pₐᵤₓ / uₘ.ₘ
- @. P[pos] = P[pos] / H[2]
+ # pₐᵤₓ ← pₐᵤₓ + Nvₖ
+ @kaxpy!(n, one(FC), z, P[ppos])
+ # pₖ = pₐᵤₓ / uₖ.ₖ
+ P[ppos] .= P[ppos] ./ H[1]
- # Update solution xₘ.
- # xₘ = xₘ₋₁ + ξₘ * pₘ
- @kaxpy!(n, ξ, P[pos], x)
+ # Update solution xₖ.
+ # xₖ = xₖ₋₁ + ξₖ * pₖ
+ @kaxpy!(n, ξ, P[ppos], x)
# Compute residual norm.
- # ‖ M⁻¹(b - Axₘ) ‖₂ = hₘ₊₁.ₘ * |ξₘ / uₘ.ₘ|
- rNorm = real(H[1]) * abs(ξ / H[2])
+ # ‖ M(b - Axₖ) ‖₂ = hₖ₊₁.ₖ * |ξₖ / uₖ.ₖ|
+ rNorm = Haux * abs(ξ / H[1])
history && push!(rNorms, rNorm)
# Stopping conditions that do not depend on user input.
@@ -254,9 +281,9 @@ function diom!(solver :: DiomSolver{T,FC,S}, A, b :: AbstractVector{FC};
resid_decrease_lim = rNorm ≤ ε
solved = resid_decrease_lim || resid_decrease_mach
tired = iter ≥ itmax
- kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm)
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e\n", iter, rNorm)
end
- (verbose > 0) && @printf("\n")
+ (verbose > 0) && @printf(iostream, "\n")
tired && (status = "maximum number of iterations exceeded")
solved && (status = "solution good enough given atol and rtol")
user_requested_exit && (status = "user-requested exit")
diff --git a/src/dqgmres.jl b/src/dqgmres.jl
index ab7c490a6..025016304 100644
--- a/src/dqgmres.jl
+++ b/src/dqgmres.jl
@@ -11,16 +11,21 @@
export dqgmres, dqgmres!
"""
- (x, stats) = dqgmres(A, b::AbstractVector{FC}; memory::Int=20,
- M=I, N=I, atol::T=√eps(T), rtol::T=√eps(T),
- reorthogonalization::Bool=false, itmax::Int=0,
+ (x, stats) = dqgmres(A, b::AbstractVector{FC};
+ memory::Int=20, M=I, N=I, ldiv::Bool=false,
+ reorthogonalization::Bool=false, atol::T=√eps(T),
+ rtol::T=√eps(T), itmax::Int=0,
verbose::Int=0, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
-Solve the consistent linear system Ax = b using DQGMRES method.
+ (x, stats) = dqgmres(A, b, x0::AbstractVector; kwargs...)
+
+DQGMRES can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
+
+Solve the consistent linear system Ax = b of size n using DQGMRES.
DQGMRES algorithm is based on the incomplete Arnoldi orthogonalization process
and computes a sequence of approximate solutions with the quasi-minimal residual property.
@@ -30,21 +35,34 @@ If MINRES is well defined on `Ax = b` and `memory = 2`, DQGMRES is theoretically
If `k ≤ memory` where `k` is the number of iterations, DQGMRES is theoretically equivalent to GMRES.
Otherwise, DQGMRES interpolates between MINRES and GMRES and is similar to MINRES with partial reorthogonalization.
-Partial reorthogonalization is available with the `reorthogonalization` option.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension n;
+* `b`: a vector of length n.
+
+#### Optional argument
-This implementation allows a left preconditioner M and a right preconditioner N.
-- Left preconditioning : M⁻¹Ax = M⁻¹b
-- Right preconditioning : AN⁻¹u = b with x = N⁻¹u
-- Split preconditioning : M⁻¹AN⁻¹u = M⁻¹b with x = N⁻¹u
+* `x0`: a vector of length n that represents an initial guess of the solution x.
-DQGMRES can be warm-started from an initial guess `x0` with the method
+#### Keyword arguments
- (x, stats) = dqgmres(A, b, x0; kwargs...)
+* `memory`: the number of most recent vectors of the Krylov basis against which to orthogonalize a new vector;
+* `M`: linear operator that models a nonsingular matrix of size `n` used for left preconditioning;
+* `N`: linear operator that models a nonsingular matrix of size `n` used for right preconditioning;
+* `reorthogonalization`: reorthogonalize the new vectors of the Krylov basis against the `memory` most recent vectors;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
-where `kwargs` are the same keyword arguments as above.
+#### Output arguments
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### Reference
@@ -84,15 +102,16 @@ function dqgmres!(solver :: DqgmresSolver{T,FC,S}, A, b :: AbstractVector{FC}, x
end
function dqgmres!(solver :: DqgmresSolver{T,FC,S}, A, b :: AbstractVector{FC};
- M=I, N=I, atol :: T=√eps(T), rtol :: T=√eps(T),
- reorthogonalization :: Bool=false, itmax :: Int=0,
+ M=I, N=I, ldiv :: Bool=false,
+ reorthogonalization :: Bool=false, atol :: T=√eps(T),
+ rtol :: T=√eps(T), itmax :: Int=0,
verbose :: Int=0, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
+ callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
m, n = size(A)
m == n || error("System must be square")
length(b) == m || error("Inconsistent problem size")
- (verbose > 0) && @printf("DQGMRES: system of size %d\n", n)
+ (verbose > 0) && @printf(iostream, "DQGMRES: system of size %d\n", n)
# Check M = Iₙ and N = Iₙ
MisI = (M === I)
@@ -100,7 +119,7 @@ function dqgmres!(solver :: DqgmresSolver{T,FC,S}, A, b :: AbstractVector{FC};
# Check type consistency
eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
# Set up workspace.
allocate_if(!MisI, solver, :w, S, n)
@@ -121,7 +140,7 @@ function dqgmres!(solver :: DqgmresSolver{T,FC,S}, A, b :: AbstractVector{FC};
else
t .= b
end
- MisI || mulorldiv!(r₀, M, t, ldiv) # M⁻¹(b - Ax₀)
+ MisI || mulorldiv!(r₀, M, t, ldiv) # M(b - Ax₀)
rNorm = @knrm2(n, r₀) # β = ‖r₀‖₂
history && push!(rNorms, rNorm)
if rNorm == 0
@@ -136,29 +155,30 @@ function dqgmres!(solver :: DqgmresSolver{T,FC,S}, A, b :: AbstractVector{FC};
itmax == 0 && (itmax = 2*n)
ε = atol + rtol * rNorm
- (verbose > 0) && @printf("%5s %7s\n", "k", "‖rₖ‖")
- kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm)
+ (verbose > 0) && @printf(iostream, "%5s %7s\n", "k", "‖rₖ‖")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e\n", iter, rNorm)
# Set up workspace.
- mem = length(c) # Memory.
+ mem = length(V) # Memory.
for i = 1 : mem
- V[i] .= zero(FC) # Orthogonal basis of Kₖ(M⁻¹AN⁻¹, M⁻¹b).
- P[i] .= zero(FC) # Directions for x : Pₘ = N⁻¹Vₘ(Rₘ)⁻¹.
+ V[i] .= zero(FC) # Orthogonal basis of Kₖ(MAN, Mr₀).
+ P[i] .= zero(FC) # Directions for x : Pₖ = NVₖ(Rₖ)⁻¹.
end
- c .= zero(T) # Last mem Givens cosines used for the factorization QₘRₘ = Hₘ.
- s .= zero(FC) # Last mem Givens sines used for the factorization QₘRₘ = Hₘ.
- H .= zero(FC) # Last column of the band hessenberg matrix Hₘ.
- # Each column has at most mem + 1 nonzero elements. hᵢ.ₘ is stored as H[m-i+2].
- # m-i+2 represents the indice of the diagonal where hᵢ.ₘ is located.
- # In addition of that, the last column of Rₘ is also stored in H.
+ c .= zero(T) # Last mem Givens cosines used for the factorization QₖRₖ = Hₖ.
+ s .= zero(FC) # Last mem Givens sines used for the factorization QₖRₖ = Hₖ.
+ H .= zero(FC) # Last column of the band hessenberg matrix Hₖ.
+ # Each column has at most mem + 1 nonzero elements.
+ # hᵢ.ₖ is stored as H[k-i+1], i ≤ k. hₖ₊₁.ₖ is not stored in H.
+ # k-i+1 represents the indice of the diagonal where hᵢ.ₖ is located.
+ # In addition of that, the last column of Rₖ is also stored in H.
# Initial γ₁ and V₁.
- γₘ = rNorm # γₘ and γₘ₊₁ are the last components of gₘ, right-hand of the least squares problem min ‖ Hₘyₘ - gₘ ‖₂.
- @. V[1] = r₀ / rNorm
+ γₖ = rNorm # γₖ and γₖ₊₁ are the last components of gₖ, right-hand of the least squares problem min ‖ Hₖyₖ - gₖ ‖₂.
+ V[1] .= r₀ ./ rNorm
# The following stopping criterion compensates for the lag in the
# residual, but usually increases the number of iterations.
- # solved = sqrt(max(1, iter-mem+1)) * |γₘ₊₁| ≤ ε
+ # solved = sqrt(max(1, iter-mem+1)) * |γₖ₊₁| ≤ ε
solved = rNorm ≤ ε # less accurate, but acceptable.
tired = iter ≥ itmax
status = "unknown"
@@ -170,88 +190,89 @@ function dqgmres!(solver :: DqgmresSolver{T,FC,S}, A, b :: AbstractVector{FC};
iter = iter + 1
# Set position in circulars stacks.
- pos = mod(iter-1, mem) + 1 # Position corresponding to pₘ and vₘ in circular stacks P and V.
- next_pos = mod(iter, mem) + 1 # Position corresponding to vₘ₊₁ in the circular stack V.
+ pos = mod(iter-1, mem) + 1 # Position corresponding to pₖ and vₖ in circular stacks P and V.
+ next_pos = mod(iter, mem) + 1 # Position corresponding to vₖ₊₁ in the circular stack V.
# Incomplete Arnoldi procedure.
z = NisI ? V[pos] : solver.z
- NisI || mulorldiv!(z, N, V[pos], ldiv) # N⁻¹vₘ, forms pₘ
- mul!(t, A, z) # AN⁻¹vₘ
- MisI || mulorldiv!(w, M, t, ldiv) # M⁻¹AN⁻¹vₘ, forms vₘ₊₁
+ NisI || mulorldiv!(z, N, V[pos], ldiv) # Nvₖ, forms pₖ
+ mul!(t, A, z) # ANvₖ
+ MisI || mulorldiv!(w, M, t, ldiv) # MANvₖ, forms vₖ₊₁
for i = max(1, iter-mem+1) : iter
- ipos = mod(i-1, mem) + 1 # Position corresponding to vᵢ in the circular stack V.
- diag = iter - i + 2
- H[diag] = @kdot(n, w, V[ipos]) # hᵢ.ₘ = ⟨M⁻¹AN⁻¹vₘ , vᵢ⟩
- @kaxpy!(n, -H[diag], V[ipos], w) # w ← w - hᵢ.ₘ * vᵢ
+ ipos = mod(i-1, mem) + 1 # Position corresponding to vᵢ in the circular stack V.
+ diag = iter - i + 1
+ H[diag] = @kdot(n, w, V[ipos]) # hᵢ.ₖ = ⟨MANvₖ, vᵢ⟩
+ @kaxpy!(n, -H[diag], V[ipos], w) # w ← w - hᵢ.ₖvᵢ
end
# Partial reorthogonalization of the Krylov basis.
if reorthogonalization
for i = max(1, iter-mem+1) : iter
ipos = mod(i-1, mem) + 1
- diag = iter - i + 2
+ diag = iter - i + 1
Htmp = @kdot(n, w, V[ipos])
H[diag] += Htmp
@kaxpy!(n, -Htmp, V[ipos], w)
end
end
- # Compute hₘ₊₁.ₘ and vₘ₊₁.
- H[1] = @knrm2(n, w) # hₘ₊₁.ₘ = ‖vₘ₊₁‖₂
- if H[1] ≠ 0 # hₘ₊₁.ₘ = 0 ⇒ "lucky breakdown"
- @. V[next_pos] = w / H[1] # vₘ₊₁ = w / hₘ₊₁.ₘ
+ # Compute hₖ₊₁.ₖ and vₖ₊₁.
+ Haux = @knrm2(n, w) # hₖ₊₁.ₖ = ‖vₖ₊₁‖₂
+ if Haux ≠ 0 # hₖ₊₁.ₖ = 0 ⇒ "lucky breakdown"
+ V[next_pos] .= w ./ Haux # vₖ₊₁ = w / hₖ₊₁.ₖ
end
- # rₘ₋ₘₑₘ.ₘ ≠ 0 when m ≥ mem + 1
+ # rₖ₋ₘₑₘ.ₖ ≠ 0 when k ≥ mem + 1
+ # We don't want to use rₖ₋₁₋ₘₑₘ.ₖ₋₁ when we compute rₖ₋ₘₑₘ.ₖ
if iter ≥ mem + 2
- H[mem+2] = zero(FC) # hₘ₋ₘₑₘ.ₘ = 0
+ H[mem+1] = zero(FC) # rₖ₋ₘₑₘ.ₖ = 0
end
- # Update the QR factorization of H.
+ # Update the QR factorization of Hₖ.
# Apply mem previous Givens reflections Ωᵢ.
for i = max(1,iter-mem) : iter-1
- irot_pos = mod(i-1, mem) + 1 # Position corresponding to cᵢ and sᵢ in circular stacks c and s.
- diag = iter - i + 1
+ irot_pos = mod(i-1, mem) + 1 # Position corresponding to cᵢ and sᵢ in circular stacks c and s.
+ diag = iter - i
next_diag = diag + 1
- H_aux = c[irot_pos] * H[next_diag] + s[irot_pos] * H[diag]
+ Htmp = c[irot_pos] * H[next_diag] + s[irot_pos] * H[diag]
H[diag] = conj(s[irot_pos]) * H[next_diag] - c[irot_pos] * H[diag]
- H[next_diag] = H_aux
+ H[next_diag] = Htmp
end
- # Compute and apply current Givens reflection Ωₘ.
- # [cₘ sₘ] [ hₘ.ₘ ] = [ρₘ]
- # [sₘ -cₘ] [hₘ₊₁.ₘ] [0 ]
- (c[pos], s[pos], H[2]) = sym_givens(H[2], H[1])
- γₘ₊₁ = conj(s[pos]) * γₘ
- γₘ = c[pos] * γₘ
+ # Compute and apply current Givens reflection Ωₖ.
+ # [cₖ sₖ] [ hₖ.ₖ ] = [ρₖ]
+ # [sₖ -cₖ] [hₖ₊₁.ₖ] [0 ]
+ (c[pos], s[pos], H[1]) = sym_givens(H[1], Haux)
+ γₖ₊₁ = conj(s[pos]) * γₖ
+ γₖ = c[pos] * γₖ
- # Compute the direction pₘ, the last column of Pₘ = N⁻¹Vₘ(Rₘ)⁻¹.
+ # Compute the direction pₖ, the last column of Pₖ = NVₖ(Rₖ)⁻¹.
for i = max(1,iter-mem) : iter-1
- ipos = mod(i-1, mem) + 1 # Position corresponding to pᵢ in the circular stack P.
- diag = iter - i + 2
+ ipos = mod(i-1, mem) + 1 # Position corresponding to pᵢ in the circular stack P.
+ diag = iter - i + 1
if ipos == pos
- # pₐᵤₓ ← -hₘ₋ₘₑₘ.ₘ * pₘ₋ₘₑₘ
+ # pₐᵤₓ ← -hₖ₋ₘₑₘ.ₖ * pₖ₋ₘₑₘ
@kscal!(n, -H[diag], P[pos])
else
- # pₐᵤₓ ← pₐᵤₓ - hᵢ.ₘ * pᵢ
+ # pₐᵤₓ ← pₐᵤₓ - hᵢ.ₖ * pᵢ
@kaxpy!(n, -H[diag], P[ipos], P[pos])
end
end
- # pₐᵤₓ ← pₐᵤₓ + N⁻¹vₘ
+ # pₐᵤₓ ← pₐᵤₓ + Nvₖ
@kaxpy!(n, one(FC), z, P[pos])
- # pₘ = pₐᵤₓ / hₘ.ₘ
- @. P[pos] = P[pos] / H[2]
+ # pₖ = pₐᵤₓ / hₖ.ₖ
+ P[pos] .= P[pos] ./ H[1]
- # Compute solution xₘ.
- # xₘ ← xₘ₋₁ + γₘ * pₘ
- @kaxpy!(n, γₘ, P[pos], x)
+ # Compute solution xₖ.
+ # xₖ ← xₖ₋₁ + γₖ * pₖ
+ @kaxpy!(n, γₖ, P[pos], x)
# Update residual norm estimate.
- # ‖ M⁻¹(b - Axₘ) ‖₂ ≈ |γₘ₊₁|
- rNorm = abs(γₘ₊₁)
+ # ‖ M(b - Axₖ) ‖₂ ≈ |γₖ₊₁|
+ rNorm = abs(γₖ₊₁)
history && push!(rNorms, rNorm)
- # Update γₘ.
- γₘ = γₘ₊₁
+ # Update γₖ.
+ γₖ = γₖ₊₁
# Stopping conditions that do not depend on user input.
# This is to guard against tolerances that are unreasonably small.
@@ -262,9 +283,9 @@ function dqgmres!(solver :: DqgmresSolver{T,FC,S}, A, b :: AbstractVector{FC};
resid_decrease_lim = rNorm ≤ ε
solved = resid_decrease_lim || resid_decrease_mach
tired = iter ≥ itmax
- kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm)
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e\n", iter, rNorm)
end
- (verbose > 0) && @printf("\n")
+ (verbose > 0) && @printf(iostream, "\n")
solved && (status = "solution good enough given atol and rtol")
tired && (status = "maximum number of iterations exceeded")
user_requested_exit && (status = "user-requested exit")
diff --git a/src/fgmres.jl b/src/fgmres.jl
new file mode 100644
index 000000000..fa536af23
--- /dev/null
+++ b/src/fgmres.jl
@@ -0,0 +1,353 @@
+# An implementation of FGMRES for the solution of the square linear system Ax = b.
+#
+# This method is described in
+#
+# Y. Saad, A Flexible Inner-Outer Preconditioned GMRES Algorithms.
+# SIAM Journal on Scientific Computing, Vol. 14(2), pp. 461--469, 1993.
+#
+# Alexis Montoison,
+# Montreal, September 2022.
+
+export fgmres, fgmres!
+
+"""
+ (x, stats) = fgmres(A, b::AbstractVector{FC};
+ memory::Int=20, M=I, N=I, ldiv::Bool=false,
+ restart::Bool=false, reorthogonalization::Bool=false,
+ atol::T=√eps(T), rtol::T=√eps(T), itmax::Int=0,
+ verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
+
+`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
+`FC` is `T` or `Complex{T}`.
+
+ (x, stats) = fgmres(A, b, x0::AbstractVector; kwargs...)
+
+FGMRES can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
+
+Solve the linear system Ax = b of size n using FGMRES.
+
+FGMRES computes a sequence of approximate solutions with minimum residual.
+FGMRES is a variant of GMRES that allows changes in the right preconditioner at each iteration.
+
+This implementation allows a left preconditioner M and a flexible right preconditioner N.
+A situation in which the preconditioner is "not constant" is when a relaxation-type method,
+a Chebyshev iteration or another Krylov subspace method is used as a preconditioner.
+Compared to GMRES, there is no additional cost incurred in the arithmetic but the memory requirement almost doubles.
+Thus, GMRES is recommended if the right preconditioner N is constant.
+
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension n;
+* `b`: a vector of length n.
+
+#### Optional argument
+
+* `x0`: a vector of length n that represents an initial guess of the solution x.
+
+#### Keyword arguments
+
+* `memory`: if `restart = true`, the restarted version FGMRES(k) is used with `k = memory`. If `restart = false`, the parameter `memory` should be used as a hint of the number of iterations to limit dynamic memory allocations. Additional storage will be allocated if the number of iterations exceeds `memory`;
+* `M`: linear operator that models a nonsingular matrix of size `n` used for left preconditioning;
+* `N`: linear operator that models a nonsingular matrix of size `n` used for right preconditioning;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `restart`: restart the method after `memory` iterations;
+* `reorthogonalization`: reorthogonalize the new vectors of the Krylov basis against all previous vectors;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
+
+#### Reference
+
+* Y. Saad, [*A Flexible Inner-Outer Preconditioned GMRES Algorithm*](https://doi.org/10.1137/0914028), SIAM Journal on Scientific Computing, Vol. 14(2), pp. 461--469, 1993.
+"""
+function fgmres end
+
+function fgmres(A, b :: AbstractVector{FC}, x0 :: AbstractVector; memory :: Int=20, kwargs...) where FC <: FloatOrComplex
+ solver = FgmresSolver(A, b, memory)
+ fgmres!(solver, A, b, x0; kwargs...)
+ return (solver.x, solver.stats)
+end
+
+function fgmres(A, b :: AbstractVector{FC}; memory :: Int=20, kwargs...) where FC <: FloatOrComplex
+ solver = FgmresSolver(A, b, memory)
+ fgmres!(solver, A, b; kwargs...)
+ return (solver.x, solver.stats)
+end
+
+"""
+ solver = fgmres!(solver::FgmresSolver, A, b; kwargs...)
+ solver = fgmres!(solver::FgmresSolver, A, b, x0; kwargs...)
+
+where `kwargs` are keyword arguments of [`fgmres`](@ref).
+
+Note that the `memory` keyword argument is the only exception.
+It's required to create a `FgmresSolver` and can't be changed later.
+
+See [`FgmresSolver`](@ref) for more details about the `solver`.
+"""
+function fgmres! end
+
+function fgmres!(solver :: FgmresSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: AbstractVector; kwargs...) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
+ warm_start!(solver, x0)
+ fgmres!(solver, A, b; kwargs...)
+ return solver
+end
+
+function fgmres!(solver :: FgmresSolver{T,FC,S}, A, b :: AbstractVector{FC};
+ M=I, N=I, ldiv :: Bool=false,
+ restart :: Bool=false, reorthogonalization :: Bool=false,
+ atol :: T=√eps(T), rtol :: T=√eps(T), itmax :: Int=0,
+ verbose :: Int=0, history :: Bool=false,
+ callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
+
+ m, n = size(A)
+ m == n || error("System must be square")
+ length(b) == m || error("Inconsistent problem size")
+ (verbose > 0) && @printf(iostream, "FGMRES: system of size %d\n", n)
+
+ # Check M = Iₙ
+ MisI = (M === I)
+
+ # Check type consistency
+ eltype(A) == FC || error("eltype(A) ≠ $FC")
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+
+ # Set up workspace.
+ allocate_if(!MisI , solver, :q , S, n)
+ allocate_if(restart, solver, :Δx, S, n)
+ Δx, x, w, V, Z = solver.Δx, solver.x, solver.w, solver.V, solver.Z
+ z, c, s, R, stats = solver.z, solver.c, solver.s, solver.R, solver.stats
+ warm_start = solver.warm_start
+ rNorms = stats.residuals
+ reset!(stats)
+ q = MisI ? w : solver.q
+ r₀ = MisI ? w : solver.q
+ xr = restart ? Δx : x
+
+ # Initial solution x₀.
+ x .= zero(FC)
+
+ # Initial residual r₀.
+ if warm_start
+ mul!(w, A, Δx)
+ @kaxpby!(n, one(FC), b, -one(FC), w)
+ restart && @kaxpy!(n, one(FC), Δx, x)
+ else
+ w .= b
+ end
+ MisI || mulorldiv!(r₀, M, w, ldiv) # r₀ = M(b - Ax₀)
+ β = @knrm2(n, r₀) # β = ‖r₀‖₂
+
+ rNorm = β
+ history && push!(rNorms, β)
+ ε = atol + rtol * rNorm
+
+ if β == 0
+ stats.niter = 0
+ stats.solved, stats.inconsistent = true, false
+ stats.status = "x = 0 is a zero-residual solution"
+ solver.warm_start = false
+ return solver
+ end
+
+ mem = length(c) # Memory
+ npass = 0 # Number of pass
+
+ iter = 0 # Cumulative number of iterations
+ inner_iter = 0 # Number of iterations in a pass
+
+ itmax == 0 && (itmax = 2*n)
+ inner_itmax = itmax
+
+ (verbose > 0) && @printf(iostream, "%5s %5s %7s %7s\n", "pass", "k", "‖rₖ‖", "hₖ₊₁.ₖ")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %5d %7.1e %7s\n", npass, iter, rNorm, "✗ ✗ ✗ ✗")
+
+ # Tolerance for breakdown detection.
+ btol = eps(T)^(3/4)
+
+ # Stopping criterion
+ breakdown = false
+ inconsistent = false
+ solved = rNorm ≤ ε
+ tired = iter ≥ itmax
+ inner_tired = inner_iter ≥ inner_itmax
+ status = "unknown"
+ user_requested_exit = false
+
+ while !(solved || tired || breakdown || user_requested_exit)
+
+ # Initialize workspace.
+ nr = 0 # Number of coefficients stored in Rₖ.
+ for i = 1 : mem
+ V[i] .= zero(FC) # Orthogonal basis of {Mr₀, MANₖr₀, ..., (MANₖ)ᵏ⁻¹r₀}.
+ Z[i] .= zero(FC) # Zₖ = [N₁v₁, ..., Nₖvₖ]
+ end
+ s .= zero(FC) # Givens sines used for the factorization QₖRₖ = Hₖ₊₁.ₖ.
+ c .= zero(T) # Givens cosines used for the factorization QₖRₖ = Hₖ₊₁.ₖ.
+ R .= zero(FC) # Upper triangular matrix Rₖ.
+ z .= zero(FC) # Right-hand of the least squares problem min ‖Hₖ₊₁.ₖyₖ - βe₁‖₂.
+
+ if restart
+ xr .= zero(FC) # xr === Δx when restart is set to true
+ if npass ≥ 1
+ mul!(w, A, x)
+ @kaxpby!(n, one(FC), b, -one(FC), w)
+ MisI || mulorldiv!(r₀, M, w, ldiv)
+ end
+ end
+
+ # Initial ζ₁ and V₁
+ β = @knrm2(n, r₀)
+ z[1] = β
+ @. V[1] = r₀ / rNorm
+
+ npass = npass + 1
+ solver.inner_iter = 0
+ inner_tired = false
+
+ while !(solved || inner_tired || breakdown || user_requested_exit)
+
+ # Update iteration index
+ solver.inner_iter = solver.inner_iter + 1
+ inner_iter = solver.inner_iter
+
+ # Update workspace if more storage is required and restart is set to false
+ if !restart && (inner_iter > mem)
+ for i = 1 : inner_iter
+ push!(R, zero(FC))
+ end
+ push!(s, zero(FC))
+ push!(c, zero(T))
+ push!(Z, S(undef, n))
+ end
+
+ # Continue the process.
+ # MAZₖ = Vₖ₊₁Hₖ₊₁.ₖ
+ mulorldiv!(Z[inner_iter], N, V[inner_iter], ldiv) # zₖ ← Nₖvₖ
+ mul!(w, A, Z[inner_iter]) # w ← Azₖ
+ MisI || mulorldiv!(q, M, w, ldiv) # q ← MAzₖ
+ for i = 1 : inner_iter
+ R[nr+i] = @kdot(n, V[i], q) # hᵢₖ = (vᵢ)ᴴq
+ @kaxpy!(n, -R[nr+i], V[i], q) # q ← q - hᵢₖvᵢ
+ end
+
+ # Reorthogonalization of the basis.
+ if reorthogonalization
+ for i = 1 : inner_iter
+ Htmp = @kdot(n, V[i], q)
+ R[nr+i] += Htmp
+ @kaxpy!(n, -Htmp, V[i], q)
+ end
+ end
+
+ # Compute hₖ₊₁.ₖ
+ Hbis = @knrm2(n, q) # hₖ₊₁.ₖ = ‖vₖ₊₁‖₂
+
+ # Update the QR factorization of Hₖ₊₁.ₖ.
+ # Apply previous Givens reflections Ωᵢ.
+ # [cᵢ sᵢ] [ r̄ᵢ.ₖ ] = [ rᵢ.ₖ ]
+ # [s̄ᵢ -cᵢ] [rᵢ₊₁.ₖ] [r̄ᵢ₊₁.ₖ]
+ for i = 1 : inner_iter-1
+ Rtmp = c[i] * R[nr+i] + s[i] * R[nr+i+1]
+ R[nr+i+1] = conj(s[i]) * R[nr+i] - c[i] * R[nr+i+1]
+ R[nr+i] = Rtmp
+ end
+
+ # Compute and apply current Givens reflection Ωₖ.
+ # [cₖ sₖ] [ r̄ₖ.ₖ ] = [rₖ.ₖ]
+ # [s̄ₖ -cₖ] [hₖ₊₁.ₖ] [ 0 ]
+ (c[inner_iter], s[inner_iter], R[nr+inner_iter]) = sym_givens(R[nr+inner_iter], Hbis)
+
+ # Update zₖ = (Qₖ)ᴴβe₁
+ ζₖ₊₁ = conj(s[inner_iter]) * z[inner_iter]
+ z[inner_iter] = c[inner_iter] * z[inner_iter]
+
+ # Update residual norm estimate.
+ # ‖ M⁻¹(b - Axₖ) ‖₂ = |ζₖ₊₁|
+ rNorm = abs(ζₖ₊₁)
+ history && push!(rNorms, rNorm)
+
+ # Update the number of coefficients in Rₖ
+ nr = nr + inner_iter
+
+ # Stopping conditions that do not depend on user input.
+ # This is to guard against tolerances that are unreasonably small.
+ resid_decrease_mach = (rNorm + one(T) ≤ one(T))
+
+ # Update stopping criterion.
+ resid_decrease_lim = rNorm ≤ ε
+ breakdown = Hbis ≤ btol
+ solved = resid_decrease_lim || resid_decrease_mach
+ inner_tired = restart ? inner_iter ≥ min(mem, inner_itmax) : inner_iter ≥ inner_itmax
+ solver.inner_iter = inner_iter
+ kdisplay(iter+inner_iter, verbose) && @printf(iostream, "%5d %5d %7.1e %7.1e\n", npass, iter+inner_iter, rNorm, Hbis)
+
+ # Compute vₖ₊₁
+ if !(solved || inner_tired || breakdown)
+ if !restart && (inner_iter ≥ mem)
+ push!(V, S(undef, n))
+ push!(z, zero(FC))
+ end
+ @. V[inner_iter+1] = q / Hbis # hₖ₊₁.ₖvₖ₊₁ = q
+ z[inner_iter+1] = ζₖ₊₁
+ end
+
+ user_requested_exit = callback(solver) :: Bool
+ end
+
+ # Compute y by solving Ry = z with backward substitution.
+ y = z # yᵢ = ζᵢ
+ for i = inner_iter : -1 : 1
+ pos = nr + i - inner_iter # position of rᵢ.ₖ
+ for j = inner_iter : -1 : i+1
+ y[i] = y[i] - R[pos] * y[j] # yᵢ ← yᵢ - rᵢⱼyⱼ
+ pos = pos - j + 1 # position of rᵢ.ⱼ₋₁
+ end
+ # Rₖ can be singular if the system is inconsistent
+ if abs(R[pos]) ≤ btol
+ y[i] = zero(FC)
+ inconsistent = true
+ else
+ y[i] = y[i] / R[pos] # yᵢ ← yᵢ / rᵢᵢ
+ end
+ end
+
+ # Form xₖ = N₁v₁y₁ + ... + Nₖvₖyₖ = z₁y₁ + ... + zₖyₖ
+ for i = 1 : inner_iter
+ @kaxpy!(n, y[i], Z[i], xr)
+ end
+ restart && @kaxpy!(n, one(FC), xr, x)
+
+ # Update inner_itmax, iter and tired variables.
+ inner_itmax = inner_itmax - inner_iter
+ iter = iter + inner_iter
+ tired = iter ≥ itmax
+ end
+ (verbose > 0) && @printf(iostream, "\n")
+
+ tired && (status = "maximum number of iterations exceeded")
+ solved && (status = "solution good enough given atol and rtol")
+ inconsistent && (status = "found approximate least-squares solution")
+ user_requested_exit && (status = "user-requested exit")
+
+ # Update x
+ warm_start && !restart && @kaxpy!(n, one(FC), Δx, x)
+ solver.warm_start = false
+
+ # Update stats
+ stats.niter = iter
+ stats.solved = solved
+ stats.inconsistent = inconsistent
+ stats.status = status
+ return solver
+end
diff --git a/src/fom.jl b/src/fom.jl
index fcae5cf62..6aabb33f5 100644
--- a/src/fom.jl
+++ b/src/fom.jl
@@ -11,38 +11,53 @@
export fom, fom!
"""
- (x, stats) = fom(A, b::AbstractVector{FC}; memory::Int=20,
- M=I, N=I, atol::T=√eps(T), rtol::T=√eps(T),
- reorthogonalization::Bool=false, itmax::Int=0,
- restart::Bool=false, verbose::Int=0, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ (x, stats) = fom(A, b::AbstractVector{FC};
+ memory::Int=20, M=I, N=I, ldiv::Bool=false,
+ restart::Bool=false, reorthogonalization::Bool=false,
+ atol::T=√eps(T), rtol::T=√eps(T), itmax::Int=0,
+ verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
-Solve the linear system Ax = b using FOM method.
+ (x, stats) = fom(A, b, x0::AbstractVector; kwargs...)
+
+FOM can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
+
+Solve the linear system Ax = b of size n using FOM.
FOM algorithm is based on the Arnoldi process and a Galerkin condition.
-This implementation allows a left preconditioner M and a right preconditioner N.
-- Left preconditioning : M⁻¹Ax = M⁻¹b
-- Right preconditioning : AN⁻¹u = b with x = N⁻¹u
-- Split preconditioning : M⁻¹AN⁻¹u = M⁻¹b with x = N⁻¹u
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension n;
+* `b`: a vector of length n.
-Full reorthogonalization is available with the `reorthogonalization` option.
+#### Optional argument
-If `restart = true`, the restarted version FOM(k) is used with `k = memory`.
-If `restart = false`, the parameter `memory` should be used as a hint of the number of iterations to limit dynamic memory allocations.
-More storage will be allocated only if the number of iterations exceed `memory`.
+* `x0`: a vector of length n that represents an initial guess of the solution x.
-FOM can be warm-started from an initial guess `x0` with the method
+#### Keyword arguments
- (x, stats) = fom(A, b, x0; kwargs...)
+* `memory`: if `restart = true`, the restarted version FOM(k) is used with `k = memory`. If `restart = false`, the parameter `memory` should be used as a hint of the number of iterations to limit dynamic memory allocations. Additional storage will be allocated if the number of iterations exceeds `memory`;
+* `M`: linear operator that models a nonsingular matrix of size `n` used for left preconditioning;
+* `N`: linear operator that models a nonsingular matrix of size `n` used for right preconditioning;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `restart`: restart the method after `memory` iterations;
+* `reorthogonalization`: reorthogonalize the new vectors of the Krylov basis against all previous vectors;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
-where `kwargs` are the same keyword arguments as above.
+#### Output arguments
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### Reference
@@ -82,15 +97,16 @@ function fom!(solver :: FomSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 :: Abs
end
function fom!(solver :: FomSolver{T,FC,S}, A, b :: AbstractVector{FC};
- M=I, N=I, atol :: T=√eps(T), rtol :: T=√eps(T),
- reorthogonalization :: Bool=false, itmax :: Int=0,
- restart :: Bool=false, verbose :: Int=0, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
+ M=I, N=I, ldiv :: Bool=false,
+ restart :: Bool=false, reorthogonalization :: Bool=false,
+ atol :: T=√eps(T), rtol :: T=√eps(T), itmax :: Int=0,
+ verbose :: Int=0, history :: Bool=false,
+ callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
m, n = size(A)
m == n || error("System must be square")
length(b) == m || error("Inconsistent problem size")
- (verbose > 0) && @printf("FOM: system of size %d\n", n)
+ (verbose > 0) && @printf(iostream, "FOM: system of size %d\n", n)
# Check M = Iₙ and N = Iₙ
MisI = (M === I)
@@ -98,7 +114,7 @@ function fom!(solver :: FomSolver{T,FC,S}, A, b :: AbstractVector{FC};
# Check type consistency
eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
# Set up workspace.
allocate_if(!MisI , solver, :q , S, n)
@@ -124,7 +140,7 @@ function fom!(solver :: FomSolver{T,FC,S}, A, b :: AbstractVector{FC};
else
w .= b
end
- MisI || mulorldiv!(r₀, M, w, ldiv) # r₀ = M⁻¹(b - Ax₀)
+ MisI || mulorldiv!(r₀, M, w, ldiv) # r₀ = M(b - Ax₀)
β = @knrm2(n, r₀) # β = ‖r₀‖₂
rNorm = β
@@ -148,8 +164,8 @@ function fom!(solver :: FomSolver{T,FC,S}, A, b :: AbstractVector{FC};
itmax == 0 && (itmax = 2*n)
inner_itmax = itmax
- (verbose > 0) && @printf("%5s %5s %7s %7s\n", "pass", "k", "‖rₖ‖", "hₖ₊₁.ₖ")
- kdisplay(iter, verbose) && @printf("%5d %5d %7.1e %7s\n", npass, iter, rNorm, "✗ ✗ ✗ ✗")
+ (verbose > 0) && @printf(iostream, "%5s %5s %7s %7s\n", "pass", "k", "‖rₖ‖", "hₖ₊₁.ₖ")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %5d %7.1e %7s\n", npass, iter, rNorm, "✗ ✗ ✗ ✗")
# Tolerance for breakdown detection.
btol = eps(T)^(3/4)
@@ -167,7 +183,7 @@ function fom!(solver :: FomSolver{T,FC,S}, A, b :: AbstractVector{FC};
# Initialize workspace.
nr = 0 # Number of coefficients stored in Uₖ.
for i = 1 : mem
- V[i] .= zero(FC) # Orthogonal basis of Kₖ(M⁻¹AN⁻¹, M⁻¹r₀).
+ V[i] .= zero(FC) # Orthogonal basis of Kₖ(MAN, Mr₀).
end
l .= zero(FC) # Lower unit triangular matrix Lₖ.
U .= zero(FC) # Upper triangular matrix Uₖ.
@@ -207,11 +223,11 @@ function fom!(solver :: FomSolver{T,FC,S}, A, b :: AbstractVector{FC};
# Continue the Arnoldi process.
p = NisI ? V[inner_iter] : solver.p
- NisI || mulorldiv!(p, N, V[inner_iter], ldiv) # p ← N⁻¹vₖ
- mul!(w, A, p) # w ← AN⁻¹vₖ
- MisI || mulorldiv!(q, M, w, ldiv) # q ← M⁻¹AN⁻¹vₖ
+ NisI || mulorldiv!(p, N, V[inner_iter], ldiv) # p ← Nvₖ
+ mul!(w, A, p) # w ← ANvₖ
+ MisI || mulorldiv!(q, M, w, ldiv) # q ← MANvₖ
for i = 1 : inner_iter
- U[nr+i] = @kdot(n, V[i], q) # hᵢₖ = qᵀvᵢ
+ U[nr+i] = @kdot(n, V[i], q) # hᵢₖ = (vᵢ)ᴴq
@kaxpy!(n, -U[nr+i], V[i], q) # q ← q - hᵢₖvᵢ
end
@@ -240,7 +256,7 @@ function fom!(solver :: FomSolver{T,FC,S}, A, b :: AbstractVector{FC};
l[inner_iter] = Hbis / U[nr+inner_iter]
# Update residual norm estimate.
- # ‖ M⁻¹(b - Axₖ) ‖₂ = hₖ₊₁.ₖ * |ζₖ / uₖ.ₖ|
+ # ‖ M(b - Axₖ) ‖₂ = hₖ₊₁.ₖ * |ζₖ / uₖ.ₖ|
rNorm = Hbis * abs(z[inner_iter] / U[nr+inner_iter])
history && push!(rNorms, rNorm)
@@ -257,7 +273,7 @@ function fom!(solver :: FomSolver{T,FC,S}, A, b :: AbstractVector{FC};
breakdown = Hbis ≤ btol
solved = resid_decrease_lim || resid_decrease_mach
inner_tired = restart ? inner_iter ≥ min(mem, inner_itmax) : inner_iter ≥ inner_itmax
- kdisplay(iter+inner_iter, verbose) && @printf("%5d %5d %7.1e %7.1e\n", npass, iter+inner_iter, rNorm, Hbis)
+ kdisplay(iter+inner_iter, verbose) && @printf(iostream, "%5d %5d %7.1e %7.1e\n", npass, iter+inner_iter, rNorm, Hbis)
# Compute vₖ₊₁.
if !(solved || inner_tired || breakdown)
@@ -280,7 +296,7 @@ function fom!(solver :: FomSolver{T,FC,S}, A, b :: AbstractVector{FC};
y[i] = y[i] / U[pos] # yᵢ ← yᵢ / rᵢᵢ
end
- # Form xₖ = N⁻¹Vₖyₖ
+ # Form xₖ = NVₖyₖ
for i = 1 : inner_iter
@kaxpy!(n, y[i], V[i], xr)
end
@@ -295,7 +311,7 @@ function fom!(solver :: FomSolver{T,FC,S}, A, b :: AbstractVector{FC};
iter = iter + inner_iter
tired = iter ≥ itmax
end
- (verbose > 0) && @printf("\n")
+ (verbose > 0) && @printf(iostream, "\n")
tired && (status = "maximum number of iterations exceeded")
breakdown && (status = "inconsistent linear system")
diff --git a/src/gmres.jl b/src/gmres.jl
index 388a4ab96..d475198b5 100644
--- a/src/gmres.jl
+++ b/src/gmres.jl
@@ -11,38 +11,53 @@
export gmres, gmres!
"""
- (x, stats) = gmres(A, b::AbstractVector{FC}; memory::Int=20,
- M=I, N=I, atol::T=√eps(T), rtol::T=√eps(T),
- reorthogonalization::Bool=false, itmax::Int=0,
- restart::Bool=false, verbose::Int=0, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ (x, stats) = gmres(A, b::AbstractVector{FC};
+ memory::Int=20, M=I, N=I, ldiv::Bool=false,
+ restart::Bool=false, reorthogonalization::Bool=false,
+ atol::T=√eps(T), rtol::T=√eps(T), itmax::Int=0,
+ verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
-Solve the linear system Ax = b using GMRES method.
+ (x, stats) = gmres(A, b, x0::AbstractVector; kwargs...)
-GMRES algorithm is based on the Arnoldi process and computes a sequence of approximate solutions with the minimal residual property.
+GMRES can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
-This implementation allows a left preconditioner M and a right preconditioner N.
-- Left preconditioning : M⁻¹Ax = M⁻¹b
-- Right preconditioning : AN⁻¹u = b with x = N⁻¹u
-- Split preconditioning : M⁻¹AN⁻¹u = M⁻¹b with x = N⁻¹u
+Solve the linear system Ax = b of size n using GMRES.
-Full reorthogonalization is available with the `reorthogonalization` option.
+GMRES algorithm is based on the Arnoldi process and computes a sequence of approximate solutions with the minimum residual.
-If `restart = true`, the restarted version GMRES(k) is used with `k = memory`.
-If `restart = false`, the parameter `memory` should be used as a hint of the number of iterations to limit dynamic memory allocations.
-More storage will be allocated only if the number of iterations exceed `memory`.
+#### Input arguments
-GMRES can be warm-started from an initial guess `x0` with the method
+* `A`: a linear operator that models a matrix of dimension n;
+* `b`: a vector of length n.
- (x, stats) = gmres(A, b, x0; kwargs...)
+#### Optional argument
-where `kwargs` are the same keyword arguments as above.
+* `x0`: a vector of length n that represents an initial guess of the solution x.
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Keyword arguments
+
+* `memory`: if `restart = true`, the restarted version GMRES(k) is used with `k = memory`. If `restart = false`, the parameter `memory` should be used as a hint of the number of iterations to limit dynamic memory allocations. Additional storage will be allocated if the number of iterations exceeds `memory`;
+* `M`: linear operator that models a nonsingular matrix of size `n` used for left preconditioning;
+* `N`: linear operator that models a nonsingular matrix of size `n` used for right preconditioning;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `restart`: restart the method after `memory` iterations;
+* `reorthogonalization`: reorthogonalize the new vectors of the Krylov basis against all previous vectors;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### Reference
@@ -82,15 +97,16 @@ function gmres!(solver :: GmresSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0 ::
end
function gmres!(solver :: GmresSolver{T,FC,S}, A, b :: AbstractVector{FC};
- M=I, N=I, atol :: T=√eps(T), rtol :: T=√eps(T),
- reorthogonalization :: Bool=false, itmax :: Int=0,
- restart :: Bool=false, verbose :: Int=0, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
+ M=I, N=I, ldiv :: Bool=false,
+ restart :: Bool=false, reorthogonalization :: Bool=false,
+ atol :: T=√eps(T), rtol :: T=√eps(T), itmax :: Int=0,
+ verbose :: Int=0, history :: Bool=false,
+ callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
m, n = size(A)
m == n || error("System must be square")
length(b) == m || error("Inconsistent problem size")
- (verbose > 0) && @printf("GMRES: system of size %d\n", n)
+ (verbose > 0) && @printf(iostream, "GMRES: system of size %d\n", n)
# Check M = Iₙ and N = Iₙ
MisI = (M === I)
@@ -98,7 +114,7 @@ function gmres!(solver :: GmresSolver{T,FC,S}, A, b :: AbstractVector{FC};
# Check type consistency
eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
# Set up workspace.
allocate_if(!MisI , solver, :q , S, n)
@@ -124,7 +140,7 @@ function gmres!(solver :: GmresSolver{T,FC,S}, A, b :: AbstractVector{FC};
else
w .= b
end
- MisI || mulorldiv!(r₀, M, w, ldiv) # r₀ = M⁻¹(b - Ax₀)
+ MisI || mulorldiv!(r₀, M, w, ldiv) # r₀ = M(b - Ax₀)
β = @knrm2(n, r₀) # β = ‖r₀‖₂
rNorm = β
@@ -148,8 +164,8 @@ function gmres!(solver :: GmresSolver{T,FC,S}, A, b :: AbstractVector{FC};
itmax == 0 && (itmax = 2*n)
inner_itmax = itmax
- (verbose > 0) && @printf("%5s %5s %7s %7s\n", "pass", "k", "‖rₖ‖", "hₖ₊₁.ₖ")
- kdisplay(iter, verbose) && @printf("%5d %5d %7.1e %7s\n", npass, iter, rNorm, "✗ ✗ ✗ ✗")
+ (verbose > 0) && @printf(iostream, "%5s %5s %7s %7s\n", "pass", "k", "‖rₖ‖", "hₖ₊₁.ₖ")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %5d %7.1e %7s\n", npass, iter, rNorm, "✗ ✗ ✗ ✗")
# Tolerance for breakdown detection.
btol = eps(T)^(3/4)
@@ -168,7 +184,7 @@ function gmres!(solver :: GmresSolver{T,FC,S}, A, b :: AbstractVector{FC};
# Initialize workspace.
nr = 0 # Number of coefficients stored in Rₖ.
for i = 1 : mem
- V[i] .= zero(FC) # Orthogonal basis of Kₖ(M⁻¹AN⁻¹, M⁻¹r₀).
+ V[i] .= zero(FC) # Orthogonal basis of Kₖ(MAN, Mr₀).
end
s .= zero(FC) # Givens sines used for the factorization QₖRₖ = Hₖ₊₁.ₖ.
c .= zero(T) # Givens cosines used for the factorization QₖRₖ = Hₖ₊₁.ₖ.
@@ -210,11 +226,11 @@ function gmres!(solver :: GmresSolver{T,FC,S}, A, b :: AbstractVector{FC};
# Continue the Arnoldi process.
p = NisI ? V[inner_iter] : solver.p
- NisI || mulorldiv!(p, N, V[inner_iter], ldiv) # p ← N⁻¹vₖ
- mul!(w, A, p) # w ← AN⁻¹vₖ
- MisI || mulorldiv!(q, M, w, ldiv) # q ← M⁻¹AN⁻¹vₖ
+ NisI || mulorldiv!(p, N, V[inner_iter], ldiv) # p ← Nvₖ
+ mul!(w, A, p) # w ← ANvₖ
+ MisI || mulorldiv!(q, M, w, ldiv) # q ← MANvₖ
for i = 1 : inner_iter
- R[nr+i] = @kdot(n, V[i], q) # hᵢₖ = qᵀvᵢ
+ R[nr+i] = @kdot(n, V[i], q) # hᵢₖ = (vᵢ)ᴴq
@kaxpy!(n, -R[nr+i], V[i], q) # q ← q - hᵢₖvᵢ
end
@@ -245,12 +261,12 @@ function gmres!(solver :: GmresSolver{T,FC,S}, A, b :: AbstractVector{FC};
# [s̄ₖ -cₖ] [hₖ₊₁.ₖ] [ 0 ]
(c[inner_iter], s[inner_iter], R[nr+inner_iter]) = sym_givens(R[nr+inner_iter], Hbis)
- # Update zₖ = (Qₖ)ᵀβe₁
+ # Update zₖ = (Qₖ)ᴴβe₁
ζₖ₊₁ = conj(s[inner_iter]) * z[inner_iter]
z[inner_iter] = c[inner_iter] * z[inner_iter]
# Update residual norm estimate.
- # ‖ M⁻¹(b - Axₖ) ‖₂ = |ζₖ₊₁|
+ # ‖ M(b - Axₖ) ‖₂ = |ζₖ₊₁|
rNorm = abs(ζₖ₊₁)
history && push!(rNorms, rNorm)
@@ -267,7 +283,7 @@ function gmres!(solver :: GmresSolver{T,FC,S}, A, b :: AbstractVector{FC};
solved = resid_decrease_lim || resid_decrease_mach
inner_tired = restart ? inner_iter ≥ min(mem, inner_itmax) : inner_iter ≥ inner_itmax
solver.inner_iter = inner_iter
- kdisplay(iter+inner_iter, verbose) && @printf("%5d %5d %7.1e %7.1e\n", npass, iter+inner_iter, rNorm, Hbis)
+ kdisplay(iter+inner_iter, verbose) && @printf(iostream, "%5d %5d %7.1e %7.1e\n", npass, iter+inner_iter, rNorm, Hbis)
# Compute vₖ₊₁
if !(solved || inner_tired || breakdown)
@@ -299,7 +315,7 @@ function gmres!(solver :: GmresSolver{T,FC,S}, A, b :: AbstractVector{FC};
end
end
- # Form xₖ = N⁻¹Vₖyₖ
+ # Form xₖ = NVₖyₖ
for i = 1 : inner_iter
@kaxpy!(n, y[i], V[i], xr)
end
@@ -314,7 +330,7 @@ function gmres!(solver :: GmresSolver{T,FC,S}, A, b :: AbstractVector{FC};
iter = iter + inner_iter
tired = iter ≥ itmax
end
- (verbose > 0) && @printf("\n")
+ (verbose > 0) && @printf(iostream, "\n")
tired && (status = "maximum number of iterations exceeded")
solved && (status = "solution good enough given atol and rtol")
diff --git a/src/gpmr.jl b/src/gpmr.jl
index b10942995..958d2977c 100644
--- a/src/gpmr.jl
+++ b/src/gpmr.jl
@@ -12,23 +12,30 @@
export gpmr, gpmr!
"""
- (x, y, stats) = gpmr(A, B, b::AbstractVector{FC}, c::AbstractVector{FC}; memory::Int=20,
- C=I, D=I, E=I, F=I, atol::T=√eps(T), rtol::T=√eps(T),
- gsp::Bool=false, reorthogonalization::Bool=false,
- itmax::Int=0, λ::FC=one(FC), μ::FC=one(FC),
+ (x, y, stats) = gpmr(A, B, b::AbstractVector{FC}, c::AbstractVector{FC};
+ memory::Int=20, C=I, D=I, E=I, F=I,
+ ldiv::Bool=false, gsp::Bool=false,
+ λ::FC=one(FC), μ::FC=one(FC),
+ reorthogonalization::Bool=false, atol::T=√eps(T),
+ rtol::T=√eps(T), itmax::Int=0,
verbose::Int=0, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
-GPMR solves the unsymmetric partitioned linear system
+ (x, y, stats) = gpmr(A, B, b, c, x0::AbstractVector, y0::AbstractVector; kwargs...)
- [ λI A ] [ x ] = [ b ]
- [ B μI ] [ y ] [ c ],
+GPMR can be warm-started from initial guesses `x0` and `y0` where `kwargs` are the same keyword arguments as above.
-where λ and μ are real or complex numbers.
-`A` can have any shape and `B` has the shape of `Aᵀ`.
+Given matrices `A` of dimension m × n and `B` of dimension n × m,
+GPMR solves the non-Hermitian partitioned linear system
+
+ [ λIₘ A ] [ x ] = [ b ]
+ [ B μIₙ ] [ y ] [ c ],
+
+of size (n+m) × (n+m) where λ and μ are real or complex numbers.
+`A` can have any shape and `B` has the shape of `Aᴴ`.
`A`, `B`, `b` and `c` must be all nonzero.
This implementation allows left and right block diagonal preconditioners
@@ -44,8 +51,6 @@ and can solve
when `CE = M⁻¹` and `DF = N⁻¹`.
By default, GPMR solves unsymmetric linear systems with `λ = 1` and `μ = 1`.
-If `gsp = true`, `λ = 1`, `μ = 0` and the associated generalized saddle point system is solved.
-`λ` and `μ` are also keyword arguments that can be directly modified for more specific problems.
GPMR is based on the orthogonal Hessenberg reduction process and its relations with the block-Arnoldi process.
The residual norm ‖rₖ‖ is monotonically decreasing in GPMR.
@@ -53,19 +58,42 @@ The residual norm ‖rₖ‖ is monotonically decreasing in GPMR.
GPMR stops when `itmax` iterations are reached or when `‖rₖ‖ ≤ atol + ‖r₀‖ * rtol`.
`atol` is an absolute tolerance and `rtol` is a relative tolerance.
-Full reorthogonalization is available with the `reorthogonalization` option.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `B`: a linear operator that models a matrix of dimension n × m;
+* `b`: a vector of length m;
+* `c`: a vector of length n.
+
+#### Optional arguments
-Additional details can be displayed if verbose mode is enabled (verbose > 0).
-Information will be displayed every `verbose` iterations.
+* `x0`: a vector of length m that represents an initial guess of the solution x;
+* `y0`: a vector of length n that represents an initial guess of the solution y.
-GPMR can be warm-started from initial guesses `x0` and `y0` with the method
+#### Keyword arguments
- (x, y, stats) = gpmr(A, B, b, c, x0, y0; kwargs...)
+* `memory`: if `restart = true`, the restarted version GPMR(k) is used with `k = memory`. If `restart = false`, the parameter `memory` should be used as a hint of the number of iterations to limit dynamic memory allocations. Additional storage will be allocated if the number of iterations exceeds `memory`;
+* `C`: linear operator that models a nonsingular matrix of size `m`, and represents the first term of the block-diagonal left preconditioner;
+* `D`: linear operator that models a nonsingular matrix of size `n`, and represents the second term of the block-diagonal left preconditioner;
+* `E`: linear operator that models a nonsingular matrix of size `m`, and represents the first term of the block-diagonal right preconditioner;
+* `F`: linear operator that models a nonsingular matrix of size `n`, and represents the second term of the block-diagonal right preconditioner;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `gsp`: if `true`, set `λ = 1` and `μ = 0` for generalized saddle-point systems;
+* `λ` and `μ`: diagonal scaling factors of the partitioned linear system;
+* `reorthogonalization`: reorthogonalize the new vectors of the Krylov basis against all previous vectors;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
-where `kwargs` are the same keyword arguments as above.
+#### Output arguments
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* `x`: a dense vector of length m;
+* `y`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### Reference
@@ -106,11 +134,13 @@ function gpmr!(solver :: GpmrSolver{T,FC,S}, A, B, b :: AbstractVector{FC}, c ::
end
function gpmr!(solver :: GpmrSolver{T,FC,S}, A, B, b :: AbstractVector{FC}, c :: AbstractVector{FC};
- C=I, D=I, E=I, F=I, atol :: T=√eps(T), rtol :: T=√eps(T),
- gsp :: Bool=false, reorthogonalization :: Bool=false,
- itmax :: Int=0, λ :: FC=one(FC), μ :: FC=one(FC),
+ C=I, D=I, E=I, F=I,
+ ldiv :: Bool=false, gsp :: Bool=false,
+ λ :: FC=one(FC), μ :: FC=one(FC),
+ reorthogonalization :: Bool=false, atol :: T=√eps(T),
+ rtol :: T=√eps(T), itmax :: Int=0,
verbose :: Int=0, history::Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
+ callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
m, n = size(A)
s, t = size(B)
@@ -118,7 +148,7 @@ function gpmr!(solver :: GpmrSolver{T,FC,S}, A, B, b :: AbstractVector{FC}, c ::
s == n || error("Inconsistent problem size")
length(b) == m || error("Inconsistent problem size")
length(c) == n || error("Inconsistent problem size")
- (verbose > 0) && @printf("GPMR: system of %d equations in %d variables\n", m+n, m+n)
+ (verbose > 0) && @printf(iostream, "GPMR: system of %d equations in %d variables\n", m+n, m+n)
# Check C = E = Iₘ and D = F = Iₙ
CisI = (C === I)
@@ -129,8 +159,8 @@ function gpmr!(solver :: GpmrSolver{T,FC,S}, A, B, b :: AbstractVector{FC}, c ::
# Check type consistency
eltype(A) == FC || error("eltype(A) ≠ $FC")
eltype(B) == FC || error("eltype(B) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
- ktypeof(c) == S || error("ktypeof(c) ≠ $S")
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+ ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S")
# Determine λ and μ associated to generalized saddle point systems.
gsp && (λ = one(FC) ; μ = zero(FC))
@@ -172,7 +202,7 @@ function gpmr!(solver :: GpmrSolver{T,FC,S}, A, B, b :: AbstractVector{FC}, c ::
gs .= zero(FC) # Givens sines used for the factorization QₖRₖ = Sₖ₊₁.ₖ.
gc .= zero(T) # Givens cosines used for the factorization QₖRₖ = Sₖ₊₁.ₖ.
R .= zero(FC) # Upper triangular matrix Rₖ.
- zt .= zero(FC) # Rₖzₖ = tₖ with (tₖ, τbar₂ₖ₊₁, τbar₂ₖ₊₂) = (Qₖ)ᵀ(βe₁ + γe₂).
+ zt .= zero(FC) # Rₖzₖ = tₖ with (tₖ, τbar₂ₖ₊₁, τbar₂ₖ₊₂) = (Qₖ)ᴴ(βe₁ + γe₂).
# Warm-start
# If λ ≠ 0, Cb₀ = Cb - CAΔy - λΔx because CM = Iₘ and E = Iₘ
@@ -213,8 +243,8 @@ function gpmr!(solver :: GpmrSolver{T,FC,S}, A, B, b :: AbstractVector{FC}, c ::
zt[1] = β
zt[2] = γ
- (verbose > 0) && @printf("%5s %7s %7s %7s\n", "k", "‖rₖ‖", "hₖ₊₁.ₖ", "fₖ₊₁.ₖ")
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7s %7s\n", iter, rNorm, "✗ ✗ ✗ ✗", "✗ ✗ ✗ ✗")
+ (verbose > 0) && @printf(iostream, "%5s %7s %7s %7s\n", "k", "‖rₖ‖", "hₖ₊₁.ₖ", "fₖ₊₁.ₖ")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7s %7s\n", iter, rNorm, "✗ ✗ ✗ ✗", "✗ ✗ ✗ ✗")
# Tolerance for breakdown detection.
btol = eps(T)^(3/4)
@@ -259,8 +289,8 @@ function gpmr!(solver :: GpmrSolver{T,FC,S}, A, B, b :: AbstractVector{FC}, c ::
DisI || mulorldiv!(p, D, dB, ldiv) # p = DBEvₖ
for i = 1 : iter
- hᵢₖ = @kdot(m, V[i], q) # hᵢ.ₖ = vᵢAuₖ
- fᵢₖ = @kdot(n, U[i], p) # fᵢ.ₖ = uᵢBvₖ
+ hᵢₖ = @kdot(m, V[i], q) # hᵢ.ₖ = (vᵢ)ᴴq
+ fᵢₖ = @kdot(n, U[i], p) # fᵢ.ₖ = (uᵢ)ᴴp
@kaxpy!(m, -hᵢₖ, V[i], q) # q ← q - hᵢ.ₖvᵢ
@kaxpy!(n, -fᵢₖ, U[i], p) # p ← p - fᵢ.ₖuᵢ
R[nr₂ₖ + 2i-1] = hᵢₖ
@@ -270,8 +300,8 @@ function gpmr!(solver :: GpmrSolver{T,FC,S}, A, B, b :: AbstractVector{FC}, c ::
# Reorthogonalization of the Krylov basis.
if reorthogonalization
for i = 1 : iter
- Htmp = @kdot(m, V[i], q) # hₜₘₚ = qᵀvᵢ
- Ftmp = @kdot(n, U[i], p) # fₜₘₚ = pᵀuᵢ
+ Htmp = @kdot(m, V[i], q) # hₜₘₚ = (vᵢ)ᴴq
+ Ftmp = @kdot(n, U[i], p) # fₜₘₚ = (uᵢ)ᴴp
@kaxpy!(m, -Htmp, V[i], q) # q ← q - hₜₘₚvᵢ
@kaxpy!(n, -Ftmp, U[i], p) # p ← p - fₜₘₚuᵢ
R[nr₂ₖ + 2i-1] += Htmp # hᵢ.ₖ = hᵢ.ₖ + hₜₘₚ
@@ -400,7 +430,7 @@ function gpmr!(solver :: GpmrSolver{T,FC,S}, A, B, b :: AbstractVector{FC}, c ::
breakdown = Faux ≤ btol && Haux ≤ btol
solved = resid_decrease_lim || resid_decrease_mach
tired = iter ≥ itmax
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e\n", iter, rNorm, Haux, Faux)
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e\n", iter, rNorm, Haux, Faux)
# Compute vₖ₊₁ and uₖ₊₁
if !(solved || tired || breakdown || user_requested_exit)
@@ -430,7 +460,7 @@ function gpmr!(solver :: GpmrSolver{T,FC,S}, A, B, b :: AbstractVector{FC}, c ::
zt[2k+2] = τbar₂ₖ₊₂
end
end
- (verbose > 0) && @printf("\n")
+ (verbose > 0) && @printf(iostream, "\n")
# Compute zₖ = (ζ₁, ..., ζ₂ₖ) by solving Rₖzₖ = tₖ with backward substitution.
for i = 2iter : -1 : 1
diff --git a/src/krylov_processes.jl b/src/krylov_processes.jl
new file mode 100644
index 000000000..2be66b1c5
--- /dev/null
+++ b/src/krylov_processes.jl
@@ -0,0 +1,439 @@
+export hermitian_lanczos, nonhermitian_lanczos, arnoldi, golub_kahan, saunders_simon_yip, montoison_orban
+
+"""
+ V, T = hermitian_lanczos(A, b, k)
+
+#### Input arguments
+
+* `A`: a linear operator that models a Hermitian matrix of dimension n;
+* `b`: a vector of length n;
+* `k`: the number of iterations of the Hermitian Lanczos process.
+
+#### Output arguments
+
+* `V`: a dense n × (k+1) matrix;
+* `T`: a sparse (k+1) × k tridiagonal matrix.
+
+#### Reference
+
+* C. Lanczos, [*An Iteration Method for the Solution of the Eigenvalue Problem of Linear Differential and Integral Operators*](https://doi.org/10.6028/jres.045.026), Journal of Research of the National Bureau of Standards, 45(4), pp. 225--280, 1950.
+"""
+function hermitian_lanczos(A, b::AbstractVector{FC}, k::Int) where FC <: FloatOrComplex
+ m, n = size(A)
+ R = real(FC)
+ S = ktypeof(b)
+ M = vector_to_matrix(S)
+
+ colptr = zeros(Int, k+1)
+ rowval = zeros(Int, 3k-1)
+ nzval = zeros(R, 3k-1)
+
+ colptr[1] = 1
+ rowval[1] = 1
+ rowval[2] = 2
+ for i = 1:k
+ colptr[i+1] = 3i
+ if i ≥ 2
+ pos = colptr[i]
+ rowval[pos] = i-1
+ rowval[pos+1] = i
+ rowval[pos+2] = i+1
+ end
+ end
+
+ V = M(undef, n, k+1)
+ T = SparseMatrixCSC(k+1, k, colptr, rowval, nzval)
+
+ pαᵢ = 1 # Position of αᵢ in the vector `nzval`
+ for i = 1:k
+ vᵢ = view(V,:,i)
+ vᵢ₊₁ = q = view(V,:,i+1)
+ if i == 1
+ βᵢ = @knrm2(n, b)
+ vᵢ .= b ./ βᵢ
+ end
+ mul!(q, A, vᵢ)
+ αᵢ = @kdotr(n, vᵢ, q)
+ nzval[pαᵢ] = αᵢ # Tᵢ.ᵢ = αᵢ
+ @kaxpy!(n, -αᵢ, vᵢ, q)
+ if i ≥ 2
+ vᵢ₋₁ = view(V,:,i-1)
+ βᵢ = nzval[pαᵢ-2] # βᵢ = Tᵢ.ᵢ₋₁
+ nzval[pαᵢ-1] = βᵢ # Tᵢ₋₁.ᵢ = βᵢ
+ @kaxpy!(n, -βᵢ, vᵢ₋₁, q)
+ end
+ βᵢ₊₁ = @knrm2(n, q)
+ nzval[pαᵢ+1] = βᵢ₊₁ # Tᵢ₊₁.ᵢ = βᵢ₊₁
+ vᵢ₊₁ .= q ./ βᵢ₊₁
+ pαᵢ = pαᵢ + 3
+ end
+ return V, T
+end
+
+"""
+ V, T, U, Tᴴ = nonhermitian_lanczos(A, b, c, k)
+
+#### Input arguments
+
+* `A`: a linear operator that models a square matrix of dimension n;
+* `b`: a vector of length n;
+* `c`: a vector of length n;
+* `k`: the number of iterations of the non-Hermitian Lanczos process.
+
+#### Output arguments
+
+* `V`: a dense n × (k+1) matrix;
+* `T`: a sparse (k+1) × k tridiagonal matrix;
+* `U`: a dense n × (k+1) matrix;
+* `Tᴴ`: a sparse (k+1) × k tridiagonal matrix.
+
+#### Reference
+
+* C. Lanczos, [*An Iteration Method for the Solution of the Eigenvalue Problem of Linear Differential and Integral Operators*](https://doi.org/10.6028/jres.045.026), Journal of Research of the National Bureau of Standards, 45(4), pp. 225--280, 1950.
+"""
+function nonhermitian_lanczos(A, b::AbstractVector{FC}, c::AbstractVector{FC}, k::Int) where FC <: FloatOrComplex
+ m, n = size(A)
+ Aᴴ = A'
+ S = ktypeof(b)
+ M = vector_to_matrix(S)
+
+ colptr = zeros(Int, k+1)
+ rowval = zeros(Int, 3k-1)
+ nzval_T = zeros(FC, 3k-1)
+ nzval_Tᴴ = zeros(FC, 3k-1)
+
+ colptr[1] = 1
+ rowval[1] = 1
+ rowval[2] = 2
+ for i = 1:k
+ colptr[i+1] = 3i
+ if i ≥ 2
+ pos = colptr[i]
+ rowval[pos] = i-1
+ rowval[pos+1] = i
+ rowval[pos+2] = i+1
+ end
+ end
+
+ V = M(undef, n, k+1)
+ U = M(undef, n, k+1)
+ T = SparseMatrixCSC(k+1, k, colptr, rowval, nzval_T)
+ Tᴴ = SparseMatrixCSC(k+1, k, colptr, rowval, nzval_Tᴴ)
+
+ pαᵢ = 1 # Position of αᵢ and ᾱᵢ in the vectors `nzval_T` and `nzval_Tᴴ`
+ for i = 1:k
+ vᵢ = view(V,:,i)
+ uᵢ = view(U,:,i)
+ vᵢ₊₁ = q = view(V,:,i+1)
+ uᵢ₊₁ = p = view(U,:,i+1)
+ if i == 1
+ cᴴb = @kdot(n, c, b)
+ βᵢ = √(abs(cᴴb))
+ γᵢ = cᴴb / βᵢ
+ vᵢ .= b ./ βᵢ
+ uᵢ .= c ./ conj(γᵢ)
+ end
+ mul!(q, A , vᵢ)
+ mul!(p, Aᴴ, uᵢ)
+ if i ≥ 2
+ vᵢ₋₁ = view(V,:,i-1)
+ uᵢ₋₁ = view(U,:,i-1)
+ βᵢ = nzval_T[pαᵢ-2] # βᵢ = Tᵢ.ᵢ₋₁
+ γᵢ = nzval_T[pαᵢ-1] # γᵢ = Tᵢ₋₁.ᵢ
+ @kaxpy!(n, - γᵢ , vᵢ₋₁, q)
+ @kaxpy!(n, -conj(βᵢ), uᵢ₋₁, p)
+ end
+ αᵢ = @kdot(n, uᵢ, q)
+ nzval_T[pαᵢ] = αᵢ # Tᵢ.ᵢ = αᵢ
+ nzval_Tᴴ[pαᵢ] = conj(αᵢ) # Tᴴᵢ.ᵢ = ᾱᵢ
+ @kaxpy!(m, - αᵢ , vᵢ, q)
+ @kaxpy!(n, -conj(αᵢ), uᵢ, p)
+ pᴴq = @kdot(n, p, q)
+ βᵢ₊₁ = √(abs(pᴴq))
+ γᵢ₊₁ = pᴴq / βᵢ₊₁
+ vᵢ₊₁ .= q ./ βᵢ₊₁
+ uᵢ₊₁ .= p ./ conj(γᵢ₊₁)
+ nzval_T[pαᵢ+1] = βᵢ₊₁ # Tᵢ₊₁.ᵢ = βᵢ₊₁
+ nzval_Tᴴ[pαᵢ+1] = conj(γᵢ₊₁) # Tᴴᵢ₊₁.ᵢ = γ̄ᵢ₊₁
+ if i ≤ k-1
+ nzval_T[pαᵢ+2] = γᵢ₊₁ # Tᵢ.ᵢ₊₁ = γᵢ₊₁
+ nzval_Tᴴ[pαᵢ+2] = conj(βᵢ₊₁) # Tᴴᵢ.ᵢ₊₁ = β̄ᵢ₊₁
+ end
+ pαᵢ = pαᵢ + 3
+ end
+ return V, T, U, Tᴴ
+end
+
+"""
+ V, H = arnoldi(A, b, k)
+
+#### Input arguments
+
+* `A`: a linear operator that models a square matrix of dimension n;
+* `b`: a vector of length n;
+* `k`: the number of iterations of the Arnoldi process.
+
+#### Output arguments
+
+* `V`: a dense n × (k+1) matrix;
+* `H`: a dense (k+1) × k upper Hessenberg matrix.
+
+#### Reference
+
+* W. E. Arnoldi, [*The principle of minimized iterations in the solution of the matrix eigenvalue problem*](https://doi.org/10.1090/qam/42792), Quarterly of Applied Mathematics, 9, pp. 17--29, 1951.
+"""
+function arnoldi(A, b::AbstractVector{FC}, k::Int) where FC <: FloatOrComplex
+ m, n = size(A)
+ S = ktypeof(b)
+ M = vector_to_matrix(S)
+
+ V = M(undef, n, k+1)
+ H = zeros(FC, k+1, k)
+
+ for i = 1:k
+ vᵢ = view(V,:,i)
+ vᵢ₊₁ = q = view(V,:,i+1)
+ if i == 1
+ β = @knrm2(n, b)
+ vᵢ .= b ./ β
+ end
+ mul!(q, A, vᵢ)
+ for j = 1:i
+ vⱼ = view(V,:,j)
+ H[j,i] = @kdot(n, vⱼ, q)
+ @kaxpy!(n, -H[j,i], vⱼ, q)
+ end
+ H[i+1,i] = @knrm2(n, q)
+ vᵢ₊₁ .= q ./ H[i+1,i]
+ end
+ return V, H
+end
+
+"""
+ V, U, L = golub_kahan(A, b, k)
+
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m;
+* `k`: the number of iterations of the Golub-Kahan process.
+
+#### Output arguments
+
+* `V`: a dense n × (k+1) matrix;
+* `U`: a dense m × (k+1) matrix;
+* `L`: a sparse (k+1) × (k+1) lower bidiagonal matrix.
+
+#### References
+
+* G. H. Golub and W. Kahan, [*Calculating the Singular Values and Pseudo-Inverse of a Matrix*](https://doi.org/10.1137/0702016), SIAM Journal on Numerical Analysis, 2(2), pp. 225--224, 1965.
+* C. C. Paige, [*Bidiagonalization of Matrices and Solution of Linear Equations*](https://doi.org/10.1137/0711019), SIAM Journal on Numerical Analysis, 11(1), pp. 197--209, 1974.
+"""
+function golub_kahan(A, b::AbstractVector{FC}, k::Int) where FC <: FloatOrComplex
+ m, n = size(A)
+ R = real(FC)
+ Aᴴ = A'
+ S = ktypeof(b)
+ M = vector_to_matrix(S)
+
+ colptr = zeros(Int, k+2)
+ rowval = zeros(Int, 2k+1)
+ nzval = zeros(R, 2k+1)
+
+ colptr[1] = 1
+ for i = 1:k
+ pos = colptr[i]
+ colptr[i+1] = pos+2
+ rowval[pos] = i
+ rowval[pos+1] = i+1
+ end
+ rowval[2k+1] = k+1
+ colptr[k+2] = 2k+2
+
+ V = M(undef, n, k+1)
+ U = M(undef, m, k+1)
+ L = SparseMatrixCSC(k+1, k+1, colptr, rowval, nzval)
+
+ pαᵢ = 1 # Position of αᵢ in the vector `nzval`
+ for i = 1:k
+ uᵢ = view(U,:,i)
+ vᵢ = view(V,:,i)
+ uᵢ₊₁ = q = view(U,:,i+1)
+ vᵢ₊₁ = p = view(V,:,i+1)
+ if i == 1
+ wᵢ = vᵢ
+ βᵢ = @knrm2(m, b)
+ uᵢ .= b ./ βᵢ
+ mul!(wᵢ, Aᴴ, uᵢ)
+ αᵢ = @knrm2(n, wᵢ)
+ nzval[pαᵢ] = αᵢ # Lᵢ.ᵢ = αᵢ
+ vᵢ .= wᵢ ./ αᵢ
+ end
+ mul!(q, A, vᵢ)
+ αᵢ = nzval[pαᵢ] # αᵢ = Lᵢ.ᵢ
+ @kaxpy!(m, -αᵢ, uᵢ, q)
+ βᵢ₊₁ = @knrm2(m, q)
+ uᵢ₊₁ .= q ./ βᵢ₊₁
+ mul!(p, Aᴴ, uᵢ₊₁)
+ @kaxpy!(n, -βᵢ₊₁, vᵢ, p)
+ αᵢ₊₁ = @knrm2(n, p)
+ vᵢ₊₁ .= p ./ αᵢ₊₁
+ nzval[pαᵢ+1] = βᵢ₊₁ # Lᵢ₊₁.ᵢ = βᵢ₊₁
+ nzval[pαᵢ+2] = αᵢ₊₁ # Lᵢ₊₁.ᵢ₊₁ = αᵢ₊₁
+ pαᵢ = pαᵢ + 2
+ end
+ return V, U, L
+end
+
+"""
+ V, T, U, Tᴴ = saunders_simon_yip(A, b, c, k)
+
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m;
+* `c`: a vector of length n;
+* `k`: the number of iterations of the Saunders-Simon-Yip process.
+
+#### Output arguments
+
+* `V`: a dense m × (k+1) matrix;
+* `T`: a sparse (k+1) × k tridiagonal matrix;
+* `U`: a dense n × (k+1) matrix;
+* `Tᴴ`: a sparse (k+1) × k tridiagonal matrix.
+
+#### Reference
+
+* M. A. Saunders, H. D. Simon, and E. L. Yip, [*Two Conjugate-Gradient-Type Methods for Unsymmetric Linear Equations*](https://doi.org/10.1137/0725052), SIAM Journal on Numerical Analysis, 25(4), pp. 927--940, 1988.
+"""
+function saunders_simon_yip(A, b::AbstractVector{FC}, c::AbstractVector{FC}, k::Int) where FC <: FloatOrComplex
+ m, n = size(A)
+ Aᴴ = A'
+ S = ktypeof(b)
+ M = vector_to_matrix(S)
+
+ colptr = zeros(Int, k+1)
+ rowval = zeros(Int, 3k-1)
+ nzval_T = zeros(FC, 3k-1)
+ nzval_Tᴴ = zeros(FC, 3k-1)
+
+ colptr[1] = 1
+ rowval[1] = 1
+ rowval[2] = 2
+ for i = 1:k
+ colptr[i+1] = 3i
+ if i ≥ 2
+ pos = colptr[i]
+ rowval[pos] = i-1
+ rowval[pos+1] = i
+ rowval[pos+2] = i+1
+ end
+ end
+
+ V = M(undef, m, k+1)
+ U = M(undef, n, k+1)
+ T = SparseMatrixCSC(k+1, k, colptr, rowval, nzval_T)
+ Tᴴ = SparseMatrixCSC(k+1, k, colptr, rowval, nzval_Tᴴ)
+
+ pαᵢ = 1 # Position of αᵢ and ᾱᵢ in the vectors `nzval_T` and `nzval_Tᴴ`
+ for i = 1:k
+ vᵢ = view(V,:,i)
+ uᵢ = view(U,:,i)
+ vᵢ₊₁ = q = view(V,:,i+1)
+ uᵢ₊₁ = p = view(U,:,i+1)
+ if i == 1
+ β = @knrm2(m, b)
+ γ = @knrm2(n, c)
+ vᵢ .= b ./ β
+ uᵢ .= c ./ γ
+ end
+ mul!(q, A , uᵢ)
+ mul!(p, Aᴴ, vᵢ)
+ if i ≥ 2
+ vᵢ₋₁ = view(V,:,i-1)
+ uᵢ₋₁ = view(U,:,i-1)
+ βᵢ = nzval_T[pαᵢ-2] # βᵢ = Tᵢ.ᵢ₋₁
+ γᵢ = nzval_T[pαᵢ-1] # γᵢ = Tᵢ₋₁.ᵢ
+ @kaxpy!(m, -γᵢ, vᵢ₋₁, q)
+ @kaxpy!(n, -βᵢ, uᵢ₋₁, p)
+ end
+ αᵢ = @kdot(m, vᵢ, q)
+ nzval_T[pαᵢ] = αᵢ # Tᵢ.ᵢ = αᵢ
+ nzval_Tᴴ[pαᵢ] = conj(αᵢ) # Tᴴᵢ.ᵢ = ᾱᵢ
+ @kaxpy!(m, - αᵢ , vᵢ, q)
+ @kaxpy!(n, -conj(αᵢ), uᵢ, p)
+ βᵢ₊₁ = @knrm2(m, q)
+ γᵢ₊₁ = @knrm2(n, p)
+ vᵢ₊₁ .= q ./ βᵢ₊₁
+ uᵢ₊₁ .= p ./ γᵢ₊₁
+ nzval_T[pαᵢ+1] = βᵢ₊₁ # Tᵢ₊₁.ᵢ = βᵢ₊₁
+ nzval_Tᴴ[pαᵢ+1] = γᵢ₊₁ # Tᴴᵢ₊₁.ᵢ = γᵢ₊₁
+ if i ≤ k-1
+ nzval_T[pαᵢ+2] = γᵢ₊₁ # Tᵢ.ᵢ₊₁ = γᵢ₊₁
+ nzval_Tᴴ[pαᵢ+2] = βᵢ₊₁ # Tᴴᵢ.ᵢ₊₁ = βᵢ₊₁
+ end
+ pαᵢ = pαᵢ + 3
+ end
+ return V, T, U, Tᴴ
+end
+
+"""
+ V, H, U, F = montoison_orban(A, B, b, c, k)
+
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `B`: a linear operator that models a matrix of dimension n × m;
+* `b`: a vector of length m;
+* `c`: a vector of length n;
+* `k`: the number of iterations of the Montoison-Orban process.
+
+#### Output arguments
+
+* `V`: a dense m × (k+1) matrix;
+* `H`: a dense (k+1) × k upper Hessenberg matrix;
+* `U`: a dense n × (k+1) matrix;
+* `F`: a dense (k+1) × k upper Hessenberg matrix.
+
+#### Reference
+
+* A. Montoison and D. Orban, [*GPMR: An Iterative Method for Unsymmetric Partitioned Linear Systems*](https://dx.doi.org/10.13140/RG.2.2.24069.68326), Cahier du GERAD G-2021-62, GERAD, Montréal, 2021.
+"""
+function montoison_orban(A, B, b::AbstractVector{FC}, c::AbstractVector{FC}, k::Int) where FC <: FloatOrComplex
+ m, n = size(A)
+ S = ktypeof(b)
+ M = vector_to_matrix(S)
+
+ V = M(undef, m, k+1)
+ U = M(undef, n, k+1)
+ H = zeros(FC, k+1, k)
+ F = zeros(FC, k+1, k)
+
+ for i = 1:k
+ vᵢ = view(V,:,i)
+ uᵢ = view(U,:,i)
+ vᵢ₊₁ = q = view(V,:,i+1)
+ uᵢ₊₁ = p = view(U,:,i+1)
+ if i == 1
+ β = @knrm2(m, b)
+ γ = @knrm2(n, c)
+ vᵢ .= b ./ β
+ uᵢ .= c ./ γ
+ end
+ mul!(q, A, uᵢ)
+ mul!(p, B, vᵢ)
+ for j = 1:i
+ vⱼ = view(V,:,j)
+ uⱼ = view(U,:,j)
+ H[j,i] = @kdot(m, vⱼ, q)
+ @kaxpy!(n, -H[j,i], vⱼ, q)
+ F[j,i] = @kdot(n, uⱼ, p)
+ @kaxpy!(m, -F[j,i], uⱼ, p)
+ end
+ H[i+1,i] = @knrm2(m, q)
+ vᵢ₊₁ .= q ./ H[i+1,i]
+ F[i+1,i] = @knrm2(n, p)
+ uᵢ₊₁ .= p ./ F[i+1,i]
+ end
+ return V, H, U, F
+end
diff --git a/src/krylov_solvers.jl b/src/krylov_solvers.jl
index 8a109a2be..bd2bc8a0e 100644
--- a/src/krylov_solvers.jl
+++ b/src/krylov_solvers.jl
@@ -3,11 +3,13 @@ CgLanczosShiftSolver, MinresQlpSolver, DqgmresSolver, DiomSolver, UsymlqSolver,
UsymqrSolver, TricgSolver, TrimrSolver, TrilqrSolver, CgsSolver, BicgstabSolver,
BilqSolver, QmrSolver, BilqrSolver, CglsSolver, CrlsSolver, CgneSolver, CrmrSolver,
LslqSolver, LsqrSolver, LsmrSolver, LnlqSolver, CraigSolver, CraigmrSolver,
-GmresSolver, FomSolver, GpmrSolver
+GmresSolver, FomSolver, GpmrSolver, FgmresSolver
export solve!, solution, nsolution, statistics, issolved, issolved_primal, issolved_dual,
niterations, Aprod, Atprod, Bprod, warm_start!
+import Base.size, Base.sizeof, Base.format_bytes
+
const KRYLOV_SOLVERS = Dict(
:cg => :CgSolver ,
:cr => :CrSolver ,
@@ -20,6 +22,7 @@ const KRYLOV_SOLVERS = Dict(
:fom => :FomSolver ,
:dqgmres => :DqgmresSolver ,
:gmres => :GmresSolver ,
+ :fgmres => :FgmresSolver ,
:gpmr => :GpmrSolver ,
:usymlq => :UsymlqSolver ,
:usymqr => :UsymqrSolver ,
@@ -51,12 +54,14 @@ Type for storing the vectors required by the in-place version of MINRES.
The outer constructors
- solver = MinresSolver(n, m, S; window :: Int=5)
+ solver = MinresSolver(m, n, S; window :: Int=5)
solver = MinresSolver(A, b; window :: Int=5)
may be used in order to create these vectors.
"""
mutable struct MinresSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
Δx :: S
x :: S
r1 :: S
@@ -68,29 +73,29 @@ mutable struct MinresSolver{T,FC,S} <: KrylovSolver{T,FC,S}
err_vec :: Vector{T}
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function MinresSolver(n, m, S; window :: Int=5)
- FC = eltype(S)
- T = real(FC)
- Δx = S(undef, 0)
- x = S(undef, n)
- r1 = S(undef, n)
- r2 = S(undef, n)
- w1 = S(undef, n)
- w2 = S(undef, n)
- y = S(undef, n)
- v = S(undef, 0)
- err_vec = zeros(T, window)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(Δx, x, r1, r2, w1, w2, y, v, err_vec, false, stats)
- return solver
- end
+function MinresSolver(m, n, S; window :: Int=5)
+ FC = eltype(S)
+ T = real(FC)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ r1 = S(undef, n)
+ r2 = S(undef, n)
+ w1 = S(undef, n)
+ w2 = S(undef, n)
+ y = S(undef, n)
+ v = S(undef, 0)
+ err_vec = zeros(T, window)
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = MinresSolver{T,FC,S}(m, n, Δx, x, r1, r2, w1, w2, y, v, err_vec, false, stats)
+ return solver
+end
- function MinresSolver(A, b; window :: Int=5)
- n, m = size(A)
- S = ktypeof(b)
- MinresSolver(n, m, S, window=window)
- end
+function MinresSolver(A, b; window :: Int=5)
+ m, n = size(A)
+ S = ktypeof(b)
+ MinresSolver(m, n, S, window=window)
end
"""
@@ -98,12 +103,14 @@ Type for storing the vectors required by the in-place version of CG.
The outer constructors
- solver = CgSolver(n, m, S)
+ solver = CgSolver(m, n, S)
solver = CgSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct CgSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
Δx :: S
x :: S
r :: S
@@ -112,26 +119,26 @@ mutable struct CgSolver{T,FC,S} <: KrylovSolver{T,FC,S}
z :: S
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function CgSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- Δx = S(undef, 0)
- x = S(undef, n)
- r = S(undef, n)
- p = S(undef, n)
- Ap = S(undef, n)
- z = S(undef, 0)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(Δx, x, r, p, Ap, z, false, stats)
- return solver
- end
+function CgSolver(m, n, S)
+ FC = eltype(S)
+ T = real(FC)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ r = S(undef, n)
+ p = S(undef, n)
+ Ap = S(undef, n)
+ z = S(undef, 0)
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = CgSolver{T,FC,S}(m, n, Δx, x, r, p, Ap, z, false, stats)
+ return solver
+end
- function CgSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- CgSolver(n, m, S)
- end
+function CgSolver(A, b)
+ m, n = size(A)
+ S = ktypeof(b)
+ CgSolver(m, n, S)
end
"""
@@ -139,12 +146,14 @@ Type for storing the vectors required by the in-place version of CR.
The outer constructors
- solver = CrSolver(n, m, S)
+ solver = CrSolver(m, n, S)
solver = CrSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct CrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
Δx :: S
x :: S
r :: S
@@ -154,27 +163,27 @@ mutable struct CrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
Mq :: S
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function CrSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- Δx = S(undef, 0)
- x = S(undef, n)
- r = S(undef, n)
- p = S(undef, n)
- q = S(undef, n)
- Ar = S(undef, n)
- Mq = S(undef, 0)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(Δx, x, r, p, q, Ar, Mq, false, stats)
- return solver
- end
+function CrSolver(m, n, S)
+ FC = eltype(S)
+ T = real(FC)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ r = S(undef, n)
+ p = S(undef, n)
+ q = S(undef, n)
+ Ar = S(undef, n)
+ Mq = S(undef, 0)
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = CrSolver{T,FC,S}(m, n, Δx, x, r, p, q, Ar, Mq, false, stats)
+ return solver
+end
- function CrSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- CrSolver(n, m, S)
- end
+function CrSolver(A, b)
+ m, n = size(A)
+ S = ktypeof(b)
+ CrSolver(m, n, S)
end
"""
@@ -182,12 +191,14 @@ Type for storing the vectors required by the in-place version of SYMMLQ.
The outer constructors
- solver = SymmlqSolver(n, m, S)
+ solver = SymmlqSolver(m, n, S)
solver = SymmlqSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct SymmlqSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
Δx :: S
x :: S
Mvold :: S
@@ -200,30 +211,30 @@ mutable struct SymmlqSolver{T,FC,S} <: KrylovSolver{T,FC,S}
sprod :: Vector{T}
warm_start :: Bool
stats :: SymmlqStats{T}
+end
- function SymmlqSolver(n, m, S; window :: Int=5)
- FC = eltype(S)
- T = real(FC)
- Δx = S(undef, 0)
- x = S(undef, n)
- Mvold = S(undef, n)
- Mv = S(undef, n)
- Mv_next = S(undef, n)
- w̅ = S(undef, n)
- v = S(undef, 0)
- clist = zeros(T, window)
- zlist = zeros(T, window)
- sprod = ones(T, window)
- stats = SymmlqStats(0, false, T[], Union{T, Missing}[], T[], Union{T, Missing}[], T(NaN), T(NaN), "unknown")
- solver = new{T,FC,S}(Δx, x, Mvold, Mv, Mv_next, w̅, v, clist, zlist, sprod, false, stats)
- return solver
- end
+function SymmlqSolver(m, n, S; window :: Int=5)
+ FC = eltype(S)
+ T = real(FC)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ Mvold = S(undef, n)
+ Mv = S(undef, n)
+ Mv_next = S(undef, n)
+ w̅ = S(undef, n)
+ v = S(undef, 0)
+ clist = zeros(T, window)
+ zlist = zeros(T, window)
+ sprod = ones(T, window)
+ stats = SymmlqStats(0, false, T[], Union{T, Missing}[], T[], Union{T, Missing}[], T(NaN), T(NaN), "unknown")
+ solver = SymmlqSolver{T,FC,S}(m, n, Δx, x, Mvold, Mv, Mv_next, w̅, v, clist, zlist, sprod, false, stats)
+ return solver
+end
- function SymmlqSolver(A, b; window :: Int=5)
- n, m = size(A)
- S = ktypeof(b)
- SymmlqSolver(n, m, S, window=window)
- end
+function SymmlqSolver(A, b; window :: Int=5)
+ m, n = size(A)
+ S = ktypeof(b)
+ SymmlqSolver(m, n, S, window=window)
end
"""
@@ -231,12 +242,14 @@ Type for storing the vectors required by the in-place version of CG-LANCZOS.
The outer constructors
- solver = CgLanczosSolver(n, m, S)
+ solver = CgLanczosSolver(m, n, S)
solver = CgLanczosSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct CgLanczosSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
Δx :: S
x :: S
Mv :: S
@@ -246,27 +259,27 @@ mutable struct CgLanczosSolver{T,FC,S} <: KrylovSolver{T,FC,S}
v :: S
warm_start :: Bool
stats :: LanczosStats{T}
+end
- function CgLanczosSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- Δx = S(undef, 0)
- x = S(undef, n)
- Mv = S(undef, n)
- Mv_prev = S(undef, n)
- p = S(undef, n)
- Mv_next = S(undef, n)
- v = S(undef, 0)
- stats = LanczosStats(0, false, T[], false, T(NaN), T(NaN), "unknown")
- solver = new{T,FC,S}(Δx, x, Mv, Mv_prev, p, Mv_next, v, false, stats)
- return solver
- end
+function CgLanczosSolver(m, n, S)
+ FC = eltype(S)
+ T = real(FC)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ Mv = S(undef, n)
+ Mv_prev = S(undef, n)
+ p = S(undef, n)
+ Mv_next = S(undef, n)
+ v = S(undef, 0)
+ stats = LanczosStats(0, false, T[], false, T(NaN), T(NaN), "unknown")
+ solver = CgLanczosSolver{T,FC,S}(m, n, Δx, x, Mv, Mv_prev, p, Mv_next, v, false, stats)
+ return solver
+end
- function CgLanczosSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- CgLanczosSolver(n, m, S)
- end
+function CgLanczosSolver(A, b)
+ m, n = size(A)
+ S = ktypeof(b)
+ CgLanczosSolver(m, n, S)
end
"""
@@ -274,12 +287,14 @@ Type for storing the vectors required by the in-place version of CG-LANCZOS-SHIF
The outer constructors
- solver = CgLanczosShiftSolver(n, m, nshifts, S)
+ solver = CgLanczosShiftSolver(m, n, nshifts, S)
solver = CgLanczosShiftSolver(A, b, nshifts)
may be used in order to create these vectors.
"""
mutable struct CgLanczosShiftSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
Mv :: S
Mv_prev :: S
Mv_next :: S
@@ -294,34 +309,34 @@ mutable struct CgLanczosShiftSolver{T,FC,S} <: KrylovSolver{T,FC,S}
converged :: BitVector
not_cv :: BitVector
stats :: LanczosShiftStats{T}
+end
- function CgLanczosShiftSolver(n, m, nshifts, S)
- FC = eltype(S)
- T = real(FC)
- Mv = S(undef, n)
- Mv_prev = S(undef, n)
- Mv_next = S(undef, n)
- v = S(undef, 0)
- x = [S(undef, n) for i = 1 : nshifts]
- p = [S(undef, n) for i = 1 : nshifts]
- σ = Vector{T}(undef, nshifts)
- δhat = Vector{T}(undef, nshifts)
- ω = Vector{T}(undef, nshifts)
- γ = Vector{T}(undef, nshifts)
- rNorms = Vector{T}(undef, nshifts)
- indefinite = BitVector(undef, nshifts)
- converged = BitVector(undef, nshifts)
- not_cv = BitVector(undef, nshifts)
- stats = LanczosShiftStats(0, false, [T[] for i = 1 : nshifts], indefinite, T(NaN), T(NaN), "unknown")
- solver = new{T,FC,S}(Mv, Mv_prev, Mv_next, v, x, p, σ, δhat, ω, γ, rNorms, converged, not_cv, stats)
- return solver
- end
+function CgLanczosShiftSolver(m, n, nshifts, S)
+ FC = eltype(S)
+ T = real(FC)
+ Mv = S(undef, n)
+ Mv_prev = S(undef, n)
+ Mv_next = S(undef, n)
+ v = S(undef, 0)
+ x = S[S(undef, n) for i = 1 : nshifts]
+ p = S[S(undef, n) for i = 1 : nshifts]
+ σ = Vector{T}(undef, nshifts)
+ δhat = Vector{T}(undef, nshifts)
+ ω = Vector{T}(undef, nshifts)
+ γ = Vector{T}(undef, nshifts)
+ rNorms = Vector{T}(undef, nshifts)
+ indefinite = BitVector(undef, nshifts)
+ converged = BitVector(undef, nshifts)
+ not_cv = BitVector(undef, nshifts)
+ stats = LanczosShiftStats(0, false, Vector{T}[T[] for i = 1 : nshifts], indefinite, T(NaN), T(NaN), "unknown")
+ solver = CgLanczosShiftSolver{T,FC,S}(m, n, Mv, Mv_prev, Mv_next, v, x, p, σ, δhat, ω, γ, rNorms, converged, not_cv, stats)
+ return solver
+end
- function CgLanczosShiftSolver(A, b, nshifts)
- n, m = size(A)
- S = ktypeof(b)
- CgLanczosShiftSolver(n, m, nshifts, S)
- end
+function CgLanczosShiftSolver(A, b, nshifts)
+ m, n = size(A)
+ S = ktypeof(b)
+ CgLanczosShiftSolver(m, n, nshifts, S)
end
"""
@@ -329,12 +344,14 @@ Type for storing the vectors required by the in-place version of MINRES-QLP.
The outer constructors
- solver = MinresQlpSolver(n, m, S)
+ solver = MinresQlpSolver(m, n, S)
solver = MinresQlpSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct MinresQlpSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
Δx :: S
wₖ₋₁ :: S
wₖ :: S
@@ -345,28 +362,28 @@ mutable struct MinresQlpSolver{T,FC,S} <: KrylovSolver{T,FC,S}
vₖ :: S
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function MinresQlpSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- Δx = S(undef, 0)
- wₖ₋₁ = S(undef, n)
- wₖ = S(undef, n)
- M⁻¹vₖ₋₁ = S(undef, n)
- M⁻¹vₖ = S(undef, n)
- x = S(undef, n)
- p = S(undef, n)
- vₖ = S(undef, 0)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(Δx, wₖ₋₁, wₖ, M⁻¹vₖ₋₁, M⁻¹vₖ, x, p, vₖ, false, stats)
- return solver
- end
+function MinresQlpSolver(m, n, S)
+ FC = eltype(S)
+ T = real(FC)
+ Δx = S(undef, 0)
+ wₖ₋₁ = S(undef, n)
+ wₖ = S(undef, n)
+ M⁻¹vₖ₋₁ = S(undef, n)
+ M⁻¹vₖ = S(undef, n)
+ x = S(undef, n)
+ p = S(undef, n)
+ vₖ = S(undef, 0)
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = MinresQlpSolver{T,FC,S}(m, n, Δx, wₖ₋₁, wₖ, M⁻¹vₖ₋₁, M⁻¹vₖ, x, p, vₖ, false, stats)
+ return solver
+end
- function MinresQlpSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- MinresQlpSolver(n, m, S)
- end
+function MinresQlpSolver(A, b)
+ m, n = size(A)
+ S = ktypeof(b)
+ MinresQlpSolver(m, n, S)
end
"""
@@ -374,13 +391,15 @@ Type for storing the vectors required by the in-place version of DQGMRES.
The outer constructors
- solver = DqgmresSolver(n, m, memory, S)
+ solver = DqgmresSolver(m, n, memory, S)
solver = DqgmresSolver(A, b, memory = 20)
may be used in order to create these vectors.
`memory` is set to `n` if the value given is larger than `n`.
"""
mutable struct DqgmresSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
Δx :: S
x :: S
t :: S
@@ -393,31 +412,31 @@ mutable struct DqgmresSolver{T,FC,S} <: KrylovSolver{T,FC,S}
H :: Vector{FC}
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function DqgmresSolver(n, m, memory, S)
- memory = min(n, memory)
- FC = eltype(S)
- T = real(FC)
- Δx = S(undef, 0)
- x = S(undef, n)
- t = S(undef, n)
- z = S(undef, 0)
- w = S(undef, 0)
- P = [S(undef, n) for i = 1 : memory]
- V = [S(undef, n) for i = 1 : memory]
- c = Vector{T}(undef, memory)
- s = Vector{FC}(undef, memory)
- H = Vector{FC}(undef, memory+2)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(Δx, x, t, z, w, P, V, c, s, H, false, stats)
- return solver
- end
+function DqgmresSolver(m, n, memory, S)
+ memory = min(m, memory)
+ FC = eltype(S)
+ T = real(FC)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ t = S(undef, n)
+ z = S(undef, 0)
+ w = S(undef, 0)
+ P = S[S(undef, n) for i = 1 : memory]
+ V = S[S(undef, n) for i = 1 : memory]
+ c = Vector{T}(undef, memory)
+ s = Vector{FC}(undef, memory)
+ H = Vector{FC}(undef, memory+1)
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = DqgmresSolver{T,FC,S}(m, n, Δx, x, t, z, w, P, V, c, s, H, false, stats)
+ return solver
+end
- function DqgmresSolver(A, b, memory = 20)
- n, m = size(A)
- S = ktypeof(b)
- DqgmresSolver(n, m, memory, S)
- end
+function DqgmresSolver(A, b, memory = 20)
+ m, n = size(A)
+ S = ktypeof(b)
+ DqgmresSolver(m, n, memory, S)
end
"""
@@ -425,13 +444,15 @@ Type for storing the vectors required by the in-place version of DIOM.
The outer constructors
- solver = DiomSolver(n, m, memory, S)
+ solver = DiomSolver(m, n, memory, S)
solver = DiomSolver(A, b, memory = 20)
may be used in order to create these vectors.
`memory` is set to `n` if the value given is larger than `n`.
"""
mutable struct DiomSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
Δx :: S
x :: S
t :: S
@@ -443,30 +464,30 @@ mutable struct DiomSolver{T,FC,S} <: KrylovSolver{T,FC,S}
H :: Vector{FC}
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function DiomSolver(n, m, memory, S)
- memory = min(n, memory)
- FC = eltype(S)
- T = real(FC)
- Δx = S(undef, 0)
- x = S(undef, n)
- t = S(undef, n)
- z = S(undef, 0)
- w = S(undef, 0)
- P = [S(undef, n) for i = 1 : memory]
- V = [S(undef, n) for i = 1 : memory]
- L = Vector{FC}(undef, memory)
- H = Vector{FC}(undef, memory+2)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(Δx, x, t, z, w, P, V, L, H, false, stats)
- return solver
- end
+function DiomSolver(m, n, memory, S)
+ memory = min(m, memory)
+ FC = eltype(S)
+ T = real(FC)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ t = S(undef, n)
+ z = S(undef, 0)
+ w = S(undef, 0)
+ P = S[S(undef, n) for i = 1 : memory-1]
+ V = S[S(undef, n) for i = 1 : memory]
+ L = Vector{FC}(undef, memory-1)
+ H = Vector{FC}(undef, memory)
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = DiomSolver{T,FC,S}(m, n, Δx, x, t, z, w, P, V, L, H, false, stats)
+ return solver
+end
- function DiomSolver(A, b, memory = 20)
- n, m = size(A)
- S = ktypeof(b)
- DiomSolver(n, m, memory, S)
- end
+function DiomSolver(A, b, memory = 20)
+ m, n = size(A)
+ S = ktypeof(b)
+ DiomSolver(m, n, memory, S)
end
"""
@@ -474,12 +495,14 @@ Type for storing the vectors required by the in-place version of USYMLQ.
The outer constructors
- solver = UsymlqSolver(n, m, S)
+ solver = UsymlqSolver(m, n, S)
solver = UsymlqSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct UsymlqSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
uₖ₋₁ :: S
uₖ :: S
p :: S
@@ -491,29 +514,29 @@ mutable struct UsymlqSolver{T,FC,S} <: KrylovSolver{T,FC,S}
q :: S
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function UsymlqSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- uₖ₋₁ = S(undef, m)
- uₖ = S(undef, m)
- p = S(undef, m)
- Δx = S(undef, 0)
- x = S(undef, m)
- d̅ = S(undef, m)
- vₖ₋₁ = S(undef, n)
- vₖ = S(undef, n)
- q = S(undef, n)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(uₖ₋₁, uₖ, p, Δx, x, d̅, vₖ₋₁, vₖ, q, false, stats)
- return solver
- end
+function UsymlqSolver(m, n, S)
+ FC = eltype(S)
+ T = real(FC)
+ uₖ₋₁ = S(undef, n)
+ uₖ = S(undef, n)
+ p = S(undef, n)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ d̅ = S(undef, n)
+ vₖ₋₁ = S(undef, m)
+ vₖ = S(undef, m)
+ q = S(undef, m)
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = UsymlqSolver{T,FC,S}(m, n, uₖ₋₁, uₖ, p, Δx, x, d̅, vₖ₋₁, vₖ, q, false, stats)
+ return solver
+end
- function UsymlqSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- UsymlqSolver(n, m, S)
- end
+function UsymlqSolver(A, b)
+ m, n = size(A)
+ S = ktypeof(b)
+ UsymlqSolver(m, n, S)
end
"""
@@ -521,12 +544,14 @@ Type for storing the vectors required by the in-place version of USYMQR.
The outer constructors
- solver = UsymqrSolver(n, m, S)
+ solver = UsymqrSolver(m, n, S)
solver = UsymqrSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct UsymqrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
vₖ₋₁ :: S
vₖ :: S
q :: S
@@ -539,30 +564,30 @@ mutable struct UsymqrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
p :: S
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function UsymqrSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- vₖ₋₁ = S(undef, n)
- vₖ = S(undef, n)
- q = S(undef, n)
- Δx = S(undef, 0)
- x = S(undef, m)
- wₖ₋₂ = S(undef, m)
- wₖ₋₁ = S(undef, m)
- uₖ₋₁ = S(undef, m)
- uₖ = S(undef, m)
- p = S(undef, m)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(vₖ₋₁, vₖ, q, Δx, x, wₖ₋₂, wₖ₋₁, uₖ₋₁, uₖ, p, false, stats)
- return solver
- end
+function UsymqrSolver(m, n, S)
+ FC = eltype(S)
+ T = real(FC)
+ vₖ₋₁ = S(undef, m)
+ vₖ = S(undef, m)
+ q = S(undef, m)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ wₖ₋₂ = S(undef, n)
+ wₖ₋₁ = S(undef, n)
+ uₖ₋₁ = S(undef, n)
+ uₖ = S(undef, n)
+ p = S(undef, n)
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = UsymqrSolver{T,FC,S}(m, n, vₖ₋₁, vₖ, q, Δx, x, wₖ₋₂, wₖ₋₁, uₖ₋₁, uₖ, p, false, stats)
+ return solver
+end
- function UsymqrSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- UsymqrSolver(n, m, S)
- end
+function UsymqrSolver(A, b)
+ m, n = size(A)
+ S = ktypeof(b)
+ UsymqrSolver(m, n, S)
end
"""
@@ -570,12 +595,14 @@ Type for storing the vectors required by the in-place version of TRICG.
The outer constructors
- solver = TricgSolver(n, m, S)
+ solver = TricgSolver(m, n, S)
solver = TricgSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct TricgSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
y :: S
N⁻¹uₖ₋₁ :: S
N⁻¹uₖ :: S
@@ -594,36 +621,36 @@ mutable struct TricgSolver{T,FC,S} <: KrylovSolver{T,FC,S}
vₖ :: S
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function TricgSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- y = S(undef, m)
- N⁻¹uₖ₋₁ = S(undef, m)
- N⁻¹uₖ = S(undef, m)
- p = S(undef, m)
- gy₂ₖ₋₁ = S(undef, m)
- gy₂ₖ = S(undef, m)
- x = S(undef, n)
- M⁻¹vₖ₋₁ = S(undef, n)
- M⁻¹vₖ = S(undef, n)
- q = S(undef, n)
- gx₂ₖ₋₁ = S(undef, n)
- gx₂ₖ = S(undef, n)
- Δx = S(undef, 0)
- Δy = S(undef, 0)
- uₖ = S(undef, 0)
- vₖ = S(undef, 0)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(y, N⁻¹uₖ₋₁, N⁻¹uₖ, p, gy₂ₖ₋₁, gy₂ₖ, x, M⁻¹vₖ₋₁, M⁻¹vₖ, q, gx₂ₖ₋₁, gx₂ₖ, Δx, Δy, uₖ, vₖ, false, stats)
- return solver
- end
+function TricgSolver(m, n, S)
+ FC = eltype(S)
+ T = real(FC)
+ y = S(undef, n)
+ N⁻¹uₖ₋₁ = S(undef, n)
+ N⁻¹uₖ = S(undef, n)
+ p = S(undef, n)
+ gy₂ₖ₋₁ = S(undef, n)
+ gy₂ₖ = S(undef, n)
+ x = S(undef, m)
+ M⁻¹vₖ₋₁ = S(undef, m)
+ M⁻¹vₖ = S(undef, m)
+ q = S(undef, m)
+ gx₂ₖ₋₁ = S(undef, m)
+ gx₂ₖ = S(undef, m)
+ Δx = S(undef, 0)
+ Δy = S(undef, 0)
+ uₖ = S(undef, 0)
+ vₖ = S(undef, 0)
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = TricgSolver{T,FC,S}(m, n, y, N⁻¹uₖ₋₁, N⁻¹uₖ, p, gy₂ₖ₋₁, gy₂ₖ, x, M⁻¹vₖ₋₁, M⁻¹vₖ, q, gx₂ₖ₋₁, gx₂ₖ, Δx, Δy, uₖ, vₖ, false, stats)
+ return solver
+end
- function TricgSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- TricgSolver(n, m, S)
- end
+function TricgSolver(A, b)
+ m, n = size(A)
+ S = ktypeof(b)
+ TricgSolver(m, n, S)
end
"""
@@ -631,12 +658,14 @@ Type for storing the vectors required by the in-place version of TRIMR.
The outer constructors
- solver = TrimrSolver(n, m, S)
+ solver = TrimrSolver(m, n, S)
solver = TrimrSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct TrimrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
y :: S
N⁻¹uₖ₋₁ :: S
N⁻¹uₖ :: S
@@ -659,40 +688,40 @@ mutable struct TrimrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
vₖ :: S
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function TrimrSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- y = S(undef, m)
- N⁻¹uₖ₋₁ = S(undef, m)
- N⁻¹uₖ = S(undef, m)
- p = S(undef, m)
- gy₂ₖ₋₃ = S(undef, m)
- gy₂ₖ₋₂ = S(undef, m)
- gy₂ₖ₋₁ = S(undef, m)
- gy₂ₖ = S(undef, m)
- x = S(undef, n)
- M⁻¹vₖ₋₁ = S(undef, n)
- M⁻¹vₖ = S(undef, n)
- q = S(undef, n)
- gx₂ₖ₋₃ = S(undef, n)
- gx₂ₖ₋₂ = S(undef, n)
- gx₂ₖ₋₁ = S(undef, n)
- gx₂ₖ = S(undef, n)
- Δx = S(undef, 0)
- Δy = S(undef, 0)
- uₖ = S(undef, 0)
- vₖ = S(undef, 0)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(y, N⁻¹uₖ₋₁, N⁻¹uₖ, p, gy₂ₖ₋₃, gy₂ₖ₋₂, gy₂ₖ₋₁, gy₂ₖ, x, M⁻¹vₖ₋₁, M⁻¹vₖ, q, gx₂ₖ₋₃, gx₂ₖ₋₂, gx₂ₖ₋₁, gx₂ₖ, Δx, Δy, uₖ, vₖ, false, stats)
- return solver
- end
+function TrimrSolver(m, n, S)
+ FC = eltype(S)
+ T = real(FC)
+ y = S(undef, n)
+ N⁻¹uₖ₋₁ = S(undef, n)
+ N⁻¹uₖ = S(undef, n)
+ p = S(undef, n)
+ gy₂ₖ₋₃ = S(undef, n)
+ gy₂ₖ₋₂ = S(undef, n)
+ gy₂ₖ₋₁ = S(undef, n)
+ gy₂ₖ = S(undef, n)
+ x = S(undef, m)
+ M⁻¹vₖ₋₁ = S(undef, m)
+ M⁻¹vₖ = S(undef, m)
+ q = S(undef, m)
+ gx₂ₖ₋₃ = S(undef, m)
+ gx₂ₖ₋₂ = S(undef, m)
+ gx₂ₖ₋₁ = S(undef, m)
+ gx₂ₖ = S(undef, m)
+ Δx = S(undef, 0)
+ Δy = S(undef, 0)
+ uₖ = S(undef, 0)
+ vₖ = S(undef, 0)
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = TrimrSolver{T,FC,S}(m, n, y, N⁻¹uₖ₋₁, N⁻¹uₖ, p, gy₂ₖ₋₃, gy₂ₖ₋₂, gy₂ₖ₋₁, gy₂ₖ, x, M⁻¹vₖ₋₁, M⁻¹vₖ, q, gx₂ₖ₋₃, gx₂ₖ₋₂, gx₂ₖ₋₁, gx₂ₖ, Δx, Δy, uₖ, vₖ, false, stats)
+ return solver
+end
- function TrimrSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- TrimrSolver(n, m, S)
- end
+function TrimrSolver(A, b)
+ m, n = size(A)
+ S = ktypeof(b)
+ TrimrSolver(m, n, S)
end
"""
@@ -700,12 +729,14 @@ Type for storing the vectors required by the in-place version of TRILQR.
The outer constructors
- solver = TrilqrSolver(n, m, S)
+ solver = TrilqrSolver(m, n, S)
solver = TrilqrSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct TrilqrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
uₖ₋₁ :: S
uₖ :: S
p :: S
@@ -721,33 +752,33 @@ mutable struct TrilqrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
wₖ₋₂ :: S
warm_start :: Bool
stats :: AdjointStats{T}
+end
- function TrilqrSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- uₖ₋₁ = S(undef, m)
- uₖ = S(undef, m)
- p = S(undef, m)
- d̅ = S(undef, m)
- Δx = S(undef, 0)
- x = S(undef, m)
- vₖ₋₁ = S(undef, n)
- vₖ = S(undef, n)
- q = S(undef, n)
- Δy = S(undef, 0)
- y = S(undef, n)
- wₖ₋₃ = S(undef, n)
- wₖ₋₂ = S(undef, n)
- stats = AdjointStats(0, false, false, T[], T[], "unknown")
- solver = new{T,FC,S}(uₖ₋₁, uₖ, p, d̅, Δx, x, vₖ₋₁, vₖ, q, Δy, y, wₖ₋₃, wₖ₋₂, false, stats)
- return solver
- end
+function TrilqrSolver(m, n, S)
+ FC = eltype(S)
+ T = real(FC)
+ uₖ₋₁ = S(undef, n)
+ uₖ = S(undef, n)
+ p = S(undef, n)
+ d̅ = S(undef, n)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ vₖ₋₁ = S(undef, m)
+ vₖ = S(undef, m)
+ q = S(undef, m)
+ Δy = S(undef, 0)
+ y = S(undef, m)
+ wₖ₋₃ = S(undef, m)
+ wₖ₋₂ = S(undef, m)
+ stats = AdjointStats(0, false, false, T[], T[], "unknown")
+ solver = TrilqrSolver{T,FC,S}(m, n, uₖ₋₁, uₖ, p, d̅, Δx, x, vₖ₋₁, vₖ, q, Δy, y, wₖ₋₃, wₖ₋₂, false, stats)
+ return solver
+end
- function TrilqrSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- TrilqrSolver(n, m, S)
- end
+function TrilqrSolver(A, b)
+ m, n = size(A)
+ S = ktypeof(b)
+ TrilqrSolver(m, n, S)
end
"""
@@ -755,12 +786,14 @@ Type for storing the vectors required by the in-place version of CGS.
The outer constructorss
- solver = CgsSolver(n, m, S)
+ solver = CgsSolver(m, n, S)
solver = CgsSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct CgsSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
Δx :: S
x :: S
r :: S
@@ -772,29 +805,30 @@ mutable struct CgsSolver{T,FC,S} <: KrylovSolver{T,FC,S}
vw :: S
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function CgsSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- Δx = S(undef, 0)
- x = S(undef, n)
- r = S(undef, n)
- u = S(undef, n)
- p = S(undef, n)
- q = S(undef, n)
- ts = S(undef, n)
- yz = S(undef, 0)
- vw = S(undef, 0)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(Δx, x, r, u, p, q, ts, yz, vw, false, stats)
- return solver
- end
+function CgsSolver(m, n, S)
+ FC = eltype(S)
+ T = real(FC)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ r = S(undef, n)
+ u = S(undef, n)
+ p = S(undef, n)
+ q = S(undef, n)
+ ts = S(undef, n)
+ yz = S(undef, 0)
+ vw = S(undef, 0)
+
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = CgsSolver{T,FC,S}(m, n, Δx, x, r, u, p, q, ts, yz, vw, false, stats)
+ return solver
+end
- function CgsSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- CgsSolver(n, m, S)
- end
+function CgsSolver(A, b)
+ m, n = size(A)
+ S = ktypeof(b)
+ CgsSolver(m, n, S)
end
"""
@@ -802,12 +836,14 @@ Type for storing the vectors required by the in-place version of BICGSTAB.
The outer constructors
- solver = BicgstabSolver(n, m, S)
+ solver = BicgstabSolver(m, n, S)
solver = BicgstabSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct BicgstabSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
Δx :: S
x :: S
r :: S
@@ -819,29 +855,29 @@ mutable struct BicgstabSolver{T,FC,S} <: KrylovSolver{T,FC,S}
t :: S
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function BicgstabSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- Δx = S(undef, 0)
- x = S(undef, n)
- r = S(undef, n)
- p = S(undef, n)
- v = S(undef, n)
- s = S(undef, n)
- qd = S(undef, n)
- yz = S(undef, 0)
- t = S(undef, 0)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(Δx, x, r, p, v, s, qd, yz, t, false, stats)
- return solver
- end
+function BicgstabSolver(m, n, S)
+ FC = eltype(S)
+ T = real(FC)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ r = S(undef, n)
+ p = S(undef, n)
+ v = S(undef, n)
+ s = S(undef, n)
+ qd = S(undef, n)
+ yz = S(undef, 0)
+ t = S(undef, 0)
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = BicgstabSolver{T,FC,S}(m, n, Δx, x, r, p, v, s, qd, yz, t, false, stats)
+ return solver
+end
- function BicgstabSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- BicgstabSolver(n, m, S)
- end
+function BicgstabSolver(A, b)
+ m, n = size(A)
+ S = ktypeof(b)
+ BicgstabSolver(m, n, S)
end
"""
@@ -849,12 +885,14 @@ Type for storing the vectors required by the in-place version of BILQ.
The outer constructors
- solver = BilqSolver(n, m, S)
+ solver = BilqSolver(m, n, S)
solver = BilqSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct BilqSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
uₖ₋₁ :: S
uₖ :: S
q :: S
@@ -866,29 +904,29 @@ mutable struct BilqSolver{T,FC,S} <: KrylovSolver{T,FC,S}
d̅ :: S
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function BilqSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- uₖ₋₁ = S(undef, n)
- uₖ = S(undef, n)
- q = S(undef, n)
- vₖ₋₁ = S(undef, n)
- vₖ = S(undef, n)
- p = S(undef, n)
- Δx = S(undef, 0)
- x = S(undef, n)
- d̅ = S(undef, n)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, Δx, x, d̅, false, stats)
- return solver
- end
+function BilqSolver(m, n, S)
+ FC = eltype(S)
+ T = real(FC)
+ uₖ₋₁ = S(undef, n)
+ uₖ = S(undef, n)
+ q = S(undef, n)
+ vₖ₋₁ = S(undef, n)
+ vₖ = S(undef, n)
+ p = S(undef, n)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ d̅ = S(undef, n)
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = BilqSolver{T,FC,S}(m, n, uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, Δx, x, d̅, false, stats)
+ return solver
+end
- function BilqSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- BilqSolver(n, m, S)
- end
+function BilqSolver(A, b)
+ m, n = size(A)
+ S = ktypeof(b)
+ BilqSolver(m, n, S)
end
"""
@@ -896,12 +934,14 @@ Type for storing the vectors required by the in-place version of QMR.
The outer constructors
- solver = QmrSolver(n, m, S)
+ solver = QmrSolver(m, n, S)
solver = QmrSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct QmrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
uₖ₋₁ :: S
uₖ :: S
q :: S
@@ -914,30 +954,30 @@ mutable struct QmrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
wₖ₋₁ :: S
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function QmrSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- uₖ₋₁ = S(undef, n)
- uₖ = S(undef, n)
- q = S(undef, n)
- vₖ₋₁ = S(undef, n)
- vₖ = S(undef, n)
- p = S(undef, n)
- Δx = S(undef, 0)
- x = S(undef, n)
- wₖ₋₂ = S(undef, n)
- wₖ₋₁ = S(undef, n)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, Δx, x, wₖ₋₂, wₖ₋₁, false, stats)
- return solver
- end
+function QmrSolver(m, n, S)
+ FC = eltype(S)
+ T = real(FC)
+ uₖ₋₁ = S(undef, n)
+ uₖ = S(undef, n)
+ q = S(undef, n)
+ vₖ₋₁ = S(undef, n)
+ vₖ = S(undef, n)
+ p = S(undef, n)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ wₖ₋₂ = S(undef, n)
+ wₖ₋₁ = S(undef, n)
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = QmrSolver{T,FC,S}(m, n, uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, Δx, x, wₖ₋₂, wₖ₋₁, false, stats)
+ return solver
+end
- function QmrSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- QmrSolver(n, m, S)
- end
+function QmrSolver(A, b)
+ m, n = size(A)
+ S = ktypeof(b)
+ QmrSolver(m, n, S)
end
"""
@@ -945,12 +985,14 @@ Type for storing the vectors required by the in-place version of BILQR.
The outer constructors
- solver = BilqrSolver(n, m, S)
+ solver = BilqrSolver(m, n, S)
solver = BilqrSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct BilqrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
uₖ₋₁ :: S
uₖ :: S
q :: S
@@ -966,33 +1008,33 @@ mutable struct BilqrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
wₖ₋₂ :: S
warm_start :: Bool
stats :: AdjointStats{T}
+end
- function BilqrSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- uₖ₋₁ = S(undef, n)
- uₖ = S(undef, n)
- q = S(undef, n)
- vₖ₋₁ = S(undef, n)
- vₖ = S(undef, n)
- p = S(undef, n)
- Δx = S(undef, 0)
- x = S(undef, n)
- Δy = S(undef, 0)
- y = S(undef, n)
- d̅ = S(undef, n)
- wₖ₋₃ = S(undef, n)
- wₖ₋₂ = S(undef, n)
- stats = AdjointStats(0, false, false, T[], T[], "unknown")
- solver = new{T,FC,S}(uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, Δx, x, Δy, y, d̅, wₖ₋₃, wₖ₋₂, false, stats)
- return solver
- end
+function BilqrSolver(m, n, S)
+ FC = eltype(S)
+ T = real(FC)
+ uₖ₋₁ = S(undef, n)
+ uₖ = S(undef, n)
+ q = S(undef, n)
+ vₖ₋₁ = S(undef, n)
+ vₖ = S(undef, n)
+ p = S(undef, n)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ Δy = S(undef, 0)
+ y = S(undef, n)
+ d̅ = S(undef, n)
+ wₖ₋₃ = S(undef, n)
+ wₖ₋₂ = S(undef, n)
+ stats = AdjointStats(0, false, false, T[], T[], "unknown")
+ solver = BilqrSolver{T,FC,S}(m, n, uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p, Δx, x, Δy, y, d̅, wₖ₋₃, wₖ₋₂, false, stats)
+ return solver
+end
- function BilqrSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- BilqrSolver(n, m, S)
- end
+function BilqrSolver(A, b)
+ m, n = size(A)
+ S = ktypeof(b)
+ BilqrSolver(m, n, S)
end
"""
@@ -1000,12 +1042,14 @@ Type for storing the vectors required by the in-place version of CGLS.
The outer constructors
- solver = CglsSolver(n, m, S)
+ solver = CglsSolver(m, n, S)
solver = CglsSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct CglsSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
x :: S
p :: S
s :: S
@@ -1013,26 +1057,26 @@ mutable struct CglsSolver{T,FC,S} <: KrylovSolver{T,FC,S}
q :: S
Mr :: S
stats :: SimpleStats{T}
+end
- function CglsSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- x = S(undef, m)
- p = S(undef, m)
- s = S(undef, m)
- r = S(undef, n)
- q = S(undef, n)
- Mr = S(undef, 0)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(x, p, s, r, q, Mr, stats)
- return solver
- end
+function CglsSolver(m, n, S)
+ FC = eltype(S)
+ T = real(FC)
+ x = S(undef, n)
+ p = S(undef, n)
+ s = S(undef, n)
+ r = S(undef, m)
+ q = S(undef, m)
+ Mr = S(undef, 0)
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = CglsSolver{T,FC,S}(m, n, x, p, s, r, q, Mr, stats)
+ return solver
+end
- function CglsSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- CglsSolver(n, m, S)
- end
+function CglsSolver(A, b)
+ m, n = size(A)
+ S = ktypeof(b)
+ CglsSolver(m, n, S)
end
"""
@@ -1040,12 +1084,14 @@ Type for storing the vectors required by the in-place version of CRLS.
The outer constructors
- solver = CrlsSolver(n, m, S)
+ solver = CrlsSolver(m, n, S)
solver = CrlsSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct CrlsSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
x :: S
p :: S
Ar :: S
@@ -1055,28 +1101,28 @@ mutable struct CrlsSolver{T,FC,S} <: KrylovSolver{T,FC,S}
s :: S
Ms :: S
stats :: SimpleStats{T}
+end
- function CrlsSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- x = S(undef, m)
- p = S(undef, m)
- Ar = S(undef, m)
- q = S(undef, m)
- r = S(undef, n)
- Ap = S(undef, n)
- s = S(undef, n)
- Ms = S(undef, 0)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(x, p, Ar, q, r, Ap, s, Ms, stats)
- return solver
- end
+function CrlsSolver(m, n, S)
+ FC = eltype(S)
+ T = real(FC)
+ x = S(undef, n)
+ p = S(undef, n)
+ Ar = S(undef, n)
+ q = S(undef, n)
+ r = S(undef, m)
+ Ap = S(undef, m)
+ s = S(undef, m)
+ Ms = S(undef, 0)
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = CrlsSolver{T,FC,S}(m, n, x, p, Ar, q, r, Ap, s, Ms, stats)
+ return solver
+end
- function CrlsSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- CrlsSolver(n, m, S)
- end
+function CrlsSolver(A, b)
+ m, n = size(A)
+ S = ktypeof(b)
+ CrlsSolver(m, n, S)
end
"""
@@ -1084,41 +1130,43 @@ Type for storing the vectors required by the in-place version of CGNE.
The outer constructors
- solver = CgneSolver(n, m, S)
+ solver = CgneSolver(m, n, S)
solver = CgneSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct CgneSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
x :: S
p :: S
- Aᵀz :: S
+ Aᴴz :: S
r :: S
q :: S
s :: S
z :: S
stats :: SimpleStats{T}
+end
- function CgneSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- x = S(undef, m)
- p = S(undef, m)
- Aᵀz = S(undef, m)
- r = S(undef, n)
- q = S(undef, n)
- s = S(undef, 0)
- z = S(undef, 0)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(x, p, Aᵀz, r, q, s, z, stats)
- return solver
- end
+function CgneSolver(m, n, S)
+ FC = eltype(S)
+ T = real(FC)
+ x = S(undef, n)
+ p = S(undef, n)
+ Aᴴz = S(undef, n)
+ r = S(undef, m)
+ q = S(undef, m)
+ s = S(undef, 0)
+ z = S(undef, 0)
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = CgneSolver{T,FC,S}(m, n, x, p, Aᴴz, r, q, s, z, stats)
+ return solver
+end
- function CgneSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- CgneSolver(n, m, S)
- end
+function CgneSolver(A, b)
+ m, n = size(A)
+ S = ktypeof(b)
+ CgneSolver(m, n, S)
end
"""
@@ -1126,41 +1174,43 @@ Type for storing the vectors required by the in-place version of CRMR.
The outer constructors
- solver = CrmrSolver(n, m, S)
+ solver = CrmrSolver(m, n, S)
solver = CrmrSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct CrmrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
x :: S
p :: S
- Aᵀr :: S
+ Aᴴr :: S
r :: S
q :: S
- Mq :: S
+ Nq :: S
s :: S
stats :: SimpleStats{T}
+end
- function CrmrSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- x = S(undef, m)
- p = S(undef, m)
- Aᵀr = S(undef, m)
- r = S(undef, n)
- q = S(undef, n)
- Mq = S(undef, 0)
- s = S(undef, 0)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(x, p, Aᵀr, r, q, Mq, s, stats)
- return solver
- end
+function CrmrSolver(m, n, S)
+ FC = eltype(S)
+ T = real(FC)
+ x = S(undef, n)
+ p = S(undef, n)
+ Aᴴr = S(undef, n)
+ r = S(undef, m)
+ q = S(undef, m)
+ Nq = S(undef, 0)
+ s = S(undef, 0)
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = CrmrSolver{T,FC,S}(m, n, x, p, Aᴴr, r, q, Nq, s, stats)
+ return solver
+end
- function CrmrSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- CrmrSolver(n, m, S)
- end
+function CrmrSolver(A, b)
+ m, n = size(A)
+ S = ktypeof(b)
+ CrmrSolver(m, n, S)
end
"""
@@ -1168,15 +1218,17 @@ Type for storing the vectors required by the in-place version of LSLQ.
The outer constructors
- solver = LslqSolver(n, m, S)
+ solver = LslqSolver(m, n, S)
solver = LslqSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct LslqSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
x :: S
Nv :: S
- Aᵀu :: S
+ Aᴴu :: S
w̄ :: S
Mu :: S
Av :: S
@@ -1184,29 +1236,29 @@ mutable struct LslqSolver{T,FC,S} <: KrylovSolver{T,FC,S}
v :: S
err_vec :: Vector{T}
stats :: LSLQStats{T}
+end
- function LslqSolver(n, m, S; window :: Int=5)
- FC = eltype(S)
- T = real(FC)
- x = S(undef, m)
- Nv = S(undef, m)
- Aᵀu = S(undef, m)
- w̄ = S(undef, m)
- Mu = S(undef, n)
- Av = S(undef, n)
- u = S(undef, 0)
- v = S(undef, 0)
- err_vec = zeros(T, window)
- stats = LSLQStats(0, false, false, T[], T[], T[], false, T[], T[], "unknown")
- solver = new{T,FC,S}(x, Nv, Aᵀu, w̄, Mu, Av, u, v, err_vec, stats)
- return solver
- end
+function LslqSolver(m, n, S; window :: Int=5)
+ FC = eltype(S)
+ T = real(FC)
+ x = S(undef, n)
+ Nv = S(undef, n)
+ Aᴴu = S(undef, n)
+ w̄ = S(undef, n)
+ Mu = S(undef, m)
+ Av = S(undef, m)
+ u = S(undef, 0)
+ v = S(undef, 0)
+ err_vec = zeros(T, window)
+ stats = LSLQStats(0, false, false, T[], T[], T[], false, T[], T[], "unknown")
+ solver = LslqSolver{T,FC,S}(m, n, x, Nv, Aᴴu, w̄, Mu, Av, u, v, err_vec, stats)
+ return solver
+end
- function LslqSolver(A, b; window :: Int=5)
- n, m = size(A)
- S = ktypeof(b)
- LslqSolver(n, m, S, window=window)
- end
+function LslqSolver(A, b; window :: Int=5)
+ m, n = size(A)
+ S = ktypeof(b)
+ LslqSolver(m, n, S, window=window)
end
"""
@@ -1214,15 +1266,17 @@ Type for storing the vectors required by the in-place version of LSQR.
The outer constructors
- solver = LsqrSolver(n, m, S)
+ solver = LsqrSolver(m, n, S)
solver = LsqrSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct LsqrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
x :: S
Nv :: S
- Aᵀu :: S
+ Aᴴu :: S
w :: S
Mu :: S
Av :: S
@@ -1230,29 +1284,29 @@ mutable struct LsqrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
v :: S
err_vec :: Vector{T}
stats :: SimpleStats{T}
+end
- function LsqrSolver(n, m, S; window :: Int=5)
- FC = eltype(S)
- T = real(FC)
- x = S(undef, m)
- Nv = S(undef, m)
- Aᵀu = S(undef, m)
- w = S(undef, m)
- Mu = S(undef, n)
- Av = S(undef, n)
- u = S(undef, 0)
- v = S(undef, 0)
- err_vec = zeros(T, window)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(x, Nv, Aᵀu, w, Mu, Av, u, v, err_vec, stats)
- return solver
- end
+function LsqrSolver(m, n, S; window :: Int=5)
+ FC = eltype(S)
+ T = real(FC)
+ x = S(undef, n)
+ Nv = S(undef, n)
+ Aᴴu = S(undef, n)
+ w = S(undef, n)
+ Mu = S(undef, m)
+ Av = S(undef, m)
+ u = S(undef, 0)
+ v = S(undef, 0)
+ err_vec = zeros(T, window)
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = LsqrSolver{T,FC,S}(m, n, x, Nv, Aᴴu, w, Mu, Av, u, v, err_vec, stats)
+ return solver
+end
- function LsqrSolver(A, b; window :: Int=5)
- n, m = size(A)
- S = ktypeof(b)
- LsqrSolver(n, m, S, window=window)
- end
+function LsqrSolver(A, b; window :: Int=5)
+ m, n = size(A)
+ S = ktypeof(b)
+ LsqrSolver(m, n, S, window=window)
end
"""
@@ -1260,15 +1314,17 @@ Type for storing the vectors required by the in-place version of LSMR.
The outer constructors
- solver = LsmrSolver(n, m, S)
+ solver = LsmrSolver(m, n, S)
solver = LsmrSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct LsmrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
x :: S
Nv :: S
- Aᵀu :: S
+ Aᴴu :: S
h :: S
hbar :: S
Mu :: S
@@ -1277,30 +1333,30 @@ mutable struct LsmrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
v :: S
err_vec :: Vector{T}
stats :: LsmrStats{T}
+end
- function LsmrSolver(n, m, S; window :: Int=5)
- FC = eltype(S)
- T = real(FC)
- x = S(undef, m)
- Nv = S(undef, m)
- Aᵀu = S(undef, m)
- h = S(undef, m)
- hbar = S(undef, m)
- Mu = S(undef, n)
- Av = S(undef, n)
- u = S(undef, 0)
- v = S(undef, 0)
- err_vec = zeros(T, window)
- stats = LsmrStats(0, false, false, T[], T[], zero(T), zero(T), zero(T), zero(T), zero(T), "unknown")
- solver = new{T,FC,S}(x, Nv, Aᵀu, h, hbar, Mu, Av, u, v, err_vec, stats)
- return solver
- end
+function LsmrSolver(m, n, S; window :: Int=5)
+ FC = eltype(S)
+ T = real(FC)
+ x = S(undef, n)
+ Nv = S(undef, n)
+ Aᴴu = S(undef, n)
+ h = S(undef, n)
+ hbar = S(undef, n)
+ Mu = S(undef, m)
+ Av = S(undef, m)
+ u = S(undef, 0)
+ v = S(undef, 0)
+ err_vec = zeros(T, window)
+ stats = LsmrStats(0, false, false, T[], T[], zero(T), zero(T), zero(T), zero(T), zero(T), "unknown")
+ solver = LsmrSolver{T,FC,S}(m, n, x, Nv, Aᴴu, h, hbar, Mu, Av, u, v, err_vec, stats)
+ return solver
+end
- function LsmrSolver(A, b; window :: Int=5)
- n, m = size(A)
- S = ktypeof(b)
- LsmrSolver(n, m, S, window=window)
- end
+function LsmrSolver(A, b; window :: Int=5)
+ m, n = size(A)
+ S = ktypeof(b)
+ LsmrSolver(m, n, S, window=window)
end
"""
@@ -1308,15 +1364,17 @@ Type for storing the vectors required by the in-place version of LNLQ.
The outer constructors
- solver = LnlqSolver(n, m, S)
+ solver = LnlqSolver(m, n, S)
solver = LnlqSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct LnlqSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
x :: S
Nv :: S
- Aᵀu :: S
+ Aᴴu :: S
y :: S
w̄ :: S
Mu :: S
@@ -1325,30 +1383,30 @@ mutable struct LnlqSolver{T,FC,S} <: KrylovSolver{T,FC,S}
v :: S
q :: S
stats :: LNLQStats{T}
+end
- function LnlqSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- x = S(undef, m)
- Nv = S(undef, m)
- Aᵀu = S(undef, m)
- y = S(undef, n)
- w̄ = S(undef, n)
- Mu = S(undef, n)
- Av = S(undef, n)
- u = S(undef, 0)
- v = S(undef, 0)
- q = S(undef, 0)
- stats = LNLQStats(0, false, T[], false, T[], T[], "unknown")
- solver = new{T,FC,S}(x, Nv, Aᵀu, y, w̄, Mu, Av, u, v, q, stats)
- return solver
- end
+function LnlqSolver(m, n, S)
+ FC = eltype(S)
+ T = real(FC)
+ x = S(undef, n)
+ Nv = S(undef, n)
+ Aᴴu = S(undef, n)
+ y = S(undef, m)
+ w̄ = S(undef, m)
+ Mu = S(undef, m)
+ Av = S(undef, m)
+ u = S(undef, 0)
+ v = S(undef, 0)
+ q = S(undef, 0)
+ stats = LNLQStats(0, false, T[], false, T[], T[], "unknown")
+ solver = LnlqSolver{T,FC,S}(m, n, x, Nv, Aᴴu, y, w̄, Mu, Av, u, v, q, stats)
+ return solver
+end
- function LnlqSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- LnlqSolver(n, m, S)
- end
+function LnlqSolver(A, b)
+ m, n = size(A)
+ S = ktypeof(b)
+ LnlqSolver(m, n, S)
end
"""
@@ -1356,15 +1414,17 @@ Type for storing the vectors required by the in-place version of CRAIG.
The outer constructors
- solver = CraigSolver(n, m, S)
+ solver = CraigSolver(m, n, S)
solver = CraigSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct CraigSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
x :: S
Nv :: S
- Aᵀu :: S
+ Aᴴu :: S
y :: S
w :: S
Mu :: S
@@ -1373,30 +1433,30 @@ mutable struct CraigSolver{T,FC,S} <: KrylovSolver{T,FC,S}
v :: S
w2 :: S
stats :: SimpleStats{T}
+end
- function CraigSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- x = S(undef, m)
- Nv = S(undef, m)
- Aᵀu = S(undef, m)
- y = S(undef, n)
- w = S(undef, n)
- Mu = S(undef, n)
- Av = S(undef, n)
- u = S(undef, 0)
- v = S(undef, 0)
- w2 = S(undef, 0)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(x, Nv, Aᵀu, y, w, Mu, Av, u, v, w2, stats)
- return solver
- end
+function CraigSolver(m, n, S)
+ FC = eltype(S)
+ T = real(FC)
+ x = S(undef, n)
+ Nv = S(undef, n)
+ Aᴴu = S(undef, n)
+ y = S(undef, m)
+ w = S(undef, m)
+ Mu = S(undef, m)
+ Av = S(undef, m)
+ u = S(undef, 0)
+ v = S(undef, 0)
+ w2 = S(undef, 0)
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = CraigSolver{T,FC,S}(m, n, x, Nv, Aᴴu, y, w, Mu, Av, u, v, w2, stats)
+ return solver
+end
- function CraigSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- CraigSolver(n, m, S)
- end
+function CraigSolver(A, b)
+ m, n = size(A)
+ S = ktypeof(b)
+ CraigSolver(m, n, S)
end
"""
@@ -1404,15 +1464,17 @@ Type for storing the vectors required by the in-place version of CRAIGMR.
The outer constructors
- solver = CraigmrSolver(n, m, S)
+ solver = CraigmrSolver(m, n, S)
solver = CraigmrSolver(A, b)
may be used in order to create these vectors.
"""
mutable struct CraigmrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
x :: S
Nv :: S
- Aᵀu :: S
+ Aᴴu :: S
d :: S
y :: S
Mu :: S
@@ -1423,32 +1485,32 @@ mutable struct CraigmrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
v :: S
q :: S
stats :: SimpleStats{T}
+end
- function CraigmrSolver(n, m, S)
- FC = eltype(S)
- T = real(FC)
- x = S(undef, m)
- Nv = S(undef, m)
- Aᵀu = S(undef, m)
- d = S(undef, m)
- y = S(undef, n)
- Mu = S(undef, n)
- w = S(undef, n)
- wbar = S(undef, n)
- Av = S(undef, n)
- u = S(undef, 0)
- v = S(undef, 0)
- q = S(undef, 0)
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(x, Nv, Aᵀu, d, y, Mu, w, wbar, Av, u, v, q, stats)
- return solver
- end
+function CraigmrSolver(m, n, S)
+ FC = eltype(S)
+ T = real(FC)
+ x = S(undef, n)
+ Nv = S(undef, n)
+ Aᴴu = S(undef, n)
+ d = S(undef, n)
+ y = S(undef, m)
+ Mu = S(undef, m)
+ w = S(undef, m)
+ wbar = S(undef, m)
+ Av = S(undef, m)
+ u = S(undef, 0)
+ v = S(undef, 0)
+ q = S(undef, 0)
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = CraigmrSolver{T,FC,S}(m, n, x, Nv, Aᴴu, d, y, Mu, w, wbar, Av, u, v, q, stats)
+ return solver
+end
- function CraigmrSolver(A, b)
- n, m = size(A)
- S = ktypeof(b)
- CraigmrSolver(n, m, S)
- end
+function CraigmrSolver(A, b)
+ m, n = size(A)
+ S = ktypeof(b)
+ CraigmrSolver(m, n, S)
end
"""
@@ -1456,13 +1518,15 @@ Type for storing the vectors required by the in-place version of GMRES.
The outer constructors
- solver = GmresSolver(n, m, memory, S)
+ solver = GmresSolver(m, n, memory, S)
solver = GmresSolver(A, b, memory = 20)
may be used in order to create these vectors.
`memory` is set to `n` if the value given is larger than `n`.
"""
mutable struct GmresSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
Δx :: S
x :: S
w :: S
@@ -1476,31 +1540,85 @@ mutable struct GmresSolver{T,FC,S} <: KrylovSolver{T,FC,S}
warm_start :: Bool
inner_iter :: Int
stats :: SimpleStats{T}
+end
- function GmresSolver(n, m, memory, S)
- memory = min(n, memory)
- FC = eltype(S)
- T = real(FC)
- Δx = S(undef, 0)
- x = S(undef, n)
- w = S(undef, n)
- p = S(undef, 0)
- q = S(undef, 0)
- V = [S(undef, n) for i = 1 : memory]
- c = Vector{T}(undef, memory)
- s = Vector{FC}(undef, memory)
- z = Vector{FC}(undef, memory)
- R = Vector{FC}(undef, div(memory * (memory+1), 2))
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(Δx, x, w, p, q, V, c, s, z, R, false, 0, stats)
- return solver
- end
+function GmresSolver(m, n, memory, S)
+ memory = min(m, memory)
+ FC = eltype(S)
+ T = real(FC)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ w = S(undef, n)
+ p = S(undef, 0)
+ q = S(undef, 0)
+ V = S[S(undef, n) for i = 1 : memory]
+ c = Vector{T}(undef, memory)
+ s = Vector{FC}(undef, memory)
+ z = Vector{FC}(undef, memory)
+ R = Vector{FC}(undef, div(memory * (memory+1), 2))
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = GmresSolver{T,FC,S}(m, n, Δx, x, w, p, q, V, c, s, z, R, false, 0, stats)
+ return solver
+end
- function GmresSolver(A, b, memory = 20)
- n, m = size(A)
- S = ktypeof(b)
- GmresSolver(n, m, memory, S)
- end
+function GmresSolver(A, b, memory = 20)
+ m, n = size(A)
+ S = ktypeof(b)
+ GmresSolver(m, n, memory, S)
+end
+
+"""
+Type for storing the vectors required by the in-place version of FGMRES.
+
+The outer constructors
+
+ solver = FgmresSolver(m, n, memory, S)
+ solver = FgmresSolver(A, b, memory = 20)
+
+may be used in order to create these vectors.
+`memory` is set to `n` if the value given is larger than `n`.
+"""
+mutable struct FgmresSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
+ Δx :: S
+ x :: S
+ w :: S
+ q :: S
+ V :: Vector{S}
+ Z :: Vector{S}
+ c :: Vector{T}
+ s :: Vector{FC}
+ z :: Vector{FC}
+ R :: Vector{FC}
+ warm_start :: Bool
+ inner_iter :: Int
+ stats :: SimpleStats{T}
+end
+
+function FgmresSolver(m, n, memory, S)
+ memory = min(m, memory)
+ FC = eltype(S)
+ T = real(FC)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ w = S(undef, n)
+ q = S(undef, 0)
+ V = S[S(undef, n) for i = 1 : memory]
+ Z = S[S(undef, n) for i = 1 : memory]
+ c = Vector{T}(undef, memory)
+ s = Vector{FC}(undef, memory)
+ z = Vector{FC}(undef, memory)
+ R = Vector{FC}(undef, div(memory * (memory+1), 2))
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = FgmresSolver{T,FC,S}(m, n, Δx, x, w, q, V, Z, c, s, z, R, false, 0, stats)
+ return solver
+end
+
+function FgmresSolver(A, b, memory = 20)
+ m, n = size(A)
+ S = ktypeof(b)
+ FgmresSolver(m, n, memory, S)
end
"""
@@ -1508,13 +1626,15 @@ Type for storing the vectors required by the in-place version of FOM.
The outer constructors
- solver = FomSolver(n, m, memory, S)
+ solver = FomSolver(m, n, memory, S)
solver = FomSolver(A, b, memory = 20)
may be used in order to create these vectors.
`memory` is set to `n` if the value given is larger than `n`.
"""
mutable struct FomSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
Δx :: S
x :: S
w :: S
@@ -1526,30 +1646,30 @@ mutable struct FomSolver{T,FC,S} <: KrylovSolver{T,FC,S}
U :: Vector{FC}
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function FomSolver(n, m, memory, S)
- memory = min(n, memory)
- FC = eltype(S)
- T = real(FC)
- Δx = S(undef, 0)
- x = S(undef, n)
- w = S(undef, n)
- p = S(undef, 0)
- q = S(undef, 0)
- V = [S(undef, n) for i = 1 : memory]
- l = Vector{FC}(undef, memory)
- z = Vector{FC}(undef, memory)
- U = Vector{FC}(undef, div(memory * (memory+1), 2))
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(Δx, x, w, p, q, V, l, z, U, false, stats)
- return solver
- end
+function FomSolver(m, n, memory, S)
+ memory = min(m, memory)
+ FC = eltype(S)
+ T = real(FC)
+ Δx = S(undef, 0)
+ x = S(undef, n)
+ w = S(undef, n)
+ p = S(undef, 0)
+ q = S(undef, 0)
+ V = S[S(undef, n) for i = 1 : memory]
+ l = Vector{FC}(undef, memory)
+ z = Vector{FC}(undef, memory)
+ U = Vector{FC}(undef, div(memory * (memory+1), 2))
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = FomSolver{T,FC,S}(m, n, Δx, x, w, p, q, V, l, z, U, false, stats)
+ return solver
+end
- function FomSolver(A, b, memory = 20)
- n, m = size(A)
- S = ktypeof(b)
- FomSolver(n, m, memory, S)
- end
+function FomSolver(A, b, memory = 20)
+ m, n = size(A)
+ S = ktypeof(b)
+ FomSolver(m, n, memory, S)
end
"""
@@ -1557,13 +1677,15 @@ Type for storing the vectors required by the in-place version of GPMR.
The outer constructors
- solver = GpmrSolver(n, m, memory, S)
+ solver = GpmrSolver(m, n, memory, S)
solver = GpmrSolver(A, b, memory = 20)
may be used in order to create these vectors.
`memory` is set to `n + m` if the value given is larger than `n + m`.
"""
mutable struct GpmrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
+ m :: Int
+ n :: Int
wA :: S
wB :: S
dA :: S
@@ -1582,37 +1704,37 @@ mutable struct GpmrSolver{T,FC,S} <: KrylovSolver{T,FC,S}
R :: Vector{FC}
warm_start :: Bool
stats :: SimpleStats{T}
+end
- function GpmrSolver(n, m, memory, S)
- memory = min(n + m, memory)
- FC = eltype(S)
- T = real(FC)
- wA = S(undef, 0)
- wB = S(undef, 0)
- dA = S(undef, n)
- dB = S(undef, m)
- Δx = S(undef, 0)
- Δy = S(undef, 0)
- x = S(undef, n)
- y = S(undef, m)
- q = S(undef, 0)
- p = S(undef, 0)
- V = [S(undef, n) for i = 1 : memory]
- U = [S(undef, m) for i = 1 : memory]
- gs = Vector{FC}(undef, 4 * memory)
- gc = Vector{T}(undef, 4 * memory)
- zt = Vector{FC}(undef, 2 * memory)
- R = Vector{FC}(undef, memory * (2memory + 1))
- stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
- solver = new{T,FC,S}(wA, wB, dA, dB, Δx, Δy, x, y, q, p, V, U, gs, gc, zt, R, false, stats)
- return solver
- end
+function GpmrSolver(m, n, memory, S)
+ memory = min(n + m, memory)
+ FC = eltype(S)
+ T = real(FC)
+ wA = S(undef, 0)
+ wB = S(undef, 0)
+ dA = S(undef, m)
+ dB = S(undef, n)
+ Δx = S(undef, 0)
+ Δy = S(undef, 0)
+ x = S(undef, m)
+ y = S(undef, n)
+ q = S(undef, 0)
+ p = S(undef, 0)
+ V = S[S(undef, m) for i = 1 : memory]
+ U = S[S(undef, n) for i = 1 : memory]
+ gs = Vector{FC}(undef, 4 * memory)
+ gc = Vector{T}(undef, 4 * memory)
+ zt = Vector{FC}(undef, 2 * memory)
+ R = Vector{FC}(undef, memory * (2 * memory + 1))
+ stats = SimpleStats(0, false, false, T[], T[], T[], "unknown")
+ solver = GpmrSolver{T,FC,S}(m, n, wA, wB, dA, dB, Δx, Δy, x, y, q, p, V, U, gs, gc, zt, R, false, stats)
+ return solver
+end
- function GpmrSolver(A, b, memory = 20)
- n, m = size(A)
- S = ktypeof(b)
- GpmrSolver(n, m, memory, S)
- end
+function GpmrSolver(A, b, memory = 20)
+ m, n = size(A)
+ S = ktypeof(b)
+ GpmrSolver(m, n, memory, S)
end
"""
@@ -1704,29 +1826,35 @@ for (KS, fun, nsol, nA, nAt, warm_start) in [
(MinresQlpSolver , :minres_qlp! , 1, 1, 0, true )
(QmrSolver , :qmr! , 1, 1, 1, true )
(GmresSolver , :gmres! , 1, 1, 0, true )
+ (FgmresSolver , :fgmres! , 1, 1, 0, true )
(FomSolver , :fom! , 1, 1, 0, true )
(GpmrSolver , :gpmr! , 2, 1, 0, true )
]
@eval begin
- @inline solve!(solver :: $KS, args...; kwargs...) = $(fun)(solver, args...; kwargs...)
- @inline statistics(solver :: $KS) = solver.stats
- @inline niterations(solver :: $KS) = solver.stats.niter
- @inline Aprod(solver :: $KS) = $nA * solver.stats.niter
- @inline Atprod(solver :: $KS) = $nAt * solver.stats.niter
+ size(solver :: $KS) = solver.m, solver.n
+ solve!(solver :: $KS, args...; kwargs...) = $(fun)(solver, args...; kwargs...)
+ statistics(solver :: $KS) = solver.stats
+ niterations(solver :: $KS) = solver.stats.niter
+ Aprod(solver :: $KS) = $nA * solver.stats.niter
+ Atprod(solver :: $KS) = $nAt * solver.stats.niter
if $KS == GpmrSolver
- @inline Bprod(solver :: $KS) = solver.stats.niter
+ Bprod(solver :: $KS) = solver.stats.niter
+ end
+ nsolution(solver :: $KS) = $nsol
+ if $nsol == 1
+ solution(solver :: $KS) = solver.x
+ solution(solver :: $KS, p :: Integer) = (p == 1) ? solution(solver) : error("solution(solver) has only one output.")
+ end
+ if $nsol == 2
+ solution(solver :: $KS) = solver.x, solver.y
+ solution(solver :: $KS, p :: Integer) = (1 ≤ p ≤ 2) ? solution(solver)[p] : error("solution(solver) has only two outputs.")
end
- @inline nsolution(solver :: $KS) = $nsol
- ($nsol == 1) && @inline solution(solver :: $KS) = solver.x
- ($nsol == 2) && @inline solution(solver :: $KS) = solver.x, solver.y
- ($nsol == 1) && @inline solution(solver :: $KS, p :: Integer) = (p == 1) ? solution(solver) : error("solution(solver) has only one output.")
- ($nsol == 2) && @inline solution(solver :: $KS, p :: Integer) = (1 ≤ p ≤ 2) ? solution(solver)[p] : error("solution(solver) has only two outputs.")
if $KS ∈ (BilqrSolver, TrilqrSolver)
- @inline issolved_primal(solver :: $KS) = solver.stats.solved_primal
- @inline issolved_dual(solver :: $KS) = solver.stats.solved_dual
- @inline issolved(solver :: $KS) = issolved_primal(solver) && issolved_dual(solver)
+ issolved_primal(solver :: $KS) = solver.stats.solved_primal
+ issolved_dual(solver :: $KS) = solver.stats.solved_dual
+ issolved(solver :: $KS) = issolved_primal(solver) && issolved_dual(solver)
else
- @inline issolved(solver :: $KS) = solver.stats.solved
+ issolved(solver :: $KS) = solver.stats.solved
end
if $warm_start
if $KS in (BilqrSolver, TrilqrSolver, TricgSolver, TrimrSolver, GpmrSolver)
@@ -1758,6 +1886,29 @@ for (KS, fun, nsol, nA, nAt, warm_start) in [
end
end
+function ksizeof(attribute)
+ if isa(attribute, Vector{<:AbstractVector}) && !isempty(attribute)
+ # A vector of vectors is a vector of pointers in Julia.
+ # All vectors inside a vector have the same size in Krylov.jl
+ size_attribute = sizeof(attribute) + length(attribute) * ksizeof(attribute[1])
+ else
+ size_attribute = sizeof(attribute)
+ end
+ return size_attribute
+end
+
+function sizeof(stats_solver :: Union{KrylovStats, KrylovSolver})
+ type = typeof(stats_solver)
+ nfields = fieldcount(type)
+ storage = 0
+ for i = 1:nfields
+ field_i = getfield(stats_solver, i)
+ size_i = ksizeof(field_i)
+ storage += size_i
+ end
+ return storage
+end
+
"""
show(io, solver; show_stats=true)
@@ -1765,38 +1916,40 @@ Statistics of `solver` are displayed if `show_stats` is set to true.
"""
function show(io :: IO, solver :: KrylovSolver{T,FC,S}; show_stats :: Bool=true) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
workspace = typeof(solver)
- name_solver = workspace.name.wrapper
- l1 = max(length(string(name_solver)), 10) # length("warm_start") = 10
- l2 = length(string(S)) + 8 # length("Vector{}") = 8
+ name_solver = string(workspace.name.name)
+ name_stats = string(typeof(solver.stats).name.name)
+ nbytes = sizeof(solver)
+ storage = format_bytes(nbytes)
architecture = S <: Vector ? "CPU" : "GPU"
- format = Printf.Format("│%$(l1)s│%$(l2)s│%18s│\n")
- format2 = Printf.Format("│%$(l1+1)s│%$(l2)s│%18s│\n")
- @printf(io, "┌%s┬%s┬%s┐\n", "─"^l1, "─"^l2, "─"^18)
- Printf.format(io, format, name_solver, "Precision: $FC", "Architecture: $architecture")
- @printf(io, "├%s┼%s┼%s┤\n", "─"^l1, "─"^l2, "─"^18)
+ l1 = max(length(name_solver), length(string(FC)) + 11) # length("Precision: ") = 11
+ nchar = workspace <: Union{CgLanczosShiftSolver, FomSolver, DiomSolver, DqgmresSolver, GmresSolver, FgmresSolver, GpmrSolver} ? 8 : 0 # length("Vector{}") = 8
+ l2 = max(ndigits(solver.m) + 7, length(architecture) + 14, length(string(S)) + nchar) # length("nrows: ") = 7 and length("Architecture: ") = 14
+ l2 = max(l2, length(name_stats) + 2 + length(string(T))) # length("{}") = 2
+ l3 = max(ndigits(solver.n) + 7, length(storage) + 9) # length("Storage: ") = 9 and length("cols: ") = 7
+ format = Printf.Format("│%$(l1)s│%$(l2)s│%$(l3)s│\n")
+ format2 = Printf.Format("│%$(l1+1)s│%$(l2)s│%$(l3)s│\n")
+ @printf(io, "┌%s┬%s┬%s┐\n", "─"^l1, "─"^l2, "─"^l3)
+ Printf.format(io, format, "$(name_solver)", "nrows: $(solver.m)", "ncols: $(solver.n)")
+ @printf(io, "├%s┼%s┼%s┤\n", "─"^l1, "─"^l2, "─"^l3)
+ Printf.format(io, format, "Precision: $FC", "Architecture: $architecture","Storage: $storage")
+ @printf(io, "├%s┼%s┼%s┤\n", "─"^l1, "─"^l2, "─"^l3)
Printf.format(io, format, "Attribute", "Type", "Size")
- @printf(io, "├%s┼%s┼%s┤\n", "─"^l1, "─"^l2, "─"^18)
- for i=1:fieldcount(workspace)-1 # show stats seperately
- type_i = fieldtype(workspace, i)
+ @printf(io, "├%s┼%s┼%s┤\n", "─"^l1, "─"^l2, "─"^l3)
+ for i=1:fieldcount(workspace)
name_i = fieldname(workspace, i)
- len = if type_i <: AbstractVector
- field_i = getfield(solver, name_i)
- ni = length(field_i)
- if eltype(type_i) <: AbstractVector
- "$(ni) x $(length(field_i[1]))"
- else
- length(field_i)
- end
- else
- 0
- end
- if (name_i in [:w̅, :w̄, :d̅]) && (VERSION < v"1.8.0-DEV")
- Printf.format(io, format2, string(name_i), type_i, len)
+ type_i = fieldtype(workspace, i)
+ field_i = getfield(solver, name_i)
+ size_i = ksizeof(field_i)
+ if (name_i::Symbol in [:w̅, :w̄, :d̅]) && (VERSION < v"1.8.0-DEV")
+ (size_i ≠ 0) && Printf.format(io, format2, string(name_i), type_i, format_bytes(size_i))
else
- Printf.format(io, format, string(name_i), type_i, len)
+ (size_i ≠ 0) && Printf.format(io, format, string(name_i), type_i, format_bytes(size_i))
end
end
- @printf(io, "└%s┴%s┴%s┘\n","─"^l1,"─"^l2,"─"^18)
- show_stats && show(io, solver.stats)
+ @printf(io, "└%s┴%s┴%s┘\n","─"^l1,"─"^l2,"─"^l3)
+ if show_stats
+ @printf(io, "\n")
+ show(io, solver.stats)
+ end
return nothing
end
diff --git a/src/krylov_stats.jl b/src/krylov_stats.jl
index a662fa0a0..392912895 100644
--- a/src/krylov_stats.jl
+++ b/src/krylov_stats.jl
@@ -1,3 +1,6 @@
+export KrylovStats, SimpleStats, LsmrStats, LanczosStats, LanczosShiftStats,
+SymmlqStats, AdjointStats, LNLQStats, LSLQStats
+
"Abstract type for statistics returned by a solver"
abstract type KrylovStats{T} end
@@ -21,6 +24,12 @@ mutable struct SimpleStats{T} <: KrylovStats{T}
status :: String
end
+function reset!(stats :: SimpleStats)
+ empty!(stats.residuals)
+ empty!(stats.Aresiduals)
+ empty!(stats.Acond)
+end
+
"""
Type for statistics returned by LSMR. The attributes are:
- niter
@@ -47,6 +56,11 @@ mutable struct LsmrStats{T} <: KrylovStats{T}
status :: String
end
+function reset!(stats :: LsmrStats)
+ empty!(stats.residuals)
+ empty!(stats.Aresiduals)
+end
+
"""
Type for statistics returned by CG-LANCZOS, the attributes are:
- niter
@@ -67,6 +81,10 @@ mutable struct LanczosStats{T} <: KrylovStats{T}
status :: String
end
+function reset!(stats :: LanczosStats)
+ empty!(stats.residuals)
+end
+
"""
Type for statistics returned by CG-LANCZOS with shifts, the attributes are:
- niter
@@ -117,6 +135,13 @@ mutable struct SymmlqStats{T} <: KrylovStats{T}
status :: String
end
+function reset!(stats :: SymmlqStats)
+ empty!(stats.residuals)
+ empty!(stats.residualscg)
+ empty!(stats.errors)
+ empty!(stats.errorscg)
+end
+
"""
Type for statistics returned by adjoint systems solvers BiLQR and TriLQR, the attributes are:
- niter
@@ -135,6 +160,11 @@ mutable struct AdjointStats{T} <: KrylovStats{T}
status :: String
end
+function reset!(stats :: AdjointStats)
+ empty!(stats.residuals_primal)
+ empty!(stats.residuals_dual)
+end
+
"""
Type for statistics returned by the LNLQ method, the attributes are:
- niter
@@ -155,6 +185,12 @@ mutable struct LNLQStats{T} <: KrylovStats{T}
status :: String
end
+function reset!(stats :: LNLQStats)
+ empty!(stats.residuals)
+ empty!(stats.error_bnd_x)
+ empty!(stats.error_bnd_y)
+end
+
"""
Type for statistics returned by the LSLQ method, the attributes are:
- niter
@@ -181,6 +217,14 @@ mutable struct LSLQStats{T} <: KrylovStats{T}
status :: String
end
+function reset!(stats :: LSLQStats)
+ empty!(stats.residuals)
+ empty!(stats.Aresiduals)
+ empty!(stats.err_lbnds)
+ empty!(stats.err_ubnds_lq)
+ empty!(stats.err_ubnds_cg)
+end
+
import Base.show
special_fields = Dict(
@@ -192,45 +236,24 @@ special_fields = Dict(
:err_ubnds_cg => "error bound CG",
)
-for f in ["Simple", "Lsmr", "Adjoint", "LNLQ", "LSLQ", "Lanczos", "Symmlq"]
- T = Meta.parse("Krylov." * f * "Stats{S}")
-
- @eval function empty_field!(stats :: $T, i, ::Type{Vector{Si}}) where {S, Si}
- statfield = getfield(stats, i)
- empty!(statfield)
- end
- @eval empty_field!(stats :: $T, i, type) where S = stats
-
- @eval function reset!(stats :: $T) where S
- nfield = length($T.types)
- for i = 1 : nfield
- type = fieldtype($T, i)
- empty_field!(stats, i, type)
+function show(io :: IO, stats :: KrylovStats)
+ kst = typeof(stats)
+ s = string(kst.name.name) * "\n"
+ nfield = fieldcount(kst)
+ for i = 1 : nfield
+ field = fieldname(kst, i)
+ field_name = if field ∈ keys(special_fields)
+ special_fields[field]
+ else
+ replace(string(field), "_" => " ")
end
- end
-end
-
-for f in ["Simple", "Lsmr", "Lanczos", "LanczosShift", "Symmlq", "Adjoint", "LNLQ", "LSLQ"]
- T = Meta.parse("Krylov." * f * "Stats{S}")
-
- @eval function show(io :: IO, stats :: $T) where S
- s = $f * " stats\n"
- nfield = length($T.types)
- for i = 1 : nfield
- field = fieldname($T, i)
- field_name = if field ∈ keys(special_fields)
- special_fields[field]
- else
- replace(string(field), "_" => " ")
- end
- s *= " " * field_name * ":"
- statfield = getfield(stats, field)
- if isa(statfield, AbstractVector) && eltype(statfield) <: Union{Missing, AbstractFloat}
- s *= @sprintf " %s\n" vec2str(statfield)
- else
- s *= @sprintf " %s\n" statfield
- end
+ s *= " " * field_name * ":"
+ statfield = getfield(stats, field)
+ if isa(statfield, AbstractVector) && eltype(statfield) <: Union{Missing, AbstractFloat}
+ s *= @sprintf " %s\n" vec2str(statfield)
+ else
+ s *= @sprintf " %s\n" statfield
end
- print(io, s)
end
+ print(io, s)
end
diff --git a/src/krylov_utils.jl b/src/krylov_utils.jl
index 6f0c1c382..6049f9c28 100644
--- a/src/krylov_utils.jl
+++ b/src/krylov_utils.jl
@@ -1,3 +1,8 @@
+export kstdout
+
+"Default I/O stream for all Krylov methods."
+const kstdout = Core.stdout
+
"""
FloatOrComplex{T}
Union type of `T` and `Complex{T}` where T is an `AbstractFloat`.
@@ -92,8 +97,8 @@ function sym_givens(a :: Complex{T}, b :: Complex{T}) where T <: AbstractFloat
return (c, s, ρ)
end
-@inline sym_givens(a :: Complex{T}, b :: T) where T <: AbstractFloat = sym_givens(a, Complex{T}(b))
-@inline sym_givens(a :: T, b :: Complex{T}) where T <: AbstractFloat = sym_givens(Complex{T}(a), b)
+sym_givens(a :: Complex{T}, b :: T) where T <: AbstractFloat = sym_givens(a, Complex{T}(b))
+sym_givens(a :: T, b :: Complex{T}) where T <: AbstractFloat = sym_givens(Complex{T}(a), b)
"""
roots = roots_quadratic(q₂, q₁, q₀; nitref)
@@ -111,68 +116,86 @@ function roots_quadratic(q₂ :: T, q₁ :: T, q₀ :: T;
# Case where q(x) is linear.
if q₂ == zero(T)
if q₁ == zero(T)
- root = [zero(T)]
- q₀ == zero(T) || (root = T[])
+ q₀ == zero(T) || error("The quadratic `q` doesn't have real roots.")
+ root = zero(T)
else
- root = [-q₀ / q₁]
+ root = -q₀ / q₁
end
- return root
+ return (root, root)
end
# Case where q(x) is indeed quadratic.
rhs = √eps(T) * q₁ * q₁
if abs(q₀ * q₂) > rhs
ρ = q₁ * q₁ - 4 * q₂ * q₀
- ρ < 0 && return T[]
+ ρ < 0 && return error("The quadratic `q` doesn't have real roots.")
d = -(q₁ + copysign(sqrt(ρ), q₁)) / 2
- roots = [d / q₂, q₀ / d]
+ root1 = d / q₂
+ root2 = q₀ / d
else
# Ill-conditioned quadratic.
- roots = [-q₁ / q₂, zero(T)]
+ root1 = -q₁ / q₂
+ root2 = zero(T)
end
# Perform a few Newton iterations to improve accuracy.
- for k = 1 : 2
- root = roots[k]
- for it = 1 : nitref
- q = (q₂ * root + q₁) * root + q₀
- dq = 2 * q₂ * root + q₁
- dq == zero(T) && continue
- root = root - q / dq
- end
- roots[k] = root
+ for it = 1 : nitref
+ q = (q₂ * root1 + q₁) * root1 + q₀
+ dq = 2 * q₂ * root1 + q₁
+ dq == zero(T) && continue
+ root1 = root1 - q / dq
end
- return roots
-end
+ for it = 1 : nitref
+ q = (q₂ * root2 + q₁) * root2 + q₀
+ dq = 2 * q₂ * root2 + q₁
+ dq == zero(T) && continue
+ root2 = root2 - q / dq
+ end
+ return (root1, root2)
+end
"""
- roots = to_boundary(x, d, radius; flip, xNorm2, dNorm2)
-
-Given a trust-region radius `radius`, a vector `x` lying inside the
-trust-region and a direction `d`, return `σ1` and `σ2` such that
-
- ‖x + σi d‖ = radius, i = 1, 2
+ s = vec2str(x; ndisp)
-in the Euclidean norm. If known, ‖x‖² may be supplied in `xNorm2`.
+Display an array in the form
-If `flip` is set to `true`, `σ1` and `σ2` are computed such that
+ [ -3.0e-01 -5.1e-01 1.9e-01 ... -2.3e-01 -4.4e-01 2.4e-01 ]
- ‖x - σi d‖ = radius, i = 1, 2.
+with (ndisp - 1)/2 elements on each side.
"""
-function to_boundary(x :: Vector{T}, d :: Vector{T},
- radius :: T; flip :: Bool=false, xNorm2 :: T=zero(T), dNorm2 :: T=zero(T)) where T <: Number
- radius > 0 || error("radius must be positive")
-
- # ‖d‖² σ² + 2 xᵀd σ + (‖x‖² - radius²).
- xd = dot(x, d)
- flip && (xd = -xd)
- dNorm2 == zero(T) && (dNorm2 = dot(d, d))
- dNorm2 == zero(T) && error("zero direction")
- xNorm2 == zero(T) && (xNorm2 = dot(x, x))
- (xNorm2 ≤ radius * radius) || error(@sprintf("outside of the trust region: ‖x‖²=%7.1e, Δ²=%7.1e", xNorm2, radius * radius))
- roots = roots_quadratic(dNorm2, 2 * xd, xNorm2 - radius * radius)
- return roots # `σ1` and `σ2`
+function vec2str(x :: AbstractVector{T}; ndisp :: Int=7) where T <: Union{AbstractFloat, Missing}
+ n = length(x)
+ if n ≤ ndisp
+ ndisp = n
+ nside = n
+ else
+ nside = max(1, div(ndisp - 1, 2))
+ end
+ s = "["
+ i = 1
+ while i ≤ nside
+ if x[i] !== missing
+ s *= @sprintf("%8.1e ", x[i])
+ else
+ s *= " ✗✗✗✗ "
+ end
+ i += 1
+ end
+ if i ≤ div(n, 2)
+ s *= "... "
+ end
+ i = max(i, n - nside + 1)
+ while i ≤ n
+ if x[i] !== missing
+ s *= @sprintf("%8.1e ", x[i])
+ else
+ s *= " ✗✗✗✗ "
+ end
+ i += 1
+ end
+ s *= "]"
+ return s
end
"""
@@ -201,84 +224,125 @@ function ktypeof(v::S) where S <: AbstractVector
end
function ktypeof(v::S) where S <: SubArray
- return ktypeof(v.parent)
+ vp = v.parent
+ if isa(vp, DenseMatrix)
+ M = typeof(vp)
+ return matrix_to_vector(M) # view of a row or a column of a matrix
+ else
+ return ktypeof(vp) # view of a vector
+ end
+end
+
+"""
+ M = vector_to_matrix(S)
+
+Return the dense matrix storage type `M` related to the dense vector storage type `S`.
+"""
+function vector_to_matrix(::Type{S}) where S <: DenseVector
+ T = hasproperty(S, :body) ? S.body : S
+ par = T.parameters
+ npar = length(par)
+ (2 ≤ npar ≤ 3) || error("Type $S is not supported.")
+ if npar == 2
+ M = T.name.wrapper{par[1], 2}
+ else
+ M = T.name.wrapper{par[1], 2, par[3]}
+ end
+ return M
+end
+
+"""
+ S = matrix_to_vector(M)
+
+Return the dense vector storage type `S` related to the dense matrix storage type `M`.
+"""
+function matrix_to_vector(::Type{M}) where M <: DenseMatrix
+ T = hasproperty(M, :body) ? M.body : M
+ par = T.parameters
+ npar = length(par)
+ (2 ≤ npar ≤ 3) || error("Type $M is not supported.")
+ if npar == 2
+ S = T.name.wrapper{par[1], 1}
+ else
+ S = T.name.wrapper{par[1], 1, par[3]}
+ end
+ return S
end
"""
v = kzeros(S, n)
-Create an AbstractVector of storage type `S` of length `n` only composed of zero.
+Create a vector of storage type `S` of length `n` only composed of zero.
"""
-@inline kzeros(S, n) = fill!(S(undef, n), zero(eltype(S)))
+kzeros(S, n) = fill!(S(undef, n), zero(eltype(S)))
"""
v = kones(S, n)
-Create an AbstractVector of storage type `S` of length `n` only composed of one.
+Create a vector of storage type `S` of length `n` only composed of one.
"""
-@inline kones(S, n) = fill!(S(undef, n), one(eltype(S)))
+kones(S, n) = fill!(S(undef, n), one(eltype(S)))
-@inline allocate_if(bool, solver, v, S, n) = bool && isempty(solver.:($v)) && (solver.:($v) = S(undef, n))
+allocate_if(bool, solver, v, S, n) = bool && isempty(solver.:($v)::S) && (solver.:($v)::S = S(undef, n))
-@inline kdisplay(iter, verbose) = (verbose > 0) && (mod(iter, verbose) == 0)
+kdisplay(iter, verbose) = (verbose > 0) && (mod(iter, verbose) == 0)
-@inline mulorldiv!(y, P, x, ldiv::Bool) = ldiv ? ldiv!(y, P, x) : mul!(y, P, x)
+mulorldiv!(y, P, x, ldiv::Bool) = ldiv ? ldiv!(y, P, x) : mul!(y, P, x)
-@inline krylov_dot(n :: Integer, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasReal = BLAS.dot(n, x, dx, y, dy)
-@inline krylov_dot(n :: Integer, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasComplex = BLAS.dotc(n, x, dx, y, dy)
-@inline krylov_dot(n :: Integer, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: Number = dot(x, y)
+kdot(n :: Integer, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasReal = BLAS.dot(n, x, dx, y, dy)
+kdot(n :: Integer, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasComplex = BLAS.dotc(n, x, dx, y, dy)
+kdot(n :: Integer, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: FloatOrComplex = dot(x, y)
-@inline krylov_dotr(n :: Integer, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: AbstractFloat = krylov_dot(n, x, dx, y, dy)
-@inline krylov_dotr(n :: Integer, x :: AbstractVector{Complex{T}}, dx :: Integer, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = real(krylov_dot(n, x, dx, y, dy))
+kdotr(n :: Integer, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: AbstractFloat = kdot(n, x, dx, y, dy)
+kdotr(n :: Integer, x :: AbstractVector{Complex{T}}, dx :: Integer, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = real(kdot(n, x, dx, y, dy))
-@inline krylov_norm2(n :: Integer, x :: Vector{T}, dx :: Integer) where T <: BLAS.BlasFloat = BLAS.nrm2(n, x, dx)
-@inline krylov_norm2(n :: Integer, x :: AbstractVector{T}, dx :: Integer) where T <: Number = norm(x)
+knrm2(n :: Integer, x :: Vector{T}, dx :: Integer) where T <: BLAS.BlasFloat = BLAS.nrm2(n, x, dx)
+knrm2(n :: Integer, x :: AbstractVector{T}, dx :: Integer) where T <: FloatOrComplex = norm(x)
-@inline krylov_scal!(n :: Integer, s :: T, x :: Vector{T}, dx :: Integer) where T <: BLAS.BlasFloat = BLAS.scal!(n, s, x, dx)
-@inline krylov_scal!(n :: Integer, s :: T, x :: AbstractVector{T}, dx :: Integer) where T <: Number = (x .*= s)
-@inline krylov_scal!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer) where T <: AbstractFloat = krylov_scal!(n, Complex{T}(s), x, dx)
+kscal!(n :: Integer, s :: T, x :: Vector{T}, dx :: Integer) where T <: BLAS.BlasFloat = BLAS.scal!(n, s, x, dx)
+kscal!(n :: Integer, s :: T, x :: AbstractVector{T}, dx :: Integer) where T <: FloatOrComplex = (x .*= s)
+kscal!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer) where T <: AbstractFloat = kscal!(n, Complex{T}(s), x, dx)
-@inline krylov_axpy!(n :: Integer, s :: T, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasFloat = BLAS.axpy!(n, s, x, dx, y, dy)
-@inline krylov_axpy!(n :: Integer, s :: T, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: Number = axpy!(s, x, y)
-@inline krylov_axpy!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = krylov_axpy!(n, Complex{T}(s), x, dx, y, dy)
+kaxpy!(n :: Integer, s :: T, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasFloat = BLAS.axpy!(n, s, x, dx, y, dy)
+kaxpy!(n :: Integer, s :: T, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: FloatOrComplex = axpy!(s, x, y)
+kaxpy!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = kaxpy!(n, Complex{T}(s), x, dx, y, dy)
-@inline krylov_axpby!(n :: Integer, s :: T, x :: Vector{T}, dx :: Integer, t :: T, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasFloat = BLAS.axpby!(n, s, x, dx, t, y, dy)
-@inline krylov_axpby!(n :: Integer, s :: T, x :: AbstractVector{T}, dx :: Integer, t :: T, y :: AbstractVector{T}, dy :: Integer) where T <: Number = axpby!(s, x, t, y)
-@inline krylov_axpby!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer, t :: Complex{T}, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = krylov_axpby!(n, Complex{T}(s), x, dx, t, y, dy)
-@inline krylov_axpby!(n :: Integer, s :: Complex{T}, x :: AbstractVector{Complex{T}}, dx :: Integer, t :: T, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = krylov_axpby!(n, s, x, dx, Complex{T}(t), y, dy)
-@inline krylov_axpby!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer, t :: T, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = krylov_axpby!(n, Complex{T}(s), x, dx, Complex{T}(t), y, dy)
+kaxpby!(n :: Integer, s :: T, x :: Vector{T}, dx :: Integer, t :: T, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasFloat = BLAS.axpby!(n, s, x, dx, t, y, dy)
+kaxpby!(n :: Integer, s :: T, x :: AbstractVector{T}, dx :: Integer, t :: T, y :: AbstractVector{T}, dy :: Integer) where T <: FloatOrComplex = axpby!(s, x, t, y)
+kaxpby!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer, t :: Complex{T}, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = kaxpby!(n, Complex{T}(s), x, dx, t, y, dy)
+kaxpby!(n :: Integer, s :: Complex{T}, x :: AbstractVector{Complex{T}}, dx :: Integer, t :: T, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = kaxpby!(n, s, x, dx, Complex{T}(t), y, dy)
+kaxpby!(n :: Integer, s :: T, x :: AbstractVector{Complex{T}}, dx :: Integer, t :: T, y :: AbstractVector{Complex{T}}, dy :: Integer) where T <: AbstractFloat = kaxpby!(n, Complex{T}(s), x, dx, Complex{T}(t), y, dy)
-@inline krylov_copy!(n :: Integer, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasFloat = BLAS.blascopy!(n, x, dx, y, dy)
-@inline krylov_copy!(n :: Integer, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: Number = copyto!(y, x)
+kcopy!(n :: Integer, x :: Vector{T}, dx :: Integer, y :: Vector{T}, dy :: Integer) where T <: BLAS.BlasFloat = BLAS.blascopy!(n, x, dx, y, dy)
+kcopy!(n :: Integer, x :: AbstractVector{T}, dx :: Integer, y :: AbstractVector{T}, dy :: Integer) where T <: FloatOrComplex = copyto!(y, x)
# the macros are just for readability, so we don't have to write the increments (always equal to 1)
-
macro kdot(n, x, y)
- return esc(:(krylov_dot($n, $x, 1, $y, 1)))
+ return esc(:(Krylov.kdot($n, $x, 1, $y, 1)))
end
macro kdotr(n, x, y)
- return esc(:(krylov_dotr($n, $x, 1, $y, 1)))
+ return esc(:(Krylov.kdotr($n, $x, 1, $y, 1)))
end
macro knrm2(n, x)
- return esc(:(krylov_norm2($n, $x, 1)))
+ return esc(:(Krylov.knrm2($n, $x, 1)))
end
macro kscal!(n, s, x)
- return esc(:(krylov_scal!($n, $s, $x, 1)))
+ return esc(:(Krylov.kscal!($n, $s, $x, 1)))
end
macro kaxpy!(n, s, x, y)
- return esc(:(krylov_axpy!($n, $s, $x, 1, $y, 1)))
+ return esc(:(Krylov.kaxpy!($n, $s, $x, 1, $y, 1)))
end
macro kaxpby!(n, s, x, t, y)
- return esc(:(krylov_axpby!($n, $s, $x, 1, $t, $y, 1)))
+ return esc(:(Krylov.kaxpby!($n, $s, $x, 1, $t, $y, 1)))
end
macro kcopy!(n, x, y)
- return esc(:(krylov_copy!($n, $x, 1, $y, 1)))
+ return esc(:(Krylov.kcopy!($n, $x, 1, $y, 1)))
end
macro kswap(x, y)
@@ -294,44 +358,35 @@ macro kref!(n, x, y, c, s)
end
"""
- s = vec2str(x; ndisp)
+ roots = to_boundary(n, x, d, radius; flip, xNorm2, dNorm2)
-Display an array in the form
+Given a trust-region radius `radius`, a vector `x` lying inside the
+trust-region and a direction `d`, return `σ1` and `σ2` such that
- [ -3.0e-01 -5.1e-01 1.9e-01 ... -2.3e-01 -4.4e-01 2.4e-01 ]
+ ‖x + σi d‖ = radius, i = 1, 2
-with (ndisp - 1)/2 elements on each side.
+in the Euclidean norm.
+`n` is the length of vectors `x` and `d`.
+If known, ‖x‖² and ‖d‖² may be supplied with `xNorm2` and `dNorm2`.
+
+If `flip` is set to `true`, `σ1` and `σ2` are computed such that
+
+ ‖x - σi d‖ = radius, i = 1, 2.
"""
-function vec2str(x :: AbstractVector{T}; ndisp :: Int=7) where T <: Union{AbstractFloat, Missing}
- n = length(x)
- if n ≤ ndisp
- ndisp = n
- nside = n
- else
- nside = max(1, div(ndisp - 1, 2))
- end
- s = "["
- i = 1
- while i ≤ nside
- if x[i] !== missing
- s *= @sprintf("%8.1e ", x[i])
- else
- s *= " ✗✗✗✗ "
- end
- i += 1
- end
- if i ≤ div(n, 2)
- s *= "... "
- end
- i = max(i, n - nside + 1)
- while i ≤ n
- if x[i] !== missing
- s *= @sprintf("%8.1e ", x[i])
- else
- s *= " ✗✗✗✗ "
- end
- i += 1
- end
- s *= "]"
- return s
+function to_boundary(n :: Int, x :: AbstractVector{FC}, d :: AbstractVector{FC}, radius :: T; flip :: Bool=false, xNorm2 :: T=zero(T), dNorm2 :: T=zero(T)) where {T <: AbstractFloat, FC <: FloatOrComplex{T}}
+ radius > 0 || error("radius must be positive")
+
+ # ‖d‖² σ² + (xᴴd + dᴴx) σ + (‖x‖² - Δ²).
+ rxd = @kdotr(n, x, d)
+ flip && (rxd = -rxd)
+ dNorm2 == zero(T) && (dNorm2 = @kdotr(n, d, d))
+ dNorm2 == zero(T) && error("zero direction")
+ xNorm2 == zero(T) && (xNorm2 = @kdotr(n, x, x))
+ radius2 = radius * radius
+ (xNorm2 ≤ radius2) || error(@sprintf("outside of the trust region: ‖x‖²=%7.1e, Δ²=%7.1e", xNorm2, radius2))
+
+ # q₂ = ‖d‖², q₁ = xᴴd + dᴴx, q₀ = ‖x‖² - Δ²
+ # ‖x‖² ≤ Δ² ⟹ (q₁)² - 4 * q₂ * q₀ ≥ 0
+ roots = roots_quadratic(dNorm2, 2 * rxd, xNorm2 - radius2)
+ return roots # `σ1` and `σ2`
end
diff --git a/src/lnlq.jl b/src/lnlq.jl
index a1f890de2..deda7336f 100644
--- a/src/lnlq.jl
+++ b/src/lnlq.jl
@@ -9,9 +9,9 @@
# and is equivalent to applying the SYMMLQ method
# to the linear system
#
-# AAᵀy = b with x = Aᵀy and can be reformulated as
+# AAᴴy = b with x = Aᴴy and can be reformulated as
#
-# [ -I Aᵀ ][ x ] = [ 0 ]
+# [ -I Aᴴ ][ x ] = [ 0 ]
# [ A ][ y ] [ b ].
#
# This method is based on the Golub-Kahan bidiagonalization process and is described in
@@ -26,10 +26,14 @@ export lnlq, lnlq!
"""
(x, y, stats) = lnlq(A, b::AbstractVector{FC};
- M=I, N=I, sqd::Bool=false, λ::T=zero(T), σ::T=zero(T),
- atol::T=√eps(T), rtol::T=√eps(T), etolx::T=√eps(T), etoly::T=√eps(T), itmax::Int=0,
- transfer_to_craig::Bool=true, verbose::Int=0, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ M=I, N=I, ldiv::Bool=false,
+ transfer_to_craig::Bool=true,
+ sqd::Bool=false, λ::T=zero(T),
+ σ::T=zero(T), utolx::T=√eps(T),
+ utoly::T=√eps(T), atol::T=√eps(T),
+ rtol::T=√eps(T), itmax::Int=0,
+ verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
@@ -38,17 +42,17 @@ Find the least-norm solution of the consistent linear system
Ax + λ²y = b
-using the LNLQ method, where λ ≥ 0 is a regularization parameter.
+of size m × n using the LNLQ method, where λ ≥ 0 is a regularization parameter.
For a system in the form Ax = b, LNLQ method is equivalent to applying
-SYMMLQ to AAᵀy = b and recovering x = Aᵀy but is more stable.
+SYMMLQ to AAᴴy = b and recovering x = Aᴴy but is more stable.
Note that y are the Lagrange multipliers of the least-norm problem
minimize ‖x‖ s.t. Ax = b.
If `λ > 0`, LNLQ solves the symmetric and quasi-definite system
- [ -F Aᵀ ] [ x ] [ 0 ]
+ [ -F Aᴴ ] [ x ] [ 0 ]
[ A λ²E ] [ y ] = [ b ],
where E and F are symmetric and positive definite.
@@ -59,12 +63,12 @@ The system above represents the optimality conditions of
min ‖x‖²_F + λ²‖y‖²_E s.t. Ax + λ²Ey = b.
-For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᵀKx`.
-LNLQ is then equivalent to applying SYMMLQ to `(AF⁻¹Aᵀ + λ²E)y = b` with `Fx = Aᵀy`.
+For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᴴKx`.
+LNLQ is then equivalent to applying SYMMLQ to `(AF⁻¹Aᴴ + λ²E)y = b` with `Fx = Aᴴy`.
If `λ = 0`, LNLQ solves the symmetric and indefinite system
- [ -F Aᵀ ] [ x ] [ 0 ]
+ [ -F Aᴴ ] [ x ] [ 0 ]
[ A 0 ] [ y ] = [ b ].
The system above represents the optimality conditions of
@@ -75,12 +79,39 @@ In this case, `M` can still be specified and indicates the weighted norm in whic
In this implementation, both the x and y-parts of the solution are returned.
-`etolx` and `etoly` are tolerances on the upper bound of the distance to the solution ‖x-xₛ‖ and ‖y-yₛ‖, respectively.
+`utolx` and `utoly` are tolerances on the upper bound of the distance to the solution ‖x-x*‖ and ‖y-y*‖, respectively.
The bound is valid if λ>0 or σ>0 where σ should be strictly smaller than the smallest positive singular value.
For instance σ:=(1-1e-7)σₘᵢₙ .
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m.
+
+#### Keyword arguments
+
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `m` used for centered preconditioning of the augmented system;
+* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning of the augmented system;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `transfer_to_craig`: transfer from the LNLQ point to the CRAIG point, when it exists. The transfer is based on the residual norm;
+* `sqd`: if `true`, set `λ=1` for Hermitian quasi-definite systems;
+* `λ`: regularization parameter;
+* `σ`: strict lower bound on the smallest positive singular value `σₘᵢₙ` such as `σ = (1-10⁻⁷)σₘᵢₙ`;
+* `utolx`: tolerance on the upper bound on the distance to the solution `‖x-x*‖`;
+* `utoly`: tolerance on the upper bound on the distance to the solution `‖y-y*‖`;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `y`: a dense vector of length m;
+* `stats`: statistics collected on the run in a [`LNLQStats`](@ref) structure.
#### Reference
@@ -104,14 +135,18 @@ See [`LnlqSolver`](@ref) for more details about the `solver`.
function lnlq! end
function lnlq!(solver :: LnlqSolver{T,FC,S}, A, b :: AbstractVector{FC};
- M=I, N=I, sqd :: Bool=false, λ :: T=zero(T), σ :: T=zero(T),
- atol :: T=√eps(T), rtol :: T=√eps(T), etolx :: T=√eps(T), etoly :: T=√eps(T), itmax :: Int=0,
- transfer_to_craig :: Bool=true, verbose :: Int=0, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
+ M=I, N=I, ldiv :: Bool=false,
+ transfer_to_craig :: Bool=true,
+ sqd :: Bool=false, λ :: T=zero(T),
+ σ :: T=zero(T), utolx :: T=√eps(T),
+ utoly :: T=√eps(T), atol :: T=√eps(T),
+ rtol :: T=√eps(T), itmax :: Int=0,
+ verbose :: Int=0, history :: Bool=false,
+ callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
m, n = size(A)
length(b) == m || error("Inconsistent problem size")
- (verbose > 0) && @printf("LNLQ: system of %d equations in %d variables\n", m, n)
+ (verbose > 0) && @printf(iostream, "LNLQ: system of %d equations in %d variables\n", m, n)
# Check sqd and λ parameters
sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !")
@@ -123,16 +158,16 @@ function lnlq!(solver :: LnlqSolver{T,FC,S}, A, b :: AbstractVector{FC};
# Check type consistency
eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
# Compute the adjoint of A
- Aᵀ = A'
+ Aᴴ = A'
# Set up workspace.
allocate_if(!MisI, solver, :u, S, m)
allocate_if(!NisI, solver, :v, S, n)
allocate_if(λ > 0, solver, :q, S, n)
- x, Nv, Aᵀu, y, w̄ = solver.x, solver.Nv, solver.Aᵀu, solver.y, solver.w̄
+ x, Nv, Aᴴu, y, w̄ = solver.x, solver.Nv, solver.Aᴴu, solver.y, solver.w̄
Mu, Av, q, stats = solver.Mu, solver.Av, solver.q, solver.stats
rNorms, xNorms, yNorms = stats.residuals, stats.error_bnd_x, stats.error_bnd_y
reset!(stats)
@@ -163,8 +198,8 @@ function lnlq!(solver :: LnlqSolver{T,FC,S}, A, b :: AbstractVector{FC};
iter = 0
itmax == 0 && (itmax = m + n)
- (verbose > 0) && @printf("%5s %7s\n", "k", "‖rₖ‖")
- kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, bNorm)
+ (verbose > 0) && @printf(iostream, "%5s %7s\n", "k", "‖rₖ‖")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e\n", iter, bNorm)
# Update iteration index
iter = iter + 1
@@ -179,9 +214,9 @@ function lnlq!(solver :: LnlqSolver{T,FC,S}, A, b :: AbstractVector{FC};
MisI || @kscal!(m, one(FC) / βₖ, Mu)
end
- # α₁Nv₁ = Aᵀu₁.
- mul!(Aᵀu, Aᵀ, u)
- Nv .= Aᵀu
+ # α₁Nv₁ = Aᴴu₁.
+ mul!(Aᴴu, Aᴴ, u)
+ Nv .= Aᴴu
NisI || mulorldiv!(v, N, Nv, ldiv) # v₁ = N⁻¹ * Nv₁
αₖ = sqrt(@kdotr(n, v, Nv)) # α₁ = ‖v₁‖_N
if αₖ ≠ 0
@@ -190,8 +225,8 @@ function lnlq!(solver :: LnlqSolver{T,FC,S}, A, b :: AbstractVector{FC};
end
w̄ .= u # Direction w̄₁
- cₖ = zero(T) # Givens cosines used for the LQ factorization of (Lₖ)ᵀ
- sₖ = zero(FC) # Givens sines used for the LQ factorization of (Lₖ)ᵀ
+ cₖ = zero(T) # Givens cosines used for the LQ factorization of (Lₖ)ᴴ
+ sₖ = zero(FC) # Givens sines used for the LQ factorization of (Lₖ)ᴴ
ζₖ₋₁ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ
ηₖ = zero(FC) # Coefficient of M̅ₖ
@@ -214,7 +249,7 @@ function lnlq!(solver :: LnlqSolver{T,FC,S}, A, b :: AbstractVector{FC};
αhatₖ = αₖ
end
- # Begin the LQ factorization of (Lₖ)ᵀ = M̅ₖQₖ.
+ # Begin the LQ factorization of (Lₖ)ᴴ = M̅ₖQₖ.
# [ α₁ β₂ 0 • • • 0 ] [ ϵ₁ 0 • • • • 0 ]
# [ 0 α₂ • • • ] [ η₂ ϵ₂ • • ]
# [ • • • • • • ] [ 0 • • • • ]
@@ -225,7 +260,7 @@ function lnlq!(solver :: LnlqSolver{T,FC,S}, A, b :: AbstractVector{FC};
ϵbarₖ = αhatₖ # ϵbar₁ = αhat₁
- # Hₖ = Bₖ(Lₖ)ᵀ = [ Lₖ(Lₖ)ᵀ ] ⟹ (Hₖ₋₁)ᵀ = [Lₖ₋₁Mₖ₋₁ 0] Qₖ
+ # Hₖ = Bₖ(Lₖ)ᴴ = [ Lₖ(Lₖ)ᴴ ] ⟹ (Hₖ₋₁)ᴴ = [Lₖ₋₁Mₖ₋₁ 0] Qₖ
# [ αₖβₖ₊₁(eₖ)ᵀ ]
#
# Solve Lₖtₖ = β₁e₁ and M̅ₖz̅ₖ = tₖ
@@ -247,7 +282,7 @@ function lnlq!(solver :: LnlqSolver{T,FC,S}, A, b :: AbstractVector{FC};
err_x = τtildeₖ
err_y = ζtildeₖ
- solved_lq = err_x ≤ etolx || err_y ≤ etoly
+ solved_lq = err_x ≤ utolx || err_y ≤ utoly
history && push!(xNorms, err_x)
history && push!(yNorms, err_y)
@@ -273,7 +308,7 @@ function lnlq!(solver :: LnlqSolver{T,FC,S}, A, b :: AbstractVector{FC};
# Continue the generalized Golub-Kahan bidiagonalization.
# AVₖ = MUₖ₊₁Bₖ
- # AᵀUₖ₊₁ = NVₖ(Bₖ)ᵀ + αₖ₊₁Nvₖ₊₁(eₖ₊₁)ᵀ = NVₖ₊₁(Lₖ₊₁)ᵀ
+ # AᴴUₖ₊₁ = NVₖ(Bₖ)ᴴ + αₖ₊₁Nvₖ₊₁(eₖ₊₁)ᴴ = NVₖ₊₁(Lₖ₊₁)ᴴ
#
# [ α₁ 0 • • • • 0 ]
# [ β₂ α₂ • • ]
@@ -296,9 +331,9 @@ function lnlq!(solver :: LnlqSolver{T,FC,S}, A, b :: AbstractVector{FC};
MisI || @kscal!(m, one(FC) / βₖ₊₁, Mu)
end
- # αₖ₊₁Nvₖ₊₁ = Aᵀuₖ₊₁ - βₖ₊₁Nvₖ
- mul!(Aᵀu, Aᵀ, u)
- @kaxpby!(n, one(FC), Aᵀu, -βₖ₊₁, Nv)
+ # αₖ₊₁Nvₖ₊₁ = Aᴴuₖ₊₁ - βₖ₊₁Nvₖ
+ mul!(Aᴴu, Aᴴ, u)
+ @kaxpby!(n, one(FC), Aᴴu, -βₖ₊₁, Nv)
NisI || mulorldiv!(v, N, Nv, ldiv) # vₖ₊₁ = N⁻¹ * Nvₖ₊₁
αₖ₊₁ = sqrt(@kdotr(n, v, Nv)) # αₖ₊₁ = ‖vₖ₊₁‖_N
if αₖ₊₁ ≠ 0
@@ -353,7 +388,7 @@ function lnlq!(solver :: LnlqSolver{T,FC,S}, A, b :: AbstractVector{FC};
ρbar = ssig * μbar + csig * σₑₛₜ
end
- # Continue the LQ factorization of (Lₖ₊₁)ᵀ.
+ # Continue the LQ factorization of (Lₖ₊₁)ᴴ.
# [ηₖ ϵbarₖ βₖ₊₁] [1 0 0 ] = [ηₖ ϵₖ 0 ]
# [0 0 αₖ₊₁] [0 cₖ₊₁ sₖ₊₁] [0 ηₖ₊₁ ϵbarₖ₊₁]
# [0 sₖ₊₁ -cₖ₊₁]
@@ -438,18 +473,15 @@ function lnlq!(solver :: LnlqSolver{T,FC,S}, A, b :: AbstractVector{FC};
solved_lq = rNorm_lq ≤ ε
solved_cg = transfer_to_craig && rNorm_cg ≤ ε
if σₑₛₜ > 0
- if transfer_to_craig
- solved_cg = solved_cg || err_x ≤ etolx || err_y ≤ etoly
- else
- solved_lq = solved_lq || err_x ≤ etolx || err_y ≤ etoly
- end
+ solved_lq = solved_lq || err_x ≤ utolx || err_y ≤ utoly
+ solved_cg = transfer_to_craig && (solved_cg || err_x ≤ utolx || err_y ≤ utoly)
end
- kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm_lq)
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e\n", iter, rNorm_lq)
# Update iteration index.
iter = iter + 1
end
- (verbose > 0) && @printf("\n")
+ (verbose > 0) && @printf(iostream, "\n")
if solved_cg
if λ > 0
diff --git a/src/lslq.jl b/src/lslq.jl
index 908de19c5..4e26fb67a 100644
--- a/src/lslq.jl
+++ b/src/lslq.jl
@@ -5,7 +5,7 @@
#
# equivalently, of the normal equations
#
-# AᵀAx = Aᵀb.
+# AᴴAx = Aᴴb.
#
# LSLQ is formally equivalent to applying SYMMLQ to the normal equations
# but should be more stable.
@@ -21,15 +21,17 @@
export lslq, lslq!
-
"""
(x, stats) = lslq(A, b::AbstractVector{FC};
- M=I, N=I, sqd::Bool=false, λ::T=zero(T),
- atol::T=√eps(T), btol::T=√eps(T), etol::T=√eps(T),
- window::Int=5, utol::T=√eps(T), itmax::Int=0,
- σ::T=zero(T), transfer_to_lsqr::Bool=false,
- conlim::T=1/√eps(T), verbose::Int=0, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ M=I, N=I, ldiv::Bool=false,
+ window::Int=5, transfer_to_lsqr::Bool=false,
+ sqd::Bool=false, λ::T=zero(T),
+ σ::T=zero(T), etol::T=√eps(T),
+ utol::T=√eps(T), btol::T=√eps(T),
+ conlim::T=1/√eps(T), atol::T=√eps(T),
+ rtol::T=√eps(T), itmax::Int=0,
+ verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
@@ -38,31 +40,17 @@ Solve the regularized linear least-squares problem
minimize ‖b - Ax‖₂² + λ²‖x‖₂²
-using the LSLQ method, where λ ≥ 0 is a regularization parameter.
+of size m × n using the LSLQ method, where λ ≥ 0 is a regularization parameter.
LSLQ is formally equivalent to applying SYMMLQ to the normal equations
- (AᵀA + λ²I) x = Aᵀb
+ (AᴴA + λ²I) x = Aᴴb
but is more stable.
-#### Main features
-
-* the solution estimate is updated along orthogonal directions
-* the norm of the solution estimate ‖xᴸₖ‖₂ is increasing
-* the error ‖eₖ‖₂ := ‖xᴸₖ - x*‖₂ is decreasing
-* it is possible to transition cheaply from the LSLQ iterate to the LSQR iterate if there is an advantage (there always is in terms of error)
-* if `A` is rank deficient, identify the minimum least-squares solution
-
-#### Optional arguments
-
-* `M`: a symmetric and positive definite dual preconditioner
-* `N`: a symmetric and positive definite primal preconditioner
-* `sqd` indicates that we are solving a symmetric and quasi-definite system with `λ=1`
-
If `λ > 0`, we solve the symmetric and quasi-definite system
[ E A ] [ r ] [ b ]
- [ Aᵀ -λ²F ] [ x ] = [ 0 ],
+ [ Aᴴ -λ²F ] [ x ] = [ 0 ],
where E and F are symmetric and positive definite.
Preconditioners M = E⁻¹ ≻ 0 and N = F⁻¹ ≻ 0 may be provided in the form of linear operators.
@@ -72,39 +60,60 @@ The system above represents the optimality conditions of
minimize ‖b - Ax‖²_E⁻¹ + λ²‖x‖²_F.
-For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᵀKx`.
-LSLQ is then equivalent to applying SYMMLQ to `(AᵀE⁻¹A + λ²F)x = AᵀE⁻¹b` with `r = E⁻¹(b - Ax)`.
+For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᴴKx`.
+LSLQ is then equivalent to applying SYMMLQ to `(AᴴE⁻¹A + λ²F)x = AᴴE⁻¹b` with `r = E⁻¹(b - Ax)`.
If `λ = 0`, we solve the symmetric and indefinite system
[ E A ] [ r ] [ b ]
- [ Aᵀ 0 ] [ x ] = [ 0 ].
+ [ Aᴴ 0 ] [ x ] = [ 0 ].
The system above represents the optimality conditions of
minimize ‖b - Ax‖²_E⁻¹.
-In this case, `N` can still be specified and indicates the weighted norm in which `x` and `Aᵀr` should be measured.
+In this case, `N` can still be specified and indicates the weighted norm in which `x` and `Aᴴr` should be measured.
`r` can be recovered by computing `E⁻¹(b - Ax)`.
-* `λ` is a regularization parameter (see the problem statement above)
-* `σ` is an underestimate of the smallest nonzero singular value of `A`---setting `σ` too large will result in an error in the course of the iterations
-* `atol` is a stopping tolerance based on the residual
-* `btol` is a stopping tolerance used to detect zero-residual problems
-* `etol` is a stopping tolerance based on the lower bound on the error
-* `window` is the number of iterations used to accumulate a lower bound on the error
-* `utol` is a stopping tolerance based on the upper bound on the error
-* `transfer_to_lsqr` return the CG solution estimate (i.e., the LSQR point) instead of the LQ estimate
-* `itmax` is the maximum number of iterations (0 means no imposed limit)
-* `conlim` is the limit on the estimated condition number of `A` beyond which the solution will be abandoned
-* `verbose` determines verbosity.
-
-#### Return values
+#### Main features
-`lslq` returns the tuple `(x, stats)` where
+* the solution estimate is updated along orthogonal directions
+* the norm of the solution estimate ‖xᴸₖ‖₂ is increasing
+* the error ‖eₖ‖₂ := ‖xᴸₖ - x*‖₂ is decreasing
+* it is possible to transition cheaply from the LSLQ iterate to the LSQR iterate if there is an advantage (there always is in terms of error)
+* if `A` is rank deficient, identify the minimum least-squares solution
-* `x` is the LQ solution estimate
-* `stats` collects other statistics on the run in a LSLQStats
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m.
+
+#### Keyword arguments
+
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `m` used for centered preconditioning of the augmented system;
+* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning of the augmented system;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `window`: number of iterations used to accumulate a lower bound on the error;
+* `transfer_to_lsqr`: transfer from the LSLQ point to the LSQR point, when it exists. The transfer is based on the residual norm;
+* `sqd`: if `true`, set `λ=1` for Hermitian quasi-definite systems;
+* `λ`: regularization parameter;
+* `σ`: strict lower bound on the smallest positive singular value `σₘᵢₙ` such as `σ = (1-10⁻⁷)σₘᵢₙ`;
+* `etol`: stopping tolerance based on the lower bound on the error;
+* `utol`: stopping tolerance based on the upper bound on the error;
+* `btol`: stopping tolerance used to detect zero-residual problems;
+* `conlim`: limit on the estimated condition number of `A` beyond which the solution will be abandoned;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`LSLQStats`](@ref) structure.
* `stats.err_lbnds` is a vector of lower bounds on the LQ error---the vector is empty if `window` is set to zero
* `stats.err_ubnds_lq` is a vector of upper bounds on the LQ error---the vector is empty if `σ == 0` is left at zero
@@ -116,8 +125,8 @@ In this case, `N` can still be specified and indicates the weighted norm in whic
The iterations stop as soon as one of the following conditions holds true:
* the optimality residual is sufficiently small (`stats.status = "found approximate minimum least-squares solution"`) in the sense that either
- * ‖Aᵀr‖ / (‖A‖ ‖r‖) ≤ atol, or
- * 1 + ‖Aᵀr‖ / (‖A‖ ‖r‖) ≤ 1
+ * ‖Aᴴr‖ / (‖A‖ ‖r‖) ≤ atol, or
+ * 1 + ‖Aᴴr‖ / (‖A‖ ‖r‖) ≤ 1
* an approximate zero-residual solution has been found (`stats.status = "found approximate zero-residual solution"`) in the sense that either
* ‖r‖ / ‖b‖ ≤ btol + atol ‖A‖ * ‖xᴸ‖ / ‖b‖, or
* 1 + ‖r‖ / ‖b‖ ≤ 1
@@ -127,9 +136,6 @@ The iterations stop as soon as one of the following conditions holds true:
* the lower bound on the LQ forward error is less than etol * ‖xᴸ‖
* the upper bound on the CG forward error is less than utol * ‖xᶜ‖
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
-
#### References
* R. Estrin, D. Orban and M. A. Saunders, [*Euclidean-norm error bounds for SYMMLQ and CG*](https://doi.org/10.1137/16M1094816), SIAM Journal on Matrix Analysis and Applications, 40(1), pp. 235--253, 2019.
@@ -153,16 +159,19 @@ See [`LslqSolver`](@ref) for more details about the `solver`.
function lslq! end
function lslq!(solver :: LslqSolver{T,FC,S}, A, b :: AbstractVector{FC};
- M=I, N=I, sqd :: Bool=false, λ :: T=zero(T),
- atol :: T=√eps(T), btol :: T=√eps(T), etol :: T=√eps(T),
- utol :: T=√eps(T), itmax :: Int=0, σ :: T=zero(T),
- transfer_to_lsqr :: Bool=false, conlim :: T=1/√eps(T),
+ M=I, N=I, ldiv :: Bool=false,
+ transfer_to_lsqr :: Bool=false,
+ sqd :: Bool=false, λ :: T=zero(T),
+ σ :: T=zero(T), etol :: T=√eps(T),
+ utol :: T=√eps(T), btol :: T=√eps(T),
+ conlim :: T=1/√eps(T), atol :: T=√eps(T),
+ rtol :: T=√eps(T), itmax :: Int=0,
verbose :: Int=0, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
+ callback=solver->false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
m, n = size(A)
length(b) == m || error("Inconsistent problem size")
- (verbose > 0) && @printf("LSLQ: system of %d equations in %d variables\n", m, n)
+ (verbose > 0) && @printf(iostream, "LSLQ: system of %d equations in %d variables\n", m, n)
# Check sqd and λ parameters
sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !")
@@ -174,15 +183,15 @@ function lslq!(solver :: LslqSolver{T,FC,S}, A, b :: AbstractVector{FC};
# Check type consistency
eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
# Compute the adjoint of A
- Aᵀ = A'
+ Aᴴ = A'
# Set up workspace.
allocate_if(!MisI, solver, :u, S, m)
allocate_if(!NisI, solver, :v, S, n)
- x, Nv, Aᵀu, w̄ = solver.x, solver.Nv, solver.Aᵀu, solver.w̄
+ x, Nv, Aᴴu, w̄ = solver.x, solver.Nv, solver.Aᴴu, solver.w̄
Mu, Av, err_vec, stats = solver.Mu, solver.Av, solver.err_vec, solver.stats
rNorms, ArNorms, err_lbnds = stats.residuals, stats.Aresiduals, stats.err_lbnds
err_ubnds_lq, err_ubnds_cg = stats.err_ubnds_lq, stats.err_ubnds_cg
@@ -213,12 +222,12 @@ function lslq!(solver :: LslqSolver{T,FC,S}, A, b :: AbstractVector{FC};
@kscal!(m, one(FC)/β₁, u)
MisI || @kscal!(m, one(FC)/β₁, Mu)
- mul!(Aᵀu, Aᵀ, u)
- Nv .= Aᵀu
+ mul!(Aᴴu, Aᴴ, u)
+ Nv .= Aᴴu
NisI || mulorldiv!(v, N, Nv, ldiv)
α = sqrt(@kdotr(n, v, Nv)) # = α₁
- # Aᵀb = 0 so x = 0 is a minimum least-squares solution
+ # Aᴴb = 0 so x = 0 is a minimum least-squares solution
if α == 0
stats.niter = 0
stats.solved, stats.inconsistent = true, false
@@ -274,11 +283,12 @@ function lslq!(solver :: LslqSolver{T,FC,S}, A, b :: AbstractVector{FC};
iter = 0
itmax == 0 && (itmax = m + n)
- (verbose > 0) && @printf("%5s %7s %7s %7s %7s %8s %8s %7s %7s %7s\n", "k", "‖r‖", "‖Aᵀr‖", "β", "α", "cos", "sin", "‖A‖²", "κ(A)", "‖xL‖")
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e %7.1e\n", iter, rNorm, ArNorm, β, α, c, s, Anorm², Acond, xlqNorm)
+ (verbose > 0) && @printf(iostream, "%5s %7s %7s %7s %7s %8s %8s %7s %7s %7s\n", "k", "‖r‖", "‖Aᴴr‖", "β", "α", "cos", "sin", "‖A‖²", "κ(A)", "‖xL‖")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e %7.1e\n", iter, rNorm, ArNorm, β, α, c, s, Anorm², Acond, xlqNorm)
status = "unknown"
- solved = solved_mach = solved_lim = (rNorm ≤ atol)
+ ε = atol + rtol * β₁
+ solved = solved_mach = solved_lim = (rNorm ≤ ε)
tired = iter ≥ itmax
ill_cond = ill_cond_mach = ill_cond_lim = false
zero_resid = zero_resid_mach = zero_resid_lim = false
@@ -298,9 +308,9 @@ function lslq!(solver :: LslqSolver{T,FC,S}, A, b :: AbstractVector{FC};
@kscal!(m, one(FC)/β, u)
MisI || @kscal!(m, one(FC)/β, Mu)
- # 2. αₖ₊₁Nvₖ₊₁ = Aᵀuₖ₊₁ - βₖ₊₁Nvₖ
- mul!(Aᵀu, Aᵀ, u)
- @kaxpby!(n, one(FC), Aᵀu, -β, Nv)
+ # 2. αₖ₊₁Nvₖ₊₁ = Aᴴuₖ₊₁ - βₖ₊₁Nvₖ
+ mul!(Aᴴu, Aᴴ, u)
+ @kaxpby!(n, one(FC), Aᴴu, -β, Nv)
NisI || mulorldiv!(v, N, Nv, ldiv)
α = sqrt(@kdotr(n, v, Nv))
if α ≠ 0
@@ -388,11 +398,11 @@ function lslq!(solver :: LslqSolver{T,FC,S}, A, b :: AbstractVector{FC};
end
end
- test1 = rNorm / β₁
+ test1 = rNorm
test2 = ArNorm / (Anorm * rNorm)
test3 = 1 / Acond
- t1 = test1 / (one(T) + Anorm * xlqNorm / β₁)
- rtol = btol + atol * Anorm * xlqNorm / β₁
+ t1 = test1 / (one(T) + Anorm * xlqNorm)
+ tol = btol + atol * Anorm * xlqNorm / β₁
# update LSLQ point for next iteration
@kaxpy!(n, c * ζ, w̄, x)
@@ -407,7 +417,7 @@ function lslq!(solver :: LslqSolver{T,FC,S}, A, b :: AbstractVector{FC};
# check stopping condition based on forward error lower bound
err_vec[mod(iter, window) + 1] = ζ
if iter ≥ window
- err_lbnd = norm(err_vec)
+ err_lbnd = @knrm2(window, err_vec)
history && push!(err_lbnds, err_lbnd)
fwd_err_lbnd = err_lbnd ≤ etol * xlqNorm
end
@@ -432,16 +442,16 @@ function lslq!(solver :: LslqSolver{T,FC,S}, A, b :: AbstractVector{FC};
tired = iter ≥ itmax
ill_cond_lim = (test3 ≤ ctol)
solved_lim = (test2 ≤ atol)
- zero_resid_lim = (test1 ≤ rtol)
+ zero_resid_lim = (test1 ≤ ε)
ill_cond = ill_cond_mach || ill_cond_lim
zero_resid = zero_resid_mach || zero_resid_lim
solved = solved_mach || solved_lim || zero_resid || fwd_err_lbnd || fwd_err_ubnd
iter = iter + 1
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e %7.1e\n", iter, rNorm, ArNorm, β, α, c, s, Anorm, Acond, xlqNorm)
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e %7.1e\n", iter, rNorm, ArNorm, β, α, c, s, Anorm, Acond, xlqNorm)
end
- (verbose > 0) && @printf("\n")
+ (verbose > 0) && @printf(iostream, "\n")
if transfer_to_lsqr # compute LSQR point
@kaxpy!(n, ζ̄ , w̄, x)
diff --git a/src/lsmr.jl b/src/lsmr.jl
index f4d8349d1..781d9448a 100644
--- a/src/lsmr.jl
+++ b/src/lsmr.jl
@@ -5,7 +5,7 @@
#
# equivalently, of the normal equations
#
-# AᵀAx = Aᵀb.
+# AᴴAx = Aᴴb.
#
# LSMR is formally equivalent to applying MINRES to the normal equations
# but should be more stable. It is also formally equivalent to CRLS though
@@ -24,17 +24,16 @@
export lsmr, lsmr!
-
"""
(x, stats) = lsmr(A, b::AbstractVector{FC};
- M=I, N=I, sqd::Bool=false, λ::T=zero(T),
+ M=I, N=I, ldiv::Bool=false,
+ window::Int=5, sqd::Bool=false, λ::T=zero(T),
+ radius::T=zero(T), etol::T=√eps(T),
axtol::T=√eps(T), btol::T=√eps(T),
- atol::T=zero(T), rtol::T=zero(T),
- etol::T=√eps(T), window::Int=5,
- itmax::Int=0, conlim::T=1/√eps(T),
- radius::T=zero(T), verbose::Int=0,
- history::Bool=false, ldiv::Bool=false,
- callback=solver->false)
+ conlim::T=1/√eps(T), atol::T=zero(T),
+ rtol::T=zero(T), itmax::Int=0,
+ verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
@@ -43,24 +42,24 @@ Solve the regularized linear least-squares problem
minimize ‖b - Ax‖₂² + λ²‖x‖₂²
-using the LSMR method, where λ ≥ 0 is a regularization parameter.
+of size m × n using the LSMR method, where λ ≥ 0 is a regularization parameter.
LSMR is formally equivalent to applying MINRES to the normal equations
- (AᵀA + λ²I) x = Aᵀb
+ (AᴴA + λ²I) x = Aᴴb
(and therefore to CRLS) but is more stable.
-LSMR produces monotonic residuals ‖r‖₂ and optimality residuals ‖Aᵀr‖₂.
+LSMR produces monotonic residuals ‖r‖₂ and optimality residuals ‖Aᴴr‖₂.
It is formally equivalent to CRLS, though can be substantially more accurate.
LSMR can be also used to find a null vector of a singular matrix A
-by solving the problem `min ‖Aᵀx - b‖` with any nonzero vector `b`.
-At a minimizer, the residual vector `r = b - Aᵀx` will satisfy `Ar = 0`.
+by solving the problem `min ‖Aᴴx - b‖` with any nonzero vector `b`.
+At a minimizer, the residual vector `r = b - Aᴴx` will satisfy `Ar = 0`.
If `λ > 0`, we solve the symmetric and quasi-definite system
[ E A ] [ r ] [ b ]
- [ Aᵀ -λ²F ] [ x ] = [ 0 ],
+ [ Aᴴ -λ²F ] [ x ] = [ 0 ],
where E and F are symmetric and positive definite.
Preconditioners M = E⁻¹ ≻ 0 and N = F⁻¹ ≻ 0 may be provided in the form of linear operators.
@@ -70,23 +69,51 @@ The system above represents the optimality conditions of
minimize ‖b - Ax‖²_E⁻¹ + λ²‖x‖²_F.
-For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᵀKx`.
-LSMR is then equivalent to applying MINRES to `(AᵀE⁻¹A + λ²F)x = AᵀE⁻¹b` with `r = E⁻¹(b - Ax)`.
+For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᴴKx`.
+LSMR is then equivalent to applying MINRES to `(AᴴE⁻¹A + λ²F)x = AᴴE⁻¹b` with `r = E⁻¹(b - Ax)`.
If `λ = 0`, we solve the symmetric and indefinite system
[ E A ] [ r ] [ b ]
- [ Aᵀ 0 ] [ x ] = [ 0 ].
+ [ Aᴴ 0 ] [ x ] = [ 0 ].
The system above represents the optimality conditions of
minimize ‖b - Ax‖²_E⁻¹.
-In this case, `N` can still be specified and indicates the weighted norm in which `x` and `Aᵀr` should be measured.
+In this case, `N` can still be specified and indicates the weighted norm in which `x` and `Aᴴr` should be measured.
`r` can be recovered by computing `E⁻¹(b - Ax)`.
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m.
+
+#### Keyword arguments
+
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `m` used for centered preconditioning of the augmented system;
+* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning of the augmented system;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `window`: number of iterations used to accumulate a lower bound on the error;
+* `sqd`: if `true`, set `λ=1` for Hermitian quasi-definite systems;
+* `λ`: regularization parameter;
+* `radius`: add the trust-region constraint ‖x‖ ≤ `radius` if `radius > 0`. Useful to compute a step in a trust-region method for optimization;
+* `etol`: stopping tolerance based on the lower bound on the error;
+* `axtol`: tolerance on the backward error;
+* `btol`: stopping tolerance used to detect zero-residual problems;
+* `conlim`: limit on the estimated condition number of `A` beyond which the solution will be abandoned;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`LsmrStats`](@ref) structure.
#### Reference
@@ -110,16 +137,18 @@ See [`LsmrSolver`](@ref) for more details about the `solver`.
function lsmr! end
function lsmr!(solver :: LsmrSolver{T,FC,S}, A, b :: AbstractVector{FC};
- M=I, N=I, sqd :: Bool=false, λ :: T=zero(T),
+ M=I, N=I, ldiv :: Bool=false,
+ sqd :: Bool=false, λ :: T=zero(T),
+ radius :: T=zero(T), etol :: T=√eps(T),
axtol :: T=√eps(T), btol :: T=√eps(T),
- atol :: T=zero(T), rtol :: T=zero(T),
- etol :: T=√eps(T), itmax :: Int=0, conlim :: T=1/√eps(T),
- radius :: T=zero(T), verbose :: Int=0, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
+ conlim :: T=1/√eps(T), atol :: T=zero(T),
+ rtol :: T=zero(T), itmax :: Int=0,
+ verbose :: Int=0, history :: Bool=false,
+ callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
m, n = size(A)
length(b) == m || error("Inconsistent problem size")
- (verbose > 0) && @printf("LSMR: system of %d equations in %d variables\n", m, n)
+ (verbose > 0) && @printf(iostream, "LSMR: system of %d equations in %d variables\n", m, n)
# Check sqd and λ parameters
sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !")
@@ -131,15 +160,15 @@ function lsmr!(solver :: LsmrSolver{T,FC,S}, A, b :: AbstractVector{FC};
# Check type consistency
eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
# Compute the adjoint of A
- Aᵀ = A'
+ Aᴴ = A'
# Set up workspace.
allocate_if(!MisI, solver, :u, S, m)
allocate_if(!NisI, solver, :v, S, n)
- x, Nv, Aᵀu, h, hbar = solver.x, solver.Nv, solver.Aᵀu, solver.h, solver.hbar
+ x, Nv, Aᴴu, h, hbar = solver.x, solver.Nv, solver.Aᴴu, solver.h, solver.hbar
Mu, Av, err_vec, stats = solver.Mu, solver.Av, solver.err_vec, solver.stats
rNorms, ArNorms = stats.residuals, stats.Aresiduals
reset!(stats)
@@ -166,8 +195,8 @@ function lsmr!(solver :: LsmrSolver{T,FC,S}, A, b :: AbstractVector{FC};
@kscal!(m, one(FC)/β₁, u)
MisI || @kscal!(m, one(FC)/β₁, Mu)
- mul!(Aᵀu, Aᵀ, u)
- Nv .= Aᵀu
+ mul!(Aᴴu, Aᴴ, u)
+ Nv .= Aᴴu
NisI || mulorldiv!(v, N, Nv, ldiv)
α = sqrt(@kdotr(n, v, Nv))
@@ -210,10 +239,10 @@ function lsmr!(solver :: LsmrSolver{T,FC,S}, A, b :: AbstractVector{FC};
iter = 0
itmax == 0 && (itmax = m + n)
- (verbose > 0) && @printf("%5s %7s %7s %7s %7s %8s %8s %7s\n", "k", "‖r‖", "‖Aᵀr‖", "β", "α", "cos", "sin", "‖A‖²")
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e\n", iter, β₁, α, β₁, α, 0, 1, Anorm²)
+ (verbose > 0) && @printf(iostream, "%5s %7s %7s %7s %7s %8s %8s %7s\n", "k", "‖r‖", "‖Aᴴr‖", "β", "α", "cos", "sin", "‖A‖²")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e\n", iter, β₁, α, β₁, α, 0, 1, Anorm²)
- # Aᵀb = 0 so x = 0 is a minimum least-squares solution
+ # Aᴴb = 0 so x = 0 is a minimum least-squares solution
if α == 0
stats.niter = 0
stats.solved, stats.inconsistent = true, false
@@ -248,9 +277,9 @@ function lsmr!(solver :: LsmrSolver{T,FC,S}, A, b :: AbstractVector{FC};
@kscal!(m, one(FC)/β, u)
MisI || @kscal!(m, one(FC)/β, Mu)
- # 2. αₖ₊₁Nvₖ₊₁ = Aᵀuₖ₊₁ - βₖ₊₁Nvₖ
- mul!(Aᵀu, Aᵀ, u)
- @kaxpby!(n, one(FC), Aᵀu, -β, Nv)
+ # 2. αₖ₊₁Nvₖ₊₁ = Aᴴuₖ₊₁ - βₖ₊₁Nvₖ
+ mul!(Aᴴu, Aᴴ, u)
+ @kaxpby!(n, one(FC), Aᴴu, -β, Nv)
NisI || mulorldiv!(v, N, Nv, ldiv)
α = sqrt(@kdotr(n, v, Nv))
if α ≠ 0
@@ -287,7 +316,7 @@ function lsmr!(solver :: LsmrSolver{T,FC,S}, A, b :: AbstractVector{FC};
# the step ϕ/ρ is not necessarily positive
σ = ζ / (ρ * ρbar)
if radius > 0
- t1, t2 = to_boundary(x, hbar, radius)
+ t1, t2 = to_boundary(n, x, hbar, radius)
tmax, tmin = max(t1, t2), min(t1, t2)
on_boundary = σ > tmax || σ < tmin
σ = σ > 0 ? min(σ, tmax) : max(σ, tmin)
@@ -336,7 +365,7 @@ function lsmr!(solver :: LsmrSolver{T,FC,S}, A, b :: AbstractVector{FC};
t1 = test1 / (one(T) + Anorm * xNorm / β₁)
rNormtol = btol + axtol * Anorm * xNorm / β₁
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e\n", iter, rNorm, ArNorm, β, α, c, s, Anorm²)
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e\n", iter, rNorm, ArNorm, β, α, c, s, Anorm²)
# Stopping conditions that do not depend on user input.
# This is to guard against tolerances that are unreasonably small.
@@ -357,7 +386,7 @@ function lsmr!(solver :: LsmrSolver{T,FC,S}, A, b :: AbstractVector{FC};
zero_resid = zero_resid_mach | zero_resid_lim
solved = solved_mach | solved_lim | solved_opt | zero_resid | fwd_err | on_boundary
end
- (verbose > 0) && @printf("\n")
+ (verbose > 0) && @printf(iostream, "\n")
tired && (status = "maximum number of iterations exceeded")
ill_cond_mach && (status = "condition number seems too large for this machine")
diff --git a/src/lsqr.jl b/src/lsqr.jl
index dd3779dce..0351b75e1 100644
--- a/src/lsqr.jl
+++ b/src/lsqr.jl
@@ -5,7 +5,7 @@
#
# equivalently, of the normal equations
#
-# AᵀAx = Aᵀb.
+# AᴴAx = Aᴴb.
#
# LSQR is formally equivalent to applying the conjugate gradient method
# to the normal equations but should be more stable. It is also formally
@@ -24,16 +24,16 @@
export lsqr, lsqr!
-
"""
(x, stats) = lsqr(A, b::AbstractVector{FC};
- M=I, N=I, sqd::Bool=false, λ::T=zero(T),
+ M=I, N=I, ldiv::Bool=false,
+ window::Int=5, sqd::Bool=false, λ::T=zero(T),
+ radius::T=zero(T), etol::T=√eps(T),
axtol::T=√eps(T), btol::T=√eps(T),
- atol::T=zero(T), rtol::T=zero(T),
- etol::T=√eps(T), window::Int=5,
- itmax::Int=0, conlim::T=1/√eps(T),
- radius::T=zero(T), verbose::Int=0, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ conlim::T=1/√eps(T), atol::T=zero(T),
+ rtol::T=zero(T), itmax::Int=0,
+ verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
@@ -42,20 +42,20 @@ Solve the regularized linear least-squares problem
minimize ‖b - Ax‖₂² + λ²‖x‖₂²
-using the LSQR method, where λ ≥ 0 is a regularization parameter.
+of size m × n using the LSQR method, where λ ≥ 0 is a regularization parameter.
LSQR is formally equivalent to applying CG to the normal equations
- (AᵀA + λ²I) x = Aᵀb
+ (AᴴA + λ²I) x = Aᴴb
(and therefore to CGLS) but is more stable.
-LSQR produces monotonic residuals ‖r‖₂ but not optimality residuals ‖Aᵀr‖₂.
+LSQR produces monotonic residuals ‖r‖₂ but not optimality residuals ‖Aᴴr‖₂.
It is formally equivalent to CGLS, though can be slightly more accurate.
If `λ > 0`, LSQR solves the symmetric and quasi-definite system
[ E A ] [ r ] [ b ]
- [ Aᵀ -λ²F ] [ x ] = [ 0 ],
+ [ Aᴴ -λ²F ] [ x ] = [ 0 ],
where E and F are symmetric and positive definite.
Preconditioners M = E⁻¹ ≻ 0 and N = F⁻¹ ≻ 0 may be provided in the form of linear operators.
@@ -65,23 +65,51 @@ The system above represents the optimality conditions of
minimize ‖b - Ax‖²_E⁻¹ + λ²‖x‖²_F.
-For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᵀKx`.
-LSQR is then equivalent to applying CG to `(AᵀE⁻¹A + λ²F)x = AᵀE⁻¹b` with `r = E⁻¹(b - Ax)`.
+For a symmetric and positive definite matrix `K`, the K-norm of a vector `x` is `‖x‖²_K = xᴴKx`.
+LSQR is then equivalent to applying CG to `(AᴴE⁻¹A + λ²F)x = AᴴE⁻¹b` with `r = E⁻¹(b - Ax)`.
If `λ = 0`, we solve the symmetric and indefinite system
[ E A ] [ r ] [ b ]
- [ Aᵀ 0 ] [ x ] = [ 0 ].
+ [ Aᴴ 0 ] [ x ] = [ 0 ].
The system above represents the optimality conditions of
minimize ‖b - Ax‖²_E⁻¹.
-In this case, `N` can still be specified and indicates the weighted norm in which `x` and `Aᵀr` should be measured.
+In this case, `N` can still be specified and indicates the weighted norm in which `x` and `Aᴴr` should be measured.
`r` can be recovered by computing `E⁻¹(b - Ax)`.
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m.
+
+#### Keyword arguments
+
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `m` used for centered preconditioning of the augmented system;
+* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning of the augmented system;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `window`: number of iterations used to accumulate a lower bound on the error;
+* `sqd`: if `true`, set `λ=1` for Hermitian quasi-definite systems;
+* `λ`: regularization parameter;
+* `radius`: add the trust-region constraint ‖x‖ ≤ `radius` if `radius > 0`. Useful to compute a step in a trust-region method for optimization;
+* `etol`: stopping tolerance based on the lower bound on the error;
+* `axtol`: tolerance on the backward error;
+* `btol`: stopping tolerance used to detect zero-residual problems;
+* `conlim`: limit on the estimated condition number of `A` beyond which the solution will be abandoned;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### Reference
@@ -105,16 +133,18 @@ See [`LsqrSolver`](@ref) for more details about the `solver`.
function lsqr! end
function lsqr!(solver :: LsqrSolver{T,FC,S}, A, b :: AbstractVector{FC};
- M=I, N=I, sqd :: Bool=false, λ :: T=zero(T),
+ M=I, N=I, ldiv :: Bool=false,
+ sqd :: Bool=false, λ :: T=zero(T),
+ radius :: T=zero(T), etol :: T=√eps(T),
axtol :: T=√eps(T), btol :: T=√eps(T),
- atol :: T=zero(T), rtol :: T=zero(T),
- etol :: T=√eps(T), itmax :: Int=0, conlim :: T=1/√eps(T),
- radius :: T=zero(T), verbose :: Int=0, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
+ conlim :: T=1/√eps(T), atol :: T=zero(T),
+ rtol :: T=zero(T), itmax :: Int=0,
+ verbose :: Int=0, history :: Bool=false,
+ callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
m, n = size(A)
length(b) == m || error("Inconsistent problem size")
- (verbose > 0) && @printf("LSQR: system of %d equations in %d variables\n", m, n)
+ (verbose > 0) && @printf(iostream, "LSQR: system of %d equations in %d variables\n", m, n)
# Check sqd and λ parameters
sqd && (λ ≠ 0) && error("sqd cannot be set to true if λ ≠ 0 !")
@@ -126,15 +156,15 @@ function lsqr!(solver :: LsqrSolver{T,FC,S}, A, b :: AbstractVector{FC};
# Check type consistency
eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
# Compute the adjoint of A
- Aᵀ = A'
+ Aᴴ = A'
# Set up workspace.
allocate_if(!MisI, solver, :u, S, m)
allocate_if(!NisI, solver, :v, S, n)
- x, Nv, Aᵀu, w = solver.x, solver.Nv, solver.Aᵀu, solver.w
+ x, Nv, Aᴴu, w = solver.x, solver.Nv, solver.Aᴴu, solver.w
Mu, Av, err_vec, stats = solver.Mu, solver.Av, solver.err_vec, solver.stats
rNorms, ArNorms = stats.residuals, stats.Aresiduals
reset!(stats)
@@ -162,8 +192,8 @@ function lsqr!(solver :: LsqrSolver{T,FC,S}, A, b :: AbstractVector{FC};
@kscal!(m, one(FC)/β₁, u)
MisI || @kscal!(m, one(FC)/β₁, Mu)
- mul!(Aᵀu, Aᵀ, u)
- Nv .= Aᵀu
+ mul!(Aᴴu, Aᴴ, u)
+ Nv .= Aᴴu
NisI || mulorldiv!(v, N, Nv, ldiv)
Anorm² = @kdotr(n, v, Nv)
Anorm = sqrt(Anorm²)
@@ -184,8 +214,8 @@ function lsqr!(solver :: LsqrSolver{T,FC,S}, A, b :: AbstractVector{FC};
iter = 0
itmax == 0 && (itmax = m + n)
- (verbose > 0) && @printf("%5s %7s %7s %7s %7s %7s %7s %7s %7s\n", "k", "α", "β", "‖r‖", "‖Aᵀr‖", "compat", "backwrd", "‖A‖", "κ(A)")
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e\n", iter, β₁, α, β₁, α, 0, 1, Anorm, Acond)
+ (verbose > 0) && @printf(iostream, "%5s %7s %7s %7s %7s %7s %7s %7s %7s\n", "k", "α", "β", "‖r‖", "‖Aᴴr‖", "compat", "backwrd", "‖A‖", "κ(A)")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e\n", iter, β₁, α, β₁, α, 0, 1, Anorm, Acond)
rNorm = β₁
r1Norm = rNorm
@@ -194,7 +224,7 @@ function lsqr!(solver :: LsqrSolver{T,FC,S}, A, b :: AbstractVector{FC};
history && push!(rNorms, r2Norm)
ArNorm = ArNorm0 = α * β
history && push!(ArNorms, ArNorm)
- # Aᵀb = 0 so x = 0 is a minimum least-squares solution
+ # Aᴴb = 0 so x = 0 is a minimum least-squares solution
if α == 0
stats.niter = 0
stats.solved, stats.inconsistent = true, false
@@ -237,9 +267,9 @@ function lsqr!(solver :: LsqrSolver{T,FC,S}, A, b :: AbstractVector{FC};
Anorm² = Anorm² + α * α + β * β # = ‖B_{k-1}‖²
λ > 0 && (Anorm² += λ²)
- # 2. αₖ₊₁Nvₖ₊₁ = Aᵀuₖ₊₁ - βₖ₊₁Nvₖ
- mul!(Aᵀu, Aᵀ, u)
- @kaxpby!(n, one(FC), Aᵀu, -β, Nv)
+ # 2. αₖ₊₁Nvₖ₊₁ = Aᴴuₖ₊₁ - βₖ₊₁Nvₖ
+ mul!(Aᴴu, Aᴴ, u)
+ @kaxpby!(n, one(FC), Aᴴu, -β, Nv)
NisI || mulorldiv!(v, N, Nv, ldiv)
α = sqrt(@kdotr(n, v, Nv))
if α ≠ 0
@@ -272,7 +302,7 @@ function lsqr!(solver :: LsqrSolver{T,FC,S}, A, b :: AbstractVector{FC};
xENorm² = xENorm² + ϕ * ϕ
err_vec[mod(iter, window) + 1] = ϕ
- iter ≥ window && (err_lbnd = norm(err_vec))
+ iter ≥ window && (err_lbnd = @knrm2(window, err_vec))
τ = s * ϕ
θ = s * α
@@ -283,7 +313,7 @@ function lsqr!(solver :: LsqrSolver{T,FC,S}, A, b :: AbstractVector{FC};
# the step ϕ/ρ is not necessarily positive
σ = ϕ / ρ
if radius > 0
- t1, t2 = to_boundary(x, w, radius)
+ t1, t2 = to_boundary(n, x, w, radius)
tmax, tmin = max(t1, t2), min(t1, t2)
on_boundary = σ > tmax || σ < tmin
σ = σ > 0 ? min(σ, tmax) : max(σ, tmin)
@@ -325,7 +355,7 @@ function lsqr!(solver :: LsqrSolver{T,FC,S}, A, b :: AbstractVector{FC};
t1 = test1 / (one(T) + Anorm * xNorm / β₁)
rNormtol = btol + axtol * Anorm * xNorm / β₁
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e\n", iter, α, β, rNorm, ArNorm, test1, test2, Anorm, Acond)
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e %7.1e\n", iter, α, β, rNorm, ArNorm, test1, test2, Anorm, Acond)
# Stopping conditions that do not depend on user input.
# This is to guard against tolerances that are unreasonably small.
@@ -346,7 +376,7 @@ function lsqr!(solver :: LsqrSolver{T,FC,S}, A, b :: AbstractVector{FC};
zero_resid = zero_resid_mach | zero_resid_lim
solved = solved_mach | solved_lim | solved_opt | zero_resid | fwd_err | on_boundary
end
- (verbose > 0) && @printf("\n")
+ (verbose > 0) && @printf(iostream, "\n")
tired && (status = "maximum number of iterations exceeded")
ill_cond_mach && (status = "condition number seems too large for this machine")
diff --git a/src/minres.jl b/src/minres.jl
index cbaefee9f..f82bbc350 100644
--- a/src/minres.jl
+++ b/src/minres.jl
@@ -3,7 +3,7 @@
#
# minimize ‖Ax - b‖₂
#
-# where A is square and symmetric.
+# where A is Hermitian.
#
# MINRES is formally equivalent to applying the conjugate residuals method
# to Ax = b when A is positive definite, but is more general and also applies
@@ -21,20 +21,22 @@
export minres, minres!
-
"""
(x, stats) = minres(A, b::AbstractVector{FC};
- M=I, λ::T=zero(T), atol::T=√eps(T)/100,
- rtol::T=√eps(T)/100, ratol :: T=zero(T),
- rrtol :: T=zero(T), etol::T=√eps(T),
- window::Int=5, itmax::Int=0,
- conlim::T=1/√eps(T), verbose::Int=0,
- history::Bool=false, ldiv::Bool=false,
- callback=solver->false)
+ M=I, ldiv::Bool=false, window::Int=5,
+ λ::T=zero(T), atol::T=√eps(T),
+ rtol::T=√eps(T), etol::T=√eps(T),
+ conlim::T=1/√eps(T), itmax::Int=0,
+ verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
+ (x, stats) = minres(A, b, x0::AbstractVector; kwargs...)
+
+MINRES can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
+
Solve the shifted linear least-squares problem
minimize ‖b - (A + λI)x‖₂²
@@ -43,26 +45,44 @@ or the shifted linear system
(A + λI) x = b
-using the MINRES method, where λ ≥ 0 is a shift parameter,
-where A is square and symmetric.
+of size n using the MINRES method, where λ ≥ 0 is a shift parameter,
+where A is Hermitian.
MINRES is formally equivalent to applying CR to Ax=b when A is positive
definite, but is typically more stable and also applies to the case where
A is indefinite.
-MINRES produces monotonic residuals ‖r‖₂ and optimality residuals ‖Aᵀr‖₂.
+MINRES produces monotonic residuals ‖r‖₂ and optimality residuals ‖Aᴴr‖₂.
+
+#### Input arguments
-A preconditioner M may be provided in the form of a linear operator and is
-assumed to be symmetric and positive definite.
+* `A`: a linear operator that models a Hermitian matrix of dimension n;
+* `b`: a vector of length n.
-MINRES can be warm-started from an initial guess `x0` with the method
+#### Optional argument
- (x, stats) = minres(A, b, x0; kwargs...)
+* `x0`: a vector of length n that represents an initial guess of the solution x.
-where `kwargs` are the same keyword arguments as above.
+#### Keyword arguments
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning;
+* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`;
+* `window`: number of iterations used to accumulate a lower bound on the error;
+* `λ`: regularization parameter;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `etol`: stopping tolerance based on the lower bound on the error;
+* `conlim`: limit on the estimated condition number of `A` beyond which the solution will be abandoned;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
+
+#### Output arguments
+
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### Reference
@@ -99,22 +119,24 @@ function minres!(solver :: MinresSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0
end
function minres!(solver :: MinresSolver{T,FC,S}, A, b :: AbstractVector{FC};
- M=I, λ :: T=zero(T), atol :: T=√eps(T)/100, rtol :: T=√eps(T)/100,
- ratol :: T=zero(T), rrtol :: T=zero(T), etol :: T=√eps(T),
- itmax :: Int=0, conlim :: T=1/√eps(T), verbose :: Int=0,
- history :: Bool=false, ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
-
- n, m = size(A)
+ M=I, ldiv :: Bool=false,
+ λ :: T=zero(T), atol :: T=√eps(T),
+ rtol :: T=√eps(T), etol :: T=√eps(T),
+ conlim :: T=1/√eps(T), itmax :: Int=0,
+ verbose :: Int=0, history :: Bool=false,
+ callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
+
+ m, n = size(A)
m == n || error("System must be square")
length(b) == n || error("Inconsistent problem size")
- (verbose > 0) && @printf("MINRES: system of size %d\n", n)
+ (verbose > 0) && @printf(iostream, "MINRES: system of size %d\n", n)
# Tests M = Iₙ
MisI = (M === I)
# Check type consistency
eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
# Set up workspace.
allocate_if(!MisI, solver, :v, S, n)
@@ -189,16 +211,15 @@ function minres!(solver :: MinresSolver{T,FC,S}, A, b :: AbstractVector{FC};
iter = 0
itmax == 0 && (itmax = 2*n)
- (verbose > 0) && @printf("%5s %7s %7s %7s %8s %8s %7s %7s %7s %7s\n", "k", "‖r‖", "‖Aᵀr‖", "β", "cos", "sin", "‖A‖", "κ(A)", "test1", "test2")
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e\n", iter, rNorm, ArNorm, β, cs, sn, ANorm, Acond)
+ (verbose > 0) && @printf(iostream, "%5s %7s %7s %7s %8s %8s %7s %7s %7s %7s\n", "k", "‖r‖", "‖Aᴴr‖", "β", "cos", "sin", "‖A‖", "κ(A)", "test1", "test2")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e\n", iter, rNorm, ArNorm, β, cs, sn, ANorm, Acond)
- tol = atol + rtol * β₁
- rNormtol = ratol + rrtol * β₁
+ ε = atol + rtol * β₁
stats.status = "unknown"
solved = solved_mach = solved_lim = (rNorm ≤ rtol)
tired = iter ≥ itmax
ill_cond = ill_cond_mach = ill_cond_lim = false
- zero_resid = zero_resid_mach = zero_resid_lim = (rNorm ≤ tol)
+ zero_resid = zero_resid_mach = zero_resid_lim = (rNorm ≤ ε)
fwd_err = false
user_requested_exit = false
@@ -241,7 +262,7 @@ function minres!(solver :: MinresSolver{T,FC,S}, A, b :: AbstractVector{FC};
ϵ = sn * β
δbar = -cs * β
root = sqrt(γbar * γbar + δbar * δbar)
- ArNorm = ϕbar * root # = ‖Aᵀrₖ₋₁‖
+ ArNorm = ϕbar * root # = ‖Aᴴrₖ₋₁‖
history && push!(ArNorms, ArNorm)
# Compute the next plane rotation.
@@ -266,7 +287,7 @@ function minres!(solver :: MinresSolver{T,FC,S}, A, b :: AbstractVector{FC};
# Compute lower bound on forward error.
err_vec[mod(iter, window) + 1] = ϕ
- iter ≥ window && (err_lbnd = norm(err_vec))
+ iter ≥ window && (err_lbnd = @knrm2(window, err_vec))
γmax = max(γmax, γ)
γmin = min(γmin, γ)
@@ -292,11 +313,11 @@ function minres!(solver :: MinresSolver{T,FC,S}, A, b :: AbstractVector{FC};
Acond = γmax / γmin
history && push!(Aconds, Acond)
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e %7.1e %7.1e\n", iter, rNorm, ArNorm, β, cs, sn, ANorm, Acond, test1, test2)
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e %7.1e %7.1e\n", iter, rNorm, ArNorm, β, cs, sn, ANorm, Acond, test1, test2)
if iter == 1 && β / β₁ ≤ 10 * ϵM
- # Aᵀb = 0 so x = 0 is a minimum least-squares solution
- stats.niter = 0
+ # Aᴴb = 0 so x = 0 is a minimum least-squares solution
+ stats.niter = 1
stats.solved, stats.inconsistent = true, true
stats.status = "x is a minimum least-squares solution"
solver.warm_start = false
@@ -314,18 +335,18 @@ function minres!(solver :: MinresSolver{T,FC,S}, A, b :: AbstractVector{FC};
# Stopping conditions based on user-provided tolerances.
tired = iter ≥ itmax
ill_cond_lim = (one(T) / Acond ≤ ctol)
- solved_lim = (test2 ≤ tol)
- zero_resid_lim = (test1 ≤ tol)
- resid_decrease_lim = (rNorm ≤ rNormtol)
+ solved_lim = (test2 ≤ ε)
+ zero_resid_lim = MisI && (test1 ≤ eps(T))
+ resid_decrease_lim = (rNorm ≤ ε)
iter ≥ window && (fwd_err = err_lbnd ≤ etol * sqrt(xENorm²))
user_requested_exit = callback(solver) :: Bool
- zero_resid = zero_resid_mach | zero_resid_lim
- resid_decrease = resid_decrease_mach | resid_decrease_lim
- ill_cond = ill_cond_mach | ill_cond_lim
- solved = solved_mach | solved_lim | zero_resid | fwd_err | resid_decrease
+ zero_resid = zero_resid_mach || zero_resid_lim
+ resid_decrease = resid_decrease_mach || resid_decrease_lim
+ ill_cond = ill_cond_mach || ill_cond_lim
+ solved = solved_mach || solved_lim || zero_resid || fwd_err || resid_decrease
end
- (verbose > 0) && @printf("\n")
+ (verbose > 0) && @printf(iostream, "\n")
tired && (status = "maximum number of iterations exceeded")
ill_cond_mach && (status = "condition number seems too large for this machine")
diff --git a/src/minres_qlp.jl b/src/minres_qlp.jl
index bbfbf856b..72662f97e 100644
--- a/src/minres_qlp.jl
+++ b/src/minres_qlp.jl
@@ -18,30 +18,52 @@ export minres_qlp, minres_qlp!
"""
(x, stats) = minres_qlp(A, b::AbstractVector{FC};
- M=I, atol::T=√eps(T), rtol::T=√eps(T),
- ctol::T=√eps(T), λ::T=zero(T), itmax::Int=0,
+ M=I, ldiv::Bool=false, Artol::T=√eps(T),
+ λ::T=zero(T), atol::T=√eps(T),
+ rtol::T=√eps(T), itmax::Int=0,
verbose::Int=0, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
+ (x, stats) = minres_qlp(A, b, x0::AbstractVector; kwargs...)
+
+MINRES-QLP can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
+
MINRES-QLP is the only method based on the Lanczos process that returns the minimum-norm
-solution on singular inconsistent systems (A + λI)x = b, where λ is a shift parameter.
+solution on singular inconsistent systems (A + λI)x = b of size n, where λ is a shift parameter.
It is significantly more complex but can be more reliable than MINRES when A is ill-conditioned.
-A preconditioner M may be provided in the form of a linear operator and is
-assumed to be symmetric and positive definite.
M also indicates the weighted norm in which residuals are measured.
-MINRES-QLP can be warm-started from an initial guess `x0` with the method
+#### Input arguments
+
+* `A`: a linear operator that models a Hermitian matrix of dimension n;
+* `b`: a vector of length n.
+
+#### Optional argument
+
+* `x0`: a vector of length n that represents an initial guess of the solution x.
+
+#### Keyword arguments
- (x, stats) = minres_qlp(A, b, x0; kwargs...)
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning;
+* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`;
+* `Artol`: relative stopping tolerance based on the Aᴴ-residual norm;
+* `λ`: regularization parameter;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
-where `kwargs` are the same keyword arguments as above.
+#### Output arguments
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### References
@@ -80,22 +102,23 @@ function minres_qlp!(solver :: MinresQlpSolver{T,FC,S}, A, b :: AbstractVector{F
end
function minres_qlp!(solver :: MinresQlpSolver{T,FC,S}, A, b :: AbstractVector{FC};
- M=I, atol :: T=√eps(T), rtol :: T=√eps(T),
- ctol :: T=√eps(T), λ ::T=zero(T), itmax :: Int=0,
+ M=I, ldiv :: Bool=false, Artol :: T=√eps(T),
+ λ ::T=zero(T), atol :: T=√eps(T),
+ rtol :: T=√eps(T), itmax :: Int=0,
verbose :: Int=0, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
+ callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
- n, m = size(A)
+ m, n = size(A)
m == n || error("System must be square")
length(b) == m || error("Inconsistent problem size")
- (verbose > 0) && @printf("MINRES-QLP: system of size %d\n", n)
+ (verbose > 0) && @printf(iostream, "MINRES-QLP: system of size %d\n", n)
# Tests M = Iₙ
MisI = (M === I)
# Check type consistency
eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
# Set up workspace.
allocate_if(!MisI, solver, :vₖ, S, n)
@@ -147,8 +170,8 @@ function minres_qlp!(solver :: MinresQlpSolver{T,FC,S}, A, b :: AbstractVector{F
ε = atol + rtol * rNorm
κ = zero(T)
- (verbose > 0) && @printf("%5s %7s %7s %7s %7s %8s %7s %8s %7s\n", "k", "‖rₖ‖", "‖Arₖ₋₁‖", "βₖ₊₁", "Rₖ.ₖ", "Lₖ.ₖ", "‖A‖", "κ(A)", "backward")
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7s %7.1e %7s %8s %7.1e %7.1e %8s\n", iter, rNorm, "✗ ✗ ✗ ✗", βₖ, "✗ ✗ ✗ ✗", " ✗ ✗ ✗ ✗", ANorm, Acond, " ✗ ✗ ✗ ✗")
+ (verbose > 0) && @printf(iostream, "%5s %7s %7s %7s %7s %8s %7s %8s %7s\n", "k", "‖rₖ‖", "‖Arₖ₋₁‖", "βₖ₊₁", "Rₖ.ₖ", "Lₖ.ₖ", "‖A‖", "κ(A)", "backward")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7s %7.1e %7s %8s %7.1e %7.1e %8s\n", iter, rNorm, "✗ ✗ ✗ ✗", βₖ, "✗ ✗ ✗ ✗", " ✗ ✗ ✗ ✗", ANorm, Acond, " ✗ ✗ ✗ ✗")
# Set up workspace.
M⁻¹vₖ₋₁ .= zero(FC)
@@ -246,7 +269,7 @@ function minres_qlp!(solver :: MinresQlpSolver{T,FC,S}, A, b :: AbstractVector{F
# [sₖ -cₖ] [βₖ₊₁ ] [0 ]
(cₖ, sₖ, λₖ) = sym_givens(λbarₖ, βₖ₊₁)
- # Compute [ zₖ ] = (Qₖ)ᵀβ₁e₁
+ # Compute [ zₖ ] = (Qₖ)ᴴβ₁e₁
# [ζbarₖ₊₁]
#
# [cₖ sₖ] [ζbarₖ] = [ ζₖ ]
@@ -312,7 +335,7 @@ function minres_qlp!(solver :: MinresQlpSolver{T,FC,S}, A, b :: AbstractVector{F
τₖ = (ξₖ - ψbarₖ₋₁ * τₖ₋₁) / μbarₖ
end
- # Compute directions wₖ₋₂, ẘₖ₋₁ and w̄ₖ, last columns of Wₖ = Vₖ(Pₖ)ᵀ
+ # Compute directions wₖ₋₂, ẘₖ₋₁ and w̄ₖ, last columns of Wₖ = Vₖ(Pₖ)ᴴ
if iter == 1
# w̅₁ = v₁
@. wₖ = vₖ
@@ -352,7 +375,7 @@ function minres_qlp!(solver :: MinresQlpSolver{T,FC,S}, A, b :: AbstractVector{F
# Update ‖Arₖ₋₁‖ estimate
# ‖ Arₖ₋₁ ‖ = |ζbarₖ| * √(|λbarₖ|² + |γbarₖ|²)
ArNorm = abs(ζbarₖ) * √(abs2(λbarₖ) + abs2(cₖ₋₁ * βₖ₊₁))
- iter == 1 && (κ = atol + ctol * ArNorm)
+ iter == 1 && (κ = atol + Artol * ArNorm)
history && push!(ArNorms, ArNorm)
ANorm = sqrt(ANorm²)
@@ -383,14 +406,14 @@ function minres_qlp!(solver :: MinresQlpSolver{T,FC,S}, A, b :: AbstractVector{F
# Stopping conditions based on user-provided tolerances.
tired = iter ≥ itmax
resid_decrease_lim = (rNorm ≤ ε)
- zero_resid_lim = (backward ≤ ε)
+ zero_resid_lim = MisI && (backward ≤ eps(T))
breakdown = βₖ₊₁ ≤ btol
user_requested_exit = callback(solver) :: Bool
zero_resid = zero_resid_mach | zero_resid_lim
resid_decrease = resid_decrease_mach | resid_decrease_lim
solved = resid_decrease | zero_resid
- inconsistent = (ArNorm ≤ κ && abs(μbarₖ) ≤ ctol) || (breakdown && !solved)
+ inconsistent = (ArNorm ≤ κ && abs(μbarₖ) ≤ Artol) || (breakdown && !solved)
# Update variables
if iter ≥ 2
@@ -405,9 +428,9 @@ function minres_qlp!(solver :: MinresQlpSolver{T,FC,S}, A, b :: AbstractVector{F
μbarₖ₋₁ = μbarₖ
ζbarₖ = ζbarₖ₊₁
βₖ = βₖ₊₁
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e %7.1e %8.1e %7.1e %7.1e %8.1e\n", iter, rNorm, ArNorm, βₖ₊₁, λₖ, μbarₖ, ANorm, Acond, backward)
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e %7.1e %8.1e %7.1e %7.1e %8.1e\n", iter, rNorm, ArNorm, βₖ₊₁, λₖ, μbarₖ, ANorm, Acond, backward)
end
- (verbose > 0) && @printf("\n")
+ (verbose > 0) && @printf(iostream, "\n")
# Finalize the update of x
if iter ≥ 2
diff --git a/src/qmr.jl b/src/qmr.jl
index eb4a4eb46..e24fba79a 100644
--- a/src/qmr.jl
+++ b/src/qmr.jl
@@ -21,28 +21,48 @@
export qmr, qmr!
"""
- (x, stats) = qmr(A, b::AbstractVector{FC}; c::AbstractVector{FC}=b,
- atol::T=√eps(T), rtol::T=√eps(T),
- itmax::Int=0, verbose::Int=0, history::Bool=false,
- callback=solver->false)
+ (x, stats) = qmr(A, b::AbstractVector{FC};
+ c::AbstractVector{FC}=b, atol::T=√eps(T),
+ rtol::T=√eps(T), itmax::Int=0, verbose::Int=0,
+ history::Bool=false, callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
-Solve the square linear system Ax = b using the QMR method.
+ (x, stats) = qmr(A, b, x0::AbstractVector; kwargs...)
+
+QMR can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
+
+Solve the square linear system Ax = b of size n using QMR.
QMR is based on the Lanczos biorthogonalization process and requires two initial vectors `b` and `c`.
-The relation `bᵀc ≠ 0` must be satisfied and by default `c = b`.
-When `A` is symmetric and `b = c`, QMR is equivalent to MINRES.
+The relation `bᴴc ≠ 0` must be satisfied and by default `c = b`.
+When `A` is Hermitian and `b = c`, QMR is equivalent to MINRES.
+
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension n;
+* `b`: a vector of length n.
+
+#### Optional argument
+
+* `x0`: a vector of length n that represents an initial guess of the solution x.
-QMR can be warm-started from an initial guess `x0` with the method
+#### Keyword arguments
- (x, stats) = qmr(A, b, x0; kwargs...)
+* `c`: the second initial vector of length `n` required by the Lanczos biorthogonalization process;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
-where `kwargs` are the same keyword arguments as above.
+#### Output arguments
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### References
@@ -83,20 +103,20 @@ end
function qmr!(solver :: QmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: AbstractVector{FC}=b,
atol :: T=√eps(T), rtol :: T=√eps(T),
itmax :: Int=0, verbose :: Int=0, history :: Bool=false,
- callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
+ callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
- n, m = size(A)
+ m, n = size(A)
m == n || error("System must be square")
length(b) == m || error("Inconsistent problem size")
- (verbose > 0) && @printf("QMR: system of size %d\n", n)
+ (verbose > 0) && @printf(iostream, "QMR: system of size %d\n", n)
# Check type consistency
eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
- ktypeof(c) == S || error("ktypeof(c) ≠ $S")
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+ ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S")
# Compute the adjoint of A
- Aᵀ = A'
+ Aᴴ = A'
# Set up workspace.
uₖ₋₁, uₖ, q, vₖ₋₁, vₖ, p = solver.uₖ₋₁, solver.uₖ, solver.q, solver.vₖ₋₁, solver.vₖ, solver.p
@@ -129,22 +149,22 @@ function qmr!(solver :: QmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Abst
itmax == 0 && (itmax = 2*n)
ε = atol + rtol * rNorm
- (verbose > 0) && @printf("%5s %7s\n", "k", "‖rₖ‖")
- kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm)
+ (verbose > 0) && @printf(iostream, "%5s %7s\n", "k", "‖rₖ‖")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e\n", iter, rNorm)
# Initialize the Lanczos biorthogonalization process.
- cᵗb = @kdot(n, c, r₀) # ⟨c,r₀⟩
- if cᵗb == 0
+ cᴴb = @kdot(n, c, r₀) # ⟨c,r₀⟩
+ if cᴴb == 0
stats.niter = 0
stats.solved = false
stats.inconsistent = false
- stats.status = "Breakdown bᵀc = 0"
+ stats.status = "Breakdown bᴴc = 0"
solver.warm_start = false
return solver
end
- βₖ = √(abs(cᵗb)) # β₁γ₁ = cᵀ(b - Ax₀)
- γₖ = cᵗb / βₖ # β₁γ₁ = cᵀ(b - Ax₀)
+ βₖ = √(abs(cᴴb)) # β₁γ₁ = cᴴ(b - Ax₀)
+ γₖ = cᴴb / βₖ # β₁γ₁ = cᴴ(b - Ax₀)
vₖ₋₁ .= zero(FC) # v₀ = 0
uₖ₋₁ .= zero(FC) # u₀ = 0
vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁
@@ -153,7 +173,7 @@ function qmr!(solver :: QmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Abst
sₖ₋₂ = sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the QR factorization of Tₖ₊₁.ₖ
wₖ₋₂ .= zero(FC) # Column k-2 of Wₖ = Vₖ(Rₖ)⁻¹
wₖ₋₁ .= zero(FC) # Column k-1 of Wₖ = Vₖ(Rₖ)⁻¹
- ζbarₖ = βₖ # ζbarₖ is the last component of z̅ₖ = (Qₖ)ᵀβ₁e₁
+ ζbarₖ = βₖ # ζbarₖ is the last component of z̅ₖ = (Qₖ)ᴴβ₁e₁
τₖ = @kdotr(n, vₖ, vₖ) # τₖ is used for the residual norm estimate
# Stopping criterion.
@@ -169,10 +189,10 @@ function qmr!(solver :: QmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Abst
# Continue the Lanczos biorthogonalization process.
# AVₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ
- # AᵀUₖ = Uₖ(Tₖ)ᵀ + γ̄ₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᵀ
+ # AᴴUₖ = Uₖ(Tₖ)ᴴ + γ̄ₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᴴ
mul!(q, A , vₖ) # Forms vₖ₊₁ : q ← Avₖ
- mul!(p, Aᵀ, uₖ) # Forms uₖ₊₁ : p ← Aᵀuₖ
+ mul!(p, Aᴴ, uₖ) # Forms uₖ₊₁ : p ← Aᴴuₖ
@kaxpy!(n, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁
@kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - β̄ₖ * uₖ₋₁
@@ -182,9 +202,9 @@ function qmr!(solver :: QmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Abst
@kaxpy!(n, - αₖ , vₖ, q) # q ← q - αₖ * vₖ
@kaxpy!(n, -conj(αₖ), uₖ, p) # p ← p - ᾱₖ * uₖ
- pᵗq = @kdot(n, p, q) # pᵗq = ⟨p,q⟩
- βₖ₊₁ = √(abs(pᵗq)) # βₖ₊₁ = √(|pᵗq|)
- γₖ₊₁ = pᵗq / βₖ₊₁ # γₖ₊₁ = pᵗq / βₖ₊₁
+ pᴴq = @kdot(n, p, q) # pᴴq = ⟨p,q⟩
+ βₖ₊₁ = √(abs(pᴴq)) # βₖ₊₁ = √(|pᴴq|)
+ γₖ₊₁ = pᴴq / βₖ₊₁ # γₖ₊₁ = pᴴq / βₖ₊₁
# Update the QR factorization of Tₖ₊₁.ₖ = Qₖ [ Rₖ ].
# [ Oᵀ ]
@@ -271,7 +291,7 @@ function qmr!(solver :: QmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Abst
@. vₖ₋₁ = vₖ # vₖ₋₁ ← vₖ
@. uₖ₋₁ = uₖ # uₖ₋₁ ← uₖ
- if pᵗq ≠ zero(FC)
+ if pᴴq ≠ zero(FC)
@. vₖ = q / βₖ₊₁ # βₖ₊₁vₖ₊₁ = q
@. uₖ = p / conj(γₖ₊₁) # γ̄ₖ₊₁uₖ₊₁ = p
end
@@ -303,10 +323,10 @@ function qmr!(solver :: QmrSolver{T,FC,S}, A, b :: AbstractVector{FC}; c :: Abst
resid_decrease_lim = rNorm ≤ ε
solved = resid_decrease_lim || resid_decrease_mach
tired = iter ≥ itmax
- breakdown = !solved && (pᵗq == 0)
- kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm)
+ breakdown = !solved && (pᴴq == 0)
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e\n", iter, rNorm)
end
- (verbose > 0) && @printf("\n")
+ (verbose > 0) && @printf(iostream, "\n")
tired && (status = "maximum number of iterations exceeded")
breakdown && (status = "Breakdown ⟨uₖ₊₁,vₖ₊₁⟩ = 0")
diff --git a/src/symmlq.jl b/src/symmlq.jl
index 7b889c715..81477fc66 100644
--- a/src/symmlq.jl
+++ b/src/symmlq.jl
@@ -1,5 +1,5 @@
# An implementation of SYMMLQ for the solution of the
-# linear system Ax = b, where A is square and symmetric.
+# linear system Ax = b, where A is Hermitian.
#
# This implementation follows the original implementation by
# Michael Saunders described in
@@ -11,38 +11,62 @@
export symmlq, symmlq!
-
"""
- (x, stats) = symmlq(A, b::AbstractVector{FC}; window::Int=0,
- M=I, λ::T=zero(T), transfer_to_cg::Bool=true,
- λest::T=zero(T), atol::T=√eps(T), rtol::T=√eps(T),
- etol::T=√eps(T), itmax::Int=0, conlim::T=1/√eps(T),
+ (x, stats) = symmlq(A, b::AbstractVector{FC};
+ M=I, ldiv::Bool=false, window::Int=5,
+ transfer_to_cg::Bool=true, λ::T=zero(T),
+ λest::T=zero(T), etol::T=√eps(T),
+ conlim::T=1/√eps(T), atol::T=√eps(T),
+ rtol::T=√eps(T), itmax::Int=0,
verbose::Int=0, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
+ (x, stats) = symmlq(A, b, x0::AbstractVector; kwargs...)
+
+SYMMLQ can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above
+
Solve the shifted linear system
(A + λI) x = b
-using the SYMMLQ method, where λ is a shift parameter,
-and A is square and symmetric.
+of size n using the SYMMLQ method, where λ is a shift parameter, and A is Hermitian.
+
+SYMMLQ produces monotonic errors ‖x* - x‖₂.
+
+#### Input arguments
+
+* `A`: a linear operator that models a Hermitian matrix of dimension n;
+* `b`: a vector of length n.
-SYMMLQ produces monotonic errors ‖x*-x‖₂.
+#### Optional argument
-A preconditioner M may be provided in the form of a linear operator and is
-assumed to be symmetric and positive definite.
+* `x0`: a vector of length n that represents an initial guess of the solution x.
-SYMMLQ can be warm-started from an initial guess `x0` with the method
+#### Keyword arguments
- (x, stats) = symmlq(A, b, x0; kwargs...)
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning;
+* `ldiv`: define whether the preconditioner uses `ldiv!` or `mul!`;
+* `window`: number of iterations used to accumulate a lower bound on the error;
+* `transfer_to_cg`: transfer from the SYMMLQ point to the CG point, when it exists. The transfer is based on the residual norm;
+* `λ`: regularization parameter;
+* `λest`: positive strict lower bound on the smallest eigenvalue `λₘᵢₙ` when solving a positive-definite system, such as `λest = (1-10⁻⁷)λₘᵢₙ`;
+* `etol`: stopping tolerance based on the lower bound on the error;
+* `conlim`: limit on the estimated condition number of `A` beyond which the solution will be abandoned;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `2n`;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
-where `kwargs` are the same keyword arguments as above.
+#### Output arguments
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SymmlqStats`](@ref) structure.
#### Reference
@@ -79,23 +103,25 @@ function symmlq!(solver :: SymmlqSolver{T,FC,S}, A, b :: AbstractVector{FC}, x0
end
function symmlq!(solver :: SymmlqSolver{T,FC,S}, A, b :: AbstractVector{FC};
- M=I, λ :: T=zero(T), transfer_to_cg :: Bool=true,
- λest :: T=zero(T), atol :: T=√eps(T), rtol :: T=√eps(T),
- etol :: T=√eps(T), itmax :: Int=0, conlim :: T=1/√eps(T),
+ M=I, ldiv :: Bool=false,
+ transfer_to_cg :: Bool=true, λ :: T=zero(T),
+ λest :: T=zero(T), etol :: T=√eps(T),
+ conlim :: T=1/√eps(T), atol :: T=√eps(T),
+ rtol :: T=√eps(T), itmax :: Int=0,
verbose :: Int=0, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
+ callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
m, n = size(A)
m == n || error("System must be square")
length(b) == m || error("Inconsistent problem size")
- (verbose > 0) && @printf("SYMMLQ: system of size %d\n", n)
+ (verbose > 0) && @printf(iostream, "SYMMLQ: system of size %d\n", n)
# Tests M = Iₙ
MisI = (M === I)
# Check type consistency
eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
# Set up workspace.
allocate_if(!MisI, solver, :v, S, n)
@@ -213,8 +239,8 @@ function symmlq!(solver :: SymmlqSolver{T,FC,S}, A, b :: AbstractVector{FC};
iter = 0
itmax == 0 && (itmax = 2 * n)
- (verbose > 0) && @printf("%5s %7s %7s %8s %8s %7s %7s %7s\n", "k", "‖r‖", "β", "cos", "sin", "‖A‖", "κ(A)", "test1")
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e\n", iter, rNorm, β, cold, sold, ANorm, Acond)
+ (verbose > 0) && @printf(iostream, "%5s %7s %7s %8s %8s %7s %7s %7s\n", "k", "‖r‖", "β", "cos", "sin", "‖A‖", "κ(A)", "test1")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e\n", iter, rNorm, β, cold, sold, ANorm, Acond)
tol = atol + rtol * β₁
status = "unknown"
@@ -301,8 +327,11 @@ function symmlq!(solver :: SymmlqSolver{T,FC,S}, A, b :: AbstractVector{FC};
zetabark = zlist[jx] / clist[jx]
if γbar ≠ 0
- theta = abs(sum(clist[i] * sprod[i] * zlist[i] for i = 1 : window))
- theta = zetabark * theta + abs(zetabark * ζbar * sprod[ix] * s) - zetabark^2
+ theta = zero(T)
+ for i = 1 : window
+ theta += clist[i] * sprod[i] * zlist[i]
+ end
+ theta = zetabark * abs(theta) + abs(zetabark * ζbar * sprod[ix] * s) - zetabark^2
history && (errorscg[iter-window+1] = sqrt(abs(errorscg[iter-window+1]^2 - 2*theta)))
else
history && (errorscg[iter-window+1] = missing)
@@ -345,7 +374,7 @@ function symmlq!(solver :: SymmlqSolver{T,FC,S}, A, b :: AbstractVector{FC};
ANorm = sqrt(ANorm²)
test1 = rNorm / (ANorm * xNorm)
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e %7.1e\n", iter, rNorm, β, c, s, ANorm, Acond, test1)
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %8.1e %8.1e %7.1e %7.1e %7.1e\n", iter, rNorm, β, c, s, ANorm, Acond, test1)
# Reset variables
ϵold = ϵ
@@ -372,7 +401,7 @@ function symmlq!(solver :: SymmlqSolver{T,FC,S}, A, b :: AbstractVector{FC};
ill_cond = ill_cond_mach || ill_cond_lim
solved = solved_mach || zero_resid || zero_resid_mach || zero_resid_lim || fwd_err || resid_decrease_mach
end
- (verbose > 0) && @printf("\n")
+ (verbose > 0) && @printf(iostream, "\n")
# Compute CG point
# (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * w̅ₖ
diff --git a/src/tricg.jl b/src/tricg.jl
index 5acff2d52..4096a9ffe 100644
--- a/src/tricg.jl
+++ b/src/tricg.jl
@@ -13,30 +13,32 @@ export tricg, tricg!
"""
(x, y, stats) = tricg(A, b::AbstractVector{FC}, c::AbstractVector{FC};
- M=I, N=I, atol::T=√eps(T), rtol::T=√eps(T),
- spd::Bool=false, snd::Bool=false, flip::Bool=false,
- τ::T=one(T), ν::T=-one(T), itmax::Int=0,
+ M=I, N=I, ldiv::Bool=false,
+ spd::Bool=false, snd::Bool=false,
+ flip::Bool=false, τ::T=one(T),
+ ν::T=-one(T), atol::T=√eps(T),
+ rtol::T=√eps(T), itmax::Int=0,
verbose::Int=0, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
-TriCG solves the symmetric linear system
+ (x, y, stats) = tricg(A, b, c, x0::AbstractVector, y0::AbstractVector; kwargs...)
+
+TriCG can be warm-started from initial guesses `x0` and `y0` where `kwargs` are the same keyword arguments as above.
+
+Given a matrix `A` of dimension m × n, TriCG solves the Hermitian linear system
[ τE A ] [ x ] = [ b ]
- [ Aᵀ νF ] [ y ] [ c ],
+ [ Aᴴ νF ] [ y ] [ c ],
-where τ and ν are real numbers, E = M⁻¹ ≻ 0 and F = N⁻¹ ≻ 0.
+of size (n+m) × (n+m) where τ and ν are real numbers, E = M⁻¹ ≻ 0 and F = N⁻¹ ≻ 0.
`b` and `c` must both be nonzero.
TriCG could breakdown if `τ = 0` or `ν = 0`.
It's recommended to use TriMR in these cases.
-By default, TriCG solves symmetric and quasi-definite linear systems with τ = 1 and ν = -1.
-If `flip = true`, TriCG solves another known variant of SQD systems where τ = -1 and ν = 1.
-If `spd = true`, τ = ν = 1 and the associated symmetric and positive definite linear system is solved.
-If `snd = true`, τ = ν = -1 and the associated symmetric and negative definite linear system is solved.
-`τ` and `ν` are also keyword arguments that can be directly modified for more specific problems.
+By default, TriCG solves Hermitian and quasi-definite linear systems with τ = 1 and ν = -1.
TriCG is based on the preconditioned orthogonal tridiagonalization process
and its relation with the preconditioned block-Lanczos process.
@@ -50,17 +52,39 @@ It's the Euclidean norm when `M` and `N` are identity operators.
TriCG stops when `itmax` iterations are reached or when `‖rₖ‖ ≤ atol + ‖r₀‖ * rtol`.
`atol` is an absolute tolerance and `rtol` is a relative tolerance.
-Additional details can be displayed if verbose mode is enabled (verbose > 0).
-Information will be displayed every `verbose` iterations.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m;
+* `c`: a vector of length n.
+
+#### Optional arguments
+
+* `x0`: a vector of length m that represents an initial guess of the solution x;
+* `y0`: a vector of length n that represents an initial guess of the solution y.
-TriCG can be warm-started from initial guesses `x0` and `y0` with the method
+#### Keyword arguments
- (x, y, stats) = tricg(A, b, c, x0, y0; kwargs...)
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `m` used for centered preconditioning of the partitioned system;
+* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning of the partitioned system;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `spd`: if `true`, set `τ = 1` and `ν = 1` for Hermitian and positive-definite linear system;
+* `snd`: if `true`, set `τ = -1` and `ν = -1` for Hermitian and negative-definite linear systems;
+* `flip`: if `true`, set `τ = -1` and `ν = 1` for another known variant of Hermitian quasi-definite systems;
+* `τ` and `ν`: diagonal scaling factors of the partitioned Hermitian linear system;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
-where `kwargs` are the same keyword arguments as above.
+#### Output arguments
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* `x`: a dense vector of length m;
+* `y`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### Reference
@@ -98,16 +122,18 @@ function tricg!(solver :: TricgSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
end
function tricg!(solver :: TricgSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC};
- M=I, N=I, atol :: T=√eps(T), rtol :: T=√eps(T),
- spd :: Bool=false, snd :: Bool=false, flip :: Bool=false,
- τ :: T=one(T), ν :: T=-one(T), itmax :: Int=0,
+ M=I, N=I, ldiv :: Bool=false,
+ spd :: Bool=false, snd :: Bool=false,
+ flip :: Bool=false, τ :: T=one(T),
+ ν :: T=-one(T), atol :: T=√eps(T),
+ rtol :: T=√eps(T), itmax :: Int=0,
verbose :: Int=0, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
+ callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
m, n = size(A)
length(b) == m || error("Inconsistent problem size")
length(c) == n || error("Inconsistent problem size")
- (verbose > 0) && @printf("TriCG: system of %d equations in %d variables\n", m+n, m+n)
+ (verbose > 0) && @printf(iostream, "TriCG: system of %d equations in %d variables\n", m+n, m+n)
# Check flip, spd and snd parameters
spd && flip && error("The matrix cannot be SPD and SQD")
@@ -120,8 +146,8 @@ function tricg!(solver :: TricgSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
# Check type consistency
eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
- ktypeof(c) == S || error("ktypeof(c) ≠ $S")
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+ ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S")
# Determine τ and ν associated to SQD, SPD or SND systems.
flip && (τ = -one(T) ; ν = one(T))
@@ -133,7 +159,7 @@ function tricg!(solver :: TricgSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
warm_start && (ν ≠ 0) && !NisI && error("Warm-start with preconditioners is not supported.")
# Compute the adjoint of A
- Aᵀ = A'
+ Aᴴ = A'
# Set up workspace.
allocate_if(!MisI, solver, :vₖ, S, m)
@@ -164,12 +190,12 @@ function tricg!(solver :: TricgSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
N⁻¹uₖ₋₁ .= zero(FC) # u₀ = 0
# [ τI A ] [ xₖ ] = [ b - τΔx - AΔy ] = [ b₀ ]
- # [ Aᵀ νI ] [ yₖ ] [ c - AᵀΔx - νΔy ] [ c₀ ]
+ # [ Aᴴ νI ] [ yₖ ] [ c - AᴴΔx - νΔy ] [ c₀ ]
if warm_start
mul!(b₀, A, Δy)
(τ ≠ 0) && @kaxpy!(m, τ, Δx, b₀)
@kaxpby!(m, one(FC), b, -one(FC), b₀)
- mul!(c₀, Aᵀ, Δx)
+ mul!(c₀, Aᴴ, Δx)
(ν ≠ 0) && @kaxpy!(n, ν, Δy, c₀)
@kaxpby!(n, one(FC), c, -one(FC), c₀)
end
@@ -196,7 +222,7 @@ function tricg!(solver :: TricgSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
error("c must be nonzero")
end
- # Initialize directions Gₖ such that Lₖ(Gₖ)ᵀ = (Wₖ)ᵀ
+ # Initialize directions Gₖ such that L̄ₖ(Gₖ)ᵀ = (Wₖ)ᵀ
gx₂ₖ₋₁ .= zero(FC)
gy₂ₖ₋₁ .= zero(FC)
gx₂ₖ .= zero(FC)
@@ -207,8 +233,8 @@ function tricg!(solver :: TricgSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
history && push!(rNorms, rNorm)
ε = atol + rtol * rNorm
- (verbose > 0) && @printf("%5s %7s %7s %7s\n", "k", "‖rₖ‖", "βₖ₊₁", "γₖ₊₁")
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e\n", iter, rNorm, βₖ, γₖ)
+ (verbose > 0) && @printf(iostream, "%5s %7s %7s %7s\n", "k", "‖rₖ‖", "βₖ₊₁", "γₖ₊₁")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e\n", iter, rNorm, βₖ, γₖ)
# Set up workspace.
d₂ₖ₋₃ = d₂ₖ₋₂ = zero(T)
@@ -231,10 +257,10 @@ function tricg!(solver :: TricgSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
# Continue the orthogonal tridiagonalization process.
# AUₖ = EVₖTₖ + βₖ₊₁Evₖ₊₁(eₖ)ᵀ = EVₖ₊₁Tₖ₊₁.ₖ
- # AᵀVₖ = FUₖ(Tₖ)ᵀ + γₖ₊₁Fuₖ₊₁(eₖ)ᵀ = FUₖ₊₁(Tₖ.ₖ₊₁)ᵀ
+ # AᴴVₖ = FUₖ(Tₖ)ᴴ + γₖ₊₁Fuₖ₊₁(eₖ)ᵀ = FUₖ₊₁(Tₖ.ₖ₊₁)ᴴ
mul!(q, A , uₖ) # Forms Evₖ₊₁ : q ← Auₖ
- mul!(p, Aᵀ, vₖ) # Forms Fuₖ₊₁ : p ← Aᵀvₖ
+ mul!(p, Aᴴ, vₖ) # Forms Fuₖ₊₁ : p ← Aᴴvₖ
if iter ≥ 2
@kaxpy!(m, -γₖ, M⁻¹vₖ₋₁, q) # q ← q - γₖ * M⁻¹vₖ₋₁
@@ -254,14 +280,14 @@ function tricg!(solver :: TricgSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
# [0 u₁ ••• 0 uₖ]
#
# rₖ = [ b ] - [ τE A ] [ xₖ ] = [ b ] - [ τE A ] Wₖzₖ
- # [ c ] [ Aᵀ νF ] [ yₖ ] [ c ] [ Aᵀ νF ]
+ # [ c ] [ Aᴴ νF ] [ yₖ ] [ c ] [ Aᴴ νF ]
#
# block-Lanczos formulation : [ τE A ] Wₖ = [ E 0 ] Wₖ₊₁Sₖ₊₁.ₖ
- # [ Aᵀ νF ] [ 0 F ]
+ # [ Aᴴ νF ] [ 0 F ]
#
- # TriCG subproblem : (Wₖ)ᵀ * rₖ = 0 ↔ Sₖ.ₖzₖ = β₁e₁ + γ₁e₂
+ # TriCG subproblem : (Wₖ)ᴴ * rₖ = 0 ↔ Sₖ.ₖzₖ = β₁e₁ + γ₁e₂
#
- # Update the LDLᵀ factorization of Sₖ.ₖ.
+ # Update the LDLᴴ factorization of Sₖ.ₖ.
#
# [ τ α₁ γ₂ 0 • • • • 0 ]
# [ ᾱ₁ ν β₂ • • ]
@@ -306,7 +332,7 @@ function tricg!(solver :: TricgSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
π₂ₖ = -(δₖ * d₂ₖ₋₁ * π₂ₖ₋₁ + λₖ * d₂ₖ₋₂ * π₂ₖ₋₂ + ηₖ * d₂ₖ₋₃ * π₂ₖ₋₃) / d₂ₖ
end
- # Solve Gₖ = Wₖ(Lₖ)⁻ᵀ ⟷ L̄ₖ(Gₖ)ᵀ = (Wₖ)ᵀ.
+ # Solve Gₖ = Wₖ(Lₖ)⁻ᴴ ⟷ L̄ₖ(Gₖ)ᵀ = (Wₖ)ᵀ.
if iter == 1
# [ 1 0 ] [ gx₁ gy₁ ] = [ v₁ 0 ]
# [ δ̄₁ 1 ] [ gx₂ gy₂ ] [ 0 u₁ ]
@@ -342,7 +368,7 @@ function tricg!(solver :: TricgSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
# Compute vₖ₊₁ and uₖ₊₁
MisI || mulorldiv!(vₖ₊₁, M, q, ldiv) # βₖ₊₁vₖ₊₁ = MAuₖ - γₖvₖ₋₁ - αₖvₖ
- NisI || mulorldiv!(uₖ₊₁, N, p, ldiv) # γₖ₊₁uₖ₊₁ = NAᵀvₖ - βₖuₖ₋₁ - ᾱₖuₖ
+ NisI || mulorldiv!(uₖ₊₁, N, p, ldiv) # γₖ₊₁uₖ₊₁ = NAᴴvₖ - βₖuₖ₋₁ - ᾱₖuₖ
βₖ₊₁ = sqrt(@kdotr(m, vₖ₊₁, q)) # βₖ₊₁ = ‖vₖ₊₁‖_E
γₖ₊₁ = sqrt(@kdotr(n, uₖ₊₁, p)) # γₖ₊₁ = ‖uₖ₊₁‖_F
@@ -388,9 +414,9 @@ function tricg!(solver :: TricgSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
breakdown = βₖ₊₁ ≤ btol && γₖ₊₁ ≤ btol
solved = resid_decrease_lim || resid_decrease_mach
tired = iter ≥ itmax
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e\n", iter, rNorm, βₖ₊₁, γₖ₊₁)
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e\n", iter, rNorm, βₖ₊₁, γₖ₊₁)
end
- (verbose > 0) && @printf("\n")
+ (verbose > 0) && @printf(iostream, "\n")
tired && (status = "maximum number of iterations exceeded")
breakdown && (status = "inconsistent linear system")
diff --git a/src/trilqr.jl b/src/trilqr.jl
index edcb4c9b9..e11a8a6c6 100644
--- a/src/trilqr.jl
+++ b/src/trilqr.jl
@@ -1,5 +1,5 @@
# An implementation of TRILQR for the solution of square or
-# rectangular consistent linear adjoint systems Ax = b and Aᵀy = c.
+# rectangular consistent linear adjoint systems Ax = b and Aᴴy = c.
#
# This method is described in
#
@@ -14,32 +14,53 @@ export trilqr, trilqr!
"""
(x, y, stats) = trilqr(A, b::AbstractVector{FC}, c::AbstractVector{FC};
- atol::T=√eps(T), rtol::T=√eps(T), transfer_to_usymcg::Bool=true,
- itmax::Int=0, verbose::Int=0, history::Bool=false,
- callback=solver->false)
+ transfer_to_usymcg::Bool=true, atol::T=√eps(T),
+ rtol::T=√eps(T), itmax::Int=0,
+ verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
+ (x, y, stats) = trilqr(A, b, c, x0::AbstractVector, y0::AbstractVector; kwargs...)
+
+TriLQR can be warm-started from initial guesses `x0` and `y0` where `kwargs` are the same keyword arguments as above.
+
Combine USYMLQ and USYMQR to solve adjoint systems.
[0 A] [y] = [b]
- [Aᵀ 0] [x] [c]
+ [Aᴴ 0] [x] [c]
+
+USYMLQ is used for solving primal system `Ax = b` of size m × n.
+USYMQR is used for solving dual system `Aᴴy = c` of size n × m.
+
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m;
+* `c`: a vector of length n.
-USYMLQ is used for solving primal system `Ax = b`.
-USYMQR is used for solving dual system `Aᵀy = c`.
+#### Optional arguments
-An option gives the possibility of transferring from the USYMLQ point to the
-USYMCG point, when it exists. The transfer is based on the residual norm.
+* `x0`: a vector of length n that represents an initial guess of the solution x;
+* `y0`: a vector of length m that represents an initial guess of the solution y.
-TriLQR can be warm-started from initial guesses `x0` and `y0` with the method
+#### Keyword arguments
- (x, y, stats) = trilqr(A, b, c, x0, y0; kwargs...)
+* `transfer_to_usymcg`: transfer from the USYMLQ point to the USYMCG point, when it exists. The transfer is based on the residual norm;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
-where `kwargs` are the same keyword arguments as above.
+#### Output arguments
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* `x`: a dense vector of length n;
+* `y`: a dense vector of length m;
+* `stats`: statistics collected on the run in an [`AdjointStats`](@ref) structure.
#### Reference
@@ -77,23 +98,24 @@ function trilqr!(solver :: TrilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :
end
function trilqr!(solver :: TrilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC};
- atol :: T=√eps(T), rtol :: T=√eps(T), transfer_to_usymcg :: Bool=true,
- itmax :: Int=0, verbose :: Int=0, history :: Bool=false,
- callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
+ transfer_to_usymcg :: Bool=true, atol :: T=√eps(T),
+ rtol :: T=√eps(T), itmax :: Int=0,
+ verbose :: Int=0, history :: Bool=false,
+ callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
m, n = size(A)
length(b) == m || error("Inconsistent problem size")
length(c) == n || error("Inconsistent problem size")
- (verbose > 0) && @printf("TRILQR: primal system of %d equations in %d variables\n", m, n)
- (verbose > 0) && @printf("TRILQR: dual system of %d equations in %d variables\n", n, m)
+ (verbose > 0) && @printf(iostream, "TRILQR: primal system of %d equations in %d variables\n", m, n)
+ (verbose > 0) && @printf(iostream, "TRILQR: dual system of %d equations in %d variables\n", n, m)
# Check type consistency
eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
- ktypeof(c) == S || error("ktypeof(c) ≠ $S")
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+ ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S")
# Compute the adjoint of A
- Aᵀ = A'
+ Aᴴ = A'
# Set up workspace.
uₖ₋₁, uₖ, p, d̅, x, stats = solver.uₖ₋₁, solver.uₖ, solver.p, solver.d̅, solver.x, solver.stats
@@ -107,7 +129,7 @@ function trilqr!(solver :: TrilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :
if warm_start
mul!(r₀, A, Δx)
@kaxpby!(n, one(FC), b, -one(FC), r₀)
- mul!(s₀, Aᵀ, Δy)
+ mul!(s₀, Aᴴ, Δy)
@kaxpby!(n, one(FC), c, -one(FC), s₀)
end
@@ -115,7 +137,7 @@ function trilqr!(solver :: TrilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :
x .= zero(FC) # x₀
bNorm = @knrm2(m, r₀) # rNorm = ‖r₀‖
- # Initial solution y₀ and residual s₀ = c - Aᵀy₀.
+ # Initial solution y₀ and residual s₀ = c - Aᴴy₀.
t .= zero(FC) # t₀
cNorm = @knrm2(n, s₀) # sNorm = ‖s₀‖
@@ -127,8 +149,8 @@ function trilqr!(solver :: TrilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :
εL = atol + rtol * bNorm
εQ = atol + rtol * cNorm
ξ = zero(T)
- (verbose > 0) && @printf("%5s %7s %7s\n", "k", "‖rₖ‖", "‖sₖ‖")
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e\n", iter, bNorm, cNorm)
+ (verbose > 0) && @printf(iostream, "%5s %7s %7s\n", "k", "‖rₖ‖", "‖sₖ‖")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e\n", iter, bNorm, cNorm)
# Set up workspace.
βₖ = @knrm2(m, r₀) # β₁ = ‖r₀‖ = ‖v₁‖
@@ -136,17 +158,17 @@ function trilqr!(solver :: TrilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :
vₖ₋₁ .= zero(FC) # v₀ = 0
uₖ₋₁ .= zero(FC) # u₀ = 0
vₖ .= r₀ ./ βₖ # v₁ = (b - Ax₀) / β₁
- uₖ .= s₀ ./ γₖ # u₁ = (c - Aᵀy₀) / γ₁
+ uₖ .= s₀ ./ γₖ # u₁ = (c - Aᴴy₀) / γ₁
cₖ₋₁ = cₖ = -one(T) # Givens cosines used for the LQ factorization of Tₖ
sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the LQ factorization of Tₖ
- d̅ .= zero(FC) # Last column of D̅ₖ = Uₖ(Qₖ)ᵀ
+ d̅ .= zero(FC) # Last column of D̅ₖ = Uₖ(Qₖ)ᴴ
ζₖ₋₁ = ζbarₖ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁
ζₖ₋₂ = ηₖ = zero(FC) # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ
δbarₖ₋₁ = δbarₖ = zero(FC) # Coefficients of Lₖ₋₁ and L̅ₖ modified over the course of two iterations
ψbarₖ₋₁ = ψₖ₋₁ = zero(FC) # ψₖ₋₁ and ψbarₖ are the last components of h̅ₖ = Qₖγ₁e₁
ϵₖ₋₃ = λₖ₋₂ = zero(FC) # Components of Lₖ₋₁
- wₖ₋₃ .= zero(FC) # Column k-3 of Wₖ = Vₖ(Lₖ)⁻ᵀ
- wₖ₋₂ .= zero(FC) # Column k-2 of Wₖ = Vₖ(Lₖ)⁻ᵀ
+ wₖ₋₃ .= zero(FC) # Column k-3 of Wₖ = Vₖ(Lₖ)⁻ᴴ
+ wₖ₋₂ .= zero(FC) # Column k-2 of Wₖ = Vₖ(Lₖ)⁻ᴴ
# Stopping criterion.
inconsistent = false
@@ -166,10 +188,10 @@ function trilqr!(solver :: TrilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :
# Continue the SSY tridiagonalization process.
# AUₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ
- # AᵀVₖ = Uₖ(Tₖ)ᵀ + γₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᵀ
+ # AᴴVₖ = Uₖ(Tₖ)ᴴ + γₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᴴ
mul!(q, A , uₖ) # Forms vₖ₊₁ : q ← Auₖ
- mul!(p, Aᵀ, vₖ) # Forms uₖ₊₁ : p ← Aᵀvₖ
+ mul!(p, Aᴴ, vₖ) # Forms uₖ₊₁ : p ← Aᴴvₖ
@kaxpy!(m, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁
@kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - βₖ * uₖ₋₁
@@ -236,7 +258,7 @@ function trilqr!(solver :: TrilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :
ηₖ = -ϵₖ₋₂ * ζₖ₋₂ - λₖ₋₁ * ζₖ₋₁
end
- # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Uₖ(Qₖ)ᵀ.
+ # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Uₖ(Qₖ)ᴴ.
# [d̅ₖ₋₁ uₖ] [cₖ s̄ₖ] = [dₖ₋₁ d̅ₖ] ⟷ dₖ₋₁ = cₖ * d̅ₖ₋₁ + sₖ * uₖ
# [sₖ -cₖ] ⟷ d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * uₖ
if iter ≥ 2
@@ -295,7 +317,7 @@ function trilqr!(solver :: TrilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :
ψbarₖ = sₖ * ψbarₖ₋₁
end
- # Compute the direction wₖ₋₁, the last column of Wₖ₋₁ = (Vₖ₋₁)(Lₖ₋₁)⁻ᵀ ⟷ (L̄ₖ₋₁)(Wₖ₋₁)ᵀ = (Vₖ₋₁)ᵀ.
+ # Compute the direction wₖ₋₁, the last column of Wₖ₋₁ = (Vₖ₋₁)(Lₖ₋₁)⁻ᴴ ⟷ (L̄ₖ₋₁)(Wₖ₋₁)ᵀ = (Vₖ₋₁)ᵀ.
# w₁ = v₁ / δ̄₁
if iter == 2
wₖ₋₁ = wₖ₋₂
@@ -374,11 +396,11 @@ function trilqr!(solver :: TrilqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :
user_requested_exit = callback(solver) :: Bool
tired = iter ≥ itmax
- kdisplay(iter, verbose) && solved_primal && !solved_dual && @printf("%5d %7s %7.1e\n", iter, "", sNorm)
- kdisplay(iter, verbose) && !solved_primal && solved_dual && @printf("%5d %7.1e %7s\n", iter, rNorm_lq, "")
- kdisplay(iter, verbose) && !solved_primal && !solved_dual && @printf("%5d %7.1e %7.1e\n", iter, rNorm_lq, sNorm)
+ kdisplay(iter, verbose) && solved_primal && !solved_dual && @printf(iostream, "%5d %7s %7.1e\n", iter, "", sNorm)
+ kdisplay(iter, verbose) && !solved_primal && solved_dual && @printf(iostream, "%5d %7.1e %7s\n", iter, rNorm_lq, "")
+ kdisplay(iter, verbose) && !solved_primal && !solved_dual && @printf(iostream, "%5d %7.1e %7.1e\n", iter, rNorm_lq, sNorm)
end
- (verbose > 0) && @printf("\n")
+ (verbose > 0) && @printf(iostream, "\n")
# Compute USYMCG point
# (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * d̅ₖ
diff --git a/src/trimr.jl b/src/trimr.jl
index bc53633c2..9da4dfa92 100644
--- a/src/trimr.jl
+++ b/src/trimr.jl
@@ -13,30 +13,31 @@ export trimr, trimr!
"""
(x, y, stats) = trimr(A, b::AbstractVector{FC}, c::AbstractVector{FC};
- M=I, N=I, atol::T=√eps(T), rtol::T=√eps(T),
- spd::Bool=false, snd::Bool=false, flip::Bool=false, sp::Bool=false,
- τ::T=one(T), ν::T=-one(T), itmax::Int=0,
+ M=I, N=I, ldiv::Bool=false,
+ spd::Bool=false, snd::Bool=false,
+ flip::Bool=false, sp::Bool=false,
+ τ::T=one(T), ν::T=-one(T), atol::T=√eps(T),
+ rtol::T=√eps(T), itmax::Int=0,
verbose::Int=0, history::Bool=false,
- ldiv::Bool=false, callback=solver->false)
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
-TriMR solves the symmetric linear system
+ (x, y, stats) = trimr(A, b, c, x0::AbstractVector, y0::AbstractVector; kwargs...)
+
+TriMR can be warm-started from initial guesses `x0` and `y0` where `kwargs` are the same keyword arguments as above.
+
+Given a matrix `A` of dimension m × n, TriMR solves the symmetric linear system
[ τE A ] [ x ] = [ b ]
- [ Aᵀ νF ] [ y ] [ c ],
+ [ Aᴴ νF ] [ y ] [ c ],
-where τ and ν are real numbers, E = M⁻¹ ≻ 0, F = N⁻¹ ≻ 0.
+of size (n+m) × (n+m) where τ and ν are real numbers, E = M⁻¹ ≻ 0, F = N⁻¹ ≻ 0.
`b` and `c` must both be nonzero.
TriMR handles saddle-point systems (`τ = 0` or `ν = 0`) and adjoint systems (`τ = 0` and `ν = 0`) without any risk of breakdown.
By default, TriMR solves symmetric and quasi-definite linear systems with τ = 1 and ν = -1.
-If `flip = true`, TriMR solves another known variant of SQD systems where τ = -1 and ν = 1.
-If `spd = true`, τ = ν = 1 and the associated symmetric and positive definite linear system is solved.
-If `snd = true`, τ = ν = -1 and the associated symmetric and negative definite linear system is solved.
-If `sp = true`, τ = 1, ν = 0 and the associated saddle-point linear system is solved.
-`τ` and `ν` are also keyword arguments that can be directly modified for more specific problems.
TriMR is based on the preconditioned orthogonal tridiagonalization process
and its relation with the preconditioned block-Lanczos process.
@@ -50,17 +51,40 @@ It's the Euclidean norm when `M` and `N` are identity operators.
TriMR stops when `itmax` iterations are reached or when `‖rₖ‖ ≤ atol + ‖r₀‖ * rtol`.
`atol` is an absolute tolerance and `rtol` is a relative tolerance.
-Additional details can be displayed if verbose mode is enabled (verbose > 0).
-Information will be displayed every `verbose` iterations.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m;
+* `c`: a vector of length n.
+
+#### Optional arguments
+
+* `x0`: a vector of length m that represents an initial guess of the solution x;
+* `y0`: a vector of length n that represents an initial guess of the solution y.
-TriMR can be warm-started from initial guesses `x0` and `y0` with the method
+#### Keyword arguments
- (x, y, stats) = trimr(A, b, c, x0, y0; kwargs...)
+* `M`: linear operator that models a Hermitian positive-definite matrix of size `m` used for centered preconditioning of the partitioned system;
+* `N`: linear operator that models a Hermitian positive-definite matrix of size `n` used for centered preconditioning of the partitioned system;
+* `ldiv`: define whether the preconditioners use `ldiv!` or `mul!`;
+* `spd`: if `true`, set `τ = 1` and `ν = 1` for Hermitian and positive-definite linear system;
+* `snd`: if `true`, set `τ = -1` and `ν = -1` for Hermitian and negative-definite linear systems;
+* `flip`: if `true`, set `τ = -1` and `ν = 1` for another known variant of Hermitian quasi-definite systems;
+* `sp`: if `true`, set `τ = 1` and `ν = 0` for saddle-point systems;
+* `τ` and `ν`: diagonal scaling factors of the partitioned Hermitian linear system;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
-where `kwargs` are the same keyword arguments as above.
+#### Output arguments
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* `x`: a dense vector of length m;
+* `y`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### Reference
@@ -98,16 +122,18 @@ function trimr!(solver :: TrimrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
end
function trimr!(solver :: TrimrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC};
- M=I, N=I, atol :: T=√eps(T), rtol :: T=√eps(T),
- spd :: Bool=false, snd :: Bool=false, flip :: Bool=false, sp :: Bool=false,
- τ :: T=one(T), ν :: T=-one(T), itmax :: Int=0,
+ M=I, N=I, ldiv :: Bool=false,
+ spd :: Bool=false, snd :: Bool=false,
+ flip :: Bool=false, sp :: Bool=false,
+ τ :: T=one(T), ν :: T=-one(T), atol :: T=√eps(T),
+ rtol :: T=√eps(T), itmax :: Int=0,
verbose :: Int=0, history :: Bool=false,
- ldiv :: Bool=false, callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
+ callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
m, n = size(A)
length(b) == m || error("Inconsistent problem size")
length(c) == n || error("Inconsistent problem size")
- (verbose > 0) && @printf("TriMR: system of %d equations in %d variables\n", m+n, m+n)
+ (verbose > 0) && @printf(iostream, "TriMR: system of %d equations in %d variables\n", m+n, m+n)
# Check flip, sp, spd and snd parameters
spd && flip && error("The matrix cannot be symmetric positive definite and symmetric quasi-definite !")
@@ -123,8 +149,8 @@ function trimr!(solver :: TrimrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
# Check type consistency
eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
- ktypeof(c) == S || error("ktypeof(c) ≠ $S")
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+ ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S")
# Determine τ and ν associated to SQD, SPD or SND systems.
flip && (τ = -one(T) ; ν = one(T))
@@ -137,7 +163,7 @@ function trimr!(solver :: TrimrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
warm_start && (ν ≠ 0) && !NisI && error("Warm-start with preconditioners is not supported.")
# Compute the adjoint of A
- Aᵀ = A'
+ Aᴴ = A'
# Set up workspace.
allocate_if(!MisI, solver, :vₖ, S, m)
@@ -169,12 +195,12 @@ function trimr!(solver :: TrimrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
N⁻¹uₖ₋₁ .= zero(FC) # u₀ = 0
# [ τI A ] [ xₖ ] = [ b - τΔx - AΔy ] = [ b₀ ]
- # [ Aᵀ νI ] [ yₖ ] [ c - AᵀΔx - νΔy ] [ c₀ ]
+ # [ Aᴴ νI ] [ yₖ ] [ c - AᴴΔx - νΔy ] [ c₀ ]
if warm_start
mul!(b₀, A, Δy)
(τ ≠ 0) && @kaxpy!(m, τ, Δx, b₀)
@kaxpby!(m, one(FC), b, -one(FC), b₀)
- mul!(c₀, Aᵀ, Δx)
+ mul!(c₀, Aᴴ, Δx)
(ν ≠ 0) && @kaxpy!(n, ν, Δy, c₀)
@kaxpby!(n, one(FC), c, -one(FC), c₀)
end
@@ -216,8 +242,8 @@ function trimr!(solver :: TrimrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
history && push!(rNorms, rNorm)
ε = atol + rtol * rNorm
- (verbose > 0) && @printf("%5s %7s %7s %7s\n", "k", "‖rₖ‖", "βₖ₊₁", "γₖ₊₁")
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e\n", iter, rNorm, βₖ, γₖ)
+ (verbose > 0) && @printf(iostream, "%5s %7s %7s %7s\n", "k", "‖rₖ‖", "βₖ₊₁", "γₖ₊₁")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e\n", iter, rNorm, βₖ, γₖ)
# Set up workspace.
old_c₁ₖ = old_c₂ₖ = old_c₃ₖ = old_c₄ₖ = zero(T)
@@ -244,10 +270,10 @@ function trimr!(solver :: TrimrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
# Continue the orthogonal tridiagonalization process.
# AUₖ = EVₖTₖ + βₖ₊₁Evₖ₊₁(eₖ)ᵀ = EVₖ₊₁Tₖ₊₁.ₖ
- # AᵀVₖ = FUₖ(Tₖ)ᵀ + γₖ₊₁Fuₖ₊₁(eₖ)ᵀ = FUₖ₊₁(Tₖ.ₖ₊₁)ᵀ
+ # AᴴVₖ = FUₖ(Tₖ)ᴴ + γₖ₊₁Fuₖ₊₁(eₖ)ᵀ = FUₖ₊₁(Tₖ.ₖ₊₁)ᴴ
mul!(q, A , uₖ) # Forms Evₖ₊₁ : q ← Auₖ
- mul!(p, Aᵀ, vₖ) # Forms Fuₖ₊₁ : p ← Aᵀvₖ
+ mul!(p, Aᴴ, vₖ) # Forms Fuₖ₊₁ : p ← Aᴴvₖ
if iter ≥ 2
@kaxpy!(m, -γₖ, M⁻¹vₖ₋₁, q) # q ← q - γₖ * M⁻¹vₖ₋₁
@@ -261,7 +287,7 @@ function trimr!(solver :: TrimrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
# Compute vₖ₊₁ and uₖ₊₁
MisI || mulorldiv!(vₖ₊₁, M, q, ldiv) # βₖ₊₁vₖ₊₁ = MAuₖ - γₖvₖ₋₁ - αₖvₖ
- NisI || mulorldiv!(uₖ₊₁, N, p, ldiv) # γₖ₊₁uₖ₊₁ = NAᵀvₖ - βₖuₖ₋₁ - ᾱₖuₖ
+ NisI || mulorldiv!(uₖ₊₁, N, p, ldiv) # γₖ₊₁uₖ₊₁ = NAᴴvₖ - βₖuₖ₋₁ - ᾱₖuₖ
βₖ₊₁ = sqrt(@kdotr(m, vₖ₊₁, q)) # βₖ₊₁ = ‖vₖ₊₁‖_E
γₖ₊₁ = sqrt(@kdotr(n, uₖ₊₁, p)) # γₖ₊₁ = ‖uₖ₊₁‖_F
@@ -282,10 +308,10 @@ function trimr!(solver :: TrimrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
# [0 u₁ ••• 0 uₖ]
#
# rₖ = [ b ] - [ τE A ] [ xₖ ] = [ b ] - [ τE A ] Wₖzₖ
- # [ c ] [ Aᵀ νF ] [ yₖ ] [ c ] [ Aᵀ νF ]
+ # [ c ] [ Aᴴ νF ] [ yₖ ] [ c ] [ Aᴴ νF ]
#
# block-Lanczos formulation : [ τE A ] Wₖ = [ E 0 ] Wₖ₊₁Sₖ₊₁.ₖ
- # [ Aᵀ νF ] [ 0 F ]
+ # [ Aᴴ νF ] [ 0 F ]
#
# TriMR subproblem : min ‖ rₖ ‖ ↔ min ‖ Sₖ₊₁.ₖzₖ - β₁e₁ - γ₁e₂ ‖
#
@@ -419,7 +445,7 @@ function trimr!(solver :: TrimrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
@kswap(gy₂ₖ₋₂, gy₂ₖ)
end
- # Update p̅ₖ = (Qₖ)ᵀ * (β₁e₁ + γ₁e₂)
+ # Update p̅ₖ = (Qₖ)ᴴ * (β₁e₁ + γ₁e₂)
πbis₂ₖ = c₁ₖ * πbar₂ₖ
πbis₂ₖ₊₂ = conj(s₁ₖ) * πbar₂ₖ
#
@@ -490,9 +516,9 @@ function trimr!(solver :: TrimrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c ::
breakdown = βₖ₊₁ ≤ btol && γₖ₊₁ ≤ btol
solved = resid_decrease_lim || resid_decrease_mach
tired = iter ≥ itmax
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e %7.1e\n", iter, rNorm, βₖ₊₁, γₖ₊₁)
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e %7.1e\n", iter, rNorm, βₖ₊₁, γₖ₊₁)
end
- (verbose > 0) && @printf("\n")
+ (verbose > 0) && @printf(iostream, "\n")
tired && (status = "maximum number of iterations exceeded")
breakdown && (status = "inconsistent linear system")
diff --git a/src/usymlq.jl b/src/usymlq.jl
index 71670c80f..53aef51a3 100644
--- a/src/usymlq.jl
+++ b/src/usymlq.jl
@@ -21,34 +21,53 @@ export usymlq, usymlq!
"""
(x, stats) = usymlq(A, b::AbstractVector{FC}, c::AbstractVector{FC};
- atol::T=√eps(T), rtol::T=√eps(T), transfer_to_usymcg::Bool=true,
- itmax::Int=0, verbose::Int=0, history::Bool=false,
- callback=solver->false)
+ transfer_to_usymcg::Bool=true, atol::T=√eps(T),
+ rtol::T=√eps(T), itmax::Int=0,
+ verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
-Solve the linear system Ax = b using the USYMLQ method.
+ (x, stats) = usymlq(A, b, c, x0::AbstractVector; kwargs...)
+
+USYMLQ can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
+
+USYMLQ determines the least-norm solution of the consistent linear system Ax = b of size m × n.
USYMLQ is based on the orthogonal tridiagonalization process and requires two initial nonzero vectors `b` and `c`.
-The vector `c` is only used to initialize the process and a default value can be `b` or `Aᵀb` depending on the shape of `A`.
+The vector `c` is only used to initialize the process and a default value can be `b` or `Aᴴb` depending on the shape of `A`.
The error norm ‖x - x*‖ monotonously decreases in USYMLQ.
It's considered as a generalization of SYMMLQ.
It can also be applied to under-determined and over-determined problems.
In all cases, problems must be consistent.
-An option gives the possibility of transferring to the USYMCG point,
-when it exists. The transfer is based on the residual norm.
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m;
+* `c`: a vector of length n.
+
+#### Optional argument
+
+* `x0`: a vector of length n that represents an initial guess of the solution x.
-USYMLQ can be warm-started from an initial guess `x0` with the method
+#### Keyword arguments
- (x, stats) = usymlq(A, b, c, x0; kwargs...)
+* `transfer_to_usymcg`: transfer from the USYMLQ point to the USYMCG point, when it exists. The transfer is based on the residual norm;
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
-where `kwargs` are the same keyword arguments as above.
+#### Output arguments
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### References
@@ -88,22 +107,23 @@ function usymlq!(solver :: UsymlqSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :
end
function usymlq!(solver :: UsymlqSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC};
- atol :: T=√eps(T), rtol :: T=√eps(T), transfer_to_usymcg :: Bool=true,
- itmax :: Int=0, verbose :: Int=0, history :: Bool=false,
- callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
+ transfer_to_usymcg :: Bool=true, atol :: T=√eps(T),
+ rtol :: T=√eps(T), itmax :: Int=0,
+ verbose :: Int=0, history :: Bool=false,
+ callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
m, n = size(A)
length(b) == m || error("Inconsistent problem size")
length(c) == n || error("Inconsistent problem size")
- (verbose > 0) && @printf("USYMLQ: system of %d equations in %d variables\n", m, n)
+ (verbose > 0) && @printf(iostream, "USYMLQ: system of %d equations in %d variables\n", m, n)
# Check type consistency
eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
- ktypeof(c) == S || error("ktypeof(c) ≠ $S")
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+ ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S")
# Compute the adjoint of A
- Aᵀ = A'
+ Aᴴ = A'
# Set up workspace.
uₖ₋₁, uₖ, p, Δx, x = solver.uₖ₋₁, solver.uₖ, solver.p, solver.Δx, solver.x
@@ -135,8 +155,8 @@ function usymlq!(solver :: UsymlqSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :
itmax == 0 && (itmax = m+n)
ε = atol + rtol * bNorm
- (verbose > 0) && @printf("%5s %7s\n", "k", "‖rₖ‖")
- kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, bNorm)
+ (verbose > 0) && @printf(iostream, "%5s %7s\n", "k", "‖rₖ‖")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e\n", iter, bNorm)
βₖ = @knrm2(m, r₀) # β₁ = ‖v₁‖ = ‖r₀‖
γₖ = @knrm2(n, c) # γ₁ = ‖u₁‖ = ‖c‖
@@ -146,7 +166,7 @@ function usymlq!(solver :: UsymlqSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :
uₖ .= c ./ γₖ # u₁ = c / γ₁
cₖ₋₁ = cₖ = -one(T) # Givens cosines used for the LQ factorization of Tₖ
sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the LQ factorization of Tₖ
- d̅ .= zero(FC) # Last column of D̅ₖ = Uₖ(Qₖ)ᵀ
+ d̅ .= zero(FC) # Last column of D̅ₖ = Uₖ(Qₖ)ᴴ
ζₖ₋₁ = ζbarₖ = zero(FC) # ζₖ₋₁ and ζbarₖ are the last components of z̅ₖ = (L̅ₖ)⁻¹β₁e₁
ζₖ₋₂ = ηₖ = zero(FC) # ζₖ₋₂ and ηₖ are used to update ζₖ₋₁ and ζbarₖ
δbarₖ₋₁ = δbarₖ = zero(FC) # Coefficients of Lₖ₋₁ and Lₖ modified over the course of two iterations
@@ -164,10 +184,10 @@ function usymlq!(solver :: UsymlqSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :
# Continue the SSY tridiagonalization process.
# AUₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ
- # AᵀVₖ = Uₖ(Tₖ)ᵀ + γₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᵀ
+ # AᴴVₖ = Uₖ(Tₖ)ᴴ + γₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᴴ
mul!(q, A , uₖ) # Forms vₖ₊₁ : q ← Auₖ
- mul!(p, Aᵀ, vₖ) # Forms uₖ₊₁ : p ← Aᵀvₖ
+ mul!(p, Aᴴ, vₖ) # Forms uₖ₊₁ : p ← Aᴴvₖ
@kaxpy!(m, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁
@kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - βₖ * uₖ₋₁
@@ -233,7 +253,7 @@ function usymlq!(solver :: UsymlqSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :
ηₖ = -ϵₖ₋₂ * ζₖ₋₂ - λₖ₋₁ * ζₖ₋₁
end
- # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Uₖ(Qₖ)ᵀ.
+ # Relations for the directions dₖ₋₁ and d̅ₖ, the last two columns of D̅ₖ = Uₖ(Qₖ)ᴴ.
# [d̅ₖ₋₁ uₖ] [cₖ s̄ₖ] = [dₖ₋₁ d̅ₖ] ⟷ dₖ₋₁ = cₖ * d̅ₖ₋₁ + sₖ * uₖ
# [sₖ -cₖ] ⟷ d̅ₖ = s̄ₖ * d̅ₖ₋₁ - cₖ * uₖ
if iter ≥ 2
@@ -294,9 +314,9 @@ function usymlq!(solver :: UsymlqSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :
solved_lq = rNorm_lq ≤ ε
solved_cg = transfer_to_usymcg && (abs(δbarₖ) > eps(T)) && (rNorm_cg ≤ ε)
tired = iter ≥ itmax
- kdisplay(iter, verbose) && @printf("%5d %7.1e\n", iter, rNorm_lq)
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e\n", iter, rNorm_lq)
end
- (verbose > 0) && @printf("\n")
+ (verbose > 0) && @printf(iostream, "\n")
# Compute USYMCG point
# (xᶜ)ₖ ← (xᴸ)ₖ₋₁ + ζbarₖ * d̅ₖ
diff --git a/src/usymqr.jl b/src/usymqr.jl
index 863390c3f..3876499b5 100644
--- a/src/usymqr.jl
+++ b/src/usymqr.jl
@@ -21,31 +21,52 @@ export usymqr, usymqr!
"""
(x, stats) = usymqr(A, b::AbstractVector{FC}, c::AbstractVector{FC};
- atol::T=√eps(T), rtol::T=√eps(T),
- itmax::Int=0, verbose::Int=0, history::Bool=false,
- callback=solver->false)
+ atol::T=√eps(T), rtol::T=√eps(T), itmax::Int=0,
+ verbose::Int=0, history::Bool=false,
+ callback=solver->false, iostream::IO=kstdout)
`T` is an `AbstractFloat` such as `Float32`, `Float64` or `BigFloat`.
`FC` is `T` or `Complex{T}`.
-Solve the linear system Ax = b using the USYMQR method.
+ (x, stats) = usymqr(A, b, c, x0::AbstractVector; kwargs...)
+
+USYMQR can be warm-started from an initial guess `x0` where `kwargs` are the same keyword arguments as above.
+
+USYMQR solves the linear least-squares problem min ‖b - Ax‖² of size m × n.
+USYMQR solves Ax = b if it is consistent.
USYMQR is based on the orthogonal tridiagonalization process and requires two initial nonzero vectors `b` and `c`.
-The vector `c` is only used to initialize the process and a default value can be `b` or `Aᵀb` depending on the shape of `A`.
+The vector `c` is only used to initialize the process and a default value can be `b` or `Aᴴb` depending on the shape of `A`.
The residual norm ‖b - Ax‖ monotonously decreases in USYMQR.
It's considered as a generalization of MINRES.
It can also be applied to under-determined and over-determined problems.
USYMQR finds the minimum-norm solution if problems are inconsistent.
-USYMQR can be warm-started from an initial guess `x0` with the method
+#### Input arguments
+
+* `A`: a linear operator that models a matrix of dimension m × n;
+* `b`: a vector of length m;
+* `c`: a vector of length n.
+
+#### Optional argument
+
+* `x0`: a vector of length n that represents an initial guess of the solution x.
+
+#### Keyword arguments
- (x, stats) = usymqr(A, b, c, x0; kwargs...)
+* `atol`: absolute stopping tolerance based on the residual norm;
+* `rtol`: relative stopping tolerance based on the residual norm;
+* `itmax`: the maximum number of iterations. If `itmax=0`, the default number of iterations is set to `m+n`;
+* `verbose`: additional details can be displayed if verbose mode is enabled (verbose > 0). Information will be displayed every `verbose` iterations;
+* `history`: collect additional statistics on the run such as residual norms, or Aᴴ-residual norms;
+* `callback`: function or functor called as `callback(solver)` that returns `true` if the Krylov method should terminate, and `false` otherwise;
+* `iostream`: stream to which output is logged.
-where `kwargs` are the same keyword arguments as above.
+#### Output arguments
-The callback is called as `callback(solver)` and should return `true` if the main loop should terminate,
-and `false` otherwise.
+* `x`: a dense vector of length n;
+* `stats`: statistics collected on the run in a [`SimpleStats`](@ref) structure.
#### References
@@ -85,28 +106,28 @@ function usymqr!(solver :: UsymqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :
end
function usymqr!(solver :: UsymqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :: AbstractVector{FC};
- atol :: T=√eps(T), rtol :: T=√eps(T),
- itmax :: Int=0, verbose :: Int=0, history :: Bool=false,
- callback = solver -> false) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
+ atol :: T=√eps(T), rtol :: T=√eps(T), itmax :: Int=0,
+ verbose :: Int=0, history :: Bool=false,
+ callback = solver -> false, iostream :: IO=kstdout) where {T <: AbstractFloat, FC <: FloatOrComplex{T}, S <: DenseVector{FC}}
m, n = size(A)
length(b) == m || error("Inconsistent problem size")
length(c) == n || error("Inconsistent problem size")
- (verbose > 0) && @printf("USYMQR: system of %d equations in %d variables\n", m, n)
+ (verbose > 0) && @printf(iostream, "USYMQR: system of %d equations in %d variables\n", m, n)
# Check type consistency
eltype(A) == FC || error("eltype(A) ≠ $FC")
- ktypeof(b) == S || error("ktypeof(b) ≠ $S")
- ktypeof(c) == S || error("ktypeof(c) ≠ $S")
+ ktypeof(b) <: S || error("ktypeof(b) is not a subtype of $S")
+ ktypeof(c) <: S || error("ktypeof(c) is not a subtype of $S")
# Compute the adjoint of A
- Aᵀ = A'
+ Aᴴ = A'
# Set up workspace.
vₖ₋₁, vₖ, q, Δx, x, p = solver.vₖ₋₁, solver.vₖ, solver.q, solver.Δx, solver.x, solver.p
wₖ₋₂, wₖ₋₁, uₖ₋₁, uₖ, stats = solver.wₖ₋₂, solver.wₖ₋₁, solver.uₖ₋₁, solver.uₖ, solver.stats
warm_start = solver.warm_start
- rNorms, AᵀrNorms = stats.residuals, stats.Aresiduals
+ rNorms, AᴴrNorms = stats.residuals, stats.Aresiduals
reset!(stats)
r₀ = warm_start ? q : b
@@ -133,8 +154,8 @@ function usymqr!(solver :: UsymqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :
ε = atol + rtol * rNorm
κ = zero(T)
- (verbose > 0) && @printf("%5s %7s %7s\n", "k", "‖rₖ‖", "‖Aᵀrₖ₋₁‖")
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7s\n", iter, rNorm, "✗ ✗ ✗ ✗")
+ (verbose > 0) && @printf(iostream, "%5s %7s %7s\n", "k", "‖rₖ‖", "‖Aᴴrₖ₋₁‖")
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7s\n", iter, rNorm, "✗ ✗ ✗ ✗")
βₖ = @knrm2(m, r₀) # β₁ = ‖v₁‖ = ‖r₀‖
γₖ = @knrm2(n, c) # γ₁ = ‖u₁‖ = ‖c‖
@@ -146,7 +167,7 @@ function usymqr!(solver :: UsymqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :
sₖ₋₂ = sₖ₋₁ = sₖ = zero(FC) # Givens sines used for the QR factorization of Tₖ₊₁.ₖ
wₖ₋₂ .= zero(FC) # Column k-2 of Wₖ = Uₖ(Rₖ)⁻¹
wₖ₋₁ .= zero(FC) # Column k-1 of Wₖ = Uₖ(Rₖ)⁻¹
- ζbarₖ = βₖ # ζbarₖ is the last component of z̅ₖ = (Qₖ)ᵀβ₁e₁
+ ζbarₖ = βₖ # ζbarₖ is the last component of z̅ₖ = (Qₖ)ᴴβ₁e₁
# Stopping criterion.
solved = rNorm ≤ ε
@@ -161,10 +182,10 @@ function usymqr!(solver :: UsymqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :
# Continue the SSY tridiagonalization process.
# AUₖ = VₖTₖ + βₖ₊₁vₖ₊₁(eₖ)ᵀ = Vₖ₊₁Tₖ₊₁.ₖ
- # AᵀVₖ = Uₖ(Tₖ)ᵀ + γₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᵀ
+ # AᴴVₖ = Uₖ(Tₖ)ᴴ + γₖ₊₁uₖ₊₁(eₖ)ᵀ = Uₖ₊₁(Tₖ.ₖ₊₁)ᴴ
mul!(q, A , uₖ) # Forms vₖ₊₁ : q ← Auₖ
- mul!(p, Aᵀ, vₖ) # Forms uₖ₊₁ : p ← Aᵀvₖ
+ mul!(p, Aᴴ, vₖ) # Forms uₖ₊₁ : p ← Aᴴvₖ
@kaxpy!(m, -γₖ, vₖ₋₁, q) # q ← q - γₖ * vₖ₋₁
@kaxpy!(n, -βₖ, uₖ₋₁, p) # p ← p - βₖ * uₖ₋₁
@@ -254,9 +275,9 @@ function usymqr!(solver :: UsymqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :
rNorm = abs(ζbarₖ₊₁)
history && push!(rNorms, rNorm)
- # Compute ‖Aᵀrₖ₋₁‖ = |ζbarₖ| * √(|δbarₖ|² + |λbarₖ|²).
- AᵀrNorm = abs(ζbarₖ) * √(abs2(δbarₖ) + abs2(cₖ₋₁ * γₖ₊₁))
- history && push!(AᵀrNorms, AᵀrNorm)
+ # Compute ‖Aᴴrₖ₋₁‖ = |ζbarₖ| * √(|δbarₖ|² + |λbarₖ|²).
+ AᴴrNorm = abs(ζbarₖ) * √(abs2(δbarₖ) + abs2(cₖ₋₁ * γₖ₊₁))
+ history && push!(AᴴrNorms, AᴴrNorm)
# Compute uₖ₊₁ and uₖ₊₁.
@. vₖ₋₁ = vₖ # vₖ₋₁ ← vₖ
@@ -286,14 +307,14 @@ function usymqr!(solver :: UsymqrSolver{T,FC,S}, A, b :: AbstractVector{FC}, c :
βₖ = βₖ₊₁
# Update stopping criterion.
- iter == 1 && (κ = atol + rtol * AᵀrNorm)
+ iter == 1 && (κ = atol + rtol * AᴴrNorm)
user_requested_exit = callback(solver) :: Bool
solved = rNorm ≤ ε
- inconsistent = !solved && AᵀrNorm ≤ κ
+ inconsistent = !solved && AᴴrNorm ≤ κ
tired = iter ≥ itmax
- kdisplay(iter, verbose) && @printf("%5d %7.1e %7.1e\n", iter, rNorm, AᵀrNorm)
+ kdisplay(iter, verbose) && @printf(iostream, "%5d %7.1e %7.1e\n", iter, rNorm, AᴴrNorm)
end
- (verbose > 0) && @printf("\n")
+ (verbose > 0) && @printf(iostream, "\n")
tired && (status = "maximum number of iterations exceeded")
solved && (status = "solution good enough given atol and rtol")
user_requested_exit && (status = "user-requested exit")
diff --git a/test/callback_utils.jl b/test/callback_utils.jl
new file mode 100644
index 000000000..f88f01848
--- /dev/null
+++ b/test/callback_utils.jl
@@ -0,0 +1,152 @@
+mutable struct StorageGetxRestartedGmres{S}
+ x::S
+ y::S
+ p::S
+end
+StorageGetxRestartedGmres(solver::GmresSolver; N = I) =
+ StorageGetxRestartedGmres(similar(solver.x), similar(solver.z), (N === I) ? similar(solver.p) : similar(solver.x))
+
+function get_x_restarted_gmres!(solver::GmresSolver{T,FC,S}, A,
+ stor::StorageGetxRestartedGmres{S}, N) where {T,FC,S}
+ NisI = (N === I)
+ x2, y2, p2 = stor.x, stor.y, stor.p
+ n = size(A, 2)
+ # Compute yₖ by solving Rₖyₖ = zₖ with backward substitution.
+ nr = sum(1:solver.inner_iter)
+ y = solver.z # yᵢ = zᵢ
+ y2 .= y
+ R = solver.R
+ V = solver.V
+ x2 .= solver.Δx
+ for i = solver.inner_iter : -1 : 1
+ pos = nr + i - solver.inner_iter # position of rᵢ.ₖ
+ for j = solver.inner_iter : -1 : i+1
+ y2[i] = y2[i] - R[pos] * y2[j] # yᵢ ← yᵢ - rᵢⱼyⱼ
+ pos = pos - j + 1 # position of rᵢ.ⱼ₋₁
+ end
+ # Rₖ can be singular if the system is inconsistent
+ if abs(R[pos]) ≤ eps(T)^(3/4)
+ y2[i] = zero(FC)
+ inconsistent = true
+ else
+ y2[i] = y2[i] / R[pos] # yᵢ ← yᵢ / rᵢᵢ
+ end
+ end
+
+ # Form xₖ = N⁻¹Vₖyₖ
+ for i = 1 : solver.inner_iter
+ Krylov.@kaxpy!(n, y2[i], V[i], x2)
+ end
+ if !NisI
+ p2 .= solver.p
+ p2 .= x2
+ mul!(x2, N, p2)
+ end
+ x2 .+= solver.x
+end
+
+mutable struct TestCallbackN2{T, S, M}
+ A::M
+ b::S
+ storage_vec::S
+ tol::T
+end
+TestCallbackN2(A, b; tol = 0.1) = TestCallbackN2(A, b, similar(b), tol)
+
+function (cb_n2::TestCallbackN2)(solver)
+ mul!(cb_n2.storage_vec, cb_n2.A, solver.x)
+ cb_n2.storage_vec .-= cb_n2.b
+ return norm(cb_n2.storage_vec) ≤ cb_n2.tol
+end
+
+mutable struct TestCallbackN2Adjoint{T, S, M}
+ A::M
+ b::S
+ c::S
+ storage_vec1::S
+ storage_vec2::S
+ tol::T
+end
+TestCallbackN2Adjoint(A, b, c; tol = 0.1) = TestCallbackN2Adjoint(A, b, c, similar(b), similar(c), tol)
+
+function (cb_n2::TestCallbackN2Adjoint)(solver)
+ mul!(cb_n2.storage_vec1, cb_n2.A, solver.x)
+ cb_n2.storage_vec1 .-= cb_n2.b
+ mul!(cb_n2.storage_vec2, cb_n2.A', solver.y)
+ cb_n2.storage_vec2 .-= cb_n2.c
+ return (norm(cb_n2.storage_vec1) ≤ cb_n2.tol && norm(cb_n2.storage_vec2) ≤ cb_n2.tol)
+end
+
+mutable struct TestCallbackN2Shifts{T, S, M}
+ A::M
+ b::S
+ shifts::Vector{T}
+ tol::T
+end
+TestCallbackN2Shifts(A, b, shifts; tol = 0.1) = TestCallbackN2Shifts(A, b, shifts, tol)
+
+function (cb_n2::TestCallbackN2Shifts)(solver)
+ r = residuals(cb_n2.A, cb_n2.b, cb_n2.shifts, solver.x)
+ return all(map(norm, r) .≤ cb_n2.tol)
+end
+
+mutable struct TestCallbackN2LS{T, S, M}
+ A::M
+ b::S
+ λ::T
+ storage_vec1::S
+ storage_vec2::S
+ tol::T
+end
+TestCallbackN2LS(A, b, λ; tol = 0.1) = TestCallbackN2LS(A, b, λ, similar(b), similar(b, size(A, 2)), tol)
+
+function (cb_n2::TestCallbackN2LS)(solver)
+ mul!(cb_n2.storage_vec1, cb_n2.A, solver.x)
+ cb_n2.storage_vec1 .-= cb_n2.b
+ mul!(cb_n2.storage_vec2, cb_n2.A', cb_n2.storage_vec1)
+ cb_n2.storage_vec2 .+= cb_n2.λ .* solver.x
+ return norm(cb_n2.storage_vec2) ≤ cb_n2.tol
+end
+
+mutable struct TestCallbackN2LN{T, S, M}
+ A::M
+ b::S
+ λ::T
+ storage_vec::S
+ tol::T
+end
+TestCallbackN2LN(A, b, λ; tol = 0.1) = TestCallbackN2LN(A, b, λ, similar(b), tol)
+
+function (cb_n2::TestCallbackN2LN)(solver)
+ mul!(cb_n2.storage_vec, cb_n2.A, solver.x)
+ cb_n2.storage_vec .-= cb_n2.b
+ cb_n2.λ != 0 && (cb_n2.storage_vec .+= cb_n2.λ .* solver.x)
+ return norm(cb_n2.storage_vec) ≤ cb_n2.tol
+end
+
+mutable struct TestCallbackN2SaddlePts{T, S, M}
+ A::M
+ b::S
+ c::S
+ storage_vec1::S
+ storage_vec2::S
+ tol::T
+end
+TestCallbackN2SaddlePts(A, b, c; tol = 0.1) =
+ TestCallbackN2SaddlePts(A, b, c, similar(b), similar(c), tol)
+
+function (cb_n2::TestCallbackN2SaddlePts)(solver)
+ mul!(cb_n2.storage_vec1, cb_n2.A, solver.y)
+ cb_n2.storage_vec1 .+= solver.x .- cb_n2.b
+ mul!(cb_n2.storage_vec2, cb_n2.A', solver.x)
+ cb_n2.storage_vec2 .-= solver.y .+ cb_n2.c
+ return (norm(cb_n2.storage_vec1) ≤ cb_n2.tol && norm(cb_n2.storage_vec2) ≤ cb_n2.tol)
+end
+
+function restarted_gmres_callback_n2(solver::GmresSolver, A, b, stor, N, storage_vec, tol)
+ get_x_restarted_gmres!(solver, A, stor, N)
+ x = stor.x
+ mul!(storage_vec, A, x)
+ storage_vec .-= b
+ return (norm(storage_vec) ≤ tol)
+end
diff --git a/test/get_div_grad.jl b/test/get_div_grad.jl
index 6d6bf012e..ae27e5061 100644
--- a/test/get_div_grad.jl
+++ b/test/get_div_grad.jl
@@ -1,8 +1,8 @@
# Identity matrix.
eye(n::Int; FC=Float64) = sparse(one(FC) * I, n, n)
-# Compute the energy norm ‖r‖ₚ = √(rᵀPr) where P is a symmetric and positive definite matrix.
-metric(r, P) = sqrt(dot(r, P * r))
+# Compute the energy norm ‖r‖ₚ = √(rᴴPr) where P is a symmetric and positive definite matrix.
+metric(r, P) = sqrt(real(dot(r, P * r)))
# Based on Lars Ruthotto's initial implementation.
function get_div_grad(n1 :: Int, n2 :: Int, n3 :: Int)
diff --git a/test/gpu/amd.jl b/test/gpu/amd.jl
new file mode 100644
index 000000000..9fb6cdffd
--- /dev/null
+++ b/test/gpu/amd.jl
@@ -0,0 +1,111 @@
+using AMDGPU
+
+include("gpu.jl")
+
+@testset "AMD -- AMDGPU.jl" begin
+
+ @test AMDGPU.functional()
+ AMDGPU.allowscalar(false)
+
+ @testset "documentation" begin
+ A_cpu = rand(ComplexF64, 20, 20)
+ A_cpu = A_cpu + A_cpu'
+ b_cpu = rand(ComplexF64, 20)
+ A_gpu = ROCMatrix(A_cpu)
+ b_gpu = ROCVector(b_cpu)
+ x, stats = minres(A_gpu, b_gpu)
+ end
+
+ for FC in (Float32, Float64, ComplexF32, ComplexF64)
+ S = ROCVector{FC}
+ M = ROCMatrix{FC}
+ T = real(FC)
+ n = 10
+ x = rand(FC, n)
+ x = S(x)
+ y = rand(FC, n)
+ y = S(y)
+ a = rand(FC)
+ b = rand(FC)
+ s = rand(FC)
+ a2 = rand(T)
+ b2 = rand(T)
+ c = rand(T)
+
+ @testset "kdot -- $FC" begin
+ Krylov.@kdot(n, x, y)
+ end
+
+ @testset "kdotr -- $FC" begin
+ Krylov.@kdotr(n, x, y)
+ end
+
+ @testset "knrm2 -- $FC" begin
+ Krylov.@knrm2(n, x)
+ end
+
+ @testset "kaxpy! -- $FC" begin
+ Krylov.@kaxpy!(n, a, x, y)
+ Krylov.@kaxpy!(n, a2, x, y)
+ end
+
+ @testset "kaxpby! -- $FC" begin
+ Krylov.@kaxpby!(n, a, x, b, y)
+ Krylov.@kaxpby!(n, a2, x, b, y)
+ Krylov.@kaxpby!(n, a, x, b2, y)
+ Krylov.@kaxpby!(n, a2, x, b2, y)
+ end
+
+ @testset "kcopy! -- $FC" begin
+ Krylov.@kcopy!(n, x, y)
+ end
+
+ @testset "kswap -- $FC" begin
+ Krylov.@kswap(x, y)
+ end
+
+ @testset "kref! -- $FC" begin
+ Krylov.@kref!(n, x, y, c, s)
+ end
+
+ @testset "conversion -- $FC" begin
+ test_conversion(S, M)
+ end
+
+ ε = eps(T)
+ atol = √ε
+ rtol = √ε
+
+ @testset "GMRES -- $FC" begin
+ A, b = nonsymmetric_indefinite(FC=FC)
+ A = M(A)
+ b = S(b)
+ x, stats = gmres(A, b)
+ @test norm(b - A * x) ≤ atol + rtol * norm(b)
+ end
+
+ @testset "CG -- $FC" begin
+ A, b = symmetric_definite(FC=FC)
+ A = M(A)
+ b = S(b)
+ x, stats = cg(A, b)
+ @test norm(b - A * x) ≤ atol + rtol * norm(b)
+ end
+
+ @testset "MINRES-QLP -- $FC" begin
+ A, b = symmetric_indefinite(FC=FC)
+ A = M(A)
+ b = S(b)
+ x, stats = minres_qlp(A, b)
+ @test norm(b - A * x) ≤ atol + rtol * norm(b)
+ end
+
+ # @testset "processes -- $FC" begin
+ # test_processes(S, M)
+ # end
+
+ @testset "solver -- $FC" begin
+ test_solver(S, M)
+ end
+ end
+end
diff --git a/test/gpu/gpu.jl b/test/gpu/gpu.jl
new file mode 100644
index 000000000..09036ecac
--- /dev/null
+++ b/test/gpu/gpu.jl
@@ -0,0 +1,52 @@
+using LinearAlgebra, SparseArrays, Test
+using Krylov
+
+include("../test_utils.jl")
+
+function test_processes(S, M)
+ m = 250
+ n = 500
+ k = 20
+ FC = eltype(S)
+
+ cpu_A, cpu_b = symmetric_indefinite(n, FC=FC)
+ gpu_A, gpu_b = M(cpu_A), S(cpu_b)
+ V, T = hermitian_lanczos(gpu_A, gpu_b, k)
+
+ cpu_A, cpu_b = nonsymmetric_definite(n, FC=FC)
+ cpu_c = -cpu_b
+ gpu_A, gpu_b, gpu_c = M(cpu_A), S(cpu_b), S(cpu_c)
+ V, T, U, Tᴴ = nonhermitian_lanczos(gpu_A, gpu_b, gpu_c, k)
+
+ cpu_A, cpu_b = nonsymmetric_indefinite(n, FC=FC)
+ gpu_A, gpu_b = M(cpu_A), S(cpu_b)
+ V, H = arnoldi(gpu_A, gpu_b, k)
+
+ cpu_A, cpu_b = under_consistent(m, n, FC=FC)
+ gpu_A, gpu_b = M(cpu_A), S(cpu_b)
+ V, U, L = golub_kahan(gpu_A, gpu_b, k)
+
+ cpu_A, cpu_b = under_consistent(m, n, FC=FC)
+ _, cpu_c = over_consistent(n, m, FC=FC)
+ gpu_A, gpu_b, gpu_c = M(cpu_A), S(cpu_b), S(cpu_c)
+ V, T, U, Tᴴ = saunders_simon_yip(gpu_A, gpu_b, gpu_c, k)
+
+ cpu_A, cpu_b = under_consistent(m, n, FC=FC)
+ cpu_B, cpu_c = over_consistent(n, m, FC=FC)
+ gpu_A, gpu_B, gpu_b, gpu_c = M(cpu_A), M(cpu_B), S(cpu_b), S(cpu_c)
+ V, H, U, F = montoison_orban(gpu_A, gpu_B, gpu_b, gpu_c, k)
+end
+
+function test_solver(S, M)
+ n = 10
+ memory = 5
+ A = M(undef, n, n)
+ b = S(undef, n)
+ solver = GmresSolver(n, n, memory, S)
+ solve!(solver, A, b) # Test that we don't have errors
+end
+
+function test_conversion(S, M)
+ @test Krylov.vector_to_matrix(S) == M
+ @test Krylov.matrix_to_vector(M) == S
+end
diff --git a/test/gpu/intel.jl b/test/gpu/intel.jl
new file mode 100644
index 000000000..f03176199
--- /dev/null
+++ b/test/gpu/intel.jl
@@ -0,0 +1,113 @@
+using oneAPI
+
+include("gpu.jl")
+
+@testset "Intel -- oneAPI.jl" begin
+
+ @test oneAPI.functional()
+ oneAPI.allowscalar(false)
+
+ @testset "documentation" begin
+ T = Float32
+ m = 20
+ n = 10
+ A_cpu = rand(T, m, n)
+ b_cpu = rand(T, m)
+ A_gpu = oneMatrix(A_cpu)
+ b_gpu = oneVector(b_cpu)
+ x, stats = lsqr(A_gpu, b_gpu)
+ end
+
+ for FC ∈ (Float32, ComplexF32)
+ S = oneVector{FC}
+ M = oneMatrix{FC}
+ T = real(FC)
+ n = 10
+ x = rand(FC, n)
+ x = S(x)
+ y = rand(FC, n)
+ y = S(y)
+ a = rand(FC)
+ b = rand(FC)
+ s = rand(FC)
+ a2 = rand(T)
+ b2 = rand(T)
+ c = rand(T)
+
+ @testset "kdot -- $FC" begin
+ Krylov.@kdot(n, x, y)
+ end
+
+ @testset "kdotr -- $FC" begin
+ Krylov.@kdotr(n, x, y)
+ end
+
+ @testset "knrm2 -- $FC" begin
+ Krylov.@knrm2(n, x)
+ end
+
+ @testset "kaxpy! -- $FC" begin
+ Krylov.@kaxpy!(n, a, x, y)
+ Krylov.@kaxpy!(n, a2, x, y)
+ end
+
+ @testset "kaxpby! -- $FC" begin
+ Krylov.@kaxpby!(n, a, x, b, y)
+ Krylov.@kaxpby!(n, a2, x, b, y)
+ Krylov.@kaxpby!(n, a, x, b2, y)
+ Krylov.@kaxpby!(n, a2, x, b2, y)
+ end
+
+ @testset "kcopy! -- $FC" begin
+ Krylov.@kcopy!(n, x, y)
+ end
+
+ @testset "kswap -- $FC" begin
+ Krylov.@kswap(x, y)
+ end
+
+ @testset "kref! -- $FC" begin
+ Krylov.@kref!(n, x, y, c, s)
+ end
+
+ @testset "conversion -- $FC" begin
+ test_conversion(S, M)
+ end
+
+ ε = eps(T)
+ atol = √ε
+ rtol = √ε
+
+ @testset "GMRES -- $FC" begin
+ A, b = nonsymmetric_indefinite(FC=FC)
+ A = M(A)
+ b = S(b)
+ x, stats = gmres(A, b)
+ @test norm(b - A * x) ≤ atol + rtol * norm(b)
+ end
+
+ @testset "CG -- $FC" begin
+ A, b = symmetric_definite(FC=FC)
+ A = M(A)
+ b = S(b)
+ x, stats = cg(A, b)
+ @test norm(b - A * x) ≤ atol + rtol * norm(b)
+ end
+
+ @testset "MINRES-QLP -- $FC" begin
+ A, b = symmetric_indefinite(FC=FC)
+ A = M(A)
+ b = S(b)
+ x, stats = minres_qlp(A, b)
+ @test norm(b - A * x) ≤ atol + rtol * norm(b)
+ end
+
+ # @testset "processes -- $FC" begin
+ # test_processes(S, M)
+ # end
+
+ @testset "solver -- $FC" begin
+ test_solver(S, M)
+ end
+ end
+end
diff --git a/test/gpu/metal.jl b/test/gpu/metal.jl
new file mode 100644
index 000000000..2e684e21f
--- /dev/null
+++ b/test/gpu/metal.jl
@@ -0,0 +1,113 @@
+using Metal
+
+include("gpu.jl")
+
+@testset "Apple M1 GPUs -- Metal.jl" begin
+
+ # @test Metal.functional()
+ Metal.allowscalar(false)
+
+ @testset "documentation" begin
+ T = Float32
+ n = 10
+ m = 20
+ A_cpu = rand(T, n, m)
+ b_cpu = rand(T, n)
+ A_gpu = MtlMatrix(A_cpu)
+ b_gpu = MtlVector(b_cpu)
+ x, stats = craig(A_gpu, b_gpu)
+ end
+
+ for FC in (Float32, ComplexF32)
+ S = MtlVector{FC}
+ M = MtlMatrix{FC}
+ T = real(FC)
+ n = 10
+ x = rand(FC, n)
+ x = S(x)
+ y = rand(FC, n)
+ y = S(y)
+ a = rand(FC)
+ b = rand(FC)
+ s = rand(FC)
+ a2 = rand(T)
+ b2 = rand(T)
+ c = rand(T)
+
+ @testset "kdot -- $FC" begin
+ Krylov.@kdot(n, x, y)
+ end
+
+ @testset "kdotr -- $FC" begin
+ Krylov.@kdotr(n, x, y)
+ end
+
+ @testset "knrm2 -- $FC" begin
+ Krylov.@knrm2(n, x)
+ end
+
+ @testset "kaxpy! -- $FC" begin
+ Krylov.@kaxpy!(n, a, x, y)
+ Krylov.@kaxpy!(n, a2, x, y)
+ end
+
+ @testset "kaxpby! -- $FC" begin
+ Krylov.@kaxpby!(n, a, x, b, y)
+ Krylov.@kaxpby!(n, a2, x, b, y)
+ Krylov.@kaxpby!(n, a, x, b2, y)
+ Krylov.@kaxpby!(n, a2, x, b2, y)
+ end
+
+ @testset "kcopy! -- $FC" begin
+ Krylov.@kcopy!(n, x, y)
+ end
+
+ @testset "kswap -- $FC" begin
+ Krylov.@kswap(x, y)
+ end
+
+ @testset "kref! -- $FC" begin
+ Krylov.@kref!(n, x, y, c, s)
+ end
+
+ @testset "conversion -- $FC" begin
+ test_conversion(S, M)
+ end
+
+ ε = eps(T)
+ atol = √ε
+ rtol = √ε
+
+ @testset "GMRES -- $FC" begin
+ A, b = nonsymmetric_indefinite(FC=FC)
+ A = M(A)
+ b = S(b)
+ x, stats = gmres(A, b)
+ @test norm(b - A * x) ≤ atol + rtol * norm(b)
+ end
+
+ @testset "CG -- $FC" begin
+ A, b = symmetric_definite(FC=FC)
+ A = M(A)
+ b = S(b)
+ x, stats = cg(A, b)
+ @test norm(b - A * x) ≤ atol + rtol * norm(b)
+ end
+
+ @testset "MINRES-QLP -- $FC" begin
+ A, b = symmetric_indefinite(FC=FC)
+ A = M(A)
+ b = S(b)
+ x, stats = minres_qlp(A, b)
+ @test norm(b - A * x) ≤ atol + rtol * norm(b)
+ end
+
+ # @testset "processes -- $FC" begin
+ # test_processes(S, M)
+ # end
+
+ @testset "solver -- $FC" begin
+ test_solver(S, M)
+ end
+ end
+end
diff --git a/test/gpu/nvidia.jl b/test/gpu/nvidia.jl
new file mode 100644
index 000000000..908a2819c
--- /dev/null
+++ b/test/gpu/nvidia.jl
@@ -0,0 +1,204 @@
+using LinearOperators, CUDA, CUDA.CUSPARSE, CUDA.CUSOLVER
+
+include("gpu.jl")
+
+@testset "Nvidia -- CUDA.jl" begin
+
+ @test CUDA.functional()
+ CUDA.allowscalar(false)
+
+ @testset "documentation" begin
+ A_cpu = rand(20, 20)
+ b_cpu = rand(20)
+ A_gpu = CuMatrix(A_cpu)
+ b_gpu = CuVector(b_cpu)
+ x, stats = bilq(A_gpu, b_gpu)
+
+ A_cpu = sprand(200, 100, 0.3)
+ b_cpu = rand(200)
+ A_gpu = CuSparseMatrixCSC(A_cpu)
+ b_gpu = CuVector(b_cpu)
+ x, stats = lsmr(A_gpu, b_gpu)
+
+ @testset "ic0" begin
+ A_cpu, b_cpu = sparse_laplacian()
+
+ b_gpu = CuVector(b_cpu)
+ n = length(b_gpu)
+ T = eltype(b_gpu)
+ symmetric = hermitian = true
+
+ A_gpu = CuSparseMatrixCSC(A_cpu)
+ P = ic02(A_gpu, 'O')
+ function ldiv_csc_ic0!(y, P, x)
+ copyto!(y, x)
+ sv2!('T', 'U', 'N', 1.0, P, y, 'O')
+ sv2!('N', 'U', 'N', 1.0, P, y, 'O')
+ return y
+ end
+ opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_csc_ic0!(y, P, x))
+ x, stats = cg(A_gpu, b_gpu, M=opM)
+ @test norm(b_gpu - A_gpu * x) ≤ 1e-6
+
+ A_gpu = CuSparseMatrixCSR(A_cpu)
+ P = ic02(A_gpu, 'O')
+ function ldiv_csr_ic0!(y, P, x)
+ copyto!(y, x)
+ sv2!('N', 'L', 'N', 1.0, P, y, 'O')
+ sv2!('T', 'L', 'N', 1.0, P, y, 'O')
+ return y
+ end
+ opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_csr_ic0!(y, P, x))
+ x, stats = cg(A_gpu, b_gpu, M=opM)
+ @test norm(b_gpu - A_gpu * x) ≤ 1e-6
+ end
+
+ @testset "ilu0" begin
+ A_cpu, b_cpu = polar_poisson()
+
+ p = zfd(A_cpu, 'O')
+ p .+= 1
+ A_cpu = A_cpu[p,:]
+ b_cpu = b_cpu[p]
+
+ b_gpu = CuVector(b_cpu)
+ n = length(b_gpu)
+ T = eltype(b_gpu)
+ symmetric = hermitian = false
+
+ A_gpu = CuSparseMatrixCSC(A_cpu)
+ P = ilu02(A_gpu, 'O')
+ function ldiv_csc_ilu0!(y, P, x)
+ copyto!(y, x)
+ sv2!('N', 'L', 'N', 1.0, P, y, 'O')
+ sv2!('N', 'U', 'U', 1.0, P, y, 'O')
+ return y
+ end
+ opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_csc_ilu0!(y, P, x))
+ x, stats = bicgstab(A_gpu, b_gpu, M=opM)
+ @test norm(b_gpu - A_gpu * x) ≤ 1e-6
+
+ A_gpu = CuSparseMatrixCSR(A_cpu)
+ P = ilu02(A_gpu, 'O')
+ function ldiv_csr_ilu0!(y, P, x)
+ copyto!(y, x)
+ sv2!('N', 'L', 'U', 1.0, P, y, 'O')
+ sv2!('N', 'U', 'N', 1.0, P, y, 'O')
+ return y
+ end
+ opM = LinearOperator(T, n, n, symmetric, hermitian, (y, x) -> ldiv_csr_ilu0!(y, P, x))
+ x, stats = bicgstab(A_gpu, b_gpu, M=opM)
+ @test norm(b_gpu - A_gpu * x) ≤ 1e-6
+ end
+ end
+
+ for FC in (Float32, Float64, ComplexF32, ComplexF64)
+ S = CuVector{FC}
+ V = CuSparseVector{FC}
+ M = CuMatrix{FC}
+ T = real(FC)
+ n = 10
+ x = rand(FC, n)
+ x = S(x)
+ y = rand(FC, n)
+ y = S(y)
+ a = rand(FC)
+ b = rand(FC)
+ s = rand(FC)
+ a2 = rand(T)
+ b2 = rand(T)
+ c = rand(T)
+
+ @testset "kdot -- $FC" begin
+ Krylov.@kdot(n, x, y)
+ end
+
+ @testset "kdotr -- $FC" begin
+ Krylov.@kdotr(n, x, y)
+ end
+
+ @testset "knrm2 -- $FC" begin
+ Krylov.@knrm2(n, x)
+ end
+
+ @testset "kaxpy! -- $FC" begin
+ Krylov.@kaxpy!(n, a, x, y)
+ Krylov.@kaxpy!(n, a2, x, y)
+ end
+
+ @testset "kaxpby! -- $FC" begin
+ Krylov.@kaxpby!(n, a, x, b, y)
+ Krylov.@kaxpby!(n, a2, x, b, y)
+ Krylov.@kaxpby!(n, a, x, b2, y)
+ Krylov.@kaxpby!(n, a2, x, b2, y)
+ end
+
+ @testset "kcopy! -- $FC" begin
+ Krylov.@kcopy!(n, x, y)
+ end
+
+ @testset "kswap -- $FC" begin
+ Krylov.@kswap(x, y)
+ end
+
+ @testset "kref! -- $FC" begin
+ Krylov.@kref!(n, x, y, c, s)
+ end
+
+ @testset "conversion -- $FC" begin
+ test_conversion(S, M)
+ end
+
+ ε = eps(T)
+ atol = √ε
+ rtol = √ε
+
+ @testset "GMRES -- $FC" begin
+ A, b = nonsymmetric_indefinite(FC=FC)
+ A = M(A)
+ b = S(b)
+ x, stats = gmres(A, b)
+ @test norm(b - A * x) ≤ atol + rtol * norm(b)
+ end
+
+ @testset "CG -- $FC" begin
+ A, b = symmetric_definite(FC=FC)
+ A = M(A)
+ b = S(b)
+ x, stats = cg(A, b)
+ @test norm(b - A * x) ≤ atol + rtol * norm(b)
+ end
+
+ @testset "MINRES-QLP -- $FC" begin
+ A, b = symmetric_indefinite(FC=FC)
+ A = M(A)
+ b = S(b)
+ x, stats = minres_qlp(A, b)
+ @test norm(b - A * x) ≤ atol + rtol * norm(b)
+ end
+
+ @testset "processes -- $FC" begin
+ test_processes(S, M)
+ end
+
+ @testset "solver -- $FC" begin
+ test_solver(S, M)
+ end
+
+ @testset "ktypeof -- $FC" begin
+ dv = S(rand(FC, 10))
+ b = view(dv, 4:8)
+ @test Krylov.ktypeof(dv) <: S
+ @test Krylov.ktypeof(b) <: S
+
+ dm = M(rand(FC, 10, 10))
+ b = view(dm, :, 3)
+ @test Krylov.ktypeof(b) <: S
+
+ sv = V(sprand(FC, 10, 0.5))
+ b = view(sv, 4:8)
+ @test Krylov.ktypeof(sv) <: S
+ @test Krylov.ktypeof(b) <: S
+ end
+ end
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index 99ab25fda..b69865f61 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -4,7 +4,9 @@ import Krylov.KRYLOV_SOLVERS
include("test_utils.jl")
include("test_aux.jl")
include("test_stats.jl")
+include("test_processes.jl")
+include("test_fgmres.jl")
include("test_gpmr.jl")
include("test_fom.jl")
include("test_gmres.jl")
diff --git a/test/test_allocations.jl b/test/test_allocations.jl
index 4c6817499..174d0ae55 100644
--- a/test/test_allocations.jl
+++ b/test/test_allocations.jl
@@ -1,26 +1,27 @@
@testset "allocations" begin
- for FC in (Float64, ComplexF64)
+ for FC in (Float32, Float64, ComplexF32, ComplexF64)
@testset "Data Type: $FC" begin
- A = FC.(get_div_grad(16, 16, 16)) # Dimension n x n
- n = size(A, 1)
- m = div(n, 2)
- Au = A[1:m,:] # Dimension m x n
- Ao = A[:,1:m] # Dimension n x m
- b = Ao * ones(FC, m) # Dimension n
- c = Au * ones(FC, n) # Dimension m
+ A = FC.(get_div_grad(18, 18, 18)) # Dimension m x n
+ m,n = size(A)
+ k = div(n, 2)
+ Au = A[1:k,:] # Dimension k x n
+ Ao = A[:,1:k] # Dimension m x k
+ b = Ao * ones(FC, k) # Dimension m
+ c = Au * ones(FC, n) # Dimension k
mem = 200
- shifts = [1.0; 2.0; 3.0; 4.0; 5.0]
+ T = real(FC)
+ shifts = T[1; 2; 3; 4; 5]
nshifts = 5
- nbits = sizeof(FC) # 8 bits for Float64 and 16 bits for ComplexF64
+ nbits_FC = sizeof(FC) # 8 bits for ComplexF32 and 16 bits for ComplexF64
+ nbits_T = sizeof(T) # 4 bits for Float32 and 8 bits for Float64
@testset "SYMMLQ" begin
# SYMMLQ needs:
# 5 n-vectors: x, Mvold, Mv, Mv_next, w̅
- storage_symmlq(n) = 5 * n
- storage_symmlq_bytes(n) = nbits * storage_symmlq(n)
+ storage_symmlq_bytes(n) = nbits_FC * 5 * n
expected_symmlq_bytes = storage_symmlq_bytes(n)
symmlq(A, b) # warmup
@@ -36,8 +37,7 @@
@testset "CG" begin
# CG needs:
# 4 n-vectors: x, r, p, Ap
- storage_cg(n) = 4 * n
- storage_cg_bytes(n) = nbits * storage_cg(n)
+ storage_cg_bytes(n) = nbits_FC * 4 * n
expected_cg_bytes = storage_cg_bytes(n)
cg(A, b) # warmup
@@ -53,8 +53,7 @@
@testset "CG-LANCZOS" begin
# CG-LANCZOS needs:
# 5 n-vectors: x, Mv, Mv_prev, p, Mv_next
- storage_cg_lanczos(n) = 5 * n
- storage_cg_lanczos_bytes(n) = nbits * storage_cg_lanczos(n)
+ storage_cg_lanczos_bytes(n) = nbits_FC * 5 * n
expected_cg_lanczos_bytes = storage_cg_lanczos_bytes(n)
cg_lanczos(A, b) # warmup
@@ -73,9 +72,7 @@
# - 2 (n*nshifts)-matrices: x, p
# - 5 nshifts-vectors: σ, δhat, ω, γ, rNorms
# - 3 nshifts-bitVector: indefinite, converged, not_cv
- storage_cg_lanczos_shift(n, nshifts) = (3 * n) + (2 * n * nshifts) + (5 * nshifts) + (3 * nshifts / 64)
- storage_cg_lanczos_shift_bytes(n, nshifts) = nbits * storage_cg_lanczos_shift(n, nshifts)
-
+ storage_cg_lanczos_shift_bytes(n, nshifts) = nbits_FC * ((3 * n) + (2 * n * nshifts)) + nbits_T * (5 * nshifts) + (3 * nshifts)
expected_cg_lanczos_shift_bytes = storage_cg_lanczos_shift_bytes(n, nshifts)
cg_lanczos_shift(A, b, shifts) # warmup
actual_cg_lanczos_shift_bytes = @allocated cg_lanczos_shift(A, b, shifts)
@@ -90,8 +87,7 @@
@testset "CR" begin
# CR needs:
# 5 n-vectors: x, r, p, q, Ar
- storage_cr(n) = 5 * n
- storage_cr_bytes(n) = nbits * storage_cr(n)
+ storage_cr_bytes(n) = nbits_FC * 5 * n
expected_cr_bytes = storage_cr_bytes(n)
cr(A, b) # warmup
@@ -107,8 +103,7 @@
@testset "MINRES" begin
# MINRES needs:
# 6 n-vectors: x, r1, r2, w1, w2, y
- storage_minres(n) = 6 * n
- storage_minres_bytes(n) = nbits * storage_minres(n)
+ storage_minres_bytes(n) = nbits_FC * 6 * n
expected_minres_bytes = storage_minres_bytes(n)
minres(A, b) # warmup
@@ -124,8 +119,7 @@
@testset "MINRES-QLP" begin
# MINRES-QLP needs:
# - 6 n-vectors: wₖ₋₁, wₖ, vₖ₋₁, vₖ, x, p
- storage_minres_qlp(n) = 6 * n
- storage_minres_qlp_bytes(n) = nbits * storage_minres_qlp(n)
+ storage_minres_qlp_bytes(n) = nbits_FC * 6 * n
expected_minres_qlp_bytes = storage_minres_qlp_bytes(n)
minres_qlp(A, b) # warmup
@@ -141,11 +135,11 @@
@testset "DIOM" begin
# DIOM needs:
# - 2 n-vectors: x, t
- # - 2 (n*mem)-matrices: P, V
- # - 1 mem-vector: L
- # - 1 (mem+2)-vector: H
- storage_diom(mem, n) = (2 * n) + (2 * n * mem) + (mem) + (mem + 2)
- storage_diom_bytes(mem, n) = nbits * storage_diom(mem, n)
+ # - 1 (n*mem)-matrix: V
+ # - 1 n*(mem-1)-matrix: P
+ # - 1 (mem-1)-vector: L
+ # - 1 mem-vector: H
+ storage_diom_bytes(mem, n) = nbits_FC * ((2 * n) + (n * mem) + (n * (mem-1)) + (mem-1) + (mem))
expected_diom_bytes = storage_diom_bytes(mem, n)
diom(A, b, memory=mem) # warmup
@@ -164,8 +158,7 @@
# - 1 (n*mem)-matrix: V
# - 2 mem-vectors: l, z
# - 1 (mem*(mem+1)/2)-vector: U
- storage_fom(mem, n) = (2 * n) + (n * mem) + (2 * mem) + (mem * (mem+1) / 2)
- storage_fom_bytes(mem, n) = nbits * storage_fom(mem, n)
+ storage_fom_bytes(mem, n) = nbits_FC * ((2 * n) + (n * mem) + (2 * mem) + (mem * (mem+1) / 2))
expected_fom_bytes = storage_fom_bytes(mem, n)
fom(A, b, memory=mem) # warmup
@@ -183,9 +176,8 @@
# - 2 n-vectors: x, t
# - 2 (n*mem)-matrices: P, V
# - 2 mem-vectors: c, s
- # - 1 (mem+2)-vector: H
- storage_dqgmres(mem, n) = (2 * n) + (2 * n * mem) + (2 * mem) + (mem + 2)
- storage_dqgmres_bytes(mem, n) = nbits * storage_dqgmres(mem, n)
+ # - 1 (mem+1)-vector: H
+ storage_dqgmres_bytes(mem, n) = nbits_FC * ((2 * n) + (2 * n * mem) + mem + (mem + 1)) + nbits_T * mem
expected_dqgmres_bytes = storage_dqgmres_bytes(mem, n)
dqgmres(A, b, memory=mem) # warmup
@@ -204,8 +196,7 @@
# - 1 n*(mem)-matrix: V
# - 3 mem-vectors: c, s, z
# - 1 (mem*(mem+1)/2)-vector: R
- storage_gmres(mem, n) = (2 * n) + (n * mem) + (3 * mem) + (mem * (mem+1) / 2)
- storage_gmres_bytes(mem, n) = nbits * storage_gmres(mem, n)
+ storage_gmres_bytes(mem, n) = nbits_FC * ((2 * n) + (n * mem) + (2 * mem) + (mem * (mem+1) / 2)) + nbits_T * mem
expected_gmres_bytes = storage_gmres_bytes(mem, n)
gmres(A, b, memory=mem) # warmup
@@ -218,11 +209,29 @@
@test inplace_gmres_bytes == 0
end
+ @testset "FGMRES" begin
+ # FGMRES needs:
+ # - 2 n-vectors: x, w
+ # - 2 n*(mem)-matrix: V, Z
+ # - 3 mem-vectors: c, s, z
+ # - 1 (mem*(mem+1)/2)-vector: R
+ storage_fgmres_bytes(mem, n) = nbits_FC * ((2 * n) + (2 * n * mem) + (2 * mem) + (mem * (mem+1) / 2)) + nbits_T * mem
+
+ expected_fgmres_bytes = storage_fgmres_bytes(mem, n)
+ fgmres(A, b, memory=mem) # warmup
+ actual_fgmres_bytes = @allocated fgmres(A, b, memory=mem)
+ @test expected_fgmres_bytes ≤ actual_fgmres_bytes ≤ 1.02 * expected_fgmres_bytes
+
+ solver = FgmresSolver(A, b, mem)
+ fgmres!(solver, A, b) # warmup
+ inplace_fgmres_bytes = @allocated fgmres!(solver, A, b)
+ @test inplace_fgmres_bytes == 0
+ end
+
@testset "CGS" begin
# CGS needs:
# 6 n-vectors: x, r, u, p, q, ts
- storage_cgs(n) = 6 * n
- storage_cgs_bytes(n) = nbits * storage_cgs(n)
+ storage_cgs_bytes(n) = nbits_FC * 6 * n
expected_cgs_bytes = storage_cgs_bytes(n)
cgs(A, b) # warmup
@@ -238,8 +247,7 @@
@testset "BICGSTAB" begin
# BICGSTAB needs:
# 6 n-vectors: x, r, p, v, s, qd
- storage_bicgstab(n) = 6 * n
- storage_bicgstab_bytes(n) = nbits * storage_bicgstab(n)
+ storage_bicgstab_bytes(n) = nbits_FC * 6 * n
expected_bicgstab_bytes = storage_bicgstab_bytes(n)
bicgstab(A, b) # warmup
@@ -254,12 +262,11 @@
@testset "CGNE" begin
# CGNE needs:
- # - 3 n-vectors: x, p, Aᵀz
+ # - 3 n-vectors: x, p, Aᴴz
# - 2 m-vectors: r, q
- storage_cgne(n, m) = 3 * n + 2 * m
- storage_cgne_bytes(n, m) = nbits * storage_cgne(n, m)
+ storage_cgne_bytes(m, n) = nbits_FC * (3 * n + 2 * m)
- expected_cgne_bytes = storage_cgne_bytes(n, m)
+ expected_cgne_bytes = storage_cgne_bytes(k, n)
(x, stats) = cgne(Au, c) # warmup
actual_cgne_bytes = @allocated cgne(Au, c)
@test expected_cgne_bytes ≤ actual_cgne_bytes ≤ 1.02 * expected_cgne_bytes
@@ -272,12 +279,11 @@
@testset "CRMR" begin
# CRMR needs:
- # - 3 n-vectors: x, p, Aᵀr
+ # - 3 n-vectors: x, p, Aᴴr
# - 2 m-vectors: r, q
- storage_crmr(n, m) = 3 * n + 2 * m
- storage_crmr_bytes(n, m) = nbits * storage_crmr(n, m)
+ storage_crmr_bytes(m, n) = nbits_FC * (3 * n + 2 * m)
- expected_crmr_bytes = storage_crmr_bytes(n, m)
+ expected_crmr_bytes = storage_crmr_bytes(k, n)
(x, stats) = crmr(Au, c) # warmup
actual_crmr_bytes = @allocated crmr(Au, c)
@test expected_crmr_bytes ≤ actual_crmr_bytes ≤ 1.02 * expected_crmr_bytes
@@ -290,12 +296,11 @@
@testset "LNLQ" begin
# LNLQ needs:
- # - 3 n-vectors: x, v, Aᵀu
+ # - 3 n-vectors: x, v, Aᴴu
# - 4 m-vectors: y, w̄, u, Av
- storage_lnlq(n, m) = 3 * n + 4 * m
- storage_lnlq_bytes(n, m) = nbits * storage_lnlq(n, m)
+ storage_lnlq_bytes(m, n) = nbits_FC * (3 * n + 4 * m)
- expected_lnlq_bytes = storage_lnlq_bytes(n, m)
+ expected_lnlq_bytes = storage_lnlq_bytes(k, n)
lnlq(Au, c) # warmup
actual_lnlq_bytes = @allocated lnlq(Au, c)
@test expected_lnlq_bytes ≤ actual_lnlq_bytes ≤ 1.02 * expected_lnlq_bytes
@@ -308,12 +313,11 @@
@testset "CRAIG" begin
# CRAIG needs:
- # - 3 n-vectors: x, v, Aᵀu
+ # - 3 n-vectors: x, v, Aᴴu
# - 4 m-vectors: y, w, u, Av
- storage_craig(n, m) = 3 * n + 4 * m
- storage_craig_bytes(n, m) = nbits * storage_craig(n, m)
+ storage_craig_bytes(m, n) = nbits_FC * (3 * n + 4 * m)
- expected_craig_bytes = storage_craig_bytes(n, m)
+ expected_craig_bytes = storage_craig_bytes(k, n)
craig(Au, c) # warmup
actual_craig_bytes = @allocated craig(Au, c)
@test expected_craig_bytes ≤ actual_craig_bytes ≤ 1.02 * expected_craig_bytes
@@ -326,12 +330,11 @@
@testset "CRAIGMR" begin
# CRAIGMR needs:
- # - 4 n-vectors: x, v, Aᵀu, d
+ # - 4 n-vectors: x, v, Aᴴu, d
# - 5 m-vectors: y, u, w, wbar, Av
- storage_craigmr(n, m) = 4 * n + 5 * m
- storage_craigmr_bytes(n, m) = nbits * storage_craigmr(n, m)
+ storage_craigmr_bytes(m, n) = nbits_FC * (4 * n + 5 * m)
- expected_craigmr_bytes = storage_craigmr_bytes(n, m)
+ expected_craigmr_bytes = storage_craigmr_bytes(k, n)
craigmr(Au, c) # warmup
actual_craigmr_bytes = @allocated craigmr(Au, c)
@test expected_craigmr_bytes ≤ actual_craigmr_bytes ≤ 1.02 * expected_craigmr_bytes
@@ -344,12 +347,11 @@
@testset "CGLS" begin
# CGLS needs:
- # - 3 m-vectors: x, p, s
- # - 2 n-vectors: r, q
- storage_cgls(n, m) = 3 * m + 2 * n
- storage_cgls_bytes(n, m) = nbits * storage_cgls(n, m)
+ # - 3 n-vectors: x, p, s
+ # - 2 m-vectors: r, q
+ storage_cgls_bytes(m, n) = nbits_FC * (3 * n + 2 * m)
- expected_cgls_bytes = storage_cgls_bytes(n, m)
+ expected_cgls_bytes = storage_cgls_bytes(m, k)
(x, stats) = cgls(Ao, b) # warmup
actual_cgls_bytes = @allocated cgls(Ao, b)
@test expected_cgls_bytes ≤ actual_cgls_bytes ≤ 1.02 * expected_cgls_bytes
@@ -362,12 +364,11 @@
@testset "LSLQ" begin
# LSLQ needs:
- # - 4 m-vectors: x_lq, v, Aᵀu, w̄ (= x_cg)
- # - 2 n-vectors: u, Av
- storage_lslq(n, m) = 4 * m + 2 * n
- storage_lslq_bytes(n, m) = nbits * storage_lslq(n, m)
+ # - 4 n-vectors: x_lq, v, Aᴴu, w̄ (= x_cg)
+ # - 2 m-vectors: u, Av
+ storage_lslq_bytes(m, n) = nbits_FC * (4 * n + 2 * m)
- expected_lslq_bytes = storage_lslq_bytes(n, m)
+ expected_lslq_bytes = storage_lslq_bytes(m, k)
(x, stats) = lslq(Ao, b) # warmup
actual_lslq_bytes = @allocated lslq(Ao, b)
@test expected_lslq_bytes ≤ actual_lslq_bytes ≤ 1.02 * expected_lslq_bytes
@@ -380,12 +381,11 @@
@testset "CRLS" begin
# CRLS needs:
- # - 4 m-vectors: x, p, Ar, q
- # - 3 n-vectors: r, Ap, s
- storage_crls(n, m) = 4 * m + 3 * n
- storage_crls_bytes(n, m) = nbits * storage_crls(n, m)
+ # - 4 n-vectors: x, p, Ar, q
+ # - 3 m-vectors: r, Ap, s
+ storage_crls_bytes(m, n) = nbits_FC * (4 * n + 3 * m)
- expected_crls_bytes = storage_crls_bytes(n, m)
+ expected_crls_bytes = storage_crls_bytes(m, k)
(x, stats) = crls(Ao, b) # warmup
actual_crls_bytes = @allocated crls(Ao, b)
@test expected_crls_bytes ≤ actual_crls_bytes ≤ 1.02 * expected_crls_bytes
@@ -398,12 +398,11 @@
@testset "LSQR" begin
# LSQR needs:
- # - 4 m-vectors: x, v, w, Aᵀu
- # - 2 n-vectors: u, Av
- storage_lsqr(n, m) = 4 * m + 2 * n
- storage_lsqr_bytes(n, m) = nbits * storage_lsqr(n, m)
+ # - 4 n-vectors: x, v, w, Aᴴu
+ # - 2 m-vectors: u, Av
+ storage_lsqr_bytes(m, n) = nbits_FC * (4 * n + 2 * m)
- expected_lsqr_bytes = storage_lsqr_bytes(n, m)
+ expected_lsqr_bytes = storage_lsqr_bytes(m, k)
(x, stats) = lsqr(Ao, b) # warmup
actual_lsqr_bytes = @allocated lsqr(Ao, b)
@test expected_lsqr_bytes ≤ actual_lsqr_bytes ≤ 1.02 * expected_lsqr_bytes
@@ -416,12 +415,11 @@
@testset "LSMR" begin
# LSMR needs:
- # - 5 m-vectors: x, v, h, hbar, Aᵀu
- # - 2 n-vectors: u, Av
- storage_lsmr(n, m) = 5 * m + 2 * n
- storage_lsmr_bytes(n, m) = nbits * storage_lsmr(n, m)
+ # - 5 n-vectors: x, v, h, hbar, Aᴴu
+ # - 2 m-vectors: u, Av
+ storage_lsmr_bytes(m, n) = nbits_FC * (5 * n + 2 * m)
- expected_lsmr_bytes = storage_lsmr_bytes(n, m)
+ expected_lsmr_bytes = storage_lsmr_bytes(m, k)
(x, stats) = lsmr(Ao, b) # warmup
actual_lsmr_bytes = @allocated lsmr(Ao, b)
@test expected_lsmr_bytes ≤ actual_lsmr_bytes ≤ 1.02 * expected_lsmr_bytes
@@ -435,8 +433,7 @@
@testset "BiLQ" begin
# BILQ needs:
# - 8 n-vectors: uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, d̅, p, q
- storage_bilq(n) = 8 * n
- storage_bilq_bytes(n) = nbits * storage_bilq(n)
+ storage_bilq_bytes(n) = nbits_FC * 8 * n
expected_bilq_bytes = storage_bilq_bytes(n)
bilq(A, b) # warmup
@@ -452,8 +449,7 @@
@testset "QMR" begin
# QMR needs:
# - 9 n-vectors: uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, wₖ₋₁, wₖ, p, q
- storage_qmr(n) = 9 * n
- storage_qmr_bytes(n) = nbits * storage_qmr(n)
+ storage_qmr_bytes(n) = nbits_FC * 9 * n
expected_qmr_bytes = storage_qmr_bytes(n)
qmr(A, b) # warmup
@@ -469,8 +465,7 @@
@testset "BiLQR" begin
# BILQR needs:
# - 11 n-vectors: uₖ₋₁, uₖ, vₖ₋₁, vₖ, x, t, d̅, wₖ₋₁, wₖ, p, q
- storage_bilqr(n) = 11 * n
- storage_bilqr_bytes(n) = nbits * storage_bilqr(n)
+ storage_bilqr_bytes(n) = nbits_FC * 11 * n
expected_bilqr_bytes = storage_bilqr_bytes(n)
bilqr(A, b, b) # warmup
@@ -487,10 +482,9 @@
# USYMLQ needs:
# - 5 n-vectors: uₖ₋₁, uₖ, x, d̅, p
# - 3 m-vectors: vₖ₋₁, vₖ, q
- storage_usymlq(n, m) = 5 * n + 3 * m
- storage_usymlq_bytes(n, m) = nbits * storage_usymlq(n, m)
+ storage_usymlq_bytes(m, n) = nbits_FC * (5 * n + 3 * m)
- expected_usymlq_bytes = storage_usymlq_bytes(n, m)
+ expected_usymlq_bytes = storage_usymlq_bytes(k, n)
usymlq(Au, c, b) # warmup
actual_usymlq_bytes = @allocated usymlq(Au, c, b)
@test expected_usymlq_bytes ≤ actual_usymlq_bytes ≤ 1.02 * expected_usymlq_bytes
@@ -503,12 +497,11 @@
@testset "USYMQR" begin
# USYMQR needs:
- # - 6 m-vectors: vₖ₋₁, vₖ, x, wₖ₋₁, wₖ, p
- # - 3 n-vectors: uₖ₋₁, uₖ, q
- storage_usymqr(n, m) = 6 * m + 3 * n
- storage_usymqr_bytes(n, m) = nbits * storage_usymqr(n, m)
+ # - 6 n-vectors: vₖ₋₁, vₖ, x, wₖ₋₁, wₖ, p
+ # - 3 m-vectors: uₖ₋₁, uₖ, q
+ storage_usymqr_bytes(m, n) = nbits_FC * (6 * n + 3 * m)
- expected_usymqr_bytes = storage_usymqr_bytes(n, m)
+ expected_usymqr_bytes = storage_usymqr_bytes(m, k)
(x, stats) = usymqr(Ao, b, c) # warmup
actual_usymqr_bytes = @allocated usymqr(Ao, b, c)
@test expected_usymqr_bytes ≤ actual_usymqr_bytes ≤ 1.02 * expected_usymqr_bytes
@@ -523,8 +516,7 @@
# TRILQR needs:
# - 6 m-vectors: vₖ₋₁, vₖ, t, wₖ₋₁, wₖ, q
# - 5 n-vectors: uₖ₋₁, uₖ, x, d̅, p
- storage_trilqr(n, m) = 6 * m + 5 * n
- storage_trilqr_bytes(n, m) = nbits * storage_trilqr(n, m)
+ storage_trilqr_bytes(m, n) = nbits_FC * (6 * m + 5 * n)
expected_trilqr_bytes = storage_trilqr_bytes(n, n)
trilqr(A, b, b) # warmup
@@ -541,10 +533,9 @@
# TriCG needs:
# - 6 n-vectors: yₖ, uₖ₋₁, uₖ, gy₂ₖ₋₁, gy₂ₖ, p
# - 6 m-vectors: xₖ, vₖ₋₁, vₖ, gx₂ₖ₋₁, gx₂ₖ, q
- storage_tricg(n, m) = 6 * n + 6 * m
- storage_tricg_bytes(n, m) = nbits * storage_tricg(n, m)
+ storage_tricg_bytes(m, n) = nbits_FC * (6 * n + 6 * m)
- expected_tricg_bytes = storage_tricg_bytes(n, m)
+ expected_tricg_bytes = storage_tricg_bytes(k, n)
tricg(Au, c, b) # warmup
actual_tricg_bytes = @allocated tricg(Au, c, b)
@test expected_tricg_bytes ≤ actual_tricg_bytes ≤ 1.02 * expected_tricg_bytes
@@ -559,10 +550,9 @@
# TriMR needs:
# - 8 n-vectors: yₖ, uₖ₋₁, uₖ, gy₂ₖ₋₃, gy₂ₖ₋₂, gy₂ₖ₋₁, gy₂ₖ, p
# - 8 m-vectors: xₖ, vₖ₋₁, vₖ, gx₂ₖ₋₃, gx₂ₖ₋₂, gx₂ₖ₋₁, gx₂ₖ, q
- storage_trimr(n, m) = 8 * n + 8 * m
- storage_trimr_bytes(n, m) = nbits * storage_trimr(n, m)
+ storage_trimr_bytes(m, n) = nbits_FC * (8 * n + 8 * m)
- expected_trimr_bytes = storage_trimr_bytes(n, m)
+ expected_trimr_bytes = storage_trimr_bytes(k, n)
trimr(Au, c, b) # warmup
actual_trimr_bytes = @allocated trimr(Au, c, b)
@test expected_trimr_bytes ≤ actual_trimr_bytes ≤ 1.02 * expected_trimr_bytes
@@ -575,17 +565,16 @@
@testset "GPMR" begin
# GPMR needs:
- # - 2 n-vectors: x, q
- # - 2 m-vectors: y, p
- # - 1 (n*mem)-matrix: V
- # - 1 (m*mem)-matrix: U
+ # - 2 m-vectors: x, q
+ # - 2 n-vectors: y, p
+ # - 1 (m*mem)-matrix: V
+ # - 1 (n*mem)-matrix: U
# - 1 (2*mem)-vector: zt
# - 2 (4*mem)-vectors: gc, gs
# - 1 (mem*(2mem+1))-vector: R
- storage_gpmr(mem, n, m) = (mem + 2) * (n + m) + mem * (2 * mem + 11)
- storage_gpmr_bytes(mem, n, m) = nbits * storage_gpmr(mem, n, m)
+ storage_gpmr_bytes(mem, m, n) = nbits_FC * ((mem + 2) * (n + m) + mem * (2 * mem + 7)) + nbits_T * 4 * mem
- expected_gpmr_bytes = storage_gpmr_bytes(mem, n, m)
+ expected_gpmr_bytes = storage_gpmr_bytes(mem, m, k)
gpmr(Ao, Au, b, c, memory=mem, itmax=mem) # warmup
actual_gpmr_bytes = @allocated gpmr(Ao, Au, b, c, memory=mem, itmax=mem)
@test expected_gpmr_bytes ≤ actual_gpmr_bytes ≤ 1.02 * expected_gpmr_bytes
diff --git a/test/test_aux.jl b/test/test_aux.jl
index 11bdb7c2d..6c43142c0 100644
--- a/test/test_aux.jl
+++ b/test/test_aux.jl
@@ -1,119 +1,203 @@
@testset "aux" begin
- # test Givens reflector corner cases
- (c, s, ρ) = Krylov.sym_givens(0.0, 0.0)
- @test (c == 1.0) && (s == 0.0) && (ρ == 0.0)
-
- a = 3.14
- (c, s, ρ) = Krylov.sym_givens(a, 0.0)
- @test (c == 1.0) && (s == 0.0) && (ρ == a)
- (c, s, ρ) = Krylov.sym_givens(-a, 0.0)
- @test (c == -1.0) && (s == 0.0) && (ρ == a)
-
- b = 3.14
- (c, s, ρ) = Krylov.sym_givens(0.0, b)
- @test (c == 0.0) && (s == 1.0) && (ρ == b)
- (c, s, ρ) = Krylov.sym_givens(0.0, -b)
- @test (c == 0.0) && (s == -1.0) && (ρ == b)
-
- (c, s, ρ) = Krylov.sym_givens(Complex(0.0), Complex(0.0))
- @test (c == 1.0) && (s == Complex(0.0)) && (ρ == Complex(0.0))
-
- a = Complex(1.0, 1.0)
- (c, s, ρ) = Krylov.sym_givens(a, Complex(0.0))
- @test (c == 1.0) && (s == Complex(0.0)) && (ρ == a)
- (c, s, ρ) = Krylov.sym_givens(-a, Complex(0.0))
- @test (c == 1.0) && (s == Complex(0.0)) && (ρ == -a)
-
- b = Complex(1.0, 1.0)
- (c, s, ρ) = Krylov.sym_givens(Complex(0.0), b)
- @test (c == 0.0) && (s == Complex(1.0)) && (ρ == b)
- (c, s, ρ) = Krylov.sym_givens(Complex(0.0), -b)
- @test (c == 0.0) && (s == Complex(1.0)) && (ρ == -b)
-
- # test roots of a quadratic
- roots = Krylov.roots_quadratic(0.0, 0.0, 0.0)
- @test length(roots) == 1
- @test roots[1] == 0.0
-
- roots = Krylov.roots_quadratic(0.0, 0.0, 1.0)
- @test length(roots) == 0
-
- roots = Krylov.roots_quadratic(0.0, 3.14, -1.0)
- @test length(roots) == 1
- @test roots[1] == 1.0 / 3.14
-
- roots = Krylov.roots_quadratic(1.0, 0.0, 1.0)
- @test length(roots) == 0
-
- roots = Krylov.roots_quadratic(1.0, 0.0, 0.0)
- @test length(roots) == 2
- @test roots[1] == 0.0
- @test roots[2] == 0.0
-
- roots = Krylov.roots_quadratic(1.0, 3.0, 2.0)
- @test length(roots) == 2
- @test roots[1] ≈ -2.0
- @test roots[2] ≈ -1.0
-
- roots = Krylov.roots_quadratic(1.0e+8, 1.0, 1.0)
- @test length(roots) == 0
-
- # ill-conditioned quadratic
- roots = Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=0)
- @test length(roots) == 2
- @test roots[1] == 1.0e+13
- @test roots[2] == 0.0
-
- # iterative refinement is crucial!
- roots = Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=1)
- @test length(roots) == 2
- @test roots[1] == 1.0e+13
- @test roots[2] == -1.0e-05
-
- # not ill-conditioned quadratic
- roots = Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=0)
- @test length(roots) == 2
- @test isapprox(roots[1], 1.0e+7, rtol=1.0e-6)
- @test isapprox(roots[2], -1.0, rtol=1.0e-6)
-
- roots = Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=1)
- @test length(roots) == 2
- @test isapprox(roots[1], 1.0e+7, rtol=1.0e-6)
- @test isapprox(roots[2], -1.0, rtol=1.0e-6)
-
- # test trust-region boundary
- x = ones(5)
- d = ones(5); d[1:2:5] .= -1
- @test_throws ErrorException Krylov.to_boundary(x, d, -1.0)
- @test_throws ErrorException Krylov.to_boundary(x, d, 0.5)
- @test_throws ErrorException Krylov.to_boundary(x, zeros(5), 1.0)
- @test maximum(Krylov.to_boundary(x, d, 5.0)) ≈ 2.209975124224178
- @test minimum(Krylov.to_boundary(x, d, 5.0)) ≈ -1.8099751242241782
- @test maximum(Krylov.to_boundary(x, d, 5.0, flip=true)) ≈ 1.8099751242241782
- @test minimum(Krylov.to_boundary(x, d, 5.0, flip=true)) ≈ -2.209975124224178
-
- # test kzeros and kones
- @test Krylov.kzeros(Vector{Float64}, 10) == zeros(10)
- @test Krylov.kones(Vector{Float64}, 10) == ones(10)
-
- # test ktypeof
- a = rand(Float32, 10)
- b = view(a, 4:8)
- @test Krylov.ktypeof(a) == Vector{Float32}
- @test Krylov.ktypeof(b) == Vector{Float32}
-
- a = rand(Float64, 10)
- b = view(a, 4:8)
- @test Krylov.ktypeof(a) == Vector{Float64}
- @test Krylov.ktypeof(b) == Vector{Float64}
-
- a = sprand(Float32, 10, 0.5)
- b = view(a, 4:8)
- @test Krylov.ktypeof(a) == Vector{Float32}
- @test Krylov.ktypeof(b) == Vector{Float32}
-
- a = sprand(Float64, 10, 0.5)
- b = view(a, 4:8)
- @test Krylov.ktypeof(a) == Vector{Float64}
- @test Krylov.ktypeof(b) == Vector{Float64}
+
+ @testset "sym_givens" begin
+ # test Givens reflector corner cases
+ (c, s, ρ) = Krylov.sym_givens(0.0, 0.0)
+ @test (c == 1.0) && (s == 0.0) && (ρ == 0.0)
+
+ a = 3.14
+ (c, s, ρ) = Krylov.sym_givens(a, 0.0)
+ @test (c == 1.0) && (s == 0.0) && (ρ == a)
+ (c, s, ρ) = Krylov.sym_givens(-a, 0.0)
+ @test (c == -1.0) && (s == 0.0) && (ρ == a)
+
+ b = 3.14
+ (c, s, ρ) = Krylov.sym_givens(0.0, b)
+ @test (c == 0.0) && (s == 1.0) && (ρ == b)
+ (c, s, ρ) = Krylov.sym_givens(0.0, -b)
+ @test (c == 0.0) && (s == -1.0) && (ρ == b)
+
+ (c, s, ρ) = Krylov.sym_givens(Complex(0.0), Complex(0.0))
+ @test (c == 1.0) && (s == Complex(0.0)) && (ρ == Complex(0.0))
+
+ a = Complex(1.0, 1.0)
+ (c, s, ρ) = Krylov.sym_givens(a, Complex(0.0))
+ @test (c == 1.0) && (s == Complex(0.0)) && (ρ == a)
+ (c, s, ρ) = Krylov.sym_givens(-a, Complex(0.0))
+ @test (c == 1.0) && (s == Complex(0.0)) && (ρ == -a)
+
+ b = Complex(1.0, 1.0)
+ (c, s, ρ) = Krylov.sym_givens(Complex(0.0), b)
+ @test (c == 0.0) && (s == Complex(1.0)) && (ρ == b)
+ (c, s, ρ) = Krylov.sym_givens(Complex(0.0), -b)
+ @test (c == 0.0) && (s == Complex(1.0)) && (ρ == -b)
+ end
+
+ @testset "roots_quadratic" begin
+ # test roots of a quadratic
+ roots = Krylov.roots_quadratic(0.0, 0.0, 0.0)
+ @test roots[1] == 0.0
+ @test roots[2] == 0.0
+
+ @test_throws ErrorException Krylov.roots_quadratic(0.0, 0.0, 1.0)
+
+ roots = Krylov.roots_quadratic(0.0, 3.14, -1.0)
+ @test roots[1] == 1.0 / 3.14
+ @test roots[2] == 1.0 / 3.14
+
+ @test_throws ErrorException Krylov.roots_quadratic(1.0, 0.0, 1.0)
+
+ roots = Krylov.roots_quadratic(1.0, 0.0, 0.0)
+ @test roots[1] == 0.0
+ @test roots[2] == 0.0
+
+ roots = Krylov.roots_quadratic(1.0, 3.0, 2.0)
+ @test roots[1] ≈ -2.0
+ @test roots[2] ≈ -1.0
+
+ @test_throws ErrorException Krylov.roots_quadratic(1.0e+8, 1.0, 1.0)
+
+ # ill-conditioned quadratic
+ roots = Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=0)
+ @test roots[1] == 1.0e+13
+ @test roots[2] == 0.0
+
+ # iterative refinement is crucial!
+ roots = Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=1)
+ @test roots[1] == 1.0e+13
+ @test roots[2] == -1.0e-05
+
+ # not ill-conditioned quadratic
+ roots = Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=0)
+ @test isapprox(roots[1], 1.0e+7, rtol=1.0e-6)
+ @test isapprox(roots[2], -1.0, rtol=1.0e-6)
+
+ roots = Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=1)
+ @test isapprox(roots[1], 1.0e+7, rtol=1.0e-6)
+ @test isapprox(roots[2], -1.0, rtol=1.0e-6)
+
+ allocations = @allocated Krylov.roots_quadratic(0.0, 0.0, 0.0)
+ @test allocations == 0
+
+ allocations = @allocated Krylov.roots_quadratic(0.0, 3.14, -1.0)
+ @test allocations == 0
+
+ allocations = @allocated Krylov.roots_quadratic(1.0, 0.0, 0.0)
+ @test allocations == 0
+
+ allocations = @allocated Krylov.roots_quadratic(1.0, 3.0, 2.0)
+ @test allocations == 0
+
+ allocations = @allocated Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=0)
+ @test allocations == 0
+
+ allocations = @allocated Krylov.roots_quadratic(-1.0e-8, 1.0e+5, 1.0, nitref=1)
+ @test allocations == 0
+
+ allocations = @allocated Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=0)
+ @test allocations == 0
+
+ allocations = @allocated Krylov.roots_quadratic(-1.0e-7, 1.0, 1.0, nitref=1)
+ @test allocations == 0
+ end
+
+ @testset "to_boundary" begin
+ # test trust-region boundary
+ n = 5
+ x = ones(n)
+ d = ones(n); d[1:2:n] .= -1
+ @test_throws ErrorException Krylov.to_boundary(n, x, d, -1.0)
+ @test_throws ErrorException Krylov.to_boundary(n, x, d, 0.5)
+ @test_throws ErrorException Krylov.to_boundary(n, x, zeros(n), 1.0)
+ @test maximum(Krylov.to_boundary(n, x, d, 5.0)) ≈ 2.209975124224178
+ @test minimum(Krylov.to_boundary(n, x, d, 5.0)) ≈ -1.8099751242241782
+ @test maximum(Krylov.to_boundary(n, x, d, 5.0, flip=true)) ≈ 1.8099751242241782
+ @test minimum(Krylov.to_boundary(n, x, d, 5.0, flip=true)) ≈ -2.209975124224178
+ end
+
+ @testset "kzeros" begin
+ # test kzeros
+ @test Krylov.kzeros(Vector{Float64}, 10) == zeros(Float64, 10)
+ @test Krylov.kzeros(Vector{ComplexF32}, 10) == zeros(ComplexF32, 10)
+ end
+
+ @testset "kones" begin
+ # test kones
+ @test Krylov.kones(Vector{Float64}, 10) == ones(Float64, 10)
+ @test Krylov.kones(Vector{ComplexF32}, 10) == ones(ComplexF32, 10)
+ end
+
+ @testset "ktypeof" begin
+ # test ktypeof
+ for FC in (Float32, Float64, ComplexF32, ComplexF64)
+ dv = rand(FC, 10)
+ b = view(dv, 4:8)
+ @test Krylov.ktypeof(dv) == Vector{FC}
+ @test Krylov.ktypeof(b) == Vector{FC}
+
+ dm = rand(FC, 10, 10)
+ b = view(dm, :, 3)
+ @test Krylov.ktypeof(b) == Vector{FC}
+
+ sv = sprand(FC, 10, 0.5)
+ b = view(sv, 4:8)
+ @test Krylov.ktypeof(sv) == Vector{FC}
+ @test Krylov.ktypeof(b) == Vector{FC}
+ end
+ end
+
+ @testset "vector_to_matrix" begin
+ # test vector_to_matrix
+ for FC in (Float32, Float64, ComplexF32, ComplexF64)
+ S = Vector{FC}
+ M = Krylov.vector_to_matrix(S)
+ @test M == Matrix{FC}
+ end
+ end
+
+ @testset "matrix_to_vector" begin
+ # test matrix_to_vector
+ for FC in (Float32, Float64, ComplexF32, ComplexF64)
+ M = Matrix{FC}
+ S = Krylov.matrix_to_vector(M)
+ @test S == Vector{FC}
+ end
+ end
+
+ @testset "macros" begin
+ # test macros
+ for FC ∈ (Float16, Float32, Float64, ComplexF16, ComplexF32, ComplexF64)
+ n = 10
+ x = rand(FC, n)
+ y = rand(FC, n)
+ a = rand(FC)
+ b = rand(FC)
+ c = rand(FC)
+ s = rand(FC)
+
+ T = real(FC)
+ a2 = rand(T)
+ b2 = rand(T)
+
+ Krylov.@kdot(n, x, y)
+
+ Krylov.@kdotr(n, x, y)
+
+ Krylov.@knrm2(n, x)
+
+ Krylov.@kaxpy!(n, a, x, y)
+ Krylov.@kaxpy!(n, a2, x, y)
+
+ Krylov.@kaxpby!(n, a, x, b, y)
+ Krylov.@kaxpby!(n, a2, x, b, y)
+ Krylov.@kaxpby!(n, a, x, b2, y)
+ Krylov.@kaxpby!(n, a2, x, b2, y)
+
+ Krylov.@kcopy!(n, x, y)
+
+ Krylov.@kswap(x, y)
+
+ Krylov.@kref!(n, x, y, c, s)
+ end
+ end
end
diff --git a/test/test_bicgstab.jl b/test/test_bicgstab.jl
index ce4e6dcd4..6817acf3d 100644
--- a/test/test_bicgstab.jl
+++ b/test/test_bicgstab.jl
@@ -82,10 +82,10 @@
@test(resid ≤ bicgstab_tol)
@test(stats.solved)
- # Test bᵀc == 0
+ # Test bᴴc == 0
A, b, c = bc_breakdown(FC=FC)
(x, stats) = bicgstab(A, b, c=c)
- @test stats.status == "Breakdown bᵀc = 0"
+ @test stats.status == "Breakdown bᴴc = 0"
# test callback function
solver = BicgstabSolver(A, b)
diff --git a/test/test_bilq.jl b/test/test_bilq.jl
index 900d1f6e5..40b9872db 100644
--- a/test/test_bilq.jl
+++ b/test/test_bilq.jl
@@ -66,10 +66,10 @@
@test(resid ≤ bilq_tol)
@test(stats.solved)
- # Test bᵀc == 0
+ # Test bᴴc == 0
A, b, c = bc_breakdown(FC=FC)
(x, stats) = bilq(A, b, c=c)
- @test stats.status == "Breakdown bᵀc = 0"
+ @test stats.status == "Breakdown bᴴc = 0"
# test callback function
diff --git a/test/test_bilqr.jl b/test/test_bilqr.jl
index 6dab06ec7..fd46aade4 100644
--- a/test/test_bilqr.jl
+++ b/test/test_bilqr.jl
@@ -46,10 +46,10 @@
@test(resid_dual ≤ bilqr_tol)
@test(stats.solved_dual)
- # Test bᵀc == 0
+ # Test bᴴc == 0
A, b, c = bc_breakdown(FC=FC)
(x, t, stats) = bilqr(A, b, c)
- @test stats.status == "Breakdown bᵀc = 0"
+ @test stats.status == "Breakdown bᴴc = 0"
# test callback function
A, b, c = adjoint_pde(FC=FC)
diff --git a/test/test_cgne.jl b/test/test_cgne.jl
index 64cbc0ea7..c1a3e798b 100644
--- a/test/test_cgne.jl
+++ b/test/test_cgne.jl
@@ -1,6 +1,6 @@
-function test_cgne(A, b; λ=0.0, M=I)
+function test_cgne(A, b; λ=0.0, N=I, history=false)
(nrow, ncol) = size(A)
- (x, stats) = cgne(A, b, λ=λ, M=M)
+ (x, stats) = cgne(A, b, λ=λ, N=N, history=history)
r = b - A * x
if λ > 0
s = r / sqrt(λ)
@@ -69,8 +69,8 @@ end
@test stats.status == "x = 0 is a zero-residual solution"
# Test with Jacobi (or diagonal) preconditioner
- A, b, M = square_preconditioned(FC=FC)
- (x, stats, resid) = test_cgne(A, b, M=M)
+ A, b, N = square_preconditioned(FC=FC)
+ (x, stats, resid) = test_cgne(A, b, N=N)
@test(resid ≤ cgne_tol)
@test(stats.solved)
(xI, xmin, xmin_norm) = check_min_norm(A, b, x)
@@ -81,8 +81,8 @@ end
A = 0.5 * [19.0 17.0 15.0 13.0 11.0 9.0 7.0 5.0 3.0 1.0;
2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0]
b = [1.0; 0.0]
- M = Diagonal(1 ./ (A * A'))
- (x, stats, resid) = test_cgne(A, b, M=M)
+ N = Diagonal(1 ./ (A * A'))
+ (x, stats, resid) = test_cgne(A, b, N=N)
@test(resid ≤ cgne_tol)
@test(stats.solved)
(xI, xmin, xmin_norm) = check_min_norm(A, b, x)
@@ -92,7 +92,7 @@ end
for transpose ∈ (false, true)
A, b, c, D = small_sp(transpose, FC=FC)
D⁻¹ = inv(D)
- (x, stats) = cgne(A, b, M=D⁻¹, λ=1.0)
+ (x, stats) = cgne(A, b, N=D⁻¹, λ=1.0)
end
# test callback function
diff --git a/test/test_cgs.jl b/test/test_cgs.jl
index 5c505bb70..832cd76c3 100644
--- a/test/test_cgs.jl
+++ b/test/test_cgs.jl
@@ -74,10 +74,10 @@
@test(resid ≤ cgs_tol)
@test(stats.solved)
- # Test bᵀc == 0
+ # Test bᴴc == 0
A, b, c = bc_breakdown(FC=FC)
(x, stats) = cgs(A, b, c=c)
- @test stats.status == "Breakdown bᵀc = 0"
+ @test stats.status == "Breakdown bᴴc = 0"
# test callback function
A, b = sparse_laplacian(FC=FC)
diff --git a/test/test_crmr.jl b/test/test_crmr.jl
index 6354f329f..d0f902df6 100644
--- a/test/test_crmr.jl
+++ b/test/test_crmr.jl
@@ -1,6 +1,6 @@
-function test_crmr(A, b; λ=0.0, M=I, history=false)
+function test_crmr(A, b; λ=0.0, N=I, history=false)
(nrow, ncol) = size(A)
- (x, stats) = crmr(A, b, λ=λ, M=M, history=history)
+ (x, stats) = crmr(A, b, λ=λ, N=N, history=history)
r = b - A * x
if λ > 0
s = r / sqrt(λ)
@@ -76,8 +76,8 @@ end
A = 0.5 * [19.0 17.0 15.0 13.0 11.0 9.0 7.0 5.0 3.0 1.0;
2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0]
b = [1.0; 0.0]
- M = Diagonal(1 ./ (A * A'))
- (x, stats, resid) = test_crmr(A, b, M=M)
+ N = Diagonal(1 ./ (A * A'))
+ (x, stats, resid) = test_crmr(A, b, N=N)
@test(resid ≤ crmr_tol)
@test(stats.solved)
(xI, xmin, xmin_norm) = check_min_norm(A, b, x)
@@ -87,7 +87,7 @@ end
for transpose ∈ (false, true)
A, b, c, D = small_sp(transpose, FC=FC)
D⁻¹ = inv(D)
- (x, stats) = crmr(A, b, M=D⁻¹, λ=1.0)
+ (x, stats) = crmr(A, b, N=D⁻¹, λ=1.0)
end
# test callback function
diff --git a/test/test_diom.jl b/test/test_diom.jl
index 4f1a8ecea..62a38b198 100644
--- a/test/test_diom.jl
+++ b/test/test_diom.jl
@@ -60,7 +60,7 @@
# Poisson equation in polar coordinates.
A, b = polar_poisson(FC=FC)
- (x, stats) = diom(A, b, memory=200)
+ (x, stats) = diom(A, b, memory=150)
r = b - A * x
resid = norm(r) / norm(b)
@test(resid ≤ diom_tol)
diff --git a/test/test_fgmres.jl b/test/test_fgmres.jl
new file mode 100644
index 000000000..9bb73d3e4
--- /dev/null
+++ b/test/test_fgmres.jl
@@ -0,0 +1,154 @@
+import LinearAlgebra.mul!
+
+mutable struct FlexiblePreconditioner{T,S}
+ D::Diagonal{T, S}
+ ω::T
+end
+
+function mul!(y::Vector, P::FlexiblePreconditioner, x::Vector)
+ P.ω = -P.ω
+ mul!(y, P.D, x)
+ y .*= P.ω
+end
+
+@testset "fgmres" begin
+ fgmres_tol = 1.0e-6
+
+ for FC in (Float64, ComplexF64)
+ @testset "Data Type: $FC" begin
+
+ # Symmetric and positive definite system.
+ A, b = symmetric_definite(FC=FC)
+ (x, stats) = fgmres(A, b)
+ r = b - A * x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ fgmres_tol)
+ @test(stats.solved)
+
+ # Symmetric indefinite variant.
+ A, b = symmetric_indefinite(FC=FC)
+ (x, stats) = fgmres(A, b)
+ r = b - A * x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ fgmres_tol)
+ @test(stats.solved)
+
+ # Nonsymmetric and positive definite systems.
+ A, b = nonsymmetric_definite(FC=FC)
+ (x, stats) = fgmres(A, b)
+ r = b - A * x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ fgmres_tol)
+ @test(stats.solved)
+
+ # Nonsymmetric indefinite variant.
+ A, b = nonsymmetric_indefinite(FC=FC)
+ (x, stats) = fgmres(A, b)
+ r = b - A * x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ fgmres_tol)
+ @test(stats.solved)
+
+ # Symmetric indefinite variant, almost singular.
+ A, b = almost_singular(FC=FC)
+ (x, stats) = fgmres(A, b)
+ r = b - A * x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ 100 * fgmres_tol)
+ @test(stats.solved)
+
+ # Singular system.
+ A, b = square_inconsistent(FC=FC)
+ (x, stats) = fgmres(A, b)
+ r = b - A * x
+ Aresid = norm(A' * r) / norm(A' * b)
+ @test(Aresid ≤ fgmres_tol)
+ @test(stats.inconsistent)
+
+ # Test b == 0
+ A, b = zero_rhs(FC=FC)
+ (x, stats) = fgmres(A, b)
+ @test norm(x) == 0
+ @test stats.status == "x = 0 is a zero-residual solution"
+
+ # Poisson equation in polar coordinates.
+ A, b = polar_poisson(FC=FC)
+ (x, stats) = fgmres(A, b, reorthogonalization=true)
+ r = b - A * x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ fgmres_tol)
+ @test(stats.solved)
+
+ # Left preconditioning
+ A, b, M = square_preconditioned(FC=FC)
+ (x, stats) = fgmres(A, b, M=M)
+ r = b - A * x
+ resid = norm(M * r) / norm(M * b)
+ @test(resid ≤ fgmres_tol)
+ @test(stats.solved)
+
+ # Right preconditioning
+ A, b, N = square_preconditioned(FC=FC)
+ (x, stats) = fgmres(A, b, N=N)
+ r = b - A * x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ fgmres_tol)
+ @test(stats.solved)
+
+ # Split preconditioning
+ A, b, M, N = two_preconditioners(FC=FC)
+ (x, stats) = fgmres(A, b, M=M, N=N)
+ r = b - A * x
+ resid = norm(M * r) / norm(M * b)
+ @test(resid ≤ fgmres_tol)
+ @test(stats.solved)
+
+ # Restart
+ for restart ∈ (false, true)
+ memory = 10
+
+ A, b = sparse_laplacian(FC=FC)
+ (x, stats) = fgmres(A, b, restart=restart, memory=memory)
+ r = b - A * x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ fgmres_tol)
+ @test(stats.niter > memory)
+ @test(stats.solved)
+
+ M = Diagonal(1 ./ diag(A))
+ (x, stats) = fgmres(A, b, M=M, restart=restart, memory=memory)
+ r = b - A * x
+ resid = norm(M * r) / norm(M * b)
+ @test(resid ≤ fgmres_tol)
+ @test(stats.niter > memory)
+ @test(stats.solved)
+
+ N = Diagonal(1 ./ diag(A))
+ (x, stats) = fgmres(A, b, N=N, restart=restart, memory=memory)
+ r = b - A * x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ fgmres_tol)
+ @test(stats.niter > memory)
+ @test(stats.solved)
+
+ N = Diagonal(1 ./ sqrt.(diag(A)))
+ N = Diagonal(1 ./ sqrt.(diag(A)))
+ (x, stats) = fgmres(A, b, M=M, N=N, restart=restart, memory=memory)
+ r = b - A * x
+ resid = norm(M * r) / norm(M * b)
+ @test(resid ≤ fgmres_tol)
+ @test(stats.niter > memory)
+ @test(stats.solved)
+ end
+
+ A, b = polar_poisson(FC=FC)
+ J = inv(Diagonal(A)) # Jacobi preconditioner
+ N = FlexiblePreconditioner(J, 1.0)
+ (x, stats) = fgmres(A, b, N=N)
+ r = b - A * x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ fgmres_tol)
+ @test(stats.solved)
+ end
+ end
+end
diff --git a/test/test_lnlq.jl b/test/test_lnlq.jl
index 888119db8..b308609fa 100644
--- a/test/test_lnlq.jl
+++ b/test/test_lnlq.jl
@@ -1,5 +1,5 @@
function test_lnlq(A, b,transfer_to_craig)
- (x, y, stats) = lnlq(A, b, transfer_to_craig=transfer_to_craig, etolx=0.0, etoly=0.0)
+ (x, y, stats) = lnlq(A, b, transfer_to_craig=transfer_to_craig, utolx=0.0, utoly=0.0)
r = b - A * x
resid = norm(r) / norm(b)
return (x, y, stats, resid)
@@ -61,8 +61,8 @@ end
# Test regularization
A, b, λ = regularization(FC=FC)
- (x, y, stats) = lnlq(A, b, λ=λ, transfer_to_craig=transfer_to_craig, etolx=0.0, etoly=0.0)
- (xₛ, yₛ, stats) = lnlq(A, b, transfer_to_craig=transfer_to_craig, atol=0.0, rtol=0.0, etolx=1e-10, etoly=1e-10, λ=λ)
+ (x, y, stats) = lnlq(A, b, λ=λ, transfer_to_craig=transfer_to_craig, utolx=0.0, utoly=0.0)
+ (xₛ, yₛ, stats) = lnlq(A, b, transfer_to_craig=transfer_to_craig, atol=0.0, rtol=0.0, utolx=1e-10, utoly=1e-10, λ=λ)
for (x, y) in ((x, y), (xₛ, yₛ))
s = λ * y
r = b - (A * x + λ * s)
diff --git a/test/test_minres_qlp.jl b/test/test_minres_qlp.jl
index 6e983e49a..0b4d2046d 100644
--- a/test/test_minres_qlp.jl
+++ b/test/test_minres_qlp.jl
@@ -80,7 +80,7 @@
solver = MinresQlpSolver(A, b)
tol = 1.0
cb_n2 = TestCallbackN2(A, b, tol = tol)
- minres_qlp!(solver, A, b, atol = 0.0, rtol = 0.0, ctol = 0.0, callback = cb_n2)
+ minres_qlp!(solver, A, b, atol = 0.0, rtol = 0.0, Artol = 0.0, callback = cb_n2)
@test solver.stats.status == "user-requested exit"
@test cb_n2(solver)
diff --git a/test/test_mp.jl b/test/test_mp.jl
index b7aa43d38..6b6d58450 100644
--- a/test/test_mp.jl
+++ b/test/test_mp.jl
@@ -3,7 +3,7 @@
for fn in (:cg, :cgls, :usymqr, :cgne, :cgs, :crmr, :cg_lanczos, :dqgmres, :diom, :cr, :gpmr,
:lslq, :lsqr, :lsmr, :lnlq, :craig, :bicgstab, :craigmr, :crls, :symmlq, :minres,
:bilq, :minres_qlp, :qmr, :usymlq, :tricg, :trimr, :trilqr, :bilqr, :gmres, :fom,
- :cg_lanczos_shift)
+ :fgmres, :cg_lanczos_shift)
for T in (Float16, Float32, Float64, BigFloat)
for FC in (T, Complex{T})
A = spdiagm(-1 => -ones(FC,n-1), 0 => 3*ones(FC,n), 1 => -ones(FC,n-1))
diff --git a/test/test_processes.jl b/test/test_processes.jl
new file mode 100644
index 000000000..eb3ad19af
--- /dev/null
+++ b/test/test_processes.jl
@@ -0,0 +1,146 @@
+"""
+ P = permutation_paige(k)
+
+Return the sparse (2k) × (2k) matrix
+
+ [e₁ • eₖ ]
+ [ e₁ • eₖ]
+"""
+function permutation_paige(k)
+ P = spzeros(Float64, 2k, 2k)
+ for i = 1:k
+ P[i,2i-1] = 1.0
+ P[i+k,2i] = 1.0
+ end
+ return P
+end
+
+@testset "processes" begin
+ m = 250
+ n = 500
+ k = 20
+
+ for FC in (Float64, ComplexF64)
+ R = real(FC)
+ nbits_FC = sizeof(FC)
+ nbits_R = sizeof(R)
+ nbits_I = sizeof(Int)
+
+ @testset "Data Type: $FC" begin
+
+ @testset "Hermitian Lanczos" begin
+ A, b = symmetric_indefinite(n, FC=FC)
+ V, T = hermitian_lanczos(A, b, k)
+
+ @test A * V[:,1:k] ≈ V * T
+
+ storage_hermitian_lanczos_bytes(n, k) = 4k * nbits_I + (3k-1) * nbits_R + n*(k+1) * nbits_FC
+
+ expected_hermitian_lanczos_bytes = storage_hermitian_lanczos_bytes(n, k)
+ actual_hermitian_lanczos_bytes = @allocated hermitian_lanczos(A, b, k)
+ @test expected_hermitian_lanczos_bytes ≤ actual_hermitian_lanczos_bytes ≤ 1.02 * expected_hermitian_lanczos_bytes
+ end
+
+ @testset "Non-Hermitian Lanczos" begin
+ A, b = nonsymmetric_definite(n, FC=FC)
+ c = -b
+ V, T, U, Tᴴ = nonhermitian_lanczos(A, b, c, k)
+
+ @test T[1:k,1:k] ≈ Tᴴ[1:k,1:k]'
+ @test A * V[:,1:k] ≈ V * T
+ @test A' * U[:,1:k] ≈ U * Tᴴ
+
+ storage_nonhermitian_lanczos_bytes(n, k) = 4k * nbits_I + (6k-2) * nbits_FC + 2*n*(k+1) * nbits_FC
+
+ expected_nonhermitian_lanczos_bytes = storage_nonhermitian_lanczos_bytes(n, k)
+ actual_nonhermitian_lanczos_bytes = @allocated nonhermitian_lanczos(A, b, c, k)
+ @test expected_nonhermitian_lanczos_bytes ≤ actual_nonhermitian_lanczos_bytes ≤ 1.02 * expected_nonhermitian_lanczos_bytes
+ end
+
+ @testset "Arnoldi" begin
+ A, b = nonsymmetric_indefinite(n, FC=FC)
+ V, H = arnoldi(A, b, k)
+
+ @test A * V[:,1:k] ≈ V * H
+
+ function storage_arnoldi_bytes(n, k)
+ return k*(k+1) * nbits_FC + n*(k+1) * nbits_FC
+ end
+
+ expected_arnoldi_bytes = storage_arnoldi_bytes(n, k)
+ actual_arnoldi_bytes = @allocated arnoldi(A, b, k)
+ @test expected_arnoldi_bytes ≤ actual_arnoldi_bytes ≤ 1.02 * expected_arnoldi_bytes
+ end
+
+ @testset "Golub-Kahan" begin
+ A, b = under_consistent(m, n, FC=FC)
+ V, U, L = golub_kahan(A, b, k)
+ B = L[1:k+1,1:k]
+
+ @test A * V[:,1:k] ≈ U * B
+ @test A' * U ≈ V * L'
+ @test A' * A * V[:,1:k] ≈ V * L' * B
+ @test A * A' * U[:,1:k] ≈ U * B * L[1:k,1:k]'
+
+ storage_golub_kahan_bytes(m, n, k) = 3*(k+1) * nbits_I + (2k+1) * nbits_R + (n+m)*(k+1) * nbits_FC
+
+ expected_golub_kahan_bytes = storage_golub_kahan_bytes(m, n, k)
+ actual_golub_kahan_bytes = @allocated golub_kahan(A, b, k)
+ @test expected_golub_kahan_bytes ≤ actual_golub_kahan_bytes ≤ 1.02 * expected_golub_kahan_bytes
+ end
+
+ @testset "Saunders-Simon-Yip" begin
+ A, b = under_consistent(m, n, FC=FC)
+ _, c = over_consistent(n, m, FC=FC)
+ V, T, U, Tᴴ = saunders_simon_yip(A, b, c, k)
+
+ @test T[1:k,1:k] ≈ Tᴴ[1:k,1:k]'
+ @test A * U[:,1:k] ≈ V * T
+ @test A' * V[:,1:k] ≈ U * Tᴴ
+ @test A' * A * U[:,1:k-1] ≈ U * Tᴴ * T[1:k,1:k-1]
+ @test A * A' * V[:,1:k-1] ≈ V * T * Tᴴ[1:k,1:k-1]
+
+ K = [zeros(FC,m,m) A; A' zeros(FC,n,n)]
+ Pₖ = permutation_paige(k)
+ Wₖ = [V[:,1:k] zeros(FC,m,k); zeros(FC,n,k) U[:,1:k]] * Pₖ
+ Pₖ₊₁ = permutation_paige(k+1)
+ Wₖ₊₁ = [V zeros(FC,m,k+1); zeros(FC,n,k+1) U] * Pₖ₊₁
+ G = Pₖ₊₁' * [zeros(FC,k+1,k) T; Tᴴ zeros(FC,k+1,k)] * Pₖ
+ @test K * Wₖ ≈ Wₖ₊₁ * G
+
+ storage_saunders_simon_yip_bytes(m, n, k) = 4k * nbits_I + (6k-2) * nbits_FC + (n+m)*(k+1) * nbits_FC
+
+ expected_saunders_simon_yip_bytes = storage_saunders_simon_yip_bytes(m, n, k)
+ actual_saunders_simon_yip_bytes = @allocated saunders_simon_yip(A, b, c, k)
+ @test expected_saunders_simon_yip_bytes ≤ actual_saunders_simon_yip_bytes ≤ 1.02 * expected_saunders_simon_yip_bytes
+ end
+
+ @testset "Montoison-Orban" begin
+ A, b = under_consistent(m, n, FC=FC)
+ B, c = over_consistent(n, m, FC=FC)
+ V, H, U, F = montoison_orban(A, B, b, c, k)
+
+ @test A * U[:,1:k] ≈ V * H
+ @test B * V[:,1:k] ≈ U * F
+ @test B * A * U[:,1:k-1] ≈ U * F * H[1:k,1:k-1]
+ @test A * B * V[:,1:k-1] ≈ V * H * F[1:k,1:k-1]
+
+ K = [zeros(FC,m,m) A; B zeros(FC,n,n)]
+ Pₖ = permutation_paige(k)
+ Wₖ = [V[:,1:k] zeros(FC,m,k); zeros(FC,n,k) U[:,1:k]] * Pₖ
+ Pₖ₊₁ = permutation_paige(k+1)
+ Wₖ₊₁ = [V zeros(FC,m,k+1); zeros(FC,n,k+1) U] * Pₖ₊₁
+ G = Pₖ₊₁' * [zeros(FC,k+1,k) H; F zeros(FC,k+1,k)] * Pₖ
+ @test K * Wₖ ≈ Wₖ₊₁ * G
+
+ function storage_montoison_orban_bytes(m, n, k)
+ return 2*k*(k+1) * nbits_FC + (n+m)*(k+1) * nbits_FC
+ end
+
+ expected_montoison_orban_bytes = storage_montoison_orban_bytes(m, n, k)
+ actual_montoison_orban_bytes = @allocated montoison_orban(A, B, b, c, k)
+ @test expected_montoison_orban_bytes ≤ actual_montoison_orban_bytes ≤ 1.02 * expected_montoison_orban_bytes
+ end
+ end
+ end
+end
diff --git a/test/test_qmr.jl b/test/test_qmr.jl
index 184b9877d..4a6b8c1c9 100644
--- a/test/test_qmr.jl
+++ b/test/test_qmr.jl
@@ -58,10 +58,10 @@
@test(resid ≤ qmr_tol)
@test(stats.solved)
- # Test bᵀc == 0
+ # Test bᴴc == 0
A, b, c = bc_breakdown(FC=FC)
(x, stats) = qmr(A, b, c=c)
- @test stats.status == "Breakdown bᵀc = 0"
+ @test stats.status == "Breakdown bᴴc = 0"
# test callback function
solver = QmrSolver(A, b)
diff --git a/test/test_solvers.jl b/test/test_solvers.jl
index 468fa5a05..2c98dc795 100644
--- a/test/test_solvers.jl
+++ b/test/test_solvers.jl
@@ -11,1139 +11,142 @@ function test_solvers(FC)
nshifts = 5
T = real(FC)
S = Vector{FC}
+ solvers = Dict{Symbol, KrylovSolver}()
@eval begin
- cg_solver = $(KRYLOV_SOLVERS[:cg])($n, $n, $S)
- symmlq_solver = $(KRYLOV_SOLVERS[:symmlq])($n, $n, $S)
- minres_solver = $(KRYLOV_SOLVERS[:minres])($n, $n, $S)
- cg_lanczos_solver = $(KRYLOV_SOLVERS[:cg_lanczos])($n, $n, $S)
- diom_solver = $(KRYLOV_SOLVERS[:diom])($n, $n, $mem, $S)
- fom_solver = $(KRYLOV_SOLVERS[:fom])($n, $n, $mem, $S)
- dqgmres_solver = $(KRYLOV_SOLVERS[:dqgmres])($n, $n, $mem, $S)
- gmres_solver = $(KRYLOV_SOLVERS[:gmres])($n, $n, $mem, $S)
- cr_solver = $(KRYLOV_SOLVERS[:cr])($n, $n, $S)
- crmr_solver = $(KRYLOV_SOLVERS[:crmr])($m, $n, $S)
- cgs_solver = $(KRYLOV_SOLVERS[:cgs])($n, $n, $S)
- bicgstab_solver = $(KRYLOV_SOLVERS[:bicgstab])($n, $n, $S)
- craigmr_solver = $(KRYLOV_SOLVERS[:craigmr])($m, $n, $S)
- cgne_solver = $(KRYLOV_SOLVERS[:cgne])($m, $n, $S)
- lnlq_solver = $(KRYLOV_SOLVERS[:lnlq])($m, $n, $S)
- craig_solver = $(KRYLOV_SOLVERS[:craig])($m, $n, $S)
- lslq_solver = $(KRYLOV_SOLVERS[:lslq])($n, $m, $S)
- cgls_solver = $(KRYLOV_SOLVERS[:cgls])($n, $m, $S)
- lsqr_solver = $(KRYLOV_SOLVERS[:lsqr])($n, $m, $S)
- crls_solver = $(KRYLOV_SOLVERS[:crls])($n, $m, $S)
- lsmr_solver = $(KRYLOV_SOLVERS[:lsmr])($n, $m, $S)
- usymqr_solver = $(KRYLOV_SOLVERS[:usymqr])($n, $m, $S)
- trilqr_solver = $(KRYLOV_SOLVERS[:trilqr])($n, $n, $S)
- bilq_solver = $(KRYLOV_SOLVERS[:bilq])($n, $n, $S)
- bilqr_solver = $(KRYLOV_SOLVERS[:bilqr])($n, $n, $S)
- minres_qlp_solver = $(KRYLOV_SOLVERS[:minres_qlp])($n, $n, $S)
- qmr_solver = $(KRYLOV_SOLVERS[:qmr])($n, $n, $S)
- usymlq_solver = $(KRYLOV_SOLVERS[:usymlq])($m, $n, $S)
- tricg_solver = $(KRYLOV_SOLVERS[:tricg])($m, $n, $S)
- trimr_solver = $(KRYLOV_SOLVERS[:trimr])($m, $n, $S)
- gpmr_solver = $(KRYLOV_SOLVERS[:gpmr])($n, $m, $mem, $S)
- cg_lanczos_shift_solver = $(KRYLOV_SOLVERS[:cg_lanczos_shift])($n, $m, $nshifts, $S)
+ $solvers[:cg] = $(KRYLOV_SOLVERS[:cg])($n, $n, $S)
+ $solvers[:symmlq] = $(KRYLOV_SOLVERS[:symmlq])($n, $n, $S)
+ $solvers[:minres] = $(KRYLOV_SOLVERS[:minres])($n, $n, $S)
+ $solvers[:cg_lanczos] = $(KRYLOV_SOLVERS[:cg_lanczos])($n, $n, $S)
+ $solvers[:cg_lanczos_shift] = $(KRYLOV_SOLVERS[:cg_lanczos_shift])($n, $n, $nshifts, $S)
+ $solvers[:diom] = $(KRYLOV_SOLVERS[:diom])($n, $n, $mem, $S)
+ $solvers[:fom] = $(KRYLOV_SOLVERS[:fom])($n, $n, $mem, $S)
+ $solvers[:dqgmres] = $(KRYLOV_SOLVERS[:dqgmres])($n, $n, $mem, $S)
+ $solvers[:gmres] = $(KRYLOV_SOLVERS[:gmres])($n, $n, $mem, $S)
+ $solvers[:fgmres] = $(KRYLOV_SOLVERS[:fgmres])($n, $n, $mem, $S)
+ $solvers[:cr] = $(KRYLOV_SOLVERS[:cr])($n, $n, $S)
+ $solvers[:crmr] = $(KRYLOV_SOLVERS[:crmr])($m, $n, $S)
+ $solvers[:cgs] = $(KRYLOV_SOLVERS[:cgs])($n, $n, $S)
+ $solvers[:bicgstab] = $(KRYLOV_SOLVERS[:bicgstab])($n, $n, $S)
+ $solvers[:craigmr] = $(KRYLOV_SOLVERS[:craigmr])($m, $n, $S)
+ $solvers[:cgne] = $(KRYLOV_SOLVERS[:cgne])($m, $n, $S)
+ $solvers[:lnlq] = $(KRYLOV_SOLVERS[:lnlq])($m, $n, $S)
+ $solvers[:craig] = $(KRYLOV_SOLVERS[:craig])($m, $n, $S)
+ $solvers[:lslq] = $(KRYLOV_SOLVERS[:lslq])($n, $m, $S)
+ $solvers[:cgls] = $(KRYLOV_SOLVERS[:cgls])($n, $m, $S)
+ $solvers[:lsqr] = $(KRYLOV_SOLVERS[:lsqr])($n, $m, $S)
+ $solvers[:crls] = $(KRYLOV_SOLVERS[:crls])($n, $m, $S)
+ $solvers[:lsmr] = $(KRYLOV_SOLVERS[:lsmr])($n, $m, $S)
+ $solvers[:usymqr] = $(KRYLOV_SOLVERS[:usymqr])($n, $m, $S)
+ $solvers[:trilqr] = $(KRYLOV_SOLVERS[:trilqr])($n, $n, $S)
+ $solvers[:bilq] = $(KRYLOV_SOLVERS[:bilq])($n, $n, $S)
+ $solvers[:bilqr] = $(KRYLOV_SOLVERS[:bilqr])($n, $n, $S)
+ $solvers[:minres_qlp] = $(KRYLOV_SOLVERS[:minres_qlp])($n, $n, $S)
+ $solvers[:qmr] = $(KRYLOV_SOLVERS[:qmr])($n, $n, $S)
+ $solvers[:usymlq] = $(KRYLOV_SOLVERS[:usymlq])($m, $n, $S)
+ $solvers[:tricg] = $(KRYLOV_SOLVERS[:tricg])($m, $n, $S)
+ $solvers[:trimr] = $(KRYLOV_SOLVERS[:trimr])($m, $n, $S)
+ $solvers[:gpmr] = $(KRYLOV_SOLVERS[:gpmr])($n, $m, $mem, $S)
+ $solvers[:cg_lanczos_shift] = $(KRYLOV_SOLVERS[:cg_lanczos_shift])($n, $n, $nshifts, $S)
end
- for i = 1 : 3
- A = i * A
- Au = i * Au
- Ao = i * Ao
- b = 5 * b
- c = 3 * c
-
- solver = solve!(cg_solver, A, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == 0
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(symmlq_solver, A, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == 0
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(minres_solver, A, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == 0
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(cg_lanczos_solver, A, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == 0
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(cg_lanczos_shift_solver, A, b, shifts)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == 0
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(diom_solver, A, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == 0
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(fom_solver, A, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == 0
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(dqgmres_solver, A, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == 0
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(gmres_solver, A, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == 0
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(cr_solver, A, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == 0
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(crmr_solver, Au, c)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == niter
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(cgs_solver, A, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == 2 * niter
- @test Atprod(solver) == 0
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(bicgstab_solver, A, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == 2 * niter
- @test Atprod(solver) == 0
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(craigmr_solver, Au, c)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == niter
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 2
- @test issolved(solver)
-
- solver = solve!(cgne_solver, Au, c)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == niter
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(lnlq_solver, Au, c)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == niter
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test solution(solver, 2) === solver.y
- @test nsolution(solver) == 2
- @test issolved(solver)
-
- solver = solve!(craig_solver, Au, c)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == niter
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test solution(solver, 2) === solver.y
- @test nsolution(solver) == 2
- @test issolved(solver)
-
- solver = solve!(lslq_solver, Ao, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == niter
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(cgls_solver, Ao, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == niter
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(lsqr_solver, Ao, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == niter
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(crls_solver, Ao, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == niter
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(lsmr_solver, Ao, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == niter
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(usymqr_solver, Ao, b, c)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == niter
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(trilqr_solver, A, b, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == niter
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test solution(solver, 2) === solver.y
- @test nsolution(solver) == 2
- @test issolved_primal(solver)
- @test issolved_dual(solver)
- @test issolved(solver)
-
- solver = solve!(bilq_solver, A, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == niter
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(bilqr_solver, A, b, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == niter
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test solution(solver, 2) === solver.y
- @test nsolution(solver) == 2
- @test issolved_primal(solver)
- @test issolved_dual(solver)
- @test issolved(solver)
-
- solver = solve!(minres_qlp_solver, A, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == 0
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(qmr_solver, A, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == niter
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(usymlq_solver, Au, c, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == niter
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test nsolution(solver) == 1
- @test issolved(solver)
-
- solver = solve!(tricg_solver, Au, c, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == niter
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test solution(solver, 2) === solver.y
- @test nsolution(solver) == 2
- @test issolved(solver)
-
- solver = solve!(trimr_solver, Au, c, b)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == niter
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test solution(solver, 2) === solver.y
- @test nsolution(solver) == 2
- @test issolved(solver)
-
- solver = solve!(gpmr_solver, Ao, Au, b, c)
- niter = niterations(solver)
- @test niter > 0
- @test Aprod(solver) == niter
- @test Atprod(solver) == 0
- @test Bprod(solver) == niter
- @test statistics(solver) === solver.stats
- @test solution(solver, 1) === solver.x
- @test solution(solver, 2) === solver.y
- @test nsolution(solver) == 2
- @test issolved(solver)
+ for (method, solver) in solvers
+ @testset "$(method)" begin
+ for i = 1 : 3
+ A = i * A
+ Au = i * Au
+ Ao = i * Ao
+ b = 5 * b
+ c = 3 * c
+
+ if method ∈ (:cg, :cr, :symmlq, :minres, :minres_qlp, :cg_lanczos, :diom, :fom,
+ :dqgmres, :gmres, :fgmres, :cgs, :bicgstab, :bilq, :qmr, :cg_lanczos_shift)
+ method == :cg_lanczos_shift ? solve!(solver, A, b, shifts) : solve!(solver, A, b)
+ niter = niterations(solver)
+ @test Aprod(solver) == (method ∈ (:cgs, :bicgstab) ? 2 * niter : niter)
+ @test Atprod(solver) == (method ∈ (:bilq, :qmr) ? niter : 0)
+ @test solution(solver) === solver.x
+ @test nsolution(solver) == 1
+ end
+
+ if method ∈ (:cgne, :crmr, :lnlq, :craig, :craigmr)
+ solve!(solver, Au, c)
+ niter = niterations(solver)
+ @test Aprod(solver) == niter
+ @test Atprod(solver) == niter
+ @test solution(solver, 1) === solver.x
+ @test nsolution(solver) == (method ∈ (:cgne, :crmr) ? 1 : 2)
+ (nsolution == 2) && (@test solution(solver, 2) == solver.y)
+ end
+
+ if method ∈ (:cgls, :crls, :lslq, :lsqr, :lsmr)
+ solve!(solver, Ao, b)
+ niter = niterations(solver)
+ @test Aprod(solver) == niter
+ @test Atprod(solver) == niter
+ @test solution(solver) === solver.x
+ @test nsolution(solver) == 1
+ end
+
+ if method ∈ (:bilqr, :trilqr)
+ solve!(solver, A, b, b)
+ niter = niterations(solver)
+ @test Aprod(solver) == niter
+ @test Atprod(solver) == niter
+ @test solution(solver, 1) === solver.x
+ @test solution(solver, 2) === solver.y
+ @test nsolution(solver) == 2
+ @test issolved_primal(solver)
+ @test issolved_dual(solver)
+ end
+
+ if method ∈ (:tricg, :trimr, :gpmr)
+ method == :gpmr ? solve!(solver, Ao, Au, b, c) : solve!(solver, Au, c, b)
+ niter = niterations(solver)
+ @test Aprod(solver) == niter
+ method != :gpmr && (@test Atprod(solver) == niter)
+ method == :gpmr && (@test Bprod(solver) == niter)
+ @test solution(solver, 1) === solver.x
+ @test solution(solver, 2) === solver.y
+ @test nsolution(solver) == 2
+ end
+
+ if method ∈ (:usymlq, :usymqr)
+ method == :usymlq ? solve!(solver, Au, c, b) : solve!(solver, Ao, b, c)
+ niter = niterations(solver)
+ @test Aprod(solver) == niter
+ @test Atprod(solver) == niter
+ @test solution(solver) === solver.x
+ @test nsolution(solver) == 1
+ end
+
+ @test niter > 0
+ @test statistics(solver) === solver.stats
+ @test issolved(solver)
+ end
+
+ io = IOBuffer()
+ show(io, solver, show_stats=false)
+ showed = String(take!(io))
+
+ # Test that the lines have the same length
+ str = split(showed, "\n", keepempty=false)
+ len_row = length(str[1])
+ @test mapreduce(x -> length(x) - mapreduce(y -> occursin(y, x), |, ["w̅","w̄","d̅"]) == len_row, &, str)
+
+ # Test that the columns have the same length
+ str2 = split(showed, ['│','┌','┬','┐','├','┼','┤','└','┴','┴','┘','\n'], keepempty=false)
+ len_col1 = length(str2[1])
+ len_col2 = length(str2[2])
+ len_col3 = length(str2[3])
+ @test mapreduce(x -> length(x) - mapreduce(y -> occursin(y, x), |, ["w̅","w̄","d̅"]) == len_col1, &, str2[1:3:end-2])
+ @test mapreduce(x -> length(x) - mapreduce(y -> occursin(y, x), |, ["w̅","w̄","d̅"]) == len_col2, &, str2[2:3:end-1])
+ @test mapreduce(x -> length(x) - mapreduce(y -> occursin(y, x), |, ["w̅","w̄","d̅"]) == len_col3, &, str2[3:3:end])
+
+ # Code coverage
+ show(io, solver, show_stats=true)
+ end
end
-
- io = IOBuffer()
- show(io, cg_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌──────────┬───────────────┬─────────────────┐
- │ CgSolver│Precision: $FC │Architecture: CPU│
- ├──────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├──────────┼───────────────┼─────────────────┤
- │ Δx│ Vector{$FC}│ 0│
- │ x│ Vector{$FC}│ 64│
- │ r│ Vector{$FC}│ 64│
- │ p│ Vector{$FC}│ 64│
- │ Ap│ Vector{$FC}│ 64│
- │ z│ Vector{$FC}│ 0│
- │warm_start│ Bool│ 0│
- └──────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, symmlq_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌────────────┬───────────────┬─────────────────┐
- │SymmlqSolver│Precision: $FC │Architecture: CPU│
- ├────────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├────────────┼───────────────┼─────────────────┤
- │ Δx│ Vector{$FC}│ 0│
- │ x│ Vector{$FC}│ 64│
- │ Mvold│ Vector{$FC}│ 64│
- │ Mv│ Vector{$FC}│ 64│
- │ Mv_next│ Vector{$FC}│ 64│
- │ w̅│ Vector{$FC}│ 64│
- │ v│ Vector{$FC}│ 0│
- │ clist│ Vector{$T}│ 5│
- │ zlist│ Vector{$T}│ 5│
- │ sprod│ Vector{$T}│ 5│
- │ warm_start│ Bool│ 0│
- └────────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, minres_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌────────────┬───────────────┬─────────────────┐
- │MinresSolver│Precision: $FC │Architecture: CPU│
- ├────────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├────────────┼───────────────┼─────────────────┤
- │ Δx│ Vector{$FC}│ 0│
- │ x│ Vector{$FC}│ 64│
- │ r1│ Vector{$FC}│ 64│
- │ r2│ Vector{$FC}│ 64│
- │ w1│ Vector{$FC}│ 64│
- │ w2│ Vector{$FC}│ 64│
- │ y│ Vector{$FC}│ 64│
- │ v│ Vector{$FC}│ 0│
- │ err_vec│ Vector{$T}│ 5│
- │ warm_start│ Bool│ 0│
- └────────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, cg_lanczos_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌───────────────┬───────────────┬─────────────────┐
- │CgLanczosSolver│Precision: $FC │Architecture: CPU│
- ├───────────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├───────────────┼───────────────┼─────────────────┤
- │ Δx│ Vector{$FC}│ 0│
- │ x│ Vector{$FC}│ 64│
- │ Mv│ Vector{$FC}│ 64│
- │ Mv_prev│ Vector{$FC}│ 64│
- │ p│ Vector{$FC}│ 64│
- │ Mv_next│ Vector{$FC}│ 64│
- │ v│ Vector{$FC}│ 0│
- │ warm_start│ Bool│ 0│
- └───────────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, cg_lanczos_shift_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌────────────────────┬───────────────────┬─────────────────┐
- │CgLanczosShiftSolver│ Precision: $FC │Architecture: CPU│
- ├────────────────────┼───────────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├────────────────────┼───────────────────┼─────────────────┤
- │ Mv│ Vector{$FC}│ 64│
- │ Mv_prev│ Vector{$FC}│ 64│
- │ Mv_next│ Vector{$FC}│ 64│
- │ v│ Vector{$FC}│ 0│
- │ x│Vector{Vector{$FC}}│ 5 x 64│
- │ p│Vector{Vector{$FC}}│ 5 x 64│
- │ σ│ Vector{$T}│ 5│
- │ δhat│ Vector{$T}│ 5│
- │ ω│ Vector{$T}│ 5│
- │ γ│ Vector{$T}│ 5│
- │ rNorms│ Vector{$T}│ 5│
- │ converged│ BitVector│ 5│
- │ not_cv│ BitVector│ 5│
- └────────────────────┴───────────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, diom_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌──────────┬───────────────────┬─────────────────┐
- │DiomSolver│ Precision: $FC │Architecture: CPU│
- ├──────────┼───────────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├──────────┼───────────────────┼─────────────────┤
- │ Δx│ Vector{$FC}│ 0│
- │ x│ Vector{$FC}│ 64│
- │ t│ Vector{$FC}│ 64│
- │ z│ Vector{$FC}│ 0│
- │ w│ Vector{$FC}│ 0│
- │ P│Vector{Vector{$FC}}│ 10 x 64│
- │ V│Vector{Vector{$FC}}│ 10 x 64│
- │ L│ Vector{$FC}│ 10│
- │ H│ Vector{$FC}│ 12│
- │warm_start│ Bool│ 0│
- └──────────┴───────────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, fom_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌──────────┬───────────────────┬─────────────────┐
- │ FomSolver│ Precision: $FC │Architecture: CPU│
- ├──────────┼───────────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├──────────┼───────────────────┼─────────────────┤
- │ Δx│ Vector{$FC}│ 0│
- │ x│ Vector{$FC}│ 64│
- │ w│ Vector{$FC}│ 64│
- │ p│ Vector{$FC}│ 0│
- │ q│ Vector{$FC}│ 0│
- │ V│Vector{Vector{$FC}}│ 10 x 64│
- │ l│ Vector{$FC}│ 10│
- │ z│ Vector{$FC}│ 10│
- │ U│ Vector{$FC}│ 55│
- │warm_start│ Bool│ 0│
- └──────────┴───────────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, dqgmres_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌─────────────┬───────────────────┬─────────────────┐
- │DqgmresSolver│ Precision: $FC │Architecture: CPU│
- ├─────────────┼───────────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├─────────────┼───────────────────┼─────────────────┤
- │ Δx│ Vector{$FC}│ 0│
- │ x│ Vector{$FC}│ 64│
- │ t│ Vector{$FC}│ 64│
- │ z│ Vector{$FC}│ 0│
- │ w│ Vector{$FC}│ 0│
- │ P│Vector{Vector{$FC}}│ 10 x 64│
- │ V│Vector{Vector{$FC}}│ 10 x 64│
- │ c│ Vector{$T}│ 10│
- │ s│ Vector{$FC}│ 10│
- │ H│ Vector{$FC}│ 12│
- │ warm_start│ Bool│ 0│
- └─────────────┴───────────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, gmres_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌───────────┬───────────────────┬─────────────────┐
- │GmresSolver│ Precision: $FC │Architecture: CPU│
- ├───────────┼───────────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├───────────┼───────────────────┼─────────────────┤
- │ Δx│ Vector{$FC}│ 0│
- │ x│ Vector{$FC}│ 64│
- │ w│ Vector{$FC}│ 64│
- │ p│ Vector{$FC}│ 0│
- │ q│ Vector{$FC}│ 0│
- │ V│Vector{Vector{$FC}}│ 10 x 64│
- │ c│ Vector{$T}│ 10│
- │ s│ Vector{$FC}│ 10│
- │ z│ Vector{$FC}│ 10│
- │ R│ Vector{$FC}│ 55│
- │ warm_start│ Bool│ 0│
- │ inner_iter│ Int64│ 0│
- └───────────┴───────────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, cr_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌──────────┬───────────────┬─────────────────┐
- │ CrSolver│Precision: $FC │Architecture: CPU│
- ├──────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├──────────┼───────────────┼─────────────────┤
- │ Δx│ Vector{$FC}│ 0│
- │ x│ Vector{$FC}│ 64│
- │ r│ Vector{$FC}│ 64│
- │ p│ Vector{$FC}│ 64│
- │ q│ Vector{$FC}│ 64│
- │ Ar│ Vector{$FC}│ 64│
- │ Mq│ Vector{$FC}│ 0│
- │warm_start│ Bool│ 0│
- └──────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, crmr_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌──────────┬───────────────┬─────────────────┐
- │CrmrSolver│Precision: $FC │Architecture: CPU│
- ├──────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├──────────┼───────────────┼─────────────────┤
- │ x│ Vector{$FC}│ 64│
- │ p│ Vector{$FC}│ 64│
- │ Aᵀr│ Vector{$FC}│ 64│
- │ r│ Vector{$FC}│ 32│
- │ q│ Vector{$FC}│ 32│
- │ Mq│ Vector{$FC}│ 0│
- │ s│ Vector{$FC}│ 0│
- └──────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, cgs_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌──────────┬───────────────┬─────────────────┐
- │ CgsSolver│Precision: $FC │Architecture: CPU│
- ├──────────┼───────────────┼─────────────────┤
- │Attribute │ Type│ Size│
- ├──────────┼───────────────┼─────────────────┤
- │ Δx│ Vector{$FC}│ 0│
- │ x│ Vector{$FC}│ 64│
- │ r│ Vector{$FC}│ 64│
- │ u│ Vector{$FC}│ 64│
- │ p│ Vector{$FC}│ 64│
- │ q│ Vector{$FC}│ 64│
- │ ts│ Vector{$FC}│ 64│
- │ yz│ Vector{$FC}│ 0│
- │ vw│ Vector{$FC}│ 0│
- │warm_start│ Bool│ 0│
- └──────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, bicgstab_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌──────────────┬───────────────┬─────────────────┐
- │BicgstabSolver│Precision: $FC │Architecture: CPU│
- ├──────────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├──────────────┼───────────────┼─────────────────┤
- │ Δx│ Vector{$FC}│ 0│
- │ x│ Vector{$FC}│ 64│
- │ r│ Vector{$FC}│ 64│
- │ p│ Vector{$FC}│ 64│
- │ v│ Vector{$FC}│ 64│
- │ s│ Vector{$FC}│ 64│
- │ qd│ Vector{$FC}│ 64│
- │ yz│ Vector{$FC}│ 0│
- │ t│ Vector{$FC}│ 0│
- │ warm_start│ Bool│ 0│
- └──────────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, craigmr_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌─────────────┬───────────────┬─────────────────┐
- │CraigmrSolver│Precision: $FC │Architecture: CPU│
- ├─────────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├─────────────┼───────────────┼─────────────────┤
- │ x│ Vector{$FC}│ 64│
- │ Nv│ Vector{$FC}│ 64│
- │ Aᵀu│ Vector{$FC}│ 64│
- │ d│ Vector{$FC}│ 64│
- │ y│ Vector{$FC}│ 32│
- │ Mu│ Vector{$FC}│ 32│
- │ w│ Vector{$FC}│ 32│
- │ wbar│ Vector{$FC}│ 32│
- │ Av│ Vector{$FC}│ 32│
- │ u│ Vector{$FC}│ 0│
- │ v│ Vector{$FC}│ 0│
- │ q│ Vector{$FC}│ 0│
- └─────────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, cgne_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌──────────┬───────────────┬─────────────────┐
- │CgneSolver│Precision: $FC │Architecture: CPU│
- ├──────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├──────────┼───────────────┼─────────────────┤
- │ x│ Vector{$FC}│ 64│
- │ p│ Vector{$FC}│ 64│
- │ Aᵀz│ Vector{$FC}│ 64│
- │ r│ Vector{$FC}│ 32│
- │ q│ Vector{$FC}│ 32│
- │ s│ Vector{$FC}│ 0│
- │ z│ Vector{$FC}│ 0│
- └──────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, lnlq_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌──────────┬───────────────┬─────────────────┐
- │LnlqSolver│Precision: $FC │Architecture: CPU│
- ├──────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├──────────┼───────────────┼─────────────────┤
- │ x│ Vector{$FC}│ 64│
- │ Nv│ Vector{$FC}│ 64│
- │ Aᵀu│ Vector{$FC}│ 64│
- │ y│ Vector{$FC}│ 32│
- │ w̄│ Vector{$FC}│ 32│
- │ Mu│ Vector{$FC}│ 32│
- │ Av│ Vector{$FC}│ 32│
- │ u│ Vector{$FC}│ 0│
- │ v│ Vector{$FC}│ 0│
- │ q│ Vector{$FC}│ 0│
- └──────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, craig_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌───────────┬───────────────┬─────────────────┐
- │CraigSolver│Precision: $FC │Architecture: CPU│
- ├───────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├───────────┼───────────────┼─────────────────┤
- │ x│ Vector{$FC}│ 64│
- │ Nv│ Vector{$FC}│ 64│
- │ Aᵀu│ Vector{$FC}│ 64│
- │ y│ Vector{$FC}│ 32│
- │ w│ Vector{$FC}│ 32│
- │ Mu│ Vector{$FC}│ 32│
- │ Av│ Vector{$FC}│ 32│
- │ u│ Vector{$FC}│ 0│
- │ v│ Vector{$FC}│ 0│
- │ w2│ Vector{$FC}│ 0│
- └───────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, lslq_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌──────────┬───────────────┬─────────────────┐
- │LslqSolver│Precision: $FC │Architecture: CPU│
- ├──────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├──────────┼───────────────┼─────────────────┤
- │ x│ Vector{$FC}│ 32│
- │ Nv│ Vector{$FC}│ 32│
- │ Aᵀu│ Vector{$FC}│ 32│
- │ w̄│ Vector{$FC}│ 32│
- │ Mu│ Vector{$FC}│ 64│
- │ Av│ Vector{$FC}│ 64│
- │ u│ Vector{$FC}│ 0│
- │ v│ Vector{$FC}│ 0│
- │ err_vec│ Vector{$T}│ 5│
- └──────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, cgls_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌──────────┬───────────────┬─────────────────┐
- │CglsSolver│Precision: $FC │Architecture: CPU│
- ├──────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├──────────┼───────────────┼─────────────────┤
- │ x│ Vector{$FC}│ 32│
- │ p│ Vector{$FC}│ 32│
- │ s│ Vector{$FC}│ 32│
- │ r│ Vector{$FC}│ 64│
- │ q│ Vector{$FC}│ 64│
- │ Mr│ Vector{$FC}│ 0│
- └──────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, lsqr_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌──────────┬───────────────┬─────────────────┐
- │LsqrSolver│Precision: $FC │Architecture: CPU│
- ├──────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├──────────┼───────────────┼─────────────────┤
- │ x│ Vector{$FC}│ 32│
- │ Nv│ Vector{$FC}│ 32│
- │ Aᵀu│ Vector{$FC}│ 32│
- │ w│ Vector{$FC}│ 32│
- │ Mu│ Vector{$FC}│ 64│
- │ Av│ Vector{$FC}│ 64│
- │ u│ Vector{$FC}│ 0│
- │ v│ Vector{$FC}│ 0│
- │ err_vec│ Vector{$T}│ 5│
- └──────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, crls_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌──────────┬───────────────┬─────────────────┐
- │CrlsSolver│Precision: $FC │Architecture: CPU│
- ├──────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├──────────┼───────────────┼─────────────────┤
- │ x│ Vector{$FC}│ 32│
- │ p│ Vector{$FC}│ 32│
- │ Ar│ Vector{$FC}│ 32│
- │ q│ Vector{$FC}│ 32│
- │ r│ Vector{$FC}│ 64│
- │ Ap│ Vector{$FC}│ 64│
- │ s│ Vector{$FC}│ 64│
- │ Ms│ Vector{$FC}│ 0│
- └──────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, lsmr_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌──────────┬───────────────┬─────────────────┐
- │LsmrSolver│Precision: $FC │Architecture: CPU│
- ├──────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├──────────┼───────────────┼─────────────────┤
- │ x│ Vector{$FC}│ 32│
- │ Nv│ Vector{$FC}│ 32│
- │ Aᵀu│ Vector{$FC}│ 32│
- │ h│ Vector{$FC}│ 32│
- │ hbar│ Vector{$FC}│ 32│
- │ Mu│ Vector{$FC}│ 64│
- │ Av│ Vector{$FC}│ 64│
- │ u│ Vector{$FC}│ 0│
- │ v│ Vector{$FC}│ 0│
- │ err_vec│ Vector{$T}│ 5│
- └──────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, usymqr_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌────────────┬───────────────┬─────────────────┐
- │UsymqrSolver│Precision: $FC │Architecture: CPU│
- ├────────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├────────────┼───────────────┼─────────────────┤
- │ vₖ₋₁│ Vector{$FC}│ 64│
- │ vₖ│ Vector{$FC}│ 64│
- │ q│ Vector{$FC}│ 64│
- │ Δx│ Vector{$FC}│ 0│
- │ x│ Vector{$FC}│ 32│
- │ wₖ₋₂│ Vector{$FC}│ 32│
- │ wₖ₋₁│ Vector{$FC}│ 32│
- │ uₖ₋₁│ Vector{$FC}│ 32│
- │ uₖ│ Vector{$FC}│ 32│
- │ p│ Vector{$FC}│ 32│
- │ warm_start│ Bool│ 0│
- └────────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, trilqr_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌────────────┬───────────────┬─────────────────┐
- │TrilqrSolver│Precision: $FC │Architecture: CPU│
- ├────────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├────────────┼───────────────┼─────────────────┤
- │ uₖ₋₁│ Vector{$FC}│ 64│
- │ uₖ│ Vector{$FC}│ 64│
- │ p│ Vector{$FC}│ 64│
- │ d̅│ Vector{$FC}│ 64│
- │ Δx│ Vector{$FC}│ 0│
- │ x│ Vector{$FC}│ 64│
- │ vₖ₋₁│ Vector{$FC}│ 64│
- │ vₖ│ Vector{$FC}│ 64│
- │ q│ Vector{$FC}│ 64│
- │ Δy│ Vector{$FC}│ 0│
- │ y│ Vector{$FC}│ 64│
- │ wₖ₋₃│ Vector{$FC}│ 64│
- │ wₖ₋₂│ Vector{$FC}│ 64│
- │ warm_start│ Bool│ 0│
- └────────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, bilq_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌──────────┬───────────────┬─────────────────┐
- │BilqSolver│Precision: $FC │Architecture: CPU│
- ├──────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├──────────┼───────────────┼─────────────────┤
- │ uₖ₋₁│ Vector{$FC}│ 64│
- │ uₖ│ Vector{$FC}│ 64│
- │ q│ Vector{$FC}│ 64│
- │ vₖ₋₁│ Vector{$FC}│ 64│
- │ vₖ│ Vector{$FC}│ 64│
- │ p│ Vector{$FC}│ 64│
- │ Δx│ Vector{$FC}│ 0│
- │ x│ Vector{$FC}│ 64│
- │ d̅│ Vector{$FC}│ 64│
- │warm_start│ Bool│ 0│
- └──────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, bilqr_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌───────────┬───────────────┬─────────────────┐
- │BilqrSolver│Precision: $FC │Architecture: CPU│
- ├───────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├───────────┼───────────────┼─────────────────┤
- │ uₖ₋₁│ Vector{$FC}│ 64│
- │ uₖ│ Vector{$FC}│ 64│
- │ q│ Vector{$FC}│ 64│
- │ vₖ₋₁│ Vector{$FC}│ 64│
- │ vₖ│ Vector{$FC}│ 64│
- │ p│ Vector{$FC}│ 64│
- │ Δx│ Vector{$FC}│ 0│
- │ x│ Vector{$FC}│ 64│
- │ Δy│ Vector{$FC}│ 0│
- │ y│ Vector{$FC}│ 64│
- │ d̅│ Vector{$FC}│ 64│
- │ wₖ₋₃│ Vector{$FC}│ 64│
- │ wₖ₋₂│ Vector{$FC}│ 64│
- │ warm_start│ Bool│ 0│
- └───────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, minres_qlp_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌───────────────┬───────────────┬─────────────────┐
- │MinresQlpSolver│Precision: $FC │Architecture: CPU│
- ├───────────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├───────────────┼───────────────┼─────────────────┤
- │ Δx│ Vector{$FC}│ 0│
- │ wₖ₋₁│ Vector{$FC}│ 64│
- │ wₖ│ Vector{$FC}│ 64│
- │ M⁻¹vₖ₋₁│ Vector{$FC}│ 64│
- │ M⁻¹vₖ│ Vector{$FC}│ 64│
- │ x│ Vector{$FC}│ 64│
- │ p│ Vector{$FC}│ 64│
- │ vₖ│ Vector{$FC}│ 0│
- │ warm_start│ Bool│ 0│
- └───────────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, qmr_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌──────────┬───────────────┬─────────────────┐
- │ QmrSolver│Precision: $FC │Architecture: CPU│
- ├──────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├──────────┼───────────────┼─────────────────┤
- │ uₖ₋₁│ Vector{$FC}│ 64│
- │ uₖ│ Vector{$FC}│ 64│
- │ q│ Vector{$FC}│ 64│
- │ vₖ₋₁│ Vector{$FC}│ 64│
- │ vₖ│ Vector{$FC}│ 64│
- │ p│ Vector{$FC}│ 64│
- │ Δx│ Vector{$FC}│ 0│
- │ x│ Vector{$FC}│ 64│
- │ wₖ₋₂│ Vector{$FC}│ 64│
- │ wₖ₋₁│ Vector{$FC}│ 64│
- │warm_start│ Bool│ 0│
- └──────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, usymlq_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌────────────┬───────────────┬─────────────────┐
- │UsymlqSolver│Precision: $FC │Architecture: CPU│
- ├────────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├────────────┼───────────────┼─────────────────┤
- │ uₖ₋₁│ Vector{$FC}│ 64│
- │ uₖ│ Vector{$FC}│ 64│
- │ p│ Vector{$FC}│ 64│
- │ Δx│ Vector{$FC}│ 0│
- │ x│ Vector{$FC}│ 64│
- │ d̅│ Vector{$FC}│ 64│
- │ vₖ₋₁│ Vector{$FC}│ 32│
- │ vₖ│ Vector{$FC}│ 32│
- │ q│ Vector{$FC}│ 32│
- │ warm_start│ Bool│ 0│
- └────────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, tricg_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌───────────┬───────────────┬─────────────────┐
- │TricgSolver│Precision: $FC │Architecture: CPU│
- ├───────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├───────────┼───────────────┼─────────────────┤
- │ y│ Vector{$FC}│ 64│
- │ N⁻¹uₖ₋₁│ Vector{$FC}│ 64│
- │ N⁻¹uₖ│ Vector{$FC}│ 64│
- │ p│ Vector{$FC}│ 64│
- │ gy₂ₖ₋₁│ Vector{$FC}│ 64│
- │ gy₂ₖ│ Vector{$FC}│ 64│
- │ x│ Vector{$FC}│ 32│
- │ M⁻¹vₖ₋₁│ Vector{$FC}│ 32│
- │ M⁻¹vₖ│ Vector{$FC}│ 32│
- │ q│ Vector{$FC}│ 32│
- │ gx₂ₖ₋₁│ Vector{$FC}│ 32│
- │ gx₂ₖ│ Vector{$FC}│ 32│
- │ Δx│ Vector{$FC}│ 0│
- │ Δy│ Vector{$FC}│ 0│
- │ uₖ│ Vector{$FC}│ 0│
- │ vₖ│ Vector{$FC}│ 0│
- │ warm_start│ Bool│ 0│
- └───────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, trimr_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌───────────┬───────────────┬─────────────────┐
- │TrimrSolver│Precision: $FC │Architecture: CPU│
- ├───────────┼───────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├───────────┼───────────────┼─────────────────┤
- │ y│ Vector{$FC}│ 64│
- │ N⁻¹uₖ₋₁│ Vector{$FC}│ 64│
- │ N⁻¹uₖ│ Vector{$FC}│ 64│
- │ p│ Vector{$FC}│ 64│
- │ gy₂ₖ₋₃│ Vector{$FC}│ 64│
- │ gy₂ₖ₋₂│ Vector{$FC}│ 64│
- │ gy₂ₖ₋₁│ Vector{$FC}│ 64│
- │ gy₂ₖ│ Vector{$FC}│ 64│
- │ x│ Vector{$FC}│ 32│
- │ M⁻¹vₖ₋₁│ Vector{$FC}│ 32│
- │ M⁻¹vₖ│ Vector{$FC}│ 32│
- │ q│ Vector{$FC}│ 32│
- │ gx₂ₖ₋₃│ Vector{$FC}│ 32│
- │ gx₂ₖ₋₂│ Vector{$FC}│ 32│
- │ gx₂ₖ₋₁│ Vector{$FC}│ 32│
- │ gx₂ₖ│ Vector{$FC}│ 32│
- │ Δx│ Vector{$FC}│ 0│
- │ Δy│ Vector{$FC}│ 0│
- │ uₖ│ Vector{$FC}│ 0│
- │ vₖ│ Vector{$FC}│ 0│
- │ warm_start│ Bool│ 0│
- └───────────┴───────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
-
- io = IOBuffer()
- show(io, gpmr_solver, show_stats=false)
- showed = String(take!(io))
- expected = """
- ┌──────────┬───────────────────┬─────────────────┐
- │GpmrSolver│ Precision: $FC │Architecture: CPU│
- ├──────────┼───────────────────┼─────────────────┤
- │ Attribute│ Type│ Size│
- ├──────────┼───────────────────┼─────────────────┤
- │ wA│ Vector{$FC}│ 0│
- │ wB│ Vector{$FC}│ 0│
- │ dA│ Vector{$FC}│ 64│
- │ dB│ Vector{$FC}│ 32│
- │ Δx│ Vector{$FC}│ 0│
- │ Δy│ Vector{$FC}│ 0│
- │ x│ Vector{$FC}│ 64│
- │ y│ Vector{$FC}│ 32│
- │ q│ Vector{$FC}│ 0│
- │ p│ Vector{$FC}│ 0│
- │ V│Vector{Vector{$FC}}│ 10 x 64│
- │ U│Vector{Vector{$FC}}│ 10 x 32│
- │ gs│ Vector{$FC}│ 40│
- │ gc│ Vector{$T}│ 40│
- │ zt│ Vector{$FC}│ 20│
- │ R│ Vector{$FC}│ 210│
- │warm_start│ Bool│ 0│
- └──────────┴───────────────────┴─────────────────┘
- """
- @test reduce(replace, [" " => "", "\n" => "", "─" => ""], init=showed) == reduce(replace, [" " => "", "\n" => "", "─" => ""], init=expected)
end
@testset "solvers" begin
diff --git a/test/test_stats.jl b/test/test_stats.jl
index 4289a78a3..186c56c20 100644
--- a/test/test_stats.jl
+++ b/test/test_stats.jl
@@ -4,7 +4,7 @@
show(io, stats)
showed = String(take!(io))
storage_type = typeof(stats)
- expected = """Simple stats
+ expected = """SimpleStats
niter: 0
solved: true
inconsistent: true
@@ -15,14 +15,15 @@
@test strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n"))
Krylov.reset!(stats)
check_reset(stats)
- @test (VERSION < v"1.5") || (@allocated Krylov.reset!(stats)) == 0
+ nbytes_allocated = @allocated Krylov.reset!(stats)
+ @test nbytes_allocated == 0
stats = Krylov.LsmrStats(0, true, true, Float64[1.0], Float64[2.0], Float64(3.0), Float64(4.0), Float64(5.0), Float64(6.0), Float64(7.0), "t")
io = IOBuffer()
show(io, stats)
showed = String(take!(io))
storage_type = typeof(stats)
- expected = """Lsmr stats
+ expected = """LsmrStats
niter: 0
solved: true
inconsistent: true
@@ -37,14 +38,15 @@
@test strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n"))
Krylov.reset!(stats)
check_reset(stats)
- @test (VERSION < v"1.5") || (@allocated Krylov.reset!(stats)) == 0
+ nbytes_allocated = @allocated Krylov.reset!(stats)
+ @test nbytes_allocated == 0
stats = Krylov.LanczosStats(0, true, Float64[3.0], true, NaN, NaN, "t")
io = IOBuffer()
show(io, stats)
showed = String(take!(io))
storage_type = typeof(stats)
- expected = """Lanczos stats
+ expected = """LanczosStats
niter: 0
solved: true
residuals: [ 3.0e+00 ]
@@ -55,14 +57,15 @@
@test strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n"))
Krylov.reset!(stats)
check_reset(stats)
- @test (VERSION < v"1.5") || (@allocated Krylov.reset!(stats)) == 0
+ nbytes_allocated = @allocated Krylov.reset!(stats)
+ @test nbytes_allocated == 0
stats = Krylov.LanczosShiftStats(0, true, [Float64[0.9, 0.5], Float64[0.6, 0.4, 0.1]], BitVector([false, true]), NaN, NaN, "t")
io = IOBuffer()
show(io, stats)
showed = String(take!(io))
storage_type = typeof(stats)
- expected = """LanczosShift stats
+ expected = """LanczosShiftStats
niter: 0
solved: true
residuals: [[0.9, 0.5], [0.6, 0.4, 0.1]]
@@ -70,16 +73,17 @@
‖A‖F: NaN
κ₂(A): NaN
status: t"""
- @test (VERSION < v"1.5") || strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n"))
+ @test strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n"))
Krylov.reset!(stats)
- @test (VERSION < v"1.5") || (@allocated Krylov.reset!(stats)) == 0
+ nbytes_allocated = @allocated Krylov.reset!(stats)
+ @test nbytes_allocated == 0
stats = Krylov.SymmlqStats(0, true, Float64[4.0], Union{Float64,Missing}[5.0, missing], Float64[6.0], Union{Float64,Missing}[7.0, missing], NaN, NaN, "t")
io = IOBuffer()
show(io, stats)
showed = String(take!(io))
storage_type = typeof(stats)
- expected = """Symmlq stats
+ expected = """SymmlqStats
niter: 0
solved: true
residuals: [ 4.0e+00 ]
@@ -92,14 +96,15 @@
@test strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n"))
Krylov.reset!(stats)
check_reset(stats)
- @test (VERSION < v"1.5") || (@allocated Krylov.reset!(stats)) == 0
+ nbytes_allocated = @allocated Krylov.reset!(stats)
+ @test nbytes_allocated == 0
stats = Krylov.AdjointStats(0, true, true, Float64[8.0], Float64[9.0], "t")
io = IOBuffer()
show(io, stats)
showed = String(take!(io))
storage_type = typeof(stats)
- expected = """Adjoint stats
+ expected = """AdjointStats
niter: 0
solved primal: true
solved dual: true
@@ -109,14 +114,15 @@
@test strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n"))
Krylov.reset!(stats)
check_reset(stats)
- @test (VERSION < v"1.5") || (@allocated Krylov.reset!(stats)) == 0
+ nbytes_allocated = @allocated Krylov.reset!(stats)
+ @test nbytes_allocated == 0
stats = Krylov.LNLQStats(0, true, Float64[10.0], false, Float64[11.0], Float64[12.0], "t")
io = IOBuffer()
show(io, stats)
showed = String(take!(io))
storage_type = typeof(stats)
- expected = """LNLQ stats
+ expected = """LNLQStats
niter: 0
solved: true
residuals: [ 1.0e+01 ]
@@ -127,14 +133,15 @@
@test strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n"))
Krylov.reset!(stats)
check_reset(stats)
- @test (VERSION < v"1.5") || (@allocated Krylov.reset!(stats)) == 0
+ nbytes_allocated = @allocated Krylov.reset!(stats)
+ @test nbytes_allocated == 0
stats = Krylov.LSLQStats(0, true, false, Float64[13.0], Float64[14.0], Float64[15.0], false, Float64[16.0], Float64[17.0], "t")
io = IOBuffer()
show(io, stats)
showed = String(take!(io))
storage_type = typeof(stats)
- expected = """LSLQ stats
+ expected = """LSLQStats
niter: 0
solved: true
inconsistent: false
@@ -148,5 +155,6 @@
@test strip.(split(chomp(showed), "\n")) == strip.(split(chomp(expected), "\n"))
Krylov.reset!(stats)
check_reset(stats)
- @test (VERSION < v"1.5") || (@allocated Krylov.reset!(stats)) == 0
+ nbytes_allocated = @allocated Krylov.reset!(stats)
+ @test nbytes_allocated == 0
end
diff --git a/test/test_trilqr.jl b/test/test_trilqr.jl
index 7d7927372..baf8a597e 100644
--- a/test/test_trilqr.jl
+++ b/test/test_trilqr.jl
@@ -74,7 +74,7 @@
@test(resid_dual ≤ trilqr_tol)
@test(stats.solved_dual)
- # Test consistent Ax = b and inconsistent Aᵀt = c.
+ # Test consistent Ax = b and inconsistent Aᴴt = c.
A, b, c = rectangular_adjoint(FC=FC)
(x, t, stats) = trilqr(A, b, c)
diff --git a/test/test_utils.jl b/test/test_utils.jl
index ed72056b6..f1c3ca44e 100644
--- a/test/test_utils.jl
+++ b/test/test_utils.jl
@@ -1,50 +1,51 @@
include("get_div_grad.jl")
include("gen_lsq.jl")
include("check_min_norm.jl")
+include("callback_utils.jl")
# Symmetric and positive definite systems.
function symmetric_definite(n :: Int=10; FC=Float64)
- α = FC <: Complex ? im : 1
+ α = FC <: Complex ? FC(im) : one(FC)
A = spdiagm(-1 => α * ones(FC, n-1), 0 => 4 * ones(FC, n), 1 => conj(α) * ones(FC, n-1))
- b = A * [1:n;]
+ b = A * FC[1:n;]
return A, b
end
# Symmetric and indefinite systems.
function symmetric_indefinite(n :: Int=10; FC=Float64)
- α = FC <: Complex ? im : 1
+ α = FC <: Complex ? FC(im) : one(FC)
A = spdiagm(-1 => α * ones(FC, n-1), 0 => ones(FC, n), 1 => conj(α) * ones(FC, n-1))
- b = A * [1:n;]
+ b = A * FC[1:n;]
return A, b
end
# Nonsymmetric and positive definite systems.
function nonsymmetric_definite(n :: Int=10; FC=Float64)
if FC <: Complex
- A = [i == j ? n * one(FC) : im * one(FC) for i=1:n, j=1:n]
+ A = [i == j ? n * one(FC) : FC(im) * one(FC) for i=1:n, j=1:n]
else
A = [i == j ? n * one(FC) : i < j ? one(FC) : -one(FC) for i=1:n, j=1:n]
end
- b = A * [1:n;]
+ b = A * FC[1:n;]
return A, b
end
# Nonsymmetric and indefinite systems.
function nonsymmetric_indefinite(n :: Int=10; FC=Float64)
if FC <: Complex
- A = [i == j ? n * (-one(FC))^(i*j) : im * one(FC) for i=1:n, j=1:n]
+ A = [i == j ? n * (-one(FC))^(i*j) : FC(im) * one(FC) for i=1:n, j=1:n]
else
A = [i == j ? n * (-one(FC))^(i*j) : i < j ? one(FC) : -one(FC) for i=1:n, j=1:n]
end
- b = A * [1:n;]
+ b = A * FC[1:n;]
return A, b
end
# Underdetermined and consistent systems.
function under_consistent(n :: Int=10, m :: Int=25; FC=Float64)
n < m || error("Square or overdetermined system!")
- α = FC <: Complex ? im : 1
- A = [i/j - α * j/i for i=1:n, j=1:m]
+ α = FC <: Complex ? FC(im) : one(FC)
+ A = FC[i/j - α * j/i for i=1:n, j=1:m]
b = A * ones(FC, m)
return A, b
end
@@ -52,7 +53,7 @@ end
# Underdetermined and inconsistent systems.
function under_inconsistent(n :: Int=10, m :: Int=25; FC=Float64)
n < m || error("Square or overdetermined system!")
- α = FC <: Complex ? 1 + im : 1
+ α = FC <: Complex ? FC(1 + im) : one(FC)
A = α * ones(FC, n, m)
b = [i == 1 ? -one(FC) : i * one(FC) for i=1:n]
return A, b
@@ -84,8 +85,8 @@ end
# Overdetermined and consistent systems.
function over_consistent(n :: Int=25, m :: Int=10; FC=Float64)
n > m || error("Underdetermined or square system!")
- α = FC <: Complex ? im : 1
- A = [i/j - α * j/i for i=1:n, j=1:m]
+ α = FC <: Complex ? FC(im) : one(FC)
+ A = FC[i/j - α * j/i for i=1:n, j=1:m]
b = A * ones(FC, m)
return A, b
end
@@ -93,7 +94,7 @@ end
# Overdetermined and inconsistent systems.
function over_inconsistent(n :: Int=25, m :: Int=10; FC=Float64)
n > m || error("Underdetermined or square system!")
- α = FC <: Complex ? 1 + im : 1
+ α = FC <: Complex ? FC(1 + im) : one(FC)
A = α * ones(FC, n, m)
b = [i == 1 ? -one(FC) : i * one(FC) for i=1:n]
return A, b
@@ -162,23 +163,23 @@ end
function underdetermined_adjoint(n :: Int=100, m :: Int=200; FC=Float64)
n < m || error("Square or overdetermined system!")
A = [i == j ? FC(10.0) : i < j ? one(FC) : -one(FC) for i=1:n, j=1:m]
- b = A * [1:m;]
- c = A' * [-n:-1;]
+ b = A * FC[1:m;]
+ c = A' * FC[-n:-1;]
return A, b, c
end
# Square consistent adjoint systems.
function square_adjoint(n :: Int=100; FC=Float64)
A = [i == j ? FC(10.0) : i < j ? one(FC) : -one(FC) for i=1:n, j=1:n]
- b = A * [1:n;]
- c = A' * [-n:-1;]
+ b = A * FC[1:n;]
+ c = A' * FC[-n:-1;]
return A, b, c
end
-# Adjoint systems with Ax = b underdetermined consistent and Aᵀt = c overdetermined insconsistent.
+# Adjoint systems with Ax = b underdetermined consistent and Aᴴt = c overdetermined insconsistent.
function rectangular_adjoint(n :: Int=10, m :: Int=25; FC=Float64)
- Aᵀ, c = over_inconsistent(m, n; FC=FC)
- A = adjoint(Aᵀ)
+ Aᴴ, c = over_inconsistent(m, n; FC=FC)
+ A = adjoint(Aᴴ)
b = A * ones(FC, m)
return A, b, c
end
@@ -187,8 +188,8 @@ end
function overdetermined_adjoint(n :: Int=200, m :: Int=100; FC=Float64)
n > m || error("Underdetermined or square system!")
A = [i == j ? FC(10.0) : i < j ? one(FC) : -one(FC) for i=1:n, j=1:m]
- b = A * [1:m;]
- c = A' * [-n:-1;]
+ b = A * FC[1:m;]
+ c = A' * FC[-n:-1;]
return A, b, c
end
@@ -251,7 +252,7 @@ end
# Square and preconditioned problems.
function square_preconditioned(n :: Int=10; FC=Float64)
A = ones(FC, n, n) + (n-1) * eye(n)
- b = FC(10.0) * [1:n;]
+ b = 10 * FC[1:n;]
M⁻¹ = FC(1/n) * eye(n)
return A, b, M⁻¹
end
@@ -363,110 +364,3 @@ function check_reset(stats :: KS) where KS <: Krylov.KrylovStats
end
end
end
-
-# Test callback
-mutable struct TestCallbackN2{T, S, M}
- A::M
- b::S
- storage_vec::S
- tol::T
-end
-TestCallbackN2(A, b; tol = 0.1) = TestCallbackN2(A, b, similar(b), tol)
-
-function (cb_n2::TestCallbackN2)(solver)
- mul!(cb_n2.storage_vec, cb_n2.A, solver.x)
- cb_n2.storage_vec .-= cb_n2.b
- return norm(cb_n2.storage_vec) ≤ cb_n2.tol
-end
-
-mutable struct TestCallbackN2Adjoint{T, S, M}
- A::M
- b::S
- c::S
- storage_vec1::S
- storage_vec2::S
- tol::T
-end
-TestCallbackN2Adjoint(A, b, c; tol = 0.1) = TestCallbackN2Adjoint(A, b, c, similar(b), similar(c), tol)
-
-function (cb_n2::TestCallbackN2Adjoint)(solver)
- mul!(cb_n2.storage_vec1, cb_n2.A, solver.x)
- cb_n2.storage_vec1 .-= cb_n2.b
- mul!(cb_n2.storage_vec2, cb_n2.A', solver.y)
- cb_n2.storage_vec2 .-= cb_n2.c
- return (norm(cb_n2.storage_vec1) ≤ cb_n2.tol && norm(cb_n2.storage_vec2) ≤ cb_n2.tol)
-end
-
-mutable struct TestCallbackN2Shifts{T, S, M}
- A::M
- b::S
- shifts::Vector{T}
- tol::T
-end
-TestCallbackN2Shifts(A, b, shifts; tol = 0.1) = TestCallbackN2Shifts(A, b, shifts, tol)
-
-function (cb_n2::TestCallbackN2Shifts)(solver)
- r = residuals(cb_n2.A, cb_n2.b, cb_n2.shifts, solver.x)
- return all(map(norm, r) .≤ cb_n2.tol)
-end
-
-mutable struct TestCallbackN2LS{T, S, M}
- A::M
- b::S
- λ::T
- storage_vec1::S
- storage_vec2::S
- tol::T
-end
-TestCallbackN2LS(A, b, λ; tol = 0.1) = TestCallbackN2LS(A, b, λ, similar(b), similar(b, size(A, 2)), tol)
-
-function (cb_n2::TestCallbackN2LS)(solver)
- mul!(cb_n2.storage_vec1, cb_n2.A, solver.x)
- cb_n2.storage_vec1 .-= cb_n2.b
- mul!(cb_n2.storage_vec2, cb_n2.A', cb_n2.storage_vec1)
- cb_n2.storage_vec2 .+= cb_n2.λ .* solver.x
- return norm(cb_n2.storage_vec2) ≤ cb_n2.tol
-end
-
-mutable struct TestCallbackN2LN{T, S, M}
- A::M
- b::S
- λ::T
- storage_vec::S
- tol::T
-end
-TestCallbackN2LN(A, b, λ; tol = 0.1) = TestCallbackN2LN(A, b, λ, similar(b), tol)
-
-function (cb_n2::TestCallbackN2LN)(solver)
- mul!(cb_n2.storage_vec, cb_n2.A, solver.x)
- cb_n2.storage_vec .-= cb_n2.b
- cb_n2.λ != 0 && (cb_n2.storage_vec .+= sqrt(cb_n2.λ) .* solver.s)
- return norm(cb_n2.storage_vec) ≤ cb_n2.tol
-end
-
-mutable struct TestCallbackN2SaddlePts{T, S, M}
- A::M
- b::S
- c::S
- storage_vec1::S
- storage_vec2::S
- tol::T
-end
-TestCallbackN2SaddlePts(A, b, c; tol = 0.1) =
- TestCallbackN2SaddlePts(A, b, c, similar(b), similar(c), tol)
-
-function (cb_n2::TestCallbackN2SaddlePts)(solver)
- mul!(cb_n2.storage_vec1, cb_n2.A, solver.y)
- cb_n2.storage_vec1 .+= solver.x .- cb_n2.b
- mul!(cb_n2.storage_vec2, cb_n2.A', solver.x)
- cb_n2.storage_vec2 .-= solver.y .+ cb_n2.c
- return (norm(cb_n2.storage_vec1) ≤ cb_n2.tol && norm(cb_n2.storage_vec2) ≤ cb_n2.tol)
-end
-
-function restarted_gmres_callback_n2(solver::GmresSolver, A, b, stor, N, storage_vec, tol)
- get_x_restarted_gmres!(solver, A, stor, N)
- x = stor.x
- mul!(storage_vec, A, x)
- storage_vec .-= b
- return (norm(storage_vec) ≤ tol)
-end
diff --git a/test/test_warm_start.jl b/test/test_warm_start.jl
index 66a1cbea7..232a5a9cf 100644
--- a/test/test_warm_start.jl
+++ b/test/test_warm_start.jl
@@ -70,6 +70,11 @@ function test_warm_start(FC)
resid = norm(r) / norm(b)
@test(resid ≤ tol)
+ x, stats = fgmres(A, b, x0)
+ r = b - A * x
+ resid = norm(r) / norm(b)
+ @test(resid ≤ tol)
+
x, stats = bicgstab(A, b, x0)
r = b - A * x
resid = norm(r) / norm(b)