Skip to content

Commit

Permalink
Merge pull request #371 from ArnoStrouwen/doc1.0
Browse files Browse the repository at this point in the history
try documenter 1.0 upgrade
  • Loading branch information
ChrisRackauckas authored Sep 23, 2023
2 parents 5a25b7d + daf7212 commit 6593133
Show file tree
Hide file tree
Showing 19 changed files with 197 additions and 120 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,13 @@ on:
pull_request:
branches:
- main
paths-ignore:
- 'docs/**'
push:
branches:
- main
paths-ignore:
- 'docs/**'
jobs:
test:
runs-on: ubuntu-latest
Expand Down
28 changes: 22 additions & 6 deletions benchmarks/applelu.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,13 @@ function luflop(m, n = m; innerflop = 2)
end
end

algs = [LUFactorization(), GenericLUFactorization(), RFLUFactorization(), AppleAccelerateLUFactorization(), MetalLUFactorization()]
algs = [
LUFactorization(),
GenericLUFactorization(),
RFLUFactorization(),
AppleAccelerateLUFactorization(),
MetalLUFactorization(),
]
res = [Float32[] for i in 1:length(algs)]

ns = 4:8:500
Expand All @@ -28,10 +34,14 @@ for i in 1:length(ns)
rng = MersenneTwister(123)
global A = rand(rng, Float32, n, n)
global b = rand(rng, Float32, n)
global u0= rand(rng, Float32, n)
global u0 = rand(rng, Float32, n)

for j in 1:length(algs)
bt = @belapsed solve(prob, $(algs[j])).u setup=(prob = LinearProblem(copy(A), copy(b); u0 = copy(u0), alias_A=true, alias_b=true))
bt = @belapsed solve(prob, $(algs[j])).u setup=(prob = LinearProblem(copy(A),
copy(b);
u0 = copy(u0),
alias_A = true,
alias_b = true))
push!(res[j], luflop(n) / bt / 1e9)
end
end
Expand All @@ -41,11 +51,17 @@ __parameterless_type(T) = Base.typename(T).wrapper
parameterless_type(x) = __parameterless_type(typeof(x))
parameterless_type(::Type{T}) where {T} = __parameterless_type(T)

p = plot(ns, res[1]; ylabel = "GFLOPs", xlabel = "N", title = "GFLOPs for NxN LU Factorization", label = string(Symbol(parameterless_type(algs[1]))), legend=:outertopright)
p = plot(ns,
res[1];
ylabel = "GFLOPs",
xlabel = "N",
title = "GFLOPs for NxN LU Factorization",
label = string(Symbol(parameterless_type(algs[1]))),
legend = :outertopright)
for i in 2:length(res)
plot!(p, ns, res[i]; label = string(Symbol(parameterless_type(algs[i]))))
end
p

savefig("metallubench.png")
savefig("metallubench.pdf")
savefig("metallubench.pdf")
20 changes: 15 additions & 5 deletions benchmarks/cudalu.jl
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,14 @@ for i in 1:length(ns)
rng = MersenneTwister(123)
global A = rand(rng, Float32, n, n)
global b = rand(rng, Float32, n)
global u0= rand(rng, Float32, n)
global u0 = rand(rng, Float32, n)

for j in 1:length(algs)
bt = @belapsed solve(prob, $(algs[j])).u setup=(prob = LinearProblem(copy(A), copy(b); u0 = copy(u0), alias_A=true, alias_b=true))
bt = @belapsed solve(prob, $(algs[j])).u setup=(prob = LinearProblem(copy(A),
copy(b);
u0 = copy(u0),
alias_A = true,
alias_b = true))
push!(res[j], luflop(n) / bt / 1e9)
end
end
Expand All @@ -41,11 +45,17 @@ __parameterless_type(T) = Base.typename(T).wrapper
parameterless_type(x) = __parameterless_type(typeof(x))
parameterless_type(::Type{T}) where {T} = __parameterless_type(T)

p = plot(ns, res[1]; ylabel = "GFLOPs", xlabel = "N", title = "GFLOPs for NxN LU Factorization", label = string(Symbol(parameterless_type(algs[1]))), legend=:outertopright)
p = plot(ns,
res[1];
ylabel = "GFLOPs",
xlabel = "N",
title = "GFLOPs for NxN LU Factorization",
label = string(Symbol(parameterless_type(algs[1]))),
legend = :outertopright)
for i in 2:length(res)
plot!(p, ns, res[i]; label = string(Symbol(parameterless_type(algs[i]))))
end
p

savefig("cudaoffloadlubench.png")
savefig("cudaoffloadlubench.pdf")
savefig("cudaoffloadlubench.pdf")
29 changes: 23 additions & 6 deletions benchmarks/lu.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,14 @@ function luflop(m, n = m; innerflop = 2)
end
end

algs = [LUFactorization(), GenericLUFactorization(), RFLUFactorization(), MKLLUFactorization(), FastLUFactorization(), SimpleLUFactorization()]
algs = [
LUFactorization(),
GenericLUFactorization(),
RFLUFactorization(),
MKLLUFactorization(),
FastLUFactorization(),
SimpleLUFactorization(),
]
res = [Float64[] for i in 1:length(algs)]

ns = 4:8:500
Expand All @@ -28,10 +35,14 @@ for i in 1:length(ns)
rng = MersenneTwister(123)
global A = rand(rng, n, n)
global b = rand(rng, n)
global u0= rand(rng, n)
global u0 = rand(rng, n)

for j in 1:length(algs)
bt = @belapsed solve(prob, $(algs[j])).u setup=(prob = LinearProblem(copy(A), copy(b); u0 = copy(u0), alias_A=true, alias_b=true))
bt = @belapsed solve(prob, $(algs[j])).u setup=(prob = LinearProblem(copy(A),
copy(b);
u0 = copy(u0),
alias_A = true,
alias_b = true))
push!(res[j], luflop(n) / bt / 1e9)
end
end
Expand All @@ -41,11 +52,17 @@ __parameterless_type(T) = Base.typename(T).wrapper
parameterless_type(x) = __parameterless_type(typeof(x))
parameterless_type(::Type{T}) where {T} = __parameterless_type(T)

p = plot(ns, res[1]; ylabel = "GFLOPs", xlabel = "N", title = "GFLOPs for NxN LU Factorization", label = string(Symbol(parameterless_type(algs[1]))), legend=:outertopright)
p = plot(ns,
res[1];
ylabel = "GFLOPs",
xlabel = "N",
title = "GFLOPs for NxN LU Factorization",
label = string(Symbol(parameterless_type(algs[1]))),
legend = :outertopright)
for i in 2:length(res)
plot!(p, ns, res[i]; label = string(Symbol(parameterless_type(algs[i]))))
end
p

savefig("lubench.png")
savefig("lubench.pdf")
savefig("lubench.pdf")
20 changes: 15 additions & 5 deletions benchmarks/metallu.jl
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,14 @@ for i in 1:length(ns)
rng = MersenneTwister(123)
global A = rand(rng, Float32, n, n)
global b = rand(rng, Float32, n)
global u0= rand(rng, Float32, n)
global u0 = rand(rng, Float32, n)

for j in 1:length(algs)
bt = @belapsed solve(prob, $(algs[j])).u setup=(prob = LinearProblem(copy(A), copy(b); u0 = copy(u0), alias_A=true, alias_b=true))
bt = @belapsed solve(prob, $(algs[j])).u setup=(prob = LinearProblem(copy(A),
copy(b);
u0 = copy(u0),
alias_A = true,
alias_b = true))
GC.gc()
push!(res[j], luflop(n) / bt / 1e9)
end
Expand All @@ -42,11 +46,17 @@ __parameterless_type(T) = Base.typename(T).wrapper
parameterless_type(x) = __parameterless_type(typeof(x))
parameterless_type(::Type{T}) where {T} = __parameterless_type(T)

p = plot(ns, res[1]; ylabel = "GFLOPs", xlabel = "N", title = "GFLOPs for NxN LU Factorization", label = string(Symbol(parameterless_type(algs[1]))), legend=:outertopright)
p = plot(ns,
res[1];
ylabel = "GFLOPs",
xlabel = "N",
title = "GFLOPs for NxN LU Factorization",
label = string(Symbol(parameterless_type(algs[1]))),
legend = :outertopright)
for i in 2:length(res)
plot!(p, ns, res[i]; label = string(Symbol(parameterless_type(algs[i]))))
end
p

savefig("metal_large_lubench.png")
savefig("metal_large_lubench.pdf")
savefig("metal_large_lubench.pdf")
2 changes: 1 addition & 1 deletion docs/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae"

[compat]
Documenter = "0.27"
Documenter = "1"
LinearSolve = "1, 2"
9 changes: 1 addition & 8 deletions docs/make.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,7 @@ makedocs(sitename = "LinearSolve.jl",
authors = "Chris Rackauckas",
modules = [LinearSolve, LinearSolve.SciMLBase],
clean = true, doctest = false, linkcheck = true,
strict = [
:doctest,
:linkcheck,
:parse_error,
:example_block,
# Other available options are
# :autodocs_block, :cross_references, :docs_block, :eval_block, :example_block, :footnote, :meta_block, :missing_docs, :setup_block
],
warnonly = [:docs_block, :missing_docs],
format = Documenter.HTML(assets = ["assets/favicon.ico"],
canonical = "https://docs.sciml.ai/LinearSolve/stable/"),
pages = pages)
Expand Down
35 changes: 11 additions & 24 deletions docs/src/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,32 +84,19 @@ Pkg.status(; mode = PKGMODE_MANIFEST) # hide
</details>
```

```@raw html
You can also download the
<a href="
```

```@eval
using TOML
using Markdown
version = TOML.parse(read("../../Project.toml", String))["version"]
name = TOML.parse(read("../../Project.toml", String))["name"]
link = "https://github.com/SciML/" * name * ".jl/tree/gh-pages/v" * version *
"/assets/Manifest.toml"
```

```@raw html
">manifest</a> file and the
<a href="
```

```@eval
using TOML
version = TOML.parse(read("../../Project.toml", String))["version"]
name = TOML.parse(read("../../Project.toml", String))["name"]
link = "https://github.com/SciML/" * name * ".jl/tree/gh-pages/v" * version *
"/assets/Project.toml"
```

```@raw html
">project</a> file.
link_manifest = "https://github.com/SciML/" * name * ".jl/tree/gh-pages/v" * version *
"/assets/Manifest.toml"
link_project = "https://github.com/SciML/" * name * ".jl/tree/gh-pages/v" * version *
"/assets/Project.toml"
Markdown.parse("""You can also download the
[manifest]($link_manifest)
file and the
[project]($link_project)
file.
""")
```
18 changes: 9 additions & 9 deletions docs/src/solvers/solvers.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,15 @@ but one may need to change this to receive more performance or precision. If
more precision is necessary, `QRFactorization()` and `SVDFactorization()` are
the best choices, with SVD being the slowest but most precise.

For efficiency, `RFLUFactorization` is the fastest for dense LU-factorizations until around
For efficiency, `RFLUFactorization` is the fastest for dense LU-factorizations until around
150x150 matrices, though this can be dependent on the exact details of the hardware. After this
point, `MKLLUFactorization` is usually faster on most hardware. Note that on Mac computers
that `AppleAccelerateLUFactorization` is generally always the fastest. `LUFactorization` will
use your base system BLAS which can be fast or slow depending on the hardware configuration.
use your base system BLAS which can be fast or slow depending on the hardware configuration.
`SimpleLUFactorization` will be fast only on very small matrices but can cut down on compile times.

For very large dense factorizations, offloading to the GPU can be preferred. Metal.jl can be used
on Mac hardware to offload, and has a cutoff point of being faster at around size 20,000 x 20,000
on Mac hardware to offload, and has a cutoff point of being faster at around size 20,000 x 20,000
matrices (and only supports Float32). `CudaOffloadFactorization` can be more efficient at a
much smaller cutoff, possibly around size 1,000 x 1,000 matrices, though this is highly dependent
on the chosen GPU hardware. `CudaOffloadFactorization` requires a CUDA-compatible NVIDIA GPU.
Expand All @@ -31,9 +31,9 @@ CUDA offload supports Float64 but most consumer GPU hardware will be much faster
this is only recommended for Float32 matrices.

!!! note

Performance details for dense LU-factorizations can be highly dependent on the hardware configuration.
For details see [this issue](https://github.com/SciML/LinearSolve.jl/issues/357).
Performance details for dense LU-factorizations can be highly dependent on the hardware configuration.
For details see [this issue](https://github.com/SciML/LinearSolve.jl/issues/357).
If one is looking to best optimize their system, we suggest running the performance
tuning benchmark.

Expand Down Expand Up @@ -65,19 +65,19 @@ The interface is detailed [here](@ref custom).
### Lazy SciMLOperators

If the linear operator is given as a lazy non-concrete operator, such as a `FunctionOperator`,
then using a Krylov method is preferred in order to not concretize the matrix.
then using a Krylov method is preferred in order to not concretize the matrix.
Krylov.jl generally outperforms IterativeSolvers.jl and KrylovKit.jl, and is compatible
with CPUs and GPUs, and thus is the generally preferred form for Krylov methods. The
choice of Krylov method should be the one most constrained to the type of operator one
has, for example if positive definite then `Krylov_CG()`, but if no good properties then
use `Krylov_GMRES()`.

!!! tip

If your materialized operator is a uniform block diagonal matrix, then you can use
`SimpleGMRES(; blocksize = <known block size>)` to further improve performance.
This often shows up in Neural Networks where the Jacobian wrt the Inputs (almost always)
is a Uniform Block Diagonal matrix of Block Size = size of the input divided by the
is a Uniform Block Diagonal matrix of Block Size = size of the input divided by the
batch size.

## Full List of Methods
Expand Down
2 changes: 1 addition & 1 deletion ext/LinearSolveBlockDiagonalsExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ using LinearSolve, BlockDiagonals

function LinearSolve.init_cacheval(alg::SimpleGMRES{false}, A::BlockDiagonal, b, args...;
kwargs...)
@assert ndims(A) == 2 "ndims(A) == $(ndims(A)). `A` must have ndims == 2."
@assert ndims(A)==2 "ndims(A) == $(ndims(A)). `A` must have ndims == 2."
# We need to perform this check even when `zeroinit == true`, since the type of the
# cache is dependent on whether we are able to use the specialized dispatch.
bsizes = blocksizes(A)
Expand Down
2 changes: 1 addition & 1 deletion ext/LinearSolveKernelAbstractionsExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ using GPUArraysCore
function LinearSolve._fast_sym_givens!(c, s, R, nr::Int, inner_iter::Int, bsize::Int, Hbis)
backend = get_backend(Hbis)
kernel! = __fast_sym_givens_kernel!(backend)
kernel!(c[inner_iter], s[inner_iter], R[nr + inner_iter], Hbis; ndrange=bsize)
kernel!(c[inner_iter], s[inner_iter], R[nr + inner_iter], Hbis; ndrange = bsize)
return c, s, R
end

Expand Down
Loading

0 comments on commit 6593133

Please sign in to comment.