From 972c1bc2ce2fae908eb91daf4e96c543c81f79d2 Mon Sep 17 00:00:00 2001 From: Christopher Rackauckas Date: Tue, 3 Oct 2023 21:52:02 +0200 Subject: [PATCH 1/3] Make AppleAccelerate the default algorithm when available The benchmarks show that it's just so much better, and there's no cost to doing this, so we should just always use AppleAccelerate on any machine where it exists. --- src/LinearSolve.jl | 3 ++- src/appleaccelerate.jl | 10 +++++++--- src/default.jl | 9 +++++++-- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/LinearSolve.jl b/src/LinearSolve.jl index 7e96f0203..6cdfe8794 100644 --- a/src/LinearSolve.jl +++ b/src/LinearSolve.jl @@ -90,6 +90,7 @@ EnumX.@enumx DefaultAlgorithmChoice begin SVDFactorization CholeskyFactorization NormalCholeskyFactorization + AppleAccelerateLUFactorization end struct DefaultLinearSolver <: SciMLLinearSolveAlgorithm @@ -98,6 +99,7 @@ end include("common.jl") include("factorization.jl") +include("appleaccelerate.jl") include("simplelu.jl") include("simplegmres.jl") include("iterative_wrappers.jl") @@ -106,7 +108,6 @@ include("solve_function.jl") include("default.jl") include("init.jl") include("extension_algs.jl") -include("appleaccelerate.jl") include("deprecated.jl") @generated function SciMLBase.solve!(cache::LinearCache, alg::AbstractFactorization; diff --git a/src/appleaccelerate.jl b/src/appleaccelerate.jl index f1310ec16..8758bc563 100644 --- a/src/appleaccelerate.jl +++ b/src/appleaccelerate.jl @@ -38,7 +38,6 @@ function aa_getrf!(A::AbstractMatrix{<:Float64}; if isempty(ipiv) ipiv = similar(A, Cint, min(size(A, 1), size(A, 2))) end - ccall(("dgetrf_", libacc), Cvoid, (Ref{Cint}, Ref{Cint}, Ptr{Float64}, Ref{Cint}, Ptr{Cint}, Ptr{Cint}), @@ -121,11 +120,16 @@ end default_alias_A(::AppleAccelerateLUFactorization, ::Any, ::Any) = false default_alias_b(::AppleAccelerateLUFactorization, ::Any, ::Any) = false +const PREALLOCATED_APPLE_LU = begin + A = rand(0, 0) + luinst = ArrayInterface.lu_instance(A) + LU(luinst.factors, similar(A, Cint, 0), luinst.info), Ref{Cint}() +end + function LinearSolve.init_cacheval(alg::AppleAccelerateLUFactorization, A, b, u, Pl, Pr, maxiters::Int, abstol, reltol, verbose::Bool, assumptions::OperatorAssumptions) - luinst = ArrayInterface.lu_instance(convert(AbstractMatrix, A)) - LU(luinst.factors, similar(A, Cint, 0), luinst.info), Ref{Cint}() + PREALLOCATED_APPLE_LU end function SciMLBase.solve!(cache::LinearCache, alg::AppleAccelerateLUFactorization; diff --git a/src/default.jl b/src/default.jl index afa16f183..c7e5661b6 100644 --- a/src/default.jl +++ b/src/default.jl @@ -1,6 +1,6 @@ needs_concrete_A(alg::DefaultLinearSolver) = true mutable struct DefaultLinearSolverInit{T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, - T13, T14, T15, T16} + T13, T14, T15, T16, T17} LUFactorization::T1 QRFactorization::T2 DiagonalFactorization::T3 @@ -17,6 +17,7 @@ mutable struct DefaultLinearSolverInit{T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, SVDFactorization::T14 CholeskyFactorization::T15 NormalCholeskyFactorization::T16 + AppleAccelerateLUFactorization::T17 end # Legacy fallback @@ -157,7 +158,9 @@ function defaultalg(A, b, assump::OperatorAssumptions) ArrayInterface.can_setindex(b) && (__conditioning(assump) === OperatorCondition.IllConditioned || __conditioning(assump) === OperatorCondition.WellConditioned) - if length(b) <= 10 + if appleaccelerate_isavailable() + DefaultAlgorithmChoice.AppleAccelerateLUFactorization + elseif length(b) <= 10 DefaultAlgorithmChoice.GenericLUFactorization elseif (length(b) <= 100 || (isopenblas() && length(b) <= 500)) && (A === nothing ? eltype(b) <: Union{Float32, Float64} : @@ -232,6 +235,8 @@ function algchoice_to_alg(alg::Symbol) CholeskyFactorization() elseif alg === :NormalCholeskyFactorization NormalCholeskyFactorization() + elseif alg === :AppleAccelerateLUFactorization + AppleAccelerateLUFactorization() else error("Algorithm choice symbol $alg not allowed in the default") end From 730f59cd30494fe2634fafda49226902f9800a11 Mon Sep 17 00:00:00 2001 From: Christopher Rackauckas Date: Tue, 3 Oct 2023 22:06:42 +0200 Subject: [PATCH 2/3] use the generic LU factorization for very small LU --- src/default.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/default.jl b/src/default.jl index c7e5661b6..c8f6d7b74 100644 --- a/src/default.jl +++ b/src/default.jl @@ -158,10 +158,10 @@ function defaultalg(A, b, assump::OperatorAssumptions) ArrayInterface.can_setindex(b) && (__conditioning(assump) === OperatorCondition.IllConditioned || __conditioning(assump) === OperatorCondition.WellConditioned) - if appleaccelerate_isavailable() - DefaultAlgorithmChoice.AppleAccelerateLUFactorization - elseif length(b) <= 10 + if length(b) <= 10 DefaultAlgorithmChoice.GenericLUFactorization + elseif appleaccelerate_isavailable() + DefaultAlgorithmChoice.AppleAccelerateLUFactorization elseif (length(b) <= 100 || (isopenblas() && length(b) <= 500)) && (A === nothing ? eltype(b) <: Union{Float32, Float64} : eltype(A) <: Union{Float32, Float64}) From 6413b060b7762204f48537e9b59becdba915c509 Mon Sep 17 00:00:00 2001 From: Christopher Rackauckas Date: Wed, 4 Oct 2023 13:46:59 +0200 Subject: [PATCH 3/3] version gate AppleAccelerate --- src/appleaccelerate.jl | 4 +++- src/default.jl | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/appleaccelerate.jl b/src/appleaccelerate.jl index 8758bc563..db684e278 100644 --- a/src/appleaccelerate.jl +++ b/src/appleaccelerate.jl @@ -120,10 +120,12 @@ end default_alias_A(::AppleAccelerateLUFactorization, ::Any, ::Any) = false default_alias_b(::AppleAccelerateLUFactorization, ::Any, ::Any) = false -const PREALLOCATED_APPLE_LU = begin +const PREALLOCATED_APPLE_LU = @static if VERSION >= v"1.8" A = rand(0, 0) luinst = ArrayInterface.lu_instance(A) LU(luinst.factors, similar(A, Cint, 0), luinst.info), Ref{Cint}() +else + nothing end function LinearSolve.init_cacheval(alg::AppleAccelerateLUFactorization, A, b, u, Pl, Pr, diff --git a/src/default.jl b/src/default.jl index c8f6d7b74..17335958a 100644 --- a/src/default.jl +++ b/src/default.jl @@ -160,7 +160,7 @@ function defaultalg(A, b, assump::OperatorAssumptions) __conditioning(assump) === OperatorCondition.WellConditioned) if length(b) <= 10 DefaultAlgorithmChoice.GenericLUFactorization - elseif appleaccelerate_isavailable() + elseif VERSION >= v"1.8" && appleaccelerate_isavailable() DefaultAlgorithmChoice.AppleAccelerateLUFactorization elseif (length(b) <= 100 || (isopenblas() && length(b) <= 500)) && (A === nothing ? eltype(b) <: Union{Float32, Float64} :