Use symmetric linear solve if possible

SciML · Oct 15, 2023 · 564950a · 564950a
1 parent 8f68ef1
commit 564950a
Show file tree

Hide file tree

Showing 3 changed files with 29 additions and 25 deletions.
diff --git a/src/gaussnewton.jl b/src/gaussnewton.jl
@@ -1,6 +1,6 @@
 """
-    GaussNewton(; concrete_jac = nothing, linsolve = nothing, precs = DEFAULT_PRECS,
-        adkwargs...)
+    GaussNewton(; concrete_jac = nothing, linsolve = CholeskyFactorization(),
+        precs = DEFAULT_PRECS, adkwargs...)
 
 An advanced GaussNewton implementation with support for efficient handling of sparse
 matrices via colored automatic differentiation and preconditioned linear solvers. Designed
@@ -22,9 +22,9 @@ for large-scale and numerically-difficult nonlinear least squares problems.
     for example for a preconditioner, `concrete_jac = true` can be passed in order to force
     the construction of the Jacobian.
   - `linsolve`: the [LinearSolve.jl](https://github.com/SciML/LinearSolve.jl) used for the
-    linear solves within the Newton method. Defaults to `nothing`, which means it uses the
-    LinearSolve.jl default algorithm choice. For more information on available algorithm
-    choices, see the [LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/).
+    linear solves within the Newton method. Defaults to `CholeskyFactorization`. For more
+    information on available algorithm choices, see the
+    [LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/).
   - `precs`: the choice of preconditioners for the linear solver. Defaults to using no
     preconditioners. For more information on specifying preconditioners for LinearSolve
     algorithms, consult the
@@ -41,8 +41,8 @@ for large-scale and numerically-difficult nonlinear least squares problems.
     precs
 end
 
-function GaussNewton(; concrete_jac = nothing, linsolve = nothing, precs = DEFAULT_PRECS,
-    adkwargs...)
+function GaussNewton(; concrete_jac = nothing, linsolve = CholeskyFactorization(),
+    precs = DEFAULT_PRECS, adkwargs...)
     ad = default_adargs_to_adtype(; adkwargs...)
     return GaussNewton{_unwrap_val(concrete_jac)}(ad, linsolve, precs)
 end
@@ -97,8 +97,8 @@ function perform_step!(cache::GaussNewtonCache{true})
     __matmul!(Jᵀf, J', fu1)
 
     # u = u - J \ fu
-    linres = dolinsolve(alg.precs, linsolve; A = JᵀJ, b = _vec(Jᵀf), linu = _vec(du),
-        p, reltol = cache.abstol)
+    linres = dolinsolve(alg.precs, linsolve; A = __maybe_symmetric(JᵀJ), b = _vec(Jᵀf),
+        linu = _vec(du), p, reltol = cache.abstol)
     cache.linsolve = linres.cache
     @. u = u - du
     f(cache.fu_new, u, p)
@@ -125,8 +125,8 @@ function perform_step!(cache::GaussNewtonCache{false})
     if linsolve === nothing
         cache.du = fu1 / cache.J
     else
-        linres = dolinsolve(alg.precs, linsolve; A = cache.JᵀJ, b = _vec(cache.Jᵀf),
-            linu = _vec(cache.du), p, reltol = cache.abstol)
+        linres = dolinsolve(alg.precs, linsolve; A = __maybe_symmetric(cache.JᵀJ),
+            b = _vec(cache.Jᵀf), linu = _vec(cache.du), p, reltol = cache.abstol)
         cache.linsolve = linres.cache
     end
     cache.u = @. u - cache.du  # `u` might not support mutation

diff --git a/src/jacobian.jl b/src/jacobian.jl
@@ -95,14 +95,14 @@ function jacobian_caches(alg::AbstractNonlinearSolveAlgorithm, f, u, p, ::Val{ii
         Jᵀfu = J' * fu
     end
 
-    linprob = LinearProblem(needsJᵀJ ? JᵀJ : J, needsJᵀJ ? _vec(Jᵀfu) : _vec(fu);
-        u0 = _vec(du))
+    linprob = LinearProblem(needsJᵀJ ? __maybe_symmetric(JᵀJ) : J,
+        needsJᵀJ ? _vec(Jᵀfu) : _vec(fu); u0 = _vec(du))
 
     weight = similar(u)
     recursivefill!(weight, true)
 
-    Pl, Pr = wrapprecs(alg.precs(J, nothing, u, p, nothing, nothing, nothing, nothing,
-            nothing)..., weight)
+    Pl, Pr = wrapprecs(alg.precs(needsJᵀJ ? __maybe_symmetric(JᵀJ) : J, nothing, u, p,
+            nothing, nothing, nothing, nothing, nothing)..., weight)
     linsolve = init(linprob, alg.linsolve; alias_A = true, alias_b = true, Pl, Pr,
         linsolve_kwargs...)
 
@@ -119,6 +119,10 @@ __init_JᵀJ(J::Number) = zero(J)
 __init_JᵀJ(J::AbstractArray) = J' * J
 __init_JᵀJ(J::StaticArray) = MArray{Tuple{size(J, 2), size(J, 2)}, eltype(J)}(undef)
 
+__maybe_symmetric(x) = Symmetric(x)
+__maybe_symmetric(x::Number) = x
+__maybe_symmetric(x::SparseArrays.AbstractSparseMatrix) = x
+
 ## Special Handling for Scalars
 function jacobian_caches(alg::AbstractNonlinearSolveAlgorithm, f, u::Number, p,
     ::Val{false}; linsolve_with_JᵀJ::Val{needsJᵀJ} = Val(false),

diff --git a/src/levenberg.jl b/src/levenberg.jl
@@ -1,5 +1,5 @@
 """
-    LevenbergMarquardt(; concrete_jac = nothing, linsolve = nothing,
+    LevenbergMarquardt(; concrete_jac = nothing, linsolve = CholeskyFactorization(),
         precs = DEFAULT_PRECS, damping_initial::Real = 1.0,
         damping_increase_factor::Real = 2.0, damping_decrease_factor::Real = 3.0,
         finite_diff_step_geodesic::Real = 0.1, α_geodesic::Real = 0.75,
@@ -22,9 +22,9 @@ numerically-difficult nonlinear systems.
     for example for a preconditioner, `concrete_jac = true` can be passed in order to force
     the construction of the Jacobian.
   - `linsolve`: the [LinearSolve.jl](https://github.com/SciML/LinearSolve.jl) used for the
-    linear solves within the Newton method. Defaults to `nothing`, which means it uses the
-    LinearSolve.jl default algorithm choice. For more information on available algorithm
-    choices, see the [LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/).
+    linear solves within the Newton method. Defaults to `CholeskyFactorization`. For more
+    information on available algorithm choices, see the
+    [LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/).
   - `precs`: the choice of preconditioners for the linear solver. Defaults to using no
     preconditioners. For more information on specifying preconditioners for LinearSolve
     algorithms, consult the
@@ -86,7 +86,7 @@ numerically-difficult nonlinear systems.
     min_damping_D::T
 end
 
-function LevenbergMarquardt(; concrete_jac = nothing, linsolve = nothing,
+function LevenbergMarquardt(; concrete_jac = nothing, linsolve = CholeskyFactorization(),
     precs = DEFAULT_PRECS, damping_initial::Real = 1.0, damping_increase_factor::Real = 2.0,
     damping_decrease_factor::Real = 3.0, finite_diff_step_geodesic::Real = 0.1,
     α_geodesic::Real = 0.75, b_uphill::Real = 1.0, min_damping_D::AbstractFloat = 1e-8,
@@ -203,8 +203,8 @@ function perform_step!(cache::LevenbergMarquardtCache{true})
     # The following lines do: cache.v = -cache.mat_tmp \ cache.u_tmp
     mul!(cache.u_tmp, J', fu1)
     @. cache.mat_tmp = JᵀJ + λ * DᵀD
-    linres = dolinsolve(alg.precs, linsolve; A = cache.mat_tmp, b = _vec(cache.u_tmp),
-        linu = _vec(cache.du), p = p, reltol = cache.abstol)
+    linres = dolinsolve(alg.precs, linsolve; A = __maybe_symmetric(cache.mat_tmp),
+        b = _vec(cache.u_tmp), linu = _vec(cache.du), p = p, reltol = cache.abstol)
     cache.linsolve = linres.cache
     @. cache.v = -cache.du
 
@@ -280,8 +280,8 @@ function perform_step!(cache::LevenbergMarquardtCache{false})
     if linsolve === nothing
         cache.v = -cache.mat_tmp \ (J' * fu1)
     else
-        linres = dolinsolve(alg.precs, linsolve; A = -cache.mat_tmp, b = _vec(J' * fu1),
-            linu = _vec(cache.v), p, reltol = cache.abstol)
+        linres = dolinsolve(alg.precs, linsolve; A = -__maybe_symmetric(cache.mat_tmp),
+            b = _vec(J' * fu1), linu = _vec(cache.v), p, reltol = cache.abstol)
         cache.linsolve = linres.cache
     end
 
@@ -291,7 +291,7 @@ function perform_step!(cache::LevenbergMarquardtCache{false})
         cache.a = -cache.mat_tmp \
                   _vec(J' * ((2 / h) .* ((f(u .+ h .* v, p) .- fu1) ./ h .- J * v)))
     else
-        linres = dolinsolve(alg.precs, linsolve; A = -cache.mat_tmp,
+        linres = dolinsolve(alg.precs, linsolve;
             b = _mutable(_vec(J' *
                               ((2 / h) .* ((f(u .+ h .* v, p) .- fu1) ./ h .- J * v)))),
             linu = _vec(cache.a), p, reltol = cache.abstol)