From 57e4eca42e0c0d609dd42fce13d221c7e310ee40 Mon Sep 17 00:00:00 2001
From: yonatanwesen <yonatanwesenyeleh1@gmail.com>
Date: Thu, 21 Sep 2023 23:58:34 -0400
Subject: [PATCH 01/35] reworking pt using avik's cleanup

---
 src/NonlinearSolve.jl  |   5 +-
 src/jacobian.jl        |   8 ++-
 src/pseudotransient.jl | 150 +++++++++++++++++++++++++++++++++++++++++
 test/basictests.jl     |  57 ++++++++++++++++
 4 files changed, 217 insertions(+), 3 deletions(-)
 create mode 100644 src/pseudotransient.jl

diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl
index 615f96c03..962551893 100644
--- a/src/NonlinearSolve.jl
+++ b/src/NonlinearSolve.jl
@@ -39,6 +39,7 @@ include("linesearch.jl")
 include("raphson.jl")
 include("trustRegion.jl")
 include("levenberg.jl")
+include("pseudotransient.jl")
 include("jacobian.jl")
 include("ad.jl")
 
@@ -49,7 +50,7 @@ PrecompileTools.@compile_workload begin
         prob = NonlinearProblem{false}((u, p) -> u .* u .- p, T(0.1), T(2))
 
         precompile_algs = if VERSION ≥ v"1.7"
-            (NewtonRaphson(), TrustRegion(), LevenbergMarquardt())
+            (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(), PseudoTransient())
         else
             (NewtonRaphson(),)
         end
@@ -68,7 +69,7 @@ end
 
 export RadiusUpdateSchemes
 
-export NewtonRaphson, TrustRegion, LevenbergMarquardt
+export NewtonRaphson, TrustRegion, LevenbergMarquardt, PseudoTransient
 
 export LineSearch
 
diff --git a/src/jacobian.jl b/src/jacobian.jl
index 01c030462..1ff29d803 100644
--- a/src/jacobian.jl
+++ b/src/jacobian.jl
@@ -85,7 +85,13 @@ function jacobian_caches(alg::AbstractNonlinearSolveAlgorithm, f, u, p, ::Val{ii
     end
 
     du = _mutable_zero(u)
-    linprob = LinearProblem(J, _vec(fu); u0 = _vec(du))
+    if alg isa PseudoTransient
+        alpha = convert(eltype(u), alg.alpha_initial)
+        J_new = J - (1 / alpha) * I
+        linprob = LinearProblem(J_new, _vec(fu); u0 = _vec(du))
+    else
+        linprob = LinearProblem(J, _vec(fu); u0 = _vec(du))
+    end
 
     weight = similar(u)
     recursivefill!(weight, true)
diff --git a/src/pseudotransient.jl b/src/pseudotransient.jl
new file mode 100644
index 000000000..8213850d3
--- /dev/null
+++ b/src/pseudotransient.jl
@@ -0,0 +1,150 @@
+@concrete struct PseudoTransient{CJ, AD} <: AbstractNewtonAlgorithm{CJ, AD}
+    ad::AD
+    linsolve
+    precs
+    alpha_initial
+end
+
+concrete_jac(::PseudoTransient{CJ}) where {CJ} = CJ
+
+function PseudoTransient(; concrete_jac = nothing, linsolve = nothing,
+    precs = DEFAULT_PRECS, alpha_initial = 1e-3, adkwargs...)
+    ad = default_adargs_to_adtype(; adkwargs...)
+    return PseudoTransient{_unwrap_val(concrete_jac)}(ad, linsolve, precs, alpha_initial)
+end
+
+@concrete mutable struct PseudoTransientCache{iip}
+    f
+    alg
+    u
+    fu1
+    fu2
+    du
+    p
+    alpha
+    res_norm
+    uf
+    linsolve
+    J
+    jac_cache
+    force_stop
+    maxiters::Int
+    internalnorm
+    retcode::ReturnCode.T
+    abstol
+    prob
+    stats::NLStats
+end
+
+isinplace(::PseudoTransientCache{iip}) where {iip} = iip
+
+function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::PseudoTransient, args...;
+    alias_u0 = false, maxiters = 1000, abstol = 1e-6, internalnorm = DEFAULT_NORM,
+    kwargs...) where {uType, iip}
+    @unpack f, u0, p = prob
+    u = alias_u0 ? u0 : deepcopy(u0)
+    if iip
+        fu1 = f.resid_prototype === nothing ? zero(u) : f.resid_prototype
+        f(fu1, u, p)
+    else
+        fu1 = _mutable(f(u, p))
+    end
+    uf, linsolve, J, fu2, jac_cache, du = jacobian_caches(alg, f, u, p, Val(iip))
+    alpha = convert(eltype(u), alg.alpha_initial)
+    res_norm = internalnorm(fu1)
+
+    return PseudoTransientCache{iip}(f, alg, u, fu1, fu2, du, p, alpha, res_norm, uf,
+        linsolve, J,
+        jac_cache, false, maxiters, internalnorm, ReturnCode.Default, abstol, prob,
+        NLStats(1, 0, 0, 0, 0))
+end
+
+function perform_step!(cache::PseudoTransientCache{true})
+    @unpack u, fu1, f, p, alg, J, linsolve, du, alpha = cache
+    jacobian!!(J, cache)
+    J_new = J - (1 / alpha) * I
+
+    # u = u - J \ fu
+    linres = dolinsolve(alg.precs, linsolve; A = J_new, b = _vec(fu1), linu = _vec(du),
+        p, reltol = cache.abstol)
+    cache.linsolve = linres.cache
+    @. u = u - du
+    f(fu1, u, p)
+
+    new_norm = cache.internalnorm(fu1)
+    cache.alpha *= cache.res_norm / new_norm
+    cache.res_norm = new_norm
+
+    new_norm < cache.abstol && (cache.force_stop = true)
+    cache.stats.nf += 1
+    cache.stats.njacs += 1
+    cache.stats.nsolve += 1
+    cache.stats.nfactors += 1
+    return nothing
+end
+
+function perform_step!(cache::PseudoTransientCache{false})
+    @unpack u, fu1, f, p, alg, linsolve, alpha = cache
+
+    cache.J = jacobian!!(cache.J, cache)
+    # u = u - J \ fu
+    if linsolve === nothing
+        cache.du = fu1 / cache.J
+    else
+        linres = dolinsolve(alg.precs, linsolve; A = cache.J - (1 / alpha) * I,
+            b = _vec(fu1),
+            linu = _vec(cache.du), p, reltol = cache.abstol)
+        cache.linsolve = linres.cache
+    end
+    cache.u = @. u - cache.du  # `u` might not support mutation
+    cache.fu1 = f(cache.u, p)
+
+    new_norm = cache.internalnorm(fu1)
+    cache.alpha *= cache.res_norm / new_norm
+    cache.res_norm = new_norm
+
+    new_norm < cache.abstol && (cache.force_stop = true)
+    cache.stats.nf += 1
+    cache.stats.njacs += 1
+    cache.stats.nsolve += 1
+    cache.stats.nfactors += 1
+    return nothing
+end
+
+function SciMLBase.solve!(cache::PseudoTransientCache)
+    while !cache.force_stop && cache.stats.nsteps < cache.maxiters
+        perform_step!(cache)
+        cache.stats.nsteps += 1
+    end
+
+    if cache.stats.nsteps == cache.maxiters
+        cache.retcode = ReturnCode.MaxIters
+    else
+        cache.retcode = ReturnCode.Success
+    end
+
+    return SciMLBase.build_solution(cache.prob, cache.alg, cache.u, cache.fu1;
+        cache.retcode, cache.stats)
+end
+
+function SciMLBase.reinit!(cache::PseudoTransientCache{iip}, u0 = cache.u; p = cache.p,
+    abstol = cache.abstol, maxiters = cache.maxiters) where {iip}
+    cache.p = p
+    if iip
+        recursivecopy!(cache.u, u0)
+        cache.f(cache.fu1, cache.u, p)
+    else
+        # don't have alias_u0 but cache.u is never mutated for OOP problems so it doesn't matter
+        cache.u = u0
+        cache.fu1 = cache.f(cache.u, p)
+    end
+    cache.alpha = convert(eltype(u), 1e-3)
+    cache.res_norm = internalnorm(cache.fu1)
+    cache.abstol = abstol
+    cache.maxiters = maxiters
+    cache.stats.nf = 1
+    cache.stats.nsteps = 1
+    cache.force_stop = false
+    cache.retcode = ReturnCode.Default
+    return cache
+end
diff --git a/test/basictests.jl b/test/basictests.jl
index 54e63e93d..396d504fc 100644
--- a/test/basictests.jl
+++ b/test/basictests.jl
@@ -402,3 +402,60 @@ end
         end
     end
 end
+
+# --- PseudoTransient tests ---
+
+@testset "PseudoTransient" begin
+
+    #iip
+    function test_f!(du, u, p)
+        du[1] = 2 - 2u[1]
+        du[2] = u[1] - 4u[2]
+        return du
+    end
+
+    #oop
+    simple_test(u, p) = 0.9f0 .* u .- u
+
+    #test jacobian free PseudoTransient method
+    function f!(res, u, p)
+        @. res = u * u - p
+    end
+
+    @testset "[IIP] u0: $(typeof(u0))" for u0 in (zeros(2),)
+        probN = NonlinearProblem{true}(test_f!, u0)
+        sol = solve(probN, PseudoTransient())
+        @test sol.retcode == ReturnCode.Success
+        du = zeros(2)
+        test_f!(du, sol.u, 4.0)
+        @test du≈[0, 0] atol=1e-7
+    end
+
+    @testset "[OOP] u0: $(typeof(u0))" for u0 in ([1.0f0],)
+        probN = NonlinearProblem{false}(simple_test, u0)
+        sol = solve(probN, PseudoTransient(alpha_initial = 1.0))
+        @test sol.retcode == ReturnCode.Success
+        @test abs(sol.u[1]) <= 1.0f-4
+    end
+
+    @testset "Jacobian Free PT" begin
+        u0 = [1.0, 1.0]
+        p = 2.0
+        probN = NonlinearProblem(f!, u0, p)
+        linsolve = LinearSolve.KrylovJL_GMRES()
+        sol = solve(probN,
+            PseudoTransient(alpha_initial = 10.0, linsolve = linsolve),
+            reltol = 1e-9)
+        @test sol.retcode == ReturnCode.Success
+    end
+
+    @testset "NewtonRaphson Fails" begin
+        p = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
+        u0 = [-10.0, -1.0, 1.0, 2.0, 3.0, 4.0, 10.0]
+        probN = NonlinearProblem{false}(newton_fails, u0, p)
+        sol = solve(probN, PseudoTransient(alpha_initial = 1.0), abstol = 1e-10)
+        @test all(abs.(newton_fails(sol.u, p)) .< 1e-10)
+    end
+end
+
+# Test that `PseudoTransient` passes a test that `NewtonRaphson` fails on.

From 55ba28144e5ec0ecaccc912a22a2053107f3aa7f Mon Sep 17 00:00:00 2001
From: Arno Strouwen <arno.strouwen@telenet.be>
Date: Sat, 23 Sep 2023 03:34:49 +0200
Subject: [PATCH 02/35] Documenter 1.0 upgrade

---
 .github/workflows/CI.yml                |  4 +++
 docs/Project.toml                       |  2 +-
 docs/make.jl                            | 12 +++------
 docs/src/basics/TerminationCondition.md |  2 +-
 docs/src/index.md                       | 35 ++++++++-----------------
 5 files changed, 20 insertions(+), 35 deletions(-)

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 75f12a2e9..e0d7a57e5 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -3,9 +3,13 @@ on:
   pull_request:
     branches:
       - master
+    paths-ignore:
+      - 'docs/**'
   push:
     branches:
       - master
+    paths-ignore:
+      - 'docs/**'
 jobs:
   test:
     runs-on: ubuntu-latest
diff --git a/docs/Project.toml b/docs/Project.toml
index df765bb1d..42e261955 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -12,7 +12,7 @@ Sundials = "c3572dad-4567-51f8-b174-8c6c989267f4"
 
 [compat]
 BenchmarkTools = "1"
-Documenter = "0.27"
+Documenter = "1"
 LinearSolve = "2"
 NonlinearSolve = "1, 2"
 NonlinearSolveMINPACK = "0.1"
diff --git a/docs/make.jl b/docs/make.jl
index d2815c502..8f895b0b4 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -11,15 +11,9 @@ makedocs(sitename = "NonlinearSolve.jl",
     modules = [NonlinearSolve, NonlinearSolve.SciMLBase, NonlinearSolve.DiffEqBase,
         SimpleNonlinearSolve, Sundials, SciMLNLSolve, NonlinearSolveMINPACK,
         SteadyStateDiffEq],
-    clean = true, doctest = false,
-    strict = [
-        :doctest,
-        :linkcheck,
-        :parse_error,
-        :example_block,
-        # Other available options are
-        # :autodocs_block, :cross_references, :docs_block, :eval_block, :example_block, :footnote, :meta_block, :missing_docs, :setup_block
-    ],
+    clean = true, doctest = false, linkcheck = true,
+    linkcheck_ignore = ["https://twitter.com/ChrisRackauckas/status/1544743542094020615"],
+    warnonly = [:missing_docs, :cross_references],
     format = Documenter.HTML(assets = ["assets/favicon.ico"],
         canonical = "https://docs.sciml.ai/NonlinearSolve/stable/"),
     pages = pages)
diff --git a/docs/src/basics/TerminationCondition.md b/docs/src/basics/TerminationCondition.md
index cd780b997..e966bb22c 100644
--- a/docs/src/basics/TerminationCondition.md
+++ b/docs/src/basics/TerminationCondition.md
@@ -1,6 +1,6 @@
 # Termination Conditions
 
-Provides a API to specify termination conditions for [`NonLinearProblem`](@ref) and
+Provides a API to specify termination conditions for [`NonlinearProblem`](@ref) and
 [`SteadyStateProblem`](@ref). For details on the various termination modes, i.e.,
 NLSolveTerminationMode, see the documentation for [`NLSolveTerminationCondition`](@ref).
 
diff --git a/docs/src/index.md b/docs/src/index.md
index c47fb57de..3d7e97890 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -82,32 +82,19 @@ Pkg.status(; mode = PKGMODE_MANIFEST) # hide
 </details>
 ```
 
-```@raw html
-You can also download the
-<a href="
-```
-
 ```@eval
 using TOML
+using Markdown
 version = TOML.parse(read("../../Project.toml", String))["version"]
 name = TOML.parse(read("../../Project.toml", String))["name"]
-link = "https://github.com/SciML/" * name * ".jl/tree/gh-pages/v" * version *
-       "/assets/Manifest.toml"
-```
-
-```@raw html
-">manifest</a> file and the
-<a href="
-```
-
-```@eval
-using TOML
-version = TOML.parse(read("../../Project.toml", String))["version"]
-name = TOML.parse(read("../../Project.toml", String))["name"]
-link = "https://github.com/SciML/" * name * ".jl/tree/gh-pages/v" * version *
-       "/assets/Project.toml"
-```
-
-```@raw html
-">project</a> file.
+link_manifest = "https://github.com/SciML/" * name * ".jl/tree/gh-pages/v" * version *
+                "/assets/Manifest.toml"
+link_project = "https://github.com/SciML/" * name * ".jl/tree/gh-pages/v" * version *
+               "/assets/Project.toml"
+Markdown.parse("""You can also download the
+[manifest]($link_manifest)
+file and the
+[project]($link_project)
+file.
+""")
 ```

From 6b2a1ec8c081d0f0af35092b6d8351715c9ef014 Mon Sep 17 00:00:00 2001
From: yonatanwesen <yonatanwesenyeleh1@gmail.com>
Date: Sun, 24 Sep 2023 21:02:45 -0400
Subject: [PATCH 03/35] forget the 1/alpha term when linsolve is nothing

---
 src/pseudotransient.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/pseudotransient.jl b/src/pseudotransient.jl
index 8213850d3..19fe94062 100644
--- a/src/pseudotransient.jl
+++ b/src/pseudotransient.jl
@@ -89,7 +89,7 @@ function perform_step!(cache::PseudoTransientCache{false})
     cache.J = jacobian!!(cache.J, cache)
     # u = u - J \ fu
     if linsolve === nothing
-        cache.du = fu1 / cache.J
+        cache.du = fu1 / (cache.J - (1 / alpha) * I)
     else
         linres = dolinsolve(alg.precs, linsolve; A = cache.J - (1 / alpha) * I,
             b = _vec(fu1),

From 081f3b1c6881fb8774f456421951d593619370d4 Mon Sep 17 00:00:00 2001
From: FHoltorf <32248677+FHoltorf@users.noreply.github.com>
Date: Wed, 13 Sep 2023 23:57:11 -0400
Subject: [PATCH 04/35] fix incorrect gradient and Gauss-Newton Hessian proxy

---
 src/trustRegion.jl | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/trustRegion.jl b/src/trustRegion.jl
index 20577ef67..6dc4d45a6 100644
--- a/src/trustRegion.jl
+++ b/src/trustRegion.jl
@@ -304,9 +304,9 @@ isinplace(::TrustRegionCache{iip}) where {iip} = iip
 function perform_step!(cache::TrustRegionCache{true})
     @unpack make_new_J, J, fu, f, u, p, u_tmp, alg, linsolve = cache
     if cache.make_new_J
-        jacobian!!(J, cache)
-        mul!(cache.H, J, J)
-        mul!(cache.g, J, fu)
+        jacobian!(J, cache)
+        mul!(cache.H, J', J)
+        mul!(cache.g, J', fu)
         cache.stats.njacs += 1
     end
 
@@ -330,9 +330,9 @@ function perform_step!(cache::TrustRegionCache{false})
     @unpack make_new_J, fu, f, u, p = cache
 
     if make_new_J
-        J = jacobian!!(cache.J, cache)
-        cache.H = J * J
-        cache.g = J * fu
+        J = jacobian(cache, f)
+        cache.H = J' * J
+        cache.g = J' * fu
         cache.stats.njacs += 1
     end
 

From f20e89719121fc6b7e19d94fb8f0082917f2334c Mon Sep 17 00:00:00 2001
From: FHoltorf <32248677+FHoltorf@users.noreply.github.com>
Date: Thu, 14 Sep 2023 00:03:32 -0400
Subject: [PATCH 05/35] fix Cauchy point calculation in dogleg step

---
 src/trustRegion.jl | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/src/trustRegion.jl b/src/trustRegion.jl
index 6dc4d45a6..bb7d05cbb 100644
--- a/src/trustRegion.jl
+++ b/src/trustRegion.jl
@@ -505,21 +505,23 @@ function dogleg!(cache::TrustRegionCache)
     end
 
     # Calcualte Cauchy point, optimum along the steepest descent direction.
-    δsd = -cache.g
-    norm_δsd = norm(δsd)
-    if norm_δsd ≥ trust_r
-        cache.step_size = δsd .* trust_r / norm_δsd
+    l_grad = norm(cache.g)
+    d_cauchy = l_grad^3 / dot(cache.g, cache.H, cache.g) # distance of the cauchy point from the current iterate
+    if d_cauchy > trust_r # cauchy point lies outside of trust region
+        cache.step_size = - (trust_r/l_grad) * cache.g # step to the end of the trust region
         return
     end
+    
+    # cauchy point lies inside the trust region
+    u_c = - (d_cauchy/l_grad) * cache.g
 
     # Find the intersection point on the boundary.
-    N_sd = u_tmp - δsd
-    dot_N_sd = dot(N_sd, N_sd)
-    dot_sd_N_sd = dot(δsd, N_sd)
-    dot_sd = dot(δsd, δsd)
-    fact = dot_sd_N_sd^2 - dot_N_sd * (dot_sd - trust_r^2)
-    τ = (-dot_sd_N_sd + sqrt(fact)) / dot_N_sd
-    cache.step_size = δsd + τ * N_sd
+    Δ = u_tmp - u_c # calf of the dogleg
+    θ = dot(Δ, u_c) # ~ cos(∠(calf,thigh)) 
+    l_calf = dot(Δ,Δ) # length of the calf of the dogleg
+    aux = max(θ^2 + l_calf*(trust_r^2 - d_cauchy^2), 0) # technically guaranteed to be non-negative but hedging against floating point issues
+    τ = ( - θ + sqrt( aux ) ) / l_calf # stepsize along dogleg
+    cache.step_size = u_c + τ * Δ
 end
 
 function take_step!(cache::TrustRegionCache{true})

From 6f3556ec5f48cf7505b751ea0873081cf11204f2 Mon Sep 17 00:00:00 2001
From: FHoltorf <32248677+FHoltorf@users.noreply.github.com>
Date: Thu, 14 Sep 2023 13:27:39 -0400
Subject: [PATCH 06/35] expose quadratic form structure explicitly

---
 src/trustRegion.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/trustRegion.jl b/src/trustRegion.jl
index bb7d05cbb..e4d40a747 100644
--- a/src/trustRegion.jl
+++ b/src/trustRegion.jl
@@ -365,7 +365,7 @@ function retrospective_step!(cache::TrustRegionCache)
     @unpack H, g, step_size = cache
 
     return -(get_loss(fu_prev) - get_loss(fu)) /
-           (step_size' * g + step_size' * H * step_size / 2)
+           (dot(step_size, g) + dot(step_size, H, step_size) / 2)
 end
 
 function trust_region_step!(cache::TrustRegionCache)
@@ -373,7 +373,7 @@ function trust_region_step!(cache::TrustRegionCache)
     cache.loss_new = get_loss(fu_new)
 
     # Compute the ratio of the actual reduction to the predicted reduction.
-    cache.r = -(loss - cache.loss_new) / (step_size' * g + step_size' * H * step_size / 2)
+    cache.r = -(loss - cache.loss_new) / (dot(step_size, g) + dot(step_size, H, step_size) / 2)
     @unpack r = cache
 
     if radius_update_scheme === RadiusUpdateSchemes.Simple

From 146dec98d2652dc3860788a995f824dff2e621e5 Mon Sep 17 00:00:00 2001
From: FHoltorf <32248677+FHoltorf@users.noreply.github.com>
Date: Thu, 14 Sep 2023 16:19:22 -0400
Subject: [PATCH 07/35] add NLsolve trust region updating scheme and change GN
 step to -J\fu to avoid growing ill-conditioning

---
 src/trustRegion.jl | 42 ++++++++++++++++++++++++++++++++++++++----
 1 file changed, 38 insertions(+), 4 deletions(-)

diff --git a/src/trustRegion.jl b/src/trustRegion.jl
index e4d40a747..5efc01fde 100644
--- a/src/trustRegion.jl
+++ b/src/trustRegion.jl
@@ -25,6 +25,13 @@ states as `RadiusUpdateSchemes.T`. Simply put the desired scheme as follows:
     """
     Simple
 
+    """
+    `RadiusUpdateSchemes.NLsolve`
+
+    The same updating rule as in NLsolve's trust region implementation
+    """
+    NLsolve
+
     """
     `RadiusUpdateSchemes.Hei`
 
@@ -244,7 +251,9 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::TrustRegion,
     p3 = convert(eltype(u), 0.0)
     p4 = convert(eltype(u), 0.0)
     ϵ = convert(eltype(u), 1.0e-8)
-    if radius_update_scheme === RadiusUpdateSchemes.Hei
+    if radius_update_scheme === RadiusUpdateSchemes.NLsolve
+        p1 = convert(eltype(u), 0.5)
+    elseif radius_update_scheme === RadiusUpdateSchemes.Hei
         step_threshold = convert(eltype(u), 0.0)
         shrink_threshold = convert(eltype(u), 0.25)
         expand_threshold = convert(eltype(u), 0.25)
@@ -310,8 +319,9 @@ function perform_step!(cache::TrustRegionCache{true})
         cache.stats.njacs += 1
     end
 
-    linres = dolinsolve(alg.precs, linsolve; A = cache.H, b = _vec(cache.g),
-        linu = _vec(u_tmp), p, reltol = cache.abstol)
+    linres = dolinsolve(alg.precs, linsolve, A = J, b = _vec(fu), # cache.H, b = _vec(cache.g),
+        linu = _vec(u_tmp),
+        p = p, reltol = cache.abstol)
     cache.linsolve = linres.cache
     cache.u_tmp .= -1 .* u_tmp
     dogleg!(cache)
@@ -374,7 +384,7 @@ function trust_region_step!(cache::TrustRegionCache)
 
     # Compute the ratio of the actual reduction to the predicted reduction.
     cache.r = -(loss - cache.loss_new) / (dot(step_size, g) + dot(step_size, H, step_size) / 2)
-    @unpack r = cache
+    @unpack r = cache 
 
     if radius_update_scheme === RadiusUpdateSchemes.Simple
         # Update the trust region radius.
@@ -403,6 +413,30 @@ function trust_region_step!(cache::TrustRegionCache)
             cache.force_stop = true
         end
 
+    elseif radius_update_scheme === RadiusUpdateSchemes.NLsolve
+        # accept/reject decision
+        if r > cache.step_threshold # accept
+            take_step!(cache)
+            cache.loss = cache.loss_new
+            cache.make_new_J = true
+        else # reject
+            cache.make_new_J = false
+        end
+
+        # trust region update 
+        if r < cache.shrink_threshold # default 1 // 10
+            cache.trust_r *= cache.shrink_factor # default 1 // 2
+        elseif r >= cache.expand_threshold # default 9 // 10
+            cache.trust_r = cache.expand_factor * norm(cache.step_size) # default 2
+        elseif r >= cache.p1 # default 1 // 2
+            cache.trust_r = max(cache.trust_r, cache.expand_factor * norm(cache.step_size))
+        end
+
+        # convergence test
+        if iszero(cache.fu) || cache.internalnorm(cache.fu) < cache.abstol
+            cache.force_stop = true
+        end
+
     elseif radius_update_scheme === RadiusUpdateSchemes.Hei
         if r > cache.step_threshold
             take_step!(cache)

From 884aafc5b23513e6acfd33535aadba040cffdc63 Mon Sep 17 00:00:00 2001
From: FHoltorf <32248677+FHoltorf@users.noreply.github.com>
Date: Thu, 14 Sep 2023 16:30:53 -0400
Subject: [PATCH 08/35] add Nocedal and Wright trust region updating scheme

---
 src/trustRegion.jl | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/src/trustRegion.jl b/src/trustRegion.jl
index 5efc01fde..1655a6706 100644
--- a/src/trustRegion.jl
+++ b/src/trustRegion.jl
@@ -32,6 +32,13 @@ states as `RadiusUpdateSchemes.T`. Simply put the desired scheme as follows:
     """
     NLsolve
 
+    """
+    `RadiusUpdateSchemes.NLsolve`
+
+    Nocedal and Wright updating scheme 
+    """
+    NW
+
     """
     `RadiusUpdateSchemes.Hei`
 
@@ -436,6 +443,22 @@ function trust_region_step!(cache::TrustRegionCache)
         if iszero(cache.fu) || cache.internalnorm(cache.fu) < cache.abstol
             cache.force_stop = true
         end
+    
+    elseif radius_update_scheme === RadiusUpdateSchemes.NW
+        # accept/reject decision
+        if r > cache.step_threshold # accept
+            take_step!(cache)
+            cache.loss = cache.loss_new
+            cache.make_new_J = true
+        else # reject
+            cache.make_new_J = false
+        end
+
+        if r < 1 // 4
+            cache.trust_r = (1 // 4) * norm(cache.step_size)
+        elseif (r > (3 // 4)) && abs(norm(cache.step_size) - cache.trust_r)/cache.trust_r < 1e-6
+            cache.trust_r = min(2*cache.trust_r, cache.max_trust_r)
+        end  
 
     elseif radius_update_scheme === RadiusUpdateSchemes.Hei
         if r > cache.step_threshold

From 094eb34c2e2a913632e39ca8011ddcd025d627fa Mon Sep 17 00:00:00 2001
From: FHoltorf <32248677+FHoltorf@users.noreply.github.com>
Date: Mon, 25 Sep 2023 19:14:48 -0400
Subject: [PATCH 09/35] add meaningful description for NLsolve and NW trust
 region updating schemes

---
 src/trustRegion.jl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/trustRegion.jl b/src/trustRegion.jl
index 1655a6706..b5fff47a9 100644
--- a/src/trustRegion.jl
+++ b/src/trustRegion.jl
@@ -28,14 +28,14 @@ states as `RadiusUpdateSchemes.T`. Simply put the desired scheme as follows:
     """
     `RadiusUpdateSchemes.NLsolve`
 
-    The same updating rule as in NLsolve's trust region implementation
+    The same updating scheme as in NLsolve's (https://github.com/JuliaNLSolvers/NLsolve.jl) trust region dogleg implementation. 
     """
     NLsolve
 
     """
-    `RadiusUpdateSchemes.NLsolve`
+    `RadiusUpdateSchemes.NW`
 
-    Nocedal and Wright updating scheme 
+    Trust region updating scheme as in Nocedal and Wrigt [see Alg 11.5, page 291]. 
     """
     NW
 

From 0e9965542581c5d592e8e78d2f5fdbfbee6a71ef Mon Sep 17 00:00:00 2001
From: FHoltorf <32248677+FHoltorf@users.noreply.github.com>
Date: Mon, 25 Sep 2023 19:17:15 -0400
Subject: [PATCH 10/35] cache memory for cauchy step to enable non-allocating
 code

---
 src/trustRegion.jl | 42 ++++++++++++++++++++++++++++++++++++------
 1 file changed, 36 insertions(+), 6 deletions(-)

diff --git a/src/trustRegion.jl b/src/trustRegion.jl
index b5fff47a9..68f566808 100644
--- a/src/trustRegion.jl
+++ b/src/trustRegion.jl
@@ -235,6 +235,7 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::TrustRegion,
     expand_threshold = convert(eltype(u), alg.expand_threshold)
     shrink_factor = convert(eltype(u), alg.shrink_factor)
     expand_factor = convert(eltype(u), alg.expand_factor)
+
     # Set default trust region radius if not specified
     if iszero(max_trust_radius)
         max_trust_radius = convert(eltype(u), max(norm(fu1), maximum(u) - minimum(u)))
@@ -552,8 +553,37 @@ function trust_region_step!(cache::TrustRegionCache)
     end
 end
 
-function dogleg!(cache::TrustRegionCache)
-    @unpack u_tmp, trust_r = cache
+function dogleg!(cache::TrustRegionCache{true})
+    @unpack u_tmp, u_c, trust_r = cache
+
+    # Test if the full step is within the trust region.
+    if norm(u_tmp) ≤ trust_r
+        cache.step_size .= u_tmp 
+        return
+    end
+
+    # Calcualte Cauchy point, optimum along the steepest descent direction.
+    l_grad = norm(cache.g)
+    d_cauchy = l_grad^3 / dot(cache.g, cache.H, cache.g) # distance of the cauchy point from the current iterate
+    if d_cauchy > trust_r # cauchy point lies outside of trust region
+        @. cache.step_size = - (trust_r/l_grad) * cache.g # step to the end of the trust region
+        return
+    end
+    
+    # cauchy point lies inside the trust region
+    @. u_c = - (d_cauchy/l_grad) * cache.g
+
+    # Find the intersection point on the boundary.
+    @. u_tmp -= u_c # calf of the dogleg, use u_tmp to avoid allocation
+    θ = dot(u_tmp, u_c) # ~ cos(∠(calf,thigh)) 
+    l_calf = dot(u_tmp,u_tmp) # length of the calf of the dogleg
+    aux = max(θ^2 + l_calf*(trust_r^2 - d_cauchy^2), 0) # technically guaranteed to be non-negative but hedging against floating point issues
+    τ = ( - θ + sqrt( aux ) ) / l_calf # stepsize along dogleg
+    @. cache.step_size = u_c + τ * u_tmp
+end
+
+function dogleg!(cache::TrustRegionCache{false})
+    @unpack u_tmp, u_c, trust_r = cache
 
     # Test if the full step is within the trust region.
     if norm(u_tmp) ≤ trust_r
@@ -573,12 +603,12 @@ function dogleg!(cache::TrustRegionCache)
     u_c = - (d_cauchy/l_grad) * cache.g
 
     # Find the intersection point on the boundary.
-    Δ = u_tmp - u_c # calf of the dogleg
-    θ = dot(Δ, u_c) # ~ cos(∠(calf,thigh)) 
-    l_calf = dot(Δ,Δ) # length of the calf of the dogleg
+    u_tmp -= u_c # calf of the dogleg, use u_tmp to avoid allocation
+    θ = dot(u_tmp, u_c) # ~ cos(∠(calf,thigh)) 
+    l_calf = dot(u_tmp,u_tmp) # length of the calf of the dogleg
     aux = max(θ^2 + l_calf*(trust_r^2 - d_cauchy^2), 0) # technically guaranteed to be non-negative but hedging against floating point issues
     τ = ( - θ + sqrt( aux ) ) / l_calf # stepsize along dogleg
-    cache.step_size = u_c + τ * Δ
+    cache.step_size = u_c + τ * u_tmp
 end
 
 function take_step!(cache::TrustRegionCache{true})

From 439415bf1cbcaa31e0d78c95f4ccd9d0014f58a5 Mon Sep 17 00:00:00 2001
From: FHoltorf <32248677+FHoltorf@users.noreply.github.com>
Date: Tue, 26 Sep 2023 00:29:43 -0400
Subject: [PATCH 11/35] parameter types should not be converted to eltype(u).
 For now, default to Float64.

---
 src/trustRegion.jl | 118 ++++++++++++++++++++++-----------------------
 1 file changed, 59 insertions(+), 59 deletions(-)

diff --git a/src/trustRegion.jl b/src/trustRegion.jl
index 68f566808..0f43556c3 100644
--- a/src/trustRegion.jl
+++ b/src/trustRegion.jl
@@ -206,10 +206,10 @@ end
     fu_new
     make_new_J::Bool
     r::floatType
-    p1::floatType
-    p2::floatType
-    p3::floatType
-    p4::floatType
+    p1::parType
+    p2::parType
+    p3::parType
+    p4::parType
     ϵ::floatType
     stats::NLStats
 end
@@ -227,23 +227,6 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::TrustRegion,
     uf, linsolve, J, fu2, jac_cache, du = jacobian_caches(alg, f, u, p, Val(iip);
         linsolve_kwargs)
 
-    radius_update_scheme = alg.radius_update_scheme
-    max_trust_radius = convert(eltype(u), alg.max_trust_radius)
-    initial_trust_radius = convert(eltype(u), alg.initial_trust_radius)
-    step_threshold = convert(eltype(u), alg.step_threshold)
-    shrink_threshold = convert(eltype(u), alg.shrink_threshold)
-    expand_threshold = convert(eltype(u), alg.expand_threshold)
-    shrink_factor = convert(eltype(u), alg.shrink_factor)
-    expand_factor = convert(eltype(u), alg.expand_factor)
-
-    # Set default trust region radius if not specified
-    if iszero(max_trust_radius)
-        max_trust_radius = convert(eltype(u), max(norm(fu1), maximum(u) - minimum(u)))
-    end
-    if iszero(initial_trust_radius)
-        initial_trust_radius = convert(eltype(u), max_trust_radius / 11)
-    end
-
     loss_new = loss
     H = zero(J)
     g = _mutable_zero(fu1)
@@ -253,31 +236,50 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::TrustRegion,
     make_new_J = true
     r = loss
 
+    # set trust region update scheme
+    radius_update_scheme = alg.radius_update_scheme
+
+    # set default type for all trust region parameters
+    trustType = Float64 #typeof(alg.initial_trust_radius)
+    max_trust_radius = convert(trustType, alg.max_trust_radius)
+    if iszero(max_trust_radius)
+        max_trust_radius = convert(trustType, max(norm(fu), maximum(u) - minimum(u)))
+    end
+    initial_trust_radius = convert(trustType, alg.initial_trust_radius)
+    if iszero(initial_trust_radius)
+        initial_trust_radius = convert(trustType, max_trust_radius / 11)
+    end    
+    step_threshold = convert(trustType, alg.step_threshold)
+    shrink_threshold = convert(trustType, alg.shrink_threshold)
+    expand_threshold = convert(trustType, alg.expand_threshold)
+    shrink_factor = convert(trustType, alg.shrink_factor)
+    expand_factor = convert(trustType, alg.expand_factor)
+    
     # Parameters for the Schemes
-    p1 = convert(eltype(u), 0.0)
-    p2 = convert(eltype(u), 0.0)
-    p3 = convert(eltype(u), 0.0)
-    p4 = convert(eltype(u), 0.0)
-    ϵ = convert(eltype(u), 1.0e-8)
+    parType = Float64
+    p1 = convert(parType, 0.0)
+    p2 = convert(parType, 0.0)
+    p3 = convert(parType, 0.0)
+    p4 = convert(parType, 0.0)
+    ϵ = convert(typeof(r), 1.0e-8)
     if radius_update_scheme === RadiusUpdateSchemes.NLsolve
-        p1 = convert(eltype(u), 0.5)
+        p1 = convert(parType, 0.5)
     elseif radius_update_scheme === RadiusUpdateSchemes.Hei
-        step_threshold = convert(eltype(u), 0.0)
-        shrink_threshold = convert(eltype(u), 0.25)
-        expand_threshold = convert(eltype(u), 0.25)
-        p1 = convert(eltype(u), 5.0) # M
-        p2 = convert(eltype(u), 0.1) # β
-        p3 = convert(eltype(u), 0.15) # γ1
-        p4 = convert(eltype(u), 0.15) # γ2
-        initial_trust_radius = convert(eltype(u), 1.0)
+        step_threshold = convert(trustType, 0.0)
+        shrink_threshold = convert(trustType, 0.25)
+        expand_threshold = convert(trustType, 0.25)
+        p1 = convert(parType, 5.0)  # M
+        p2 = convert(parType, 0.1)  # β
+        p3 = convert(parType, 0.15) # γ1
+        p4 = convert(parType, 0.15) # γ2
+        initial_trust_radius = convert(trustType, 1.0)
     elseif radius_update_scheme === RadiusUpdateSchemes.Yuan
-        step_threshold = convert(eltype(u), 0.0001)
-        shrink_threshold = convert(eltype(u), 0.25)
-        expand_threshold = convert(eltype(u), 0.25)
-        p1 = convert(eltype(u), 2.0) # μ
-        p2 = convert(eltype(u), 1 / 6) # c5
-        p3 = convert(eltype(u), 6.0) # c6
-        p4 = convert(eltype(u), 0.0)
+        step_threshold = convert(trustType, 0.0001)
+        shrink_threshold = convert(trustType, 0.25)
+        expand_threshold = convert(trustType, 0.25)
+        p1 = convert(parType, 2.0)   # μ
+        p2 = convert(parType, 1 / 6) # c5
+        p3 = convert(parType, 6.0)   # c6
         if iip
             auto_jacvec!(g, (fu, x) -> f(fu, x, p), u, fu1)
         else
@@ -287,25 +289,23 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::TrustRegion,
                 g = auto_jacvec(x -> f(x, p), u, fu1)
             end
         end
-        initial_trust_radius = convert(eltype(u), p1 * norm(g))
+        initial_trust_radius = convert(trustType, p1 * norm(g))
     elseif radius_update_scheme === RadiusUpdateSchemes.Fan
-        step_threshold = convert(eltype(u), 0.0001)
-        shrink_threshold = convert(eltype(u), 0.25)
-        expand_threshold = convert(eltype(u), 0.75)
-        p1 = convert(eltype(u), 0.1) # μ
-        p2 = convert(eltype(u), 1 / 4) # c5
-        p3 = convert(eltype(u), 12) # c6
-        p4 = convert(eltype(u), 1.0e18) # M
-        initial_trust_radius = convert(eltype(u), p1 * (norm(fu1)^0.99))
+        step_threshold = convert(trustType, 0.0001)
+        shrink_threshold = convert(trustType, 0.25)
+        expand_threshold = convert(trustType, 0.75)
+        p1 = convert(parType, 0.1) # μ
+        p2 = convert(parType, 0.25) # c5
+        p3 = convert(parType, 12.0) # c6
+        p4 = convert(parType, 1.0e18) # M
+        initial_trust_radius = convert(trustType, p1 * (norm(fu)^0.99))
     elseif radius_update_scheme === RadiusUpdateSchemes.Bastin
-        step_threshold = convert(eltype(u), 0.05)
-        shrink_threshold = convert(eltype(u), 0.05)
-        expand_threshold = convert(eltype(u), 0.9)
-        p1 = convert(eltype(u), 2.5)  #alpha_1
-        p2 = convert(eltype(u), 0.25) # alpha_2
-        p3 = convert(eltype(u), 0) # not required
-        p4 = convert(eltype(u), 0) # not required
-        initial_trust_radius = convert(eltype(u), 1.0)
+        step_threshold = convert(trustType, 0.05)
+        shrink_threshold = convert(trustType, 0.05)
+        expand_threshold = convert(trustType, 0.9)
+        p1 = convert(parType, 2.5)  #alpha_1
+        p2 = convert(parType, 0.25) # alpha_2
+        initial_trust_radius = convert(trustType, 1.0)
     end
 
     return TrustRegionCache{iip}(f, alg, u_prev, u, fu_prev, fu1, fu2, p, uf, linsolve, J,

From 0ba652b0e39537a650c97b0952e8842e5742e4c1 Mon Sep 17 00:00:00 2001
From: Flemming Holtorf <holtorf@mit.edu>
Date: Tue, 26 Sep 2023 13:55:00 -0400
Subject: [PATCH 12/35] finish rebase to master

---
 src/trustRegion.jl | 66 ++++++++++++++++++++++++----------------------
 1 file changed, 34 insertions(+), 32 deletions(-)

diff --git a/src/trustRegion.jl b/src/trustRegion.jl
index 0f43556c3..e34c1d143 100644
--- a/src/trustRegion.jl
+++ b/src/trustRegion.jl
@@ -203,13 +203,14 @@ end
     shrink_counter::Int
     step_size
     u_tmp
+    u_c
     fu_new
     make_new_J::Bool
     r::floatType
-    p1::parType
-    p2::parType
-    p3::parType
-    p4::parType
+    p1::floatType
+    p2::floatType
+    p3::floatType
+    p4::floatType
     ϵ::floatType
     stats::NLStats
 end
@@ -226,6 +227,7 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::TrustRegion,
     loss = get_loss(fu1)
     uf, linsolve, J, fu2, jac_cache, du = jacobian_caches(alg, f, u, p, Val(iip);
         linsolve_kwargs)
+    u_c = zero(u)
 
     loss_new = loss
     H = zero(J)
@@ -243,7 +245,7 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::TrustRegion,
     trustType = Float64 #typeof(alg.initial_trust_radius)
     max_trust_radius = convert(trustType, alg.max_trust_radius)
     if iszero(max_trust_radius)
-        max_trust_radius = convert(trustType, max(norm(fu), maximum(u) - minimum(u)))
+        max_trust_radius = convert(trustType, max(norm(fu1), maximum(u) - minimum(u)))
     end
     initial_trust_radius = convert(trustType, alg.initial_trust_radius)
     if iszero(initial_trust_radius)
@@ -256,30 +258,30 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::TrustRegion,
     expand_factor = convert(trustType, alg.expand_factor)
     
     # Parameters for the Schemes
-    parType = Float64
-    p1 = convert(parType, 0.0)
-    p2 = convert(parType, 0.0)
-    p3 = convert(parType, 0.0)
-    p4 = convert(parType, 0.0)
-    ϵ = convert(typeof(r), 1.0e-8)
+    floatType = typeof(r)
+    p1 = convert(floatType, 0.0)
+    p2 = convert(floatType, 0.0)
+    p3 = convert(floatType, 0.0)
+    p4 = convert(floatType, 0.0)
+    ϵ = convert(floatType, 1.0e-8)
     if radius_update_scheme === RadiusUpdateSchemes.NLsolve
-        p1 = convert(parType, 0.5)
+        p1 = convert(floatType, 0.5)
     elseif radius_update_scheme === RadiusUpdateSchemes.Hei
         step_threshold = convert(trustType, 0.0)
         shrink_threshold = convert(trustType, 0.25)
         expand_threshold = convert(trustType, 0.25)
-        p1 = convert(parType, 5.0)  # M
-        p2 = convert(parType, 0.1)  # β
-        p3 = convert(parType, 0.15) # γ1
-        p4 = convert(parType, 0.15) # γ2
+        p1 = convert(floatType, 5.0)  # M
+        p2 = convert(floatType, 0.1)  # β
+        p3 = convert(floatType, 0.15) # γ1
+        p4 = convert(floatType, 0.15) # γ2
         initial_trust_radius = convert(trustType, 1.0)
     elseif radius_update_scheme === RadiusUpdateSchemes.Yuan
         step_threshold = convert(trustType, 0.0001)
         shrink_threshold = convert(trustType, 0.25)
         expand_threshold = convert(trustType, 0.25)
-        p1 = convert(parType, 2.0)   # μ
-        p2 = convert(parType, 1 / 6) # c5
-        p3 = convert(parType, 6.0)   # c6
+        p1 = convert(floatType, 2.0)   # μ
+        p2 = convert(floatType, 1 / 6) # c5
+        p3 = convert(floatType, 6.0)   # c6
         if iip
             auto_jacvec!(g, (fu, x) -> f(fu, x, p), u, fu1)
         else
@@ -294,17 +296,17 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::TrustRegion,
         step_threshold = convert(trustType, 0.0001)
         shrink_threshold = convert(trustType, 0.25)
         expand_threshold = convert(trustType, 0.75)
-        p1 = convert(parType, 0.1) # μ
-        p2 = convert(parType, 0.25) # c5
-        p3 = convert(parType, 12.0) # c6
-        p4 = convert(parType, 1.0e18) # M
+        p1 = convert(floatType, 0.1) # μ
+        p2 = convert(floatType, 0.25) # c5
+        p3 = convert(floatType, 12.0) # c6
+        p4 = convert(floatType, 1.0e18) # M
         initial_trust_radius = convert(trustType, p1 * (norm(fu)^0.99))
     elseif radius_update_scheme === RadiusUpdateSchemes.Bastin
         step_threshold = convert(trustType, 0.05)
         shrink_threshold = convert(trustType, 0.05)
         expand_threshold = convert(trustType, 0.9)
-        p1 = convert(parType, 2.5)  #alpha_1
-        p2 = convert(parType, 0.25) # alpha_2
+        p1 = convert(floatType, 2.5)  # alpha_1
+        p2 = convert(floatType, 0.25) # alpha_2
         initial_trust_radius = convert(trustType, 1.0)
     end
 
@@ -312,7 +314,7 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::TrustRegion,
         jac_cache, false, maxiters, internalnorm, ReturnCode.Default, abstol, prob,
         radius_update_scheme, initial_trust_radius, max_trust_radius, step_threshold,
         shrink_threshold, expand_threshold, shrink_factor, expand_factor, loss, loss_new,
-        H, g, shrink_counter, step_size, du, fu_new, make_new_J, r, p1, p2, p3, p4, ϵ,
+        H, g, shrink_counter, step_size, du, u_c, fu_new, make_new_J, r, p1, p2, p3, p4, ϵ,
         NLStats(1, 0, 0, 0, 0))
 end
 
@@ -321,7 +323,7 @@ isinplace(::TrustRegionCache{iip}) where {iip} = iip
 function perform_step!(cache::TrustRegionCache{true})
     @unpack make_new_J, J, fu, f, u, p, u_tmp, alg, linsolve = cache
     if cache.make_new_J
-        jacobian!(J, cache)
+        jacobian!!(J, cache)
         mul!(cache.H, J', J)
         mul!(cache.g, J', fu)
         cache.stats.njacs += 1
@@ -348,7 +350,7 @@ function perform_step!(cache::TrustRegionCache{false})
     @unpack make_new_J, fu, f, u, p = cache
 
     if make_new_J
-        J = jacobian(cache, f)
+        J = jacobian!!(cache.J, cache)
         cache.H = J' * J
         cache.g = J' * fu
         cache.stats.njacs += 1
@@ -373,11 +375,11 @@ function retrospective_step!(cache::TrustRegionCache)
     @unpack J, fu_prev, fu, u_prev, u = cache
     J = jacobian!!(deepcopy(J), cache)
     if J isa Number
-        cache.H = J * J
-        cache.g = J * fu
+        cache.H = J' * J
+        cache.g = J' * fu
     else
-        mul!(cache.H, J, J)
-        mul!(cache.g, J, fu)
+        mul!(cache.H, J', J)
+        mul!(cache.g, J', fu)
     end
     cache.stats.njacs += 1
     @unpack H, g, step_size = cache

From 6f7ef29badd51ecb7c47d89cfdb27fbf497e5f56 Mon Sep 17 00:00:00 2001
From: FHoltorf <32248677+FHoltorf@users.noreply.github.com>
Date: Tue, 26 Sep 2023 16:00:31 -0400
Subject: [PATCH 13/35] introduce better variable names (and also ones that are
 more consistent with the rest of the package)

---
 src/trustRegion.jl | 95 ++++++++++++++++++++++++----------------------
 1 file changed, 50 insertions(+), 45 deletions(-)

diff --git a/src/trustRegion.jl b/src/trustRegion.jl
index e34c1d143..2d5e17944 100644
--- a/src/trustRegion.jl
+++ b/src/trustRegion.jl
@@ -201,9 +201,9 @@ end
     H
     g
     shrink_counter::Int
-    step_size
+    du
     u_tmp
-    u_c
+    u_cauchy
     fu_new
     make_new_J::Bool
     r::floatType
@@ -227,13 +227,13 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::TrustRegion,
     loss = get_loss(fu1)
     uf, linsolve, J, fu2, jac_cache, du = jacobian_caches(alg, f, u, p, Val(iip);
         linsolve_kwargs)
-    u_c = zero(u)
+    u_tmp = zero(u)
+    u_cauchy = zero(u)
 
     loss_new = loss
     H = zero(J)
     g = _mutable_zero(fu1)
     shrink_counter = 0
-    step_size = zero(u)
     fu_new = zero(fu1)
     make_new_J = true
     r = loss
@@ -242,7 +242,7 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::TrustRegion,
     radius_update_scheme = alg.radius_update_scheme
 
     # set default type for all trust region parameters
-    trustType = Float64 #typeof(alg.initial_trust_radius)
+    trustType = eltype(u) #typeof(alg.initial_trust_radius)
     max_trust_radius = convert(trustType, alg.max_trust_radius)
     if iszero(max_trust_radius)
         max_trust_radius = convert(trustType, max(norm(fu1), maximum(u) - minimum(u)))
@@ -314,7 +314,7 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::TrustRegion,
         jac_cache, false, maxiters, internalnorm, ReturnCode.Default, abstol, prob,
         radius_update_scheme, initial_trust_radius, max_trust_radius, step_threshold,
         shrink_threshold, expand_threshold, shrink_factor, expand_factor, loss, loss_new,
-        H, g, shrink_counter, step_size, du, u_c, fu_new, make_new_J, r, p1, p2, p3, p4, ϵ,
+        H, g, shrink_counter, du, u_tmp, u_cauchy, fu_new, make_new_J, r, p1, p2, p3, p4, ϵ,
         NLStats(1, 0, 0, 0, 0))
 end
 
@@ -337,7 +337,7 @@ function perform_step!(cache::TrustRegionCache{true})
     dogleg!(cache)
 
     # Compute the potentially new u
-    cache.u_tmp .= u .+ cache.step_size
+    @. cache.u_tmp = u + cache.du
     f(cache.fu_new, cache.u_tmp, p)
     trust_region_step!(cache)
     cache.stats.nf += 1
@@ -358,11 +358,15 @@ function perform_step!(cache::TrustRegionCache{false})
 
     @unpack g, H = cache
     # Compute the Newton step.
-    cache.u_tmp = -H \ g
+    cache.u_tmp = -1 .* (H \ g)
     dogleg!(cache)
 
     # Compute the potentially new u
-    cache.u_tmp = u .+ cache.step_size
+    if u isa Number
+        cache.u_tmp = u + cache.du
+    else
+        @. cache.u_tmp = u + cache.du
+    end
     cache.fu_new = f(cache.u_tmp, p)
     trust_region_step!(cache)
     cache.stats.nf += 1
@@ -382,18 +386,18 @@ function retrospective_step!(cache::TrustRegionCache)
         mul!(cache.g, J', fu)
     end
     cache.stats.njacs += 1
-    @unpack H, g, step_size = cache
+    @unpack H, g, du = cache
 
     return -(get_loss(fu_prev) - get_loss(fu)) /
-           (dot(step_size, g) + dot(step_size, H, step_size) / 2)
+           (dot(du, g) + dot(du, H, du) / 2)
 end
 
 function trust_region_step!(cache::TrustRegionCache)
-    @unpack fu_new, step_size, g, H, loss, max_trust_r, radius_update_scheme = cache
+    @unpack fu_new, du, g, H, loss, max_trust_r, radius_update_scheme = cache
     cache.loss_new = get_loss(fu_new)
 
     # Compute the ratio of the actual reduction to the predicted reduction.
-    cache.r = -(loss - cache.loss_new) / (dot(step_size, g) + dot(step_size, H, step_size) / 2)
+    cache.r = -(loss - cache.loss_new) / (dot(du, g) + dot(du, H, du) / 2)
     @unpack r = cache 
 
     if radius_update_scheme === RadiusUpdateSchemes.Simple
@@ -437,9 +441,9 @@ function trust_region_step!(cache::TrustRegionCache)
         if r < cache.shrink_threshold # default 1 // 10
             cache.trust_r *= cache.shrink_factor # default 1 // 2
         elseif r >= cache.expand_threshold # default 9 // 10
-            cache.trust_r = cache.expand_factor * norm(cache.step_size) # default 2
+            cache.trust_r = cache.expand_factor * norm(cache.du) # default 2
         elseif r >= cache.p1 # default 1 // 2
-            cache.trust_r = max(cache.trust_r, cache.expand_factor * norm(cache.step_size))
+            cache.trust_r = max(cache.trust_r, cache.expand_factor * norm(cache.du))
         end
 
         # convergence test
@@ -458,8 +462,8 @@ function trust_region_step!(cache::TrustRegionCache)
         end
 
         if r < 1 // 4
-            cache.trust_r = (1 // 4) * norm(cache.step_size)
-        elseif (r > (3 // 4)) && abs(norm(cache.step_size) - cache.trust_r)/cache.trust_r < 1e-6
+            cache.trust_r = (1 // 4) * norm(cache.du)
+        elseif (r > (3 // 4)) && abs(norm(cache.du) - cache.trust_r)/cache.trust_r < 1e-6
             cache.trust_r = min(2*cache.trust_r, cache.max_trust_r)
         end  
 
@@ -473,14 +477,14 @@ function trust_region_step!(cache::TrustRegionCache)
         end
         # Hei's radius update scheme
         @unpack shrink_threshold, p1, p2, p3, p4 = cache
-        if rfunc(r, shrink_threshold, p1, p3, p4, p2) * cache.internalnorm(step_size) <
+        if rfunc(r, shrink_threshold, p1, p3, p4, p2) * cache.internalnorm(du) <
            cache.trust_r
             cache.shrink_counter += 1
         else
             cache.shrink_counter = 0
         end
         cache.trust_r = rfunc(r, shrink_threshold, p1, p3, p4, p2) *
-                        cache.internalnorm(step_size)
+                        cache.internalnorm(du)
 
         if iszero(cache.fu) || cache.internalnorm(cache.fu) < cache.abstol ||
            cache.internalnorm(g) < cache.ϵ
@@ -492,7 +496,7 @@ function trust_region_step!(cache::TrustRegionCache)
             cache.p1 = cache.p2 * cache.p1
             cache.shrink_counter += 1
         elseif r >= cache.expand_threshold &&
-               cache.internalnorm(step_size) > cache.trust_r / 2
+               cache.internalnorm(du) > cache.trust_r / 2
             cache.p1 = cache.p3 * cache.p1
             cache.shrink_counter = 0
         end
@@ -541,7 +545,7 @@ function trust_region_step!(cache::TrustRegionCache)
             cache.loss = cache.loss_new
             cache.make_new_J = true
             if retrospective_step!(cache) >= cache.expand_threshold
-                cache.trust_r = max(cache.p1 * cache.internalnorm(step_size), cache.trust_r)
+                cache.trust_r = max(cache.p1 * cache.internalnorm(du), cache.trust_r)
             end
 
         else
@@ -556,40 +560,41 @@ function trust_region_step!(cache::TrustRegionCache)
 end
 
 function dogleg!(cache::TrustRegionCache{true})
-    @unpack u_tmp, u_c, trust_r = cache
+    @unpack u_tmp, u_cauchy, trust_r = cache
 
-    # Test if the full step is within the trust region.
+    # Take the full Gauss-Newton step if lies within the trust region.
     if norm(u_tmp) ≤ trust_r
-        cache.step_size .= u_tmp 
+        cache.du .= u_tmp 
         return
     end
 
-    # Calcualte Cauchy point, optimum along the steepest descent direction.
-    l_grad = norm(cache.g)
+    # Take intersection of steepest descent direction and trust region if Cauchy point lies outside of trust region
+    l_grad = norm(cache.g) # length of the gradient
     d_cauchy = l_grad^3 / dot(cache.g, cache.H, cache.g) # distance of the cauchy point from the current iterate
-    if d_cauchy > trust_r # cauchy point lies outside of trust region
-        @. cache.step_size = - (trust_r/l_grad) * cache.g # step to the end of the trust region
+    if d_cauchy > trust_r 
+        @. cache.du = - (trust_r/l_grad) * cache.g # step to the end of the trust region
         return
     end
     
-    # cauchy point lies inside the trust region
-    @. u_c = - (d_cauchy/l_grad) * cache.g
-
-    # Find the intersection point on the boundary.
-    @. u_tmp -= u_c # calf of the dogleg, use u_tmp to avoid allocation
-    θ = dot(u_tmp, u_c) # ~ cos(∠(calf,thigh)) 
-    l_calf = dot(u_tmp,u_tmp) # length of the calf of the dogleg
-    aux = max(θ^2 + l_calf*(trust_r^2 - d_cauchy^2), 0) # technically guaranteed to be non-negative but hedging against floating point issues
-    τ = ( - θ + sqrt( aux ) ) / l_calf # stepsize along dogleg
-    @. cache.step_size = u_c + τ * u_tmp
+    # Take the intersection of dogled with trust region if Cauchy point lies inside the trust region
+    @. u_cauchy = - (d_cauchy/l_grad) * cache.g # compute Cauchy point
+    @. u_tmp -= u_cauchy # calf of the dogleg -- use u_tmp to avoid allocation
+    a = dot(u_tmp, u_tmp)
+    b = 2*dot(u_cauchy, u_tmp)
+    c = d_cauchy^2 - trust_r^2
+    aux = max(b^2 - 4*a*c, 0.0) # technically guaranteed to be non-negative but hedging against floating point issues
+    τ = (-b + sqrt(aux)) / (2*a) # stepsize along dogleg to trust region boundary
+
+    @. cache.du = u_cauchy + τ * u_tmp
 end
 
+
 function dogleg!(cache::TrustRegionCache{false})
-    @unpack u_tmp, u_c, trust_r = cache
+    @unpack u_tmp, u_cauchy, trust_r = cache
 
     # Test if the full step is within the trust region.
     if norm(u_tmp) ≤ trust_r
-        cache.step_size = deepcopy(u_tmp)
+        cache.du = deepcopy(u_tmp)
         return
     end
 
@@ -597,20 +602,20 @@ function dogleg!(cache::TrustRegionCache{false})
     l_grad = norm(cache.g)
     d_cauchy = l_grad^3 / dot(cache.g, cache.H, cache.g) # distance of the cauchy point from the current iterate
     if d_cauchy > trust_r # cauchy point lies outside of trust region
-        cache.step_size = - (trust_r/l_grad) * cache.g # step to the end of the trust region
+        cache.du = - (trust_r/l_grad) * cache.g # step to the end of the trust region
         return
     end
     
     # cauchy point lies inside the trust region
-    u_c = - (d_cauchy/l_grad) * cache.g
+    u_cauchy = - (d_cauchy/l_grad) * cache.g
 
     # Find the intersection point on the boundary.
-    u_tmp -= u_c # calf of the dogleg, use u_tmp to avoid allocation
-    θ = dot(u_tmp, u_c) # ~ cos(∠(calf,thigh)) 
+    u_tmp -= u_cauchy # calf of the dogleg, use u_tmp to avoid allocation
+    θ = dot(u_tmp, u_cauchy) # ~ cos(∠(calf,thigh)) 
     l_calf = dot(u_tmp,u_tmp) # length of the calf of the dogleg
     aux = max(θ^2 + l_calf*(trust_r^2 - d_cauchy^2), 0) # technically guaranteed to be non-negative but hedging against floating point issues
     τ = ( - θ + sqrt( aux ) ) / l_calf # stepsize along dogleg
-    cache.step_size = u_c + τ * u_tmp
+    cache.du = u_cauchy + τ * u_tmp
 end
 
 function take_step!(cache::TrustRegionCache{true})

From 79ab9928abb06565c15714b05842a2aca5338b87 Mon Sep 17 00:00:00 2001
From: FHoltorf <32248677+FHoltorf@users.noreply.github.com>
Date: Tue, 26 Sep 2023 16:50:34 -0400
Subject: [PATCH 14/35] fix consistency test

---
 test/basictests.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/basictests.jl b/test/basictests.jl
index 54e63e93d..e7e816fd0 100644
--- a/test/basictests.jl
+++ b/test/basictests.jl
@@ -284,7 +284,7 @@ end
                 maxiters)
             sol_oop = benchmark_nlsolve_oop(quadratic_f, u0; radius_update_scheme,
                 maxiters)
-            @test sol_iip.u ≈ sol_iip.u
+            @test sol_iip.u ≈ sol_oop.u
         end
     end
 end

From b7495019d6060df03914213608e01c1957c6baed Mon Sep 17 00:00:00 2001
From: FHoltorf <32248677+FHoltorf@users.noreply.github.com>
Date: Tue, 26 Sep 2023 16:51:10 -0400
Subject: [PATCH 15/35] fix oop perform step and Fan scheme initialization

---
 src/trustRegion.jl | 31 ++++++++++++++-----------------
 1 file changed, 14 insertions(+), 17 deletions(-)

diff --git a/src/trustRegion.jl b/src/trustRegion.jl
index 2d5e17944..565fd3db9 100644
--- a/src/trustRegion.jl
+++ b/src/trustRegion.jl
@@ -300,7 +300,7 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::TrustRegion,
         p2 = convert(floatType, 0.25) # c5
         p3 = convert(floatType, 12.0) # c6
         p4 = convert(floatType, 1.0e18) # M
-        initial_trust_radius = convert(trustType, p1 * (norm(fu)^0.99))
+        initial_trust_radius = convert(trustType, p1 * (norm(fu1)^0.99))
     elseif radius_update_scheme === RadiusUpdateSchemes.Bastin
         step_threshold = convert(trustType, 0.05)
         shrink_threshold = convert(trustType, 0.05)
@@ -362,11 +362,8 @@ function perform_step!(cache::TrustRegionCache{false})
     dogleg!(cache)
 
     # Compute the potentially new u
-    if u isa Number
-        cache.u_tmp = u + cache.du
-    else
-        @. cache.u_tmp = u + cache.du
-    end
+    cache.u_tmp = u + cache.du
+    
     cache.fu_new = f(cache.u_tmp, p)
     trust_region_step!(cache)
     cache.stats.nf += 1
@@ -592,13 +589,13 @@ end
 function dogleg!(cache::TrustRegionCache{false})
     @unpack u_tmp, u_cauchy, trust_r = cache
 
-    # Test if the full step is within the trust region.
+    # Take the full Gauss-Newton step if lies within the trust region.
     if norm(u_tmp) ≤ trust_r
         cache.du = deepcopy(u_tmp)
         return
     end
 
-    # Calcualte Cauchy point, optimum along the steepest descent direction.
+    ## Take intersection of steepest descent direction and trust region if Cauchy point lies outside of trust region
     l_grad = norm(cache.g)
     d_cauchy = l_grad^3 / dot(cache.g, cache.H, cache.g) # distance of the cauchy point from the current iterate
     if d_cauchy > trust_r # cauchy point lies outside of trust region
@@ -606,15 +603,15 @@ function dogleg!(cache::TrustRegionCache{false})
         return
     end
     
-    # cauchy point lies inside the trust region
-    u_cauchy = - (d_cauchy/l_grad) * cache.g
-
-    # Find the intersection point on the boundary.
-    u_tmp -= u_cauchy # calf of the dogleg, use u_tmp to avoid allocation
-    θ = dot(u_tmp, u_cauchy) # ~ cos(∠(calf,thigh)) 
-    l_calf = dot(u_tmp,u_tmp) # length of the calf of the dogleg
-    aux = max(θ^2 + l_calf*(trust_r^2 - d_cauchy^2), 0) # technically guaranteed to be non-negative but hedging against floating point issues
-    τ = ( - θ + sqrt( aux ) ) / l_calf # stepsize along dogleg
+    # Take the intersection of dogled with trust region if Cauchy point lies inside the trust region
+    u_cauchy = - (d_cauchy/l_grad) * cache.g # compute Cauchy point
+    u_tmp -= u_cauchy # calf of the dogleg -- use u_tmp to avoid allocation
+    a = dot(u_tmp, u_tmp)
+    b = 2*dot(u_cauchy, u_tmp)
+    c = d_cauchy^2 - trust_r^2
+    aux = max(b^2 - 4*a*c, 0.0) # technically guaranteed to be non-negative but hedging against floating point issues
+    τ = (-b + sqrt(aux)) / (2*a) # stepsize along dogleg to trust region boundary
+
     cache.du = u_cauchy + τ * u_tmp
 end
 

From 21d246db248d4c6e3e604813c818896c1461fb62 Mon Sep 17 00:00:00 2001
From: FHoltorf <32248677+FHoltorf@users.noreply.github.com>
Date: Tue, 26 Sep 2023 17:44:36 -0400
Subject: [PATCH 16/35] improve comment

---
 src/trustRegion.jl | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/trustRegion.jl b/src/trustRegion.jl
index 565fd3db9..902517769 100644
--- a/src/trustRegion.jl
+++ b/src/trustRegion.jl
@@ -329,7 +329,9 @@ function perform_step!(cache::TrustRegionCache{true})
         cache.stats.njacs += 1
     end
 
-    linres = dolinsolve(alg.precs, linsolve, A = J, b = _vec(fu), # cache.H, b = _vec(cache.g),
+    # do not use A = cache.H, b = _vec(cache.g) since it is equivalent 
+    # to  A = cache.J, b = _vec(fu) as long as the Jacobian is non-singular
+    linres = dolinsolve(alg.precs, linsolve, A = J, b = _vec(fu), 
         linu = _vec(u_tmp),
         p = p, reltol = cache.abstol)
     cache.linsolve = linres.cache

From 177def26e7422d4ac7040e0972204ee8ea80ac55 Mon Sep 17 00:00:00 2001
From: FHoltorf <32248677+FHoltorf@users.noreply.github.com>
Date: Tue, 26 Sep 2023 17:45:20 -0400
Subject: [PATCH 17/35] use less conservative step acceptance policy

---
 src/trustRegion.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/trustRegion.jl b/src/trustRegion.jl
index 902517769..11a5830fd 100644
--- a/src/trustRegion.jl
+++ b/src/trustRegion.jl
@@ -160,7 +160,7 @@ end
 function TrustRegion(; concrete_jac = nothing, linsolve = nothing, precs = DEFAULT_PRECS,
     radius_update_scheme::RadiusUpdateSchemes.T = RadiusUpdateSchemes.Simple, #defaults to conventional radius update
     max_trust_radius::Real = 0 // 1, initial_trust_radius::Real = 0 // 1,
-    step_threshold::Real = 1 // 10, shrink_threshold::Real = 1 // 4,
+    step_threshold::Real = 1 // 10000, shrink_threshold::Real = 1 // 4,
     expand_threshold::Real = 3 // 4, shrink_factor::Real = 1 // 4,
     expand_factor::Real = 2 // 1, max_shrink_times::Int = 32, adkwargs...)
     ad = default_adargs_to_adtype(; adkwargs...)

From 019076478491c59f55f2d86214ac9a48fe019993 Mon Sep 17 00:00:00 2001
From: FHoltorf <32248677+FHoltorf@users.noreply.github.com>
Date: Tue, 26 Sep 2023 17:58:49 -0400
Subject: [PATCH 18/35] choose Float64 as default type for trust region
 adaptation parameters for now

---
 src/trustRegion.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/trustRegion.jl b/src/trustRegion.jl
index 11a5830fd..014c1fcc5 100644
--- a/src/trustRegion.jl
+++ b/src/trustRegion.jl
@@ -242,7 +242,7 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::TrustRegion,
     radius_update_scheme = alg.radius_update_scheme
 
     # set default type for all trust region parameters
-    trustType = eltype(u) #typeof(alg.initial_trust_radius)
+    trustType = Float64 #typeof(alg.initial_trust_radius)
     max_trust_radius = convert(trustType, alg.max_trust_radius)
     if iszero(max_trust_radius)
         max_trust_radius = convert(trustType, max(norm(fu1), maximum(u) - minimum(u)))

From 5f298e3bf2536acce72d7cb7d9cb5e51470730f7 Mon Sep 17 00:00:00 2001
From: FHoltorf <32248677+FHoltorf@users.noreply.github.com>
Date: Tue, 26 Sep 2023 18:05:10 -0400
Subject: [PATCH 19/35] hardcode NLsolve parameters

---
 src/trustRegion.jl | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/trustRegion.jl b/src/trustRegion.jl
index 014c1fcc5..90ae6efdd 100644
--- a/src/trustRegion.jl
+++ b/src/trustRegion.jl
@@ -437,12 +437,12 @@ function trust_region_step!(cache::TrustRegionCache)
         end
 
         # trust region update 
-        if r < cache.shrink_threshold # default 1 // 10
-            cache.trust_r *= cache.shrink_factor # default 1 // 2
-        elseif r >= cache.expand_threshold # default 9 // 10
-            cache.trust_r = cache.expand_factor * norm(cache.du) # default 2
-        elseif r >= cache.p1 # default 1 // 2
-            cache.trust_r = max(cache.trust_r, cache.expand_factor * norm(cache.du))
+        if r < 1//10 # cache.shrink_threshold 
+            cache.trust_r *= 1//2 # cache.shrink_factor 
+        elseif r >= 9//10 # cache.expand_threshold 
+            cache.trust_r = 2 * norm(cache.du) # cache.expand_factor * norm(cache.du) 
+        elseif r >= 1//2 # cache.p1 
+            cache.trust_r = max(cache.trust_r, 2*norm(cache.du)) # cache.expand_factor * norm(cache.du))
         end
 
         # convergence test

From b2b5d89c19fcda64e82b6f04b749bbc601434445 Mon Sep 17 00:00:00 2001
From: FHoltorf <32248677+FHoltorf@users.noreply.github.com>
Date: Tue, 26 Sep 2023 22:58:14 -0400
Subject: [PATCH 20/35] add NLsolve-like trust region initialization

---
 src/trustRegion.jl | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/src/trustRegion.jl b/src/trustRegion.jl
index 90ae6efdd..403eed394 100644
--- a/src/trustRegion.jl
+++ b/src/trustRegion.jl
@@ -238,19 +238,26 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::TrustRegion,
     make_new_J = true
     r = loss
 
+    floatType = typeof(r)
+
     # set trust region update scheme
     radius_update_scheme = alg.radius_update_scheme
 
     # set default type for all trust region parameters
-    trustType = Float64 #typeof(alg.initial_trust_radius)
-    max_trust_radius = convert(trustType, alg.max_trust_radius)
-    if iszero(max_trust_radius)
-        max_trust_radius = convert(trustType, max(norm(fu1), maximum(u) - minimum(u)))
+    trustType = floatType
+    if radius_update_scheme == RadiusUpdateSchemes.NLsolve
+        max_trust_radius = convert(trustType, Inf)
+        initial_trust_radius = norm(u0) > 0 ? convert(trustType, norm(u0)) : one(trustType)
+    else
+        max_trust_radius = convert(trustType, alg.max_trust_radius)
+        if iszero(max_trust_radius)
+            max_trust_radius = convert(trustType, max(norm(fu1), maximum(u) - minimum(u)))
+        end
+        initial_trust_radius = convert(trustType, alg.initial_trust_radius)
+        if iszero(initial_trust_radius)
+            initial_trust_radius = convert(trustType, max_trust_radius / 11)
+        end    
     end
-    initial_trust_radius = convert(trustType, alg.initial_trust_radius)
-    if iszero(initial_trust_radius)
-        initial_trust_radius = convert(trustType, max_trust_radius / 11)
-    end    
     step_threshold = convert(trustType, alg.step_threshold)
     shrink_threshold = convert(trustType, alg.shrink_threshold)
     expand_threshold = convert(trustType, alg.expand_threshold)

From f67ced5ca6238f2cfdda3422a4fd2fa23c50d9a3 Mon Sep 17 00:00:00 2001
From: FHoltorf <32248677+FHoltorf@users.noreply.github.com>
Date: Tue, 26 Sep 2023 23:02:22 -0400
Subject: [PATCH 21/35] avoid recomputation of GN step if TR step was rejected.
 Faster and avoids a bug due to mutation of Jacobian in dolinsolve.

---
 src/trustRegion.jl | 47 ++++++++++++++++++++++++----------------------
 1 file changed, 25 insertions(+), 22 deletions(-)

diff --git a/src/trustRegion.jl b/src/trustRegion.jl
index 403eed394..86cd4ea4b 100644
--- a/src/trustRegion.jl
+++ b/src/trustRegion.jl
@@ -203,6 +203,7 @@ end
     shrink_counter::Int
     du
     u_tmp
+    u_gauss_newton
     u_cauchy
     fu_new
     make_new_J::Bool
@@ -229,6 +230,7 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::TrustRegion,
         linsolve_kwargs)
     u_tmp = zero(u)
     u_cauchy = zero(u)
+    u_gauss_newton = zero(u)
 
     loss_new = loss
     H = zero(J)
@@ -265,7 +267,6 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::TrustRegion,
     expand_factor = convert(trustType, alg.expand_factor)
     
     # Parameters for the Schemes
-    floatType = typeof(r)
     p1 = convert(floatType, 0.0)
     p2 = convert(floatType, 0.0)
     p3 = convert(floatType, 0.0)
@@ -321,28 +322,30 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::TrustRegion,
         jac_cache, false, maxiters, internalnorm, ReturnCode.Default, abstol, prob,
         radius_update_scheme, initial_trust_radius, max_trust_radius, step_threshold,
         shrink_threshold, expand_threshold, shrink_factor, expand_factor, loss, loss_new,
-        H, g, shrink_counter, du, u_tmp, u_cauchy, fu_new, make_new_J, r, p1, p2, p3, p4, ϵ,
+        H, g, shrink_counter, du, u_tmp, u_gauss_newton, u_cauchy, fu_new, make_new_J, r, p1, p2, p3, p4, ϵ,
         NLStats(1, 0, 0, 0, 0))
 end
 
 isinplace(::TrustRegionCache{iip}) where {iip} = iip
 
 function perform_step!(cache::TrustRegionCache{true})
-    @unpack make_new_J, J, fu, f, u, p, u_tmp, alg, linsolve = cache
+    @unpack make_new_J, J, fu, f, u, p, u_gauss_newton, alg, linsolve = cache
     if cache.make_new_J
         jacobian!!(J, cache)
         mul!(cache.H, J', J)
         mul!(cache.g, J', fu)
         cache.stats.njacs += 1
-    end
 
-    # do not use A = cache.H, b = _vec(cache.g) since it is equivalent 
-    # to  A = cache.J, b = _vec(fu) as long as the Jacobian is non-singular
-    linres = dolinsolve(alg.precs, linsolve, A = J, b = _vec(fu), 
-        linu = _vec(u_tmp),
+        # do not use A = cache.H, b = _vec(cache.g) since it is equivalent 
+        # to  A = cache.J, b = _vec(fu) as long as the Jacobian is non-singular
+        linres = dolinsolve(alg.precs, linsolve, A = J, b = _vec(fu), 
+                            linu = _vec(u_gauss_newton),
         p = p, reltol = cache.abstol)
-    cache.linsolve = linres.cache
-    cache.u_tmp .= -1 .* u_tmp
+        cache.linsolve = linres.cache
+        @. cache.u_gauss_newton = -1 * u_gauss_newton
+    end
+
+    # Compute dogleg step
     dogleg!(cache)
 
     # Compute the potentially new u
@@ -363,11 +366,10 @@ function perform_step!(cache::TrustRegionCache{false})
         cache.H = J' * J
         cache.g = J' * fu
         cache.stats.njacs += 1
+        cache.u_gauss_newton = -1 .* (cache.H \ cache.g)
     end
 
-    @unpack g, H = cache
     # Compute the Newton step.
-    cache.u_tmp = -1 .* (H \ g)
     dogleg!(cache)
 
     # Compute the potentially new u
@@ -566,25 +568,26 @@ function trust_region_step!(cache::TrustRegionCache)
 end
 
 function dogleg!(cache::TrustRegionCache{true})
-    @unpack u_tmp, u_cauchy, trust_r = cache
+    @unpack u_tmp, u_gauss_newton, u_cauchy, trust_r = cache
 
     # Take the full Gauss-Newton step if lies within the trust region.
-    if norm(u_tmp) ≤ trust_r
-        cache.du .= u_tmp 
+    if norm(u_gauss_newton) ≤ trust_r
+        cache.du .= u_gauss_newton
         return
     end
 
     # Take intersection of steepest descent direction and trust region if Cauchy point lies outside of trust region
     l_grad = norm(cache.g) # length of the gradient
     d_cauchy = l_grad^3 / dot(cache.g, cache.H, cache.g) # distance of the cauchy point from the current iterate
-    if d_cauchy > trust_r 
+    if d_cauchy >= trust_r 
         @. cache.du = - (trust_r/l_grad) * cache.g # step to the end of the trust region
         return
     end
-    
+
     # Take the intersection of dogled with trust region if Cauchy point lies inside the trust region
     @. u_cauchy = - (d_cauchy/l_grad) * cache.g # compute Cauchy point
-    @. u_tmp -= u_cauchy # calf of the dogleg -- use u_tmp to avoid allocation
+    @. u_tmp = u_gauss_newton - u_cauchy # calf of the dogleg -- use u_tmp to avoid allocation
+    
     a = dot(u_tmp, u_tmp)
     b = 2*dot(u_cauchy, u_tmp)
     c = d_cauchy^2 - trust_r^2
@@ -596,11 +599,11 @@ end
 
 
 function dogleg!(cache::TrustRegionCache{false})
-    @unpack u_tmp, u_cauchy, trust_r = cache
+    @unpack u_tmp, u_gauss_newton, u_cauchy, trust_r = cache
 
     # Take the full Gauss-Newton step if lies within the trust region.
-    if norm(u_tmp) ≤ trust_r
-        cache.du = deepcopy(u_tmp)
+    if norm(u_gauss_newton) ≤ trust_r
+        cache.du = deepcopy(u_gauss_newton)
         return
     end
 
@@ -614,7 +617,7 @@ function dogleg!(cache::TrustRegionCache{false})
     
     # Take the intersection of dogled with trust region if Cauchy point lies inside the trust region
     u_cauchy = - (d_cauchy/l_grad) * cache.g # compute Cauchy point
-    u_tmp -= u_cauchy # calf of the dogleg -- use u_tmp to avoid allocation
+    u_tmp = u_gauss_newton - u_cauchy # calf of the dogleg
     a = dot(u_tmp, u_tmp)
     b = 2*dot(u_cauchy, u_tmp)
     c = d_cauchy^2 - trust_r^2

From 34821e5affc3f54b195dfca655ab6b5a6c0e4386 Mon Sep 17 00:00:00 2001
From: FHoltorf <32248677+FHoltorf@users.noreply.github.com>
Date: Tue, 26 Sep 2023 23:40:09 -0400
Subject: [PATCH 22/35] run SciML formatter

---
 src/trustRegion.jl | 62 +++++++++++++++++++++++-----------------------
 1 file changed, 31 insertions(+), 31 deletions(-)

diff --git a/src/trustRegion.jl b/src/trustRegion.jl
index 86cd4ea4b..b46ac8cce 100644
--- a/src/trustRegion.jl
+++ b/src/trustRegion.jl
@@ -258,14 +258,14 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::TrustRegion,
         initial_trust_radius = convert(trustType, alg.initial_trust_radius)
         if iszero(initial_trust_radius)
             initial_trust_radius = convert(trustType, max_trust_radius / 11)
-        end    
+        end
     end
     step_threshold = convert(trustType, alg.step_threshold)
     shrink_threshold = convert(trustType, alg.shrink_threshold)
     expand_threshold = convert(trustType, alg.expand_threshold)
     shrink_factor = convert(trustType, alg.shrink_factor)
     expand_factor = convert(trustType, alg.expand_factor)
-    
+
     # Parameters for the Schemes
     p1 = convert(floatType, 0.0)
     p2 = convert(floatType, 0.0)
@@ -322,7 +322,8 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::TrustRegion,
         jac_cache, false, maxiters, internalnorm, ReturnCode.Default, abstol, prob,
         radius_update_scheme, initial_trust_radius, max_trust_radius, step_threshold,
         shrink_threshold, expand_threshold, shrink_factor, expand_factor, loss, loss_new,
-        H, g, shrink_counter, du, u_tmp, u_gauss_newton, u_cauchy, fu_new, make_new_J, r, p1, p2, p3, p4, ϵ,
+        H, g, shrink_counter, du, u_tmp, u_gauss_newton, u_cauchy, fu_new, make_new_J, r,
+        p1, p2, p3, p4, ϵ,
         NLStats(1, 0, 0, 0, 0))
 end
 
@@ -338,9 +339,9 @@ function perform_step!(cache::TrustRegionCache{true})
 
         # do not use A = cache.H, b = _vec(cache.g) since it is equivalent 
         # to  A = cache.J, b = _vec(fu) as long as the Jacobian is non-singular
-        linres = dolinsolve(alg.precs, linsolve, A = J, b = _vec(fu), 
-                            linu = _vec(u_gauss_newton),
-        p = p, reltol = cache.abstol)
+        linres = dolinsolve(alg.precs, linsolve, A = J, b = _vec(fu),
+            linu = _vec(u_gauss_newton),
+            p = p, reltol = cache.abstol)
         cache.linsolve = linres.cache
         @. cache.u_gauss_newton = -1 * u_gauss_newton
     end
@@ -374,7 +375,7 @@ function perform_step!(cache::TrustRegionCache{false})
 
     # Compute the potentially new u
     cache.u_tmp = u + cache.du
-    
+
     cache.fu_new = f(cache.u_tmp, p)
     trust_region_step!(cache)
     cache.stats.nf += 1
@@ -406,7 +407,7 @@ function trust_region_step!(cache::TrustRegionCache)
 
     # Compute the ratio of the actual reduction to the predicted reduction.
     cache.r = -(loss - cache.loss_new) / (dot(du, g) + dot(du, H, du) / 2)
-    @unpack r = cache 
+    @unpack r = cache
 
     if radius_update_scheme === RadiusUpdateSchemes.Simple
         # Update the trust region radius.
@@ -446,19 +447,19 @@ function trust_region_step!(cache::TrustRegionCache)
         end
 
         # trust region update 
-        if r < 1//10 # cache.shrink_threshold 
-            cache.trust_r *= 1//2 # cache.shrink_factor 
-        elseif r >= 9//10 # cache.expand_threshold 
+        if r < 1 // 10 # cache.shrink_threshold 
+            cache.trust_r *= 1 // 2 # cache.shrink_factor 
+        elseif r >= 9 // 10 # cache.expand_threshold 
             cache.trust_r = 2 * norm(cache.du) # cache.expand_factor * norm(cache.du) 
-        elseif r >= 1//2 # cache.p1 
-            cache.trust_r = max(cache.trust_r, 2*norm(cache.du)) # cache.expand_factor * norm(cache.du))
+        elseif r >= 1 // 2 # cache.p1 
+            cache.trust_r = max(cache.trust_r, 2 * norm(cache.du)) # cache.expand_factor * norm(cache.du))
         end
 
         # convergence test
         if iszero(cache.fu) || cache.internalnorm(cache.fu) < cache.abstol
             cache.force_stop = true
         end
-    
+
     elseif radius_update_scheme === RadiusUpdateSchemes.NW
         # accept/reject decision
         if r > cache.step_threshold # accept
@@ -471,9 +472,9 @@ function trust_region_step!(cache::TrustRegionCache)
 
         if r < 1 // 4
             cache.trust_r = (1 // 4) * norm(cache.du)
-        elseif (r > (3 // 4)) && abs(norm(cache.du) - cache.trust_r)/cache.trust_r < 1e-6
-            cache.trust_r = min(2*cache.trust_r, cache.max_trust_r)
-        end  
+        elseif (r > (3 // 4)) && abs(norm(cache.du) - cache.trust_r) / cache.trust_r < 1e-6
+            cache.trust_r = min(2 * cache.trust_r, cache.max_trust_r)
+        end
 
     elseif radius_update_scheme === RadiusUpdateSchemes.Hei
         if r > cache.step_threshold
@@ -579,25 +580,24 @@ function dogleg!(cache::TrustRegionCache{true})
     # Take intersection of steepest descent direction and trust region if Cauchy point lies outside of trust region
     l_grad = norm(cache.g) # length of the gradient
     d_cauchy = l_grad^3 / dot(cache.g, cache.H, cache.g) # distance of the cauchy point from the current iterate
-    if d_cauchy >= trust_r 
-        @. cache.du = - (trust_r/l_grad) * cache.g # step to the end of the trust region
+    if d_cauchy >= trust_r
+        @. cache.du = -(trust_r / l_grad) * cache.g # step to the end of the trust region
         return
     end
 
     # Take the intersection of dogled with trust region if Cauchy point lies inside the trust region
-    @. u_cauchy = - (d_cauchy/l_grad) * cache.g # compute Cauchy point
+    @. u_cauchy = -(d_cauchy / l_grad) * cache.g # compute Cauchy point
     @. u_tmp = u_gauss_newton - u_cauchy # calf of the dogleg -- use u_tmp to avoid allocation
-    
+
     a = dot(u_tmp, u_tmp)
-    b = 2*dot(u_cauchy, u_tmp)
+    b = 2 * dot(u_cauchy, u_tmp)
     c = d_cauchy^2 - trust_r^2
-    aux = max(b^2 - 4*a*c, 0.0) # technically guaranteed to be non-negative but hedging against floating point issues
-    τ = (-b + sqrt(aux)) / (2*a) # stepsize along dogleg to trust region boundary
+    aux = max(b^2 - 4 * a * c, 0.0) # technically guaranteed to be non-negative but hedging against floating point issues
+    τ = (-b + sqrt(aux)) / (2 * a) # stepsize along dogleg to trust region boundary
 
     @. cache.du = u_cauchy + τ * u_tmp
 end
 
-
 function dogleg!(cache::TrustRegionCache{false})
     @unpack u_tmp, u_gauss_newton, u_cauchy, trust_r = cache
 
@@ -611,18 +611,18 @@ function dogleg!(cache::TrustRegionCache{false})
     l_grad = norm(cache.g)
     d_cauchy = l_grad^3 / dot(cache.g, cache.H, cache.g) # distance of the cauchy point from the current iterate
     if d_cauchy > trust_r # cauchy point lies outside of trust region
-        cache.du = - (trust_r/l_grad) * cache.g # step to the end of the trust region
+        cache.du = -(trust_r / l_grad) * cache.g # step to the end of the trust region
         return
     end
-    
+
     # Take the intersection of dogled with trust region if Cauchy point lies inside the trust region
-    u_cauchy = - (d_cauchy/l_grad) * cache.g # compute Cauchy point
+    u_cauchy = -(d_cauchy / l_grad) * cache.g # compute Cauchy point
     u_tmp = u_gauss_newton - u_cauchy # calf of the dogleg
     a = dot(u_tmp, u_tmp)
-    b = 2*dot(u_cauchy, u_tmp)
+    b = 2 * dot(u_cauchy, u_tmp)
     c = d_cauchy^2 - trust_r^2
-    aux = max(b^2 - 4*a*c, 0.0) # technically guaranteed to be non-negative but hedging against floating point issues
-    τ = (-b + sqrt(aux)) / (2*a) # stepsize along dogleg to trust region boundary
+    aux = max(b^2 - 4 * a * c, 0.0) # technically guaranteed to be non-negative but hedging against floating point issues
+    τ = (-b + sqrt(aux)) / (2 * a) # stepsize along dogleg to trust region boundary
 
     cache.du = u_cauchy + τ * u_tmp
 end

From 609f67c9193f017823dc1b06c3f71313fbcea698 Mon Sep 17 00:00:00 2001
From: FHoltorf <32248677+FHoltorf@users.noreply.github.com>
Date: Wed, 27 Sep 2023 11:23:24 -0400
Subject: [PATCH 23/35] rename NW -> NocedalWright

---
 src/trustRegion.jl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/trustRegion.jl b/src/trustRegion.jl
index b46ac8cce..4f85760e6 100644
--- a/src/trustRegion.jl
+++ b/src/trustRegion.jl
@@ -33,11 +33,11 @@ states as `RadiusUpdateSchemes.T`. Simply put the desired scheme as follows:
     NLsolve
 
     """
-    `RadiusUpdateSchemes.NW`
+    `RadiusUpdateSchemes.NocedalWright`
 
-    Trust region updating scheme as in Nocedal and Wrigt [see Alg 11.5, page 291]. 
+    Trust region updating scheme as in Nocedal and Wright [see Alg 11.5, page 291]. 
     """
-    NW
+    NocedalWright
 
     """
     `RadiusUpdateSchemes.Hei`
@@ -460,7 +460,7 @@ function trust_region_step!(cache::TrustRegionCache)
             cache.force_stop = true
         end
 
-    elseif radius_update_scheme === RadiusUpdateSchemes.NW
+    elseif radius_update_scheme === RadiusUpdateSchemes.NocedalWright
         # accept/reject decision
         if r > cache.step_threshold # accept
             take_step!(cache)

From 32cf735be587b7cd78e28af0c765bdbea27c7003 Mon Sep 17 00:00:00 2001
From: FHoltorf <32248677+FHoltorf@users.noreply.github.com>
Date: Wed, 27 Sep 2023 14:08:20 -0400
Subject: [PATCH 24/35] convergence check for NocedalWright

---
 src/trustRegion.jl | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/trustRegion.jl b/src/trustRegion.jl
index 4f85760e6..124c887b2 100644
--- a/src/trustRegion.jl
+++ b/src/trustRegion.jl
@@ -476,6 +476,11 @@ function trust_region_step!(cache::TrustRegionCache)
             cache.trust_r = min(2 * cache.trust_r, cache.max_trust_r)
         end
 
+        # convergence test
+        if iszero(cache.fu) || cache.internalnorm(cache.fu) < cache.abstol
+            cache.force_stop = true
+        end
+        
     elseif radius_update_scheme === RadiusUpdateSchemes.Hei
         if r > cache.step_threshold
             take_step!(cache)

From e77fa761d3a0f94e32a502f3fe2dfbf26014da77 Mon Sep 17 00:00:00 2001
From: FHoltorf <32248677+FHoltorf@users.noreply.github.com>
Date: Wed, 27 Sep 2023 14:11:42 -0400
Subject: [PATCH 25/35] test new trust region schemes

---
 test/basictests.jl | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/test/basictests.jl b/test/basictests.jl
index e7e816fd0..7022c6e0e 100644
--- a/test/basictests.jl
+++ b/test/basictests.jl
@@ -141,8 +141,9 @@ end
         return solve(prob, TrustRegion(; radius_update_scheme); abstol = 1e-9, kwargs...)
     end
 
-    radius_update_schemes = [RadiusUpdateSchemes.Simple, RadiusUpdateSchemes.Hei,
-        RadiusUpdateSchemes.Yuan, RadiusUpdateSchemes.Fan, RadiusUpdateSchemes.Bastin]
+    radius_update_schemes = [RadiusUpdateSchemes.Simple, RadiusUpdateSchemes.NocedalWright,
+        RadiusUpdateSchemes.NLsolve, RadiusUpdateSchemes.Hei, RadiusUpdateSchemes.Yuan,
+        RadiusUpdateSchemes.Fan, RadiusUpdateSchemes.Bastin]
     u0s = VERSION ≥ v"1.9" ? ([1.0, 1.0], @SVector[1.0, 1.0], 1.0) : ([1.0, 1.0], 1.0)
 
     @testset "[OOP] u0: $(typeof(u0)) radius_update_scheme: $(radius_update_scheme)" for u0 in u0s,

From f5c66c494dd526f465ba726faf67154d37597f9b Mon Sep 17 00:00:00 2001
From: FHoltorf <32248677+FHoltorf@users.noreply.github.com>
Date: Wed, 27 Sep 2023 15:57:47 -0400
Subject: [PATCH 26/35] run formatter

---
 src/trustRegion.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/trustRegion.jl b/src/trustRegion.jl
index 124c887b2..f2ccb7f63 100644
--- a/src/trustRegion.jl
+++ b/src/trustRegion.jl
@@ -480,7 +480,7 @@ function trust_region_step!(cache::TrustRegionCache)
         if iszero(cache.fu) || cache.internalnorm(cache.fu) < cache.abstol
             cache.force_stop = true
         end
-        
+
     elseif radius_update_scheme === RadiusUpdateSchemes.Hei
         if r > cache.step_threshold
             take_step!(cache)

From 6607fc66d4333504db407f43878c0af754a35d71 Mon Sep 17 00:00:00 2001
From: Christopher Rackauckas <accounts@chrisrackauckas.com>
Date: Fri, 29 Sep 2023 12:14:52 -0400
Subject: [PATCH 27/35] Update Project.toml

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 044751c0c..ec5ef032c 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "NonlinearSolve"
 uuid = "8913a72c-1f9b-4ce2-8d82-65094dcecaec"
 authors = ["SciML"]
-version = "2.0.0"
+version = "2.1.0"
 
 [deps]
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"

From 72a95e5ebecfdb05c8d31da06b19dd968b1f0491 Mon Sep 17 00:00:00 2001
From: Christopher Rackauckas <accounts@chrisrackauckas.com>
Date: Fri, 29 Sep 2023 12:17:22 -0400
Subject: [PATCH 28/35] Update Project.toml

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index ec5ef032c..044751c0c 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "NonlinearSolve"
 uuid = "8913a72c-1f9b-4ce2-8d82-65094dcecaec"
 authors = ["SciML"]
-version = "2.1.0"
+version = "2.0.0"
 
 [deps]
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"

From 670a4bfaefe884a3781969be8fe9ff971a178144 Mon Sep 17 00:00:00 2001
From: Christopher Rackauckas <accounts@chrisrackauckas.com>
Date: Mon, 2 Oct 2023 20:27:30 +0200
Subject: [PATCH 29/35] remove precompile on v1.9

---
 src/NonlinearSolve.jl | 38 ++++++++++++++++++++------------------
 1 file changed, 20 insertions(+), 18 deletions(-)

diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl
index 615f96c03..9cb996760 100644
--- a/src/NonlinearSolve.jl
+++ b/src/NonlinearSolve.jl
@@ -44,24 +44,26 @@ include("ad.jl")
 
 import PrecompileTools
 
-PrecompileTools.@compile_workload begin
-    for T in (Float32, Float64)
-        prob = NonlinearProblem{false}((u, p) -> u .* u .- p, T(0.1), T(2))
-
-        precompile_algs = if VERSION ≥ v"1.7"
-            (NewtonRaphson(), TrustRegion(), LevenbergMarquardt())
-        else
-            (NewtonRaphson(),)
-        end
-
-        for alg in precompile_algs
-            solve(prob, alg, abstol = T(1e-2))
-        end
-
-        prob = NonlinearProblem{true}((du, u, p) -> du[1] = u[1] * u[1] - p[1], T[0.1],
-            T[2])
-        for alg in precompile_algs
-            solve(prob, alg, abstol = T(1e-2))
+@static if VERSION >= v"1.10"
+    PrecompileTools.@compile_workload begin
+        for T in (Float32, Float64)
+            prob = NonlinearProblem{false}((u, p) -> u .* u .- p, T(0.1), T(2))
+
+            precompile_algs = if VERSION ≥ v"1.7"
+                (NewtonRaphson(), TrustRegion(), LevenbergMarquardt())
+            else
+                (NewtonRaphson(),)
+            end
+
+            for alg in precompile_algs
+                solve(prob, alg, abstol = T(1e-2))
+            end
+
+            prob = NonlinearProblem{true}((du, u, p) -> du[1] = u[1] * u[1] - p[1], T[0.1],
+                T[2])
+            for alg in precompile_algs
+                solve(prob, alg, abstol = T(1e-2))
+            end
         end
     end
 end

From f93cd7ad0fa3c35f3d5b30611fc267c6269ebd97 Mon Sep 17 00:00:00 2001
From: Christopher Rackauckas <accounts@chrisrackauckas.com>
Date: Mon, 2 Oct 2023 21:06:16 +0200
Subject: [PATCH 30/35] Update src/NonlinearSolve.jl

Co-authored-by: Avik Pal <avikpal@mit.edu>
---
 src/NonlinearSolve.jl | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl
index 9cb996760..90839dea5 100644
--- a/src/NonlinearSolve.jl
+++ b/src/NonlinearSolve.jl
@@ -49,11 +49,7 @@ import PrecompileTools
         for T in (Float32, Float64)
             prob = NonlinearProblem{false}((u, p) -> u .* u .- p, T(0.1), T(2))
 
-            precompile_algs = if VERSION ≥ v"1.7"
-                (NewtonRaphson(), TrustRegion(), LevenbergMarquardt())
-            else
-                (NewtonRaphson(),)
-            end
+            precompile_algs = (NewtonRaphson(), TrustRegion(), LevenbergMarquardt())
 
             for alg in precompile_algs
                 solve(prob, alg, abstol = T(1e-2))

From fd4ae4befdf06f9579bd3e688e96568a3b5e5286 Mon Sep 17 00:00:00 2001
From: Christopher Rackauckas <accounts@chrisrackauckas.com>
Date: Mon, 2 Oct 2023 21:06:46 +0200
Subject: [PATCH 31/35] Update Project.toml

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 044751c0c..c7decafdb 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "NonlinearSolve"
 uuid = "8913a72c-1f9b-4ce2-8d82-65094dcecaec"
 authors = ["SciML"]
-version = "2.0.0"
+version = "2.0.1"
 
 [deps]
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"

From 9b6ddf1c31e9b4a6230af766314f38c59267efbd Mon Sep 17 00:00:00 2001
From: yonatanwesen <yonatanwesenyeleh1@gmail.com>
Date: Wed, 4 Oct 2023 13:42:05 -0400
Subject: [PATCH 32/35] added linsolve kwargs so that simple gmres stops

---
 src/pseudotransient.jl | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/pseudotransient.jl b/src/pseudotransient.jl
index 19fe94062..c3308dfb6 100644
--- a/src/pseudotransient.jl
+++ b/src/pseudotransient.jl
@@ -40,6 +40,7 @@ isinplace(::PseudoTransientCache{iip}) where {iip} = iip
 
 function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::PseudoTransient, args...;
     alias_u0 = false, maxiters = 1000, abstol = 1e-6, internalnorm = DEFAULT_NORM,
+    linsolve_kwargs = (;),
     kwargs...) where {uType, iip}
     @unpack f, u0, p = prob
     u = alias_u0 ? u0 : deepcopy(u0)
@@ -49,7 +50,12 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::PseudoTransie
     else
         fu1 = _mutable(f(u, p))
     end
-    uf, linsolve, J, fu2, jac_cache, du = jacobian_caches(alg, f, u, p, Val(iip))
+    uf, linsolve, J, fu2, jac_cache, du = jacobian_caches(alg,
+        f,
+        u,
+        p,
+        Val(iip);
+        linsolve_kwargs)
     alpha = convert(eltype(u), alg.alpha_initial)
     res_norm = internalnorm(fu1)
 
@@ -102,7 +108,6 @@ function perform_step!(cache::PseudoTransientCache{false})
     new_norm = cache.internalnorm(fu1)
     cache.alpha *= cache.res_norm / new_norm
     cache.res_norm = new_norm
-
     new_norm < cache.abstol && (cache.force_stop = true)
     cache.stats.nf += 1
     cache.stats.njacs += 1

From 47f47bd742bb31b0e63b796509dadfedfa5c8288 Mon Sep 17 00:00:00 2001
From: yonatanwesen <yonatanwesenyeleh1@gmail.com>
Date: Thu, 21 Sep 2023 23:58:34 -0400
Subject: [PATCH 33/35] reworking pt using avik's cleanup

---
 src/NonlinearSolve.jl  |   9 ++-
 src/jacobian.jl        |   8 ++-
 src/pseudotransient.jl | 150 +++++++++++++++++++++++++++++++++++++++++
 test/basictests.jl     |  57 ++++++++++++++++
 4 files changed, 221 insertions(+), 3 deletions(-)
 create mode 100644 src/pseudotransient.jl

diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl
index 90839dea5..20a0eaf2f 100644
--- a/src/NonlinearSolve.jl
+++ b/src/NonlinearSolve.jl
@@ -39,6 +39,7 @@ include("linesearch.jl")
 include("raphson.jl")
 include("trustRegion.jl")
 include("levenberg.jl")
+include("pseudotransient.jl")
 include("jacobian.jl")
 include("ad.jl")
 
@@ -49,7 +50,11 @@ import PrecompileTools
         for T in (Float32, Float64)
             prob = NonlinearProblem{false}((u, p) -> u .* u .- p, T(0.1), T(2))
 
-            precompile_algs = (NewtonRaphson(), TrustRegion(), LevenbergMarquardt())
+        precompile_algs = if VERSION ≥ v"1.7"
+            (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(), PseudoTransient())
+        else
+            (NewtonRaphson(),)
+        end
 
             for alg in precompile_algs
                 solve(prob, alg, abstol = T(1e-2))
@@ -66,7 +71,7 @@ end
 
 export RadiusUpdateSchemes
 
-export NewtonRaphson, TrustRegion, LevenbergMarquardt
+export NewtonRaphson, TrustRegion, LevenbergMarquardt, PseudoTransient
 
 export LineSearch
 
diff --git a/src/jacobian.jl b/src/jacobian.jl
index 01c030462..1ff29d803 100644
--- a/src/jacobian.jl
+++ b/src/jacobian.jl
@@ -85,7 +85,13 @@ function jacobian_caches(alg::AbstractNonlinearSolveAlgorithm, f, u, p, ::Val{ii
     end
 
     du = _mutable_zero(u)
-    linprob = LinearProblem(J, _vec(fu); u0 = _vec(du))
+    if alg isa PseudoTransient
+        alpha = convert(eltype(u), alg.alpha_initial)
+        J_new = J - (1 / alpha) * I
+        linprob = LinearProblem(J_new, _vec(fu); u0 = _vec(du))
+    else
+        linprob = LinearProblem(J, _vec(fu); u0 = _vec(du))
+    end
 
     weight = similar(u)
     recursivefill!(weight, true)
diff --git a/src/pseudotransient.jl b/src/pseudotransient.jl
new file mode 100644
index 000000000..8213850d3
--- /dev/null
+++ b/src/pseudotransient.jl
@@ -0,0 +1,150 @@
+@concrete struct PseudoTransient{CJ, AD} <: AbstractNewtonAlgorithm{CJ, AD}
+    ad::AD
+    linsolve
+    precs
+    alpha_initial
+end
+
+concrete_jac(::PseudoTransient{CJ}) where {CJ} = CJ
+
+function PseudoTransient(; concrete_jac = nothing, linsolve = nothing,
+    precs = DEFAULT_PRECS, alpha_initial = 1e-3, adkwargs...)
+    ad = default_adargs_to_adtype(; adkwargs...)
+    return PseudoTransient{_unwrap_val(concrete_jac)}(ad, linsolve, precs, alpha_initial)
+end
+
+@concrete mutable struct PseudoTransientCache{iip}
+    f
+    alg
+    u
+    fu1
+    fu2
+    du
+    p
+    alpha
+    res_norm
+    uf
+    linsolve
+    J
+    jac_cache
+    force_stop
+    maxiters::Int
+    internalnorm
+    retcode::ReturnCode.T
+    abstol
+    prob
+    stats::NLStats
+end
+
+isinplace(::PseudoTransientCache{iip}) where {iip} = iip
+
+function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::PseudoTransient, args...;
+    alias_u0 = false, maxiters = 1000, abstol = 1e-6, internalnorm = DEFAULT_NORM,
+    kwargs...) where {uType, iip}
+    @unpack f, u0, p = prob
+    u = alias_u0 ? u0 : deepcopy(u0)
+    if iip
+        fu1 = f.resid_prototype === nothing ? zero(u) : f.resid_prototype
+        f(fu1, u, p)
+    else
+        fu1 = _mutable(f(u, p))
+    end
+    uf, linsolve, J, fu2, jac_cache, du = jacobian_caches(alg, f, u, p, Val(iip))
+    alpha = convert(eltype(u), alg.alpha_initial)
+    res_norm = internalnorm(fu1)
+
+    return PseudoTransientCache{iip}(f, alg, u, fu1, fu2, du, p, alpha, res_norm, uf,
+        linsolve, J,
+        jac_cache, false, maxiters, internalnorm, ReturnCode.Default, abstol, prob,
+        NLStats(1, 0, 0, 0, 0))
+end
+
+function perform_step!(cache::PseudoTransientCache{true})
+    @unpack u, fu1, f, p, alg, J, linsolve, du, alpha = cache
+    jacobian!!(J, cache)
+    J_new = J - (1 / alpha) * I
+
+    # u = u - J \ fu
+    linres = dolinsolve(alg.precs, linsolve; A = J_new, b = _vec(fu1), linu = _vec(du),
+        p, reltol = cache.abstol)
+    cache.linsolve = linres.cache
+    @. u = u - du
+    f(fu1, u, p)
+
+    new_norm = cache.internalnorm(fu1)
+    cache.alpha *= cache.res_norm / new_norm
+    cache.res_norm = new_norm
+
+    new_norm < cache.abstol && (cache.force_stop = true)
+    cache.stats.nf += 1
+    cache.stats.njacs += 1
+    cache.stats.nsolve += 1
+    cache.stats.nfactors += 1
+    return nothing
+end
+
+function perform_step!(cache::PseudoTransientCache{false})
+    @unpack u, fu1, f, p, alg, linsolve, alpha = cache
+
+    cache.J = jacobian!!(cache.J, cache)
+    # u = u - J \ fu
+    if linsolve === nothing
+        cache.du = fu1 / cache.J
+    else
+        linres = dolinsolve(alg.precs, linsolve; A = cache.J - (1 / alpha) * I,
+            b = _vec(fu1),
+            linu = _vec(cache.du), p, reltol = cache.abstol)
+        cache.linsolve = linres.cache
+    end
+    cache.u = @. u - cache.du  # `u` might not support mutation
+    cache.fu1 = f(cache.u, p)
+
+    new_norm = cache.internalnorm(fu1)
+    cache.alpha *= cache.res_norm / new_norm
+    cache.res_norm = new_norm
+
+    new_norm < cache.abstol && (cache.force_stop = true)
+    cache.stats.nf += 1
+    cache.stats.njacs += 1
+    cache.stats.nsolve += 1
+    cache.stats.nfactors += 1
+    return nothing
+end
+
+function SciMLBase.solve!(cache::PseudoTransientCache)
+    while !cache.force_stop && cache.stats.nsteps < cache.maxiters
+        perform_step!(cache)
+        cache.stats.nsteps += 1
+    end
+
+    if cache.stats.nsteps == cache.maxiters
+        cache.retcode = ReturnCode.MaxIters
+    else
+        cache.retcode = ReturnCode.Success
+    end
+
+    return SciMLBase.build_solution(cache.prob, cache.alg, cache.u, cache.fu1;
+        cache.retcode, cache.stats)
+end
+
+function SciMLBase.reinit!(cache::PseudoTransientCache{iip}, u0 = cache.u; p = cache.p,
+    abstol = cache.abstol, maxiters = cache.maxiters) where {iip}
+    cache.p = p
+    if iip
+        recursivecopy!(cache.u, u0)
+        cache.f(cache.fu1, cache.u, p)
+    else
+        # don't have alias_u0 but cache.u is never mutated for OOP problems so it doesn't matter
+        cache.u = u0
+        cache.fu1 = cache.f(cache.u, p)
+    end
+    cache.alpha = convert(eltype(u), 1e-3)
+    cache.res_norm = internalnorm(cache.fu1)
+    cache.abstol = abstol
+    cache.maxiters = maxiters
+    cache.stats.nf = 1
+    cache.stats.nsteps = 1
+    cache.force_stop = false
+    cache.retcode = ReturnCode.Default
+    return cache
+end
diff --git a/test/basictests.jl b/test/basictests.jl
index 7022c6e0e..e872ec3d1 100644
--- a/test/basictests.jl
+++ b/test/basictests.jl
@@ -403,3 +403,60 @@ end
         end
     end
 end
+
+# --- PseudoTransient tests ---
+
+@testset "PseudoTransient" begin
+
+    #iip
+    function test_f!(du, u, p)
+        du[1] = 2 - 2u[1]
+        du[2] = u[1] - 4u[2]
+        return du
+    end
+
+    #oop
+    simple_test(u, p) = 0.9f0 .* u .- u
+
+    #test jacobian free PseudoTransient method
+    function f!(res, u, p)
+        @. res = u * u - p
+    end
+
+    @testset "[IIP] u0: $(typeof(u0))" for u0 in (zeros(2),)
+        probN = NonlinearProblem{true}(test_f!, u0)
+        sol = solve(probN, PseudoTransient())
+        @test sol.retcode == ReturnCode.Success
+        du = zeros(2)
+        test_f!(du, sol.u, 4.0)
+        @test du≈[0, 0] atol=1e-7
+    end
+
+    @testset "[OOP] u0: $(typeof(u0))" for u0 in ([1.0f0],)
+        probN = NonlinearProblem{false}(simple_test, u0)
+        sol = solve(probN, PseudoTransient(alpha_initial = 1.0))
+        @test sol.retcode == ReturnCode.Success
+        @test abs(sol.u[1]) <= 1.0f-4
+    end
+
+    @testset "Jacobian Free PT" begin
+        u0 = [1.0, 1.0]
+        p = 2.0
+        probN = NonlinearProblem(f!, u0, p)
+        linsolve = LinearSolve.KrylovJL_GMRES()
+        sol = solve(probN,
+            PseudoTransient(alpha_initial = 10.0, linsolve = linsolve),
+            reltol = 1e-9)
+        @test sol.retcode == ReturnCode.Success
+    end
+
+    @testset "NewtonRaphson Fails" begin
+        p = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
+        u0 = [-10.0, -1.0, 1.0, 2.0, 3.0, 4.0, 10.0]
+        probN = NonlinearProblem{false}(newton_fails, u0, p)
+        sol = solve(probN, PseudoTransient(alpha_initial = 1.0), abstol = 1e-10)
+        @test all(abs.(newton_fails(sol.u, p)) .< 1e-10)
+    end
+end
+
+# Test that `PseudoTransient` passes a test that `NewtonRaphson` fails on.

From 5e2f5a149f6105f42d524535a8c3a0d8ce6ce4ba Mon Sep 17 00:00:00 2001
From: yonatanwesen <yonatanwesenyeleh1@gmail.com>
Date: Sun, 24 Sep 2023 21:02:45 -0400
Subject: [PATCH 34/35] forget the 1/alpha term when linsolve is nothing

---
 src/pseudotransient.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/pseudotransient.jl b/src/pseudotransient.jl
index 8213850d3..19fe94062 100644
--- a/src/pseudotransient.jl
+++ b/src/pseudotransient.jl
@@ -89,7 +89,7 @@ function perform_step!(cache::PseudoTransientCache{false})
     cache.J = jacobian!!(cache.J, cache)
     # u = u - J \ fu
     if linsolve === nothing
-        cache.du = fu1 / cache.J
+        cache.du = fu1 / (cache.J - (1 / alpha) * I)
     else
         linres = dolinsolve(alg.precs, linsolve; A = cache.J - (1 / alpha) * I,
             b = _vec(fu1),

From 6955074de9b0a64a1805a1f7a354be1f92a5a1bc Mon Sep 17 00:00:00 2001
From: yonatanwesen <yonatanwesenyeleh1@gmail.com>
Date: Wed, 4 Oct 2023 13:42:05 -0400
Subject: [PATCH 35/35] added linsolve kwargs so that simple gmres stops

---
 src/pseudotransient.jl | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/pseudotransient.jl b/src/pseudotransient.jl
index 19fe94062..c3308dfb6 100644
--- a/src/pseudotransient.jl
+++ b/src/pseudotransient.jl
@@ -40,6 +40,7 @@ isinplace(::PseudoTransientCache{iip}) where {iip} = iip
 
 function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::PseudoTransient, args...;
     alias_u0 = false, maxiters = 1000, abstol = 1e-6, internalnorm = DEFAULT_NORM,
+    linsolve_kwargs = (;),
     kwargs...) where {uType, iip}
     @unpack f, u0, p = prob
     u = alias_u0 ? u0 : deepcopy(u0)
@@ -49,7 +50,12 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::PseudoTransie
     else
         fu1 = _mutable(f(u, p))
     end
-    uf, linsolve, J, fu2, jac_cache, du = jacobian_caches(alg, f, u, p, Val(iip))
+    uf, linsolve, J, fu2, jac_cache, du = jacobian_caches(alg,
+        f,
+        u,
+        p,
+        Val(iip);
+        linsolve_kwargs)
     alpha = convert(eltype(u), alg.alpha_initial)
     res_norm = internalnorm(fu1)
 
@@ -102,7 +108,6 @@ function perform_step!(cache::PseudoTransientCache{false})
     new_norm = cache.internalnorm(fu1)
     cache.alpha *= cache.res_norm / new_norm
     cache.res_norm = new_norm
-
     new_norm < cache.abstol && (cache.force_stop = true)
     cache.stats.nf += 1
     cache.stats.njacs += 1