diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 75f12a2e9..e0d7a57e5 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -3,9 +3,13 @@ on:
   pull_request:
     branches:
       - master
+    paths-ignore:
+      - 'docs/**'
   push:
     branches:
       - master
+    paths-ignore:
+      - 'docs/**'
 jobs:
   test:
     runs-on: ubuntu-latest
diff --git a/Project.toml b/Project.toml
index 044751c0c..c7decafdb 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "NonlinearSolve"
 uuid = "8913a72c-1f9b-4ce2-8d82-65094dcecaec"
 authors = ["SciML"]
-version = "2.0.0"
+version = "2.0.1"
 
 [deps]
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
diff --git a/docs/Project.toml b/docs/Project.toml
index df765bb1d..42e261955 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -12,7 +12,7 @@ Sundials = "c3572dad-4567-51f8-b174-8c6c989267f4"
 
 [compat]
 BenchmarkTools = "1"
-Documenter = "0.27"
+Documenter = "1"
 LinearSolve = "2"
 NonlinearSolve = "1, 2"
 NonlinearSolveMINPACK = "0.1"
diff --git a/docs/make.jl b/docs/make.jl
index d2815c502..8f895b0b4 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -11,15 +11,9 @@ makedocs(sitename = "NonlinearSolve.jl",
     modules = [NonlinearSolve, NonlinearSolve.SciMLBase, NonlinearSolve.DiffEqBase,
         SimpleNonlinearSolve, Sundials, SciMLNLSolve, NonlinearSolveMINPACK,
         SteadyStateDiffEq],
-    clean = true, doctest = false,
-    strict = [
-        :doctest,
-        :linkcheck,
-        :parse_error,
-        :example_block,
-        # Other available options are
-        # :autodocs_block, :cross_references, :docs_block, :eval_block, :example_block, :footnote, :meta_block, :missing_docs, :setup_block
-    ],
+    clean = true, doctest = false, linkcheck = true,
+    linkcheck_ignore = ["https://twitter.com/ChrisRackauckas/status/1544743542094020615"],
+    warnonly = [:missing_docs, :cross_references],
     format = Documenter.HTML(assets = ["assets/favicon.ico"],
         canonical = "https://docs.sciml.ai/NonlinearSolve/stable/"),
     pages = pages)
diff --git a/docs/src/basics/TerminationCondition.md b/docs/src/basics/TerminationCondition.md
index cd780b997..e966bb22c 100644
--- a/docs/src/basics/TerminationCondition.md
+++ b/docs/src/basics/TerminationCondition.md
@@ -1,6 +1,6 @@
 # Termination Conditions
 
-Provides a API to specify termination conditions for [`NonLinearProblem`](@ref) and
+Provides a API to specify termination conditions for [`NonlinearProblem`](@ref) and
 [`SteadyStateProblem`](@ref). For details on the various termination modes, i.e.,
 NLSolveTerminationMode, see the documentation for [`NLSolveTerminationCondition`](@ref).
 
diff --git a/docs/src/index.md b/docs/src/index.md
index c47fb57de..3d7e97890 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -82,32 +82,19 @@ Pkg.status(; mode = PKGMODE_MANIFEST) # hide
 </details>
 ```
 
-```@raw html
-You can also download the
-<a href="
-```
-
 ```@eval
 using TOML
+using Markdown
 version = TOML.parse(read("../../Project.toml", String))["version"]
 name = TOML.parse(read("../../Project.toml", String))["name"]
-link = "https://github.com/SciML/" * name * ".jl/tree/gh-pages/v" * version *
-       "/assets/Manifest.toml"
-```
-
-```@raw html
-">manifest</a> file and the
-<a href="
-```
-
-```@eval
-using TOML
-version = TOML.parse(read("../../Project.toml", String))["version"]
-name = TOML.parse(read("../../Project.toml", String))["name"]
-link = "https://github.com/SciML/" * name * ".jl/tree/gh-pages/v" * version *
-       "/assets/Project.toml"
-```
-
-```@raw html
-">project</a> file.
+link_manifest = "https://github.com/SciML/" * name * ".jl/tree/gh-pages/v" * version *
+                "/assets/Manifest.toml"
+link_project = "https://github.com/SciML/" * name * ".jl/tree/gh-pages/v" * version *
+               "/assets/Project.toml"
+Markdown.parse("""You can also download the
+[manifest]($link_manifest)
+file and the
+[project]($link_project)
+file.
+""")
 ```
diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl
index 615f96c03..20a0eaf2f 100644
--- a/src/NonlinearSolve.jl
+++ b/src/NonlinearSolve.jl
@@ -39,36 +39,39 @@ include("linesearch.jl")
 include("raphson.jl")
 include("trustRegion.jl")
 include("levenberg.jl")
+include("pseudotransient.jl")
 include("jacobian.jl")
 include("ad.jl")
 
 import PrecompileTools
 
-PrecompileTools.@compile_workload begin
-    for T in (Float32, Float64)
-        prob = NonlinearProblem{false}((u, p) -> u .* u .- p, T(0.1), T(2))
+@static if VERSION >= v"1.10"
+    PrecompileTools.@compile_workload begin
+        for T in (Float32, Float64)
+            prob = NonlinearProblem{false}((u, p) -> u .* u .- p, T(0.1), T(2))
 
         precompile_algs = if VERSION ≥ v"1.7"
-            (NewtonRaphson(), TrustRegion(), LevenbergMarquardt())
+            (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(), PseudoTransient())
         else
             (NewtonRaphson(),)
         end
 
-        for alg in precompile_algs
-            solve(prob, alg, abstol = T(1e-2))
-        end
+            for alg in precompile_algs
+                solve(prob, alg, abstol = T(1e-2))
+            end
 
-        prob = NonlinearProblem{true}((du, u, p) -> du[1] = u[1] * u[1] - p[1], T[0.1],
-            T[2])
-        for alg in precompile_algs
-            solve(prob, alg, abstol = T(1e-2))
+            prob = NonlinearProblem{true}((du, u, p) -> du[1] = u[1] * u[1] - p[1], T[0.1],
+                T[2])
+            for alg in precompile_algs
+                solve(prob, alg, abstol = T(1e-2))
+            end
         end
     end
 end
 
 export RadiusUpdateSchemes
 
-export NewtonRaphson, TrustRegion, LevenbergMarquardt
+export NewtonRaphson, TrustRegion, LevenbergMarquardt, PseudoTransient
 
 export LineSearch
 
diff --git a/src/jacobian.jl b/src/jacobian.jl
index 01c030462..1ff29d803 100644
--- a/src/jacobian.jl
+++ b/src/jacobian.jl
@@ -85,7 +85,13 @@ function jacobian_caches(alg::AbstractNonlinearSolveAlgorithm, f, u, p, ::Val{ii
     end
 
     du = _mutable_zero(u)
-    linprob = LinearProblem(J, _vec(fu); u0 = _vec(du))
+    if alg isa PseudoTransient
+        alpha = convert(eltype(u), alg.alpha_initial)
+        J_new = J - (1 / alpha) * I
+        linprob = LinearProblem(J_new, _vec(fu); u0 = _vec(du))
+    else
+        linprob = LinearProblem(J, _vec(fu); u0 = _vec(du))
+    end
 
     weight = similar(u)
     recursivefill!(weight, true)
diff --git a/src/pseudotransient.jl b/src/pseudotransient.jl
new file mode 100644
index 000000000..c3308dfb6
--- /dev/null
+++ b/src/pseudotransient.jl
@@ -0,0 +1,155 @@
+@concrete struct PseudoTransient{CJ, AD} <: AbstractNewtonAlgorithm{CJ, AD}
+    ad::AD
+    linsolve
+    precs
+    alpha_initial
+end
+
+concrete_jac(::PseudoTransient{CJ}) where {CJ} = CJ
+
+function PseudoTransient(; concrete_jac = nothing, linsolve = nothing,
+    precs = DEFAULT_PRECS, alpha_initial = 1e-3, adkwargs...)
+    ad = default_adargs_to_adtype(; adkwargs...)
+    return PseudoTransient{_unwrap_val(concrete_jac)}(ad, linsolve, precs, alpha_initial)
+end
+
+@concrete mutable struct PseudoTransientCache{iip}
+    f
+    alg
+    u
+    fu1
+    fu2
+    du
+    p
+    alpha
+    res_norm
+    uf
+    linsolve
+    J
+    jac_cache
+    force_stop
+    maxiters::Int
+    internalnorm
+    retcode::ReturnCode.T
+    abstol
+    prob
+    stats::NLStats
+end
+
+isinplace(::PseudoTransientCache{iip}) where {iip} = iip
+
+function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::PseudoTransient, args...;
+    alias_u0 = false, maxiters = 1000, abstol = 1e-6, internalnorm = DEFAULT_NORM,
+    linsolve_kwargs = (;),
+    kwargs...) where {uType, iip}
+    @unpack f, u0, p = prob
+    u = alias_u0 ? u0 : deepcopy(u0)
+    if iip
+        fu1 = f.resid_prototype === nothing ? zero(u) : f.resid_prototype
+        f(fu1, u, p)
+    else
+        fu1 = _mutable(f(u, p))
+    end
+    uf, linsolve, J, fu2, jac_cache, du = jacobian_caches(alg,
+        f,
+        u,
+        p,
+        Val(iip);
+        linsolve_kwargs)
+    alpha = convert(eltype(u), alg.alpha_initial)
+    res_norm = internalnorm(fu1)
+
+    return PseudoTransientCache{iip}(f, alg, u, fu1, fu2, du, p, alpha, res_norm, uf,
+        linsolve, J,
+        jac_cache, false, maxiters, internalnorm, ReturnCode.Default, abstol, prob,
+        NLStats(1, 0, 0, 0, 0))
+end
+
+function perform_step!(cache::PseudoTransientCache{true})
+    @unpack u, fu1, f, p, alg, J, linsolve, du, alpha = cache
+    jacobian!!(J, cache)
+    J_new = J - (1 / alpha) * I
+
+    # u = u - J \ fu
+    linres = dolinsolve(alg.precs, linsolve; A = J_new, b = _vec(fu1), linu = _vec(du),
+        p, reltol = cache.abstol)
+    cache.linsolve = linres.cache
+    @. u = u - du
+    f(fu1, u, p)
+
+    new_norm = cache.internalnorm(fu1)
+    cache.alpha *= cache.res_norm / new_norm
+    cache.res_norm = new_norm
+
+    new_norm < cache.abstol && (cache.force_stop = true)
+    cache.stats.nf += 1
+    cache.stats.njacs += 1
+    cache.stats.nsolve += 1
+    cache.stats.nfactors += 1
+    return nothing
+end
+
+function perform_step!(cache::PseudoTransientCache{false})
+    @unpack u, fu1, f, p, alg, linsolve, alpha = cache
+
+    cache.J = jacobian!!(cache.J, cache)
+    # u = u - J \ fu
+    if linsolve === nothing
+        cache.du = fu1 / (cache.J - (1 / alpha) * I)
+    else
+        linres = dolinsolve(alg.precs, linsolve; A = cache.J - (1 / alpha) * I,
+            b = _vec(fu1),
+            linu = _vec(cache.du), p, reltol = cache.abstol)
+        cache.linsolve = linres.cache
+    end
+    cache.u = @. u - cache.du  # `u` might not support mutation
+    cache.fu1 = f(cache.u, p)
+
+    new_norm = cache.internalnorm(fu1)
+    cache.alpha *= cache.res_norm / new_norm
+    cache.res_norm = new_norm
+    new_norm < cache.abstol && (cache.force_stop = true)
+    cache.stats.nf += 1
+    cache.stats.njacs += 1
+    cache.stats.nsolve += 1
+    cache.stats.nfactors += 1
+    return nothing
+end
+
+function SciMLBase.solve!(cache::PseudoTransientCache)
+    while !cache.force_stop && cache.stats.nsteps < cache.maxiters
+        perform_step!(cache)
+        cache.stats.nsteps += 1
+    end
+
+    if cache.stats.nsteps == cache.maxiters
+        cache.retcode = ReturnCode.MaxIters
+    else
+        cache.retcode = ReturnCode.Success
+    end
+
+    return SciMLBase.build_solution(cache.prob, cache.alg, cache.u, cache.fu1;
+        cache.retcode, cache.stats)
+end
+
+function SciMLBase.reinit!(cache::PseudoTransientCache{iip}, u0 = cache.u; p = cache.p,
+    abstol = cache.abstol, maxiters = cache.maxiters) where {iip}
+    cache.p = p
+    if iip
+        recursivecopy!(cache.u, u0)
+        cache.f(cache.fu1, cache.u, p)
+    else
+        # don't have alias_u0 but cache.u is never mutated for OOP problems so it doesn't matter
+        cache.u = u0
+        cache.fu1 = cache.f(cache.u, p)
+    end
+    cache.alpha = convert(eltype(u), 1e-3)
+    cache.res_norm = internalnorm(cache.fu1)
+    cache.abstol = abstol
+    cache.maxiters = maxiters
+    cache.stats.nf = 1
+    cache.stats.nsteps = 1
+    cache.force_stop = false
+    cache.retcode = ReturnCode.Default
+    return cache
+end
diff --git a/src/trustRegion.jl b/src/trustRegion.jl
index 20577ef67..f2ccb7f63 100644
--- a/src/trustRegion.jl
+++ b/src/trustRegion.jl
@@ -25,6 +25,20 @@ states as `RadiusUpdateSchemes.T`. Simply put the desired scheme as follows:
     """
     Simple
 
+    """
+    `RadiusUpdateSchemes.NLsolve`
+
+    The same updating scheme as in NLsolve's (https://github.com/JuliaNLSolvers/NLsolve.jl) trust region dogleg implementation. 
+    """
+    NLsolve
+
+    """
+    `RadiusUpdateSchemes.NocedalWright`
+
+    Trust region updating scheme as in Nocedal and Wright [see Alg 11.5, page 291]. 
+    """
+    NocedalWright
+
     """
     `RadiusUpdateSchemes.Hei`
 
@@ -146,7 +160,7 @@ end
 function TrustRegion(; concrete_jac = nothing, linsolve = nothing, precs = DEFAULT_PRECS,
     radius_update_scheme::RadiusUpdateSchemes.T = RadiusUpdateSchemes.Simple, #defaults to conventional radius update
     max_trust_radius::Real = 0 // 1, initial_trust_radius::Real = 0 // 1,
-    step_threshold::Real = 1 // 10, shrink_threshold::Real = 1 // 4,
+    step_threshold::Real = 1 // 10000, shrink_threshold::Real = 1 // 4,
     expand_threshold::Real = 3 // 4, shrink_factor::Real = 1 // 4,
     expand_factor::Real = 2 // 1, max_shrink_times::Int = 32, adkwargs...)
     ad = default_adargs_to_adtype(; adkwargs...)
@@ -187,8 +201,10 @@ end
     H
     g
     shrink_counter::Int
-    step_size
+    du
     u_tmp
+    u_gauss_newton
+    u_cauchy
     fu_new
     make_new_J::Bool
     r::floatType
@@ -212,55 +228,68 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::TrustRegion,
     loss = get_loss(fu1)
     uf, linsolve, J, fu2, jac_cache, du = jacobian_caches(alg, f, u, p, Val(iip);
         linsolve_kwargs)
-
-    radius_update_scheme = alg.radius_update_scheme
-    max_trust_radius = convert(eltype(u), alg.max_trust_radius)
-    initial_trust_radius = convert(eltype(u), alg.initial_trust_radius)
-    step_threshold = convert(eltype(u), alg.step_threshold)
-    shrink_threshold = convert(eltype(u), alg.shrink_threshold)
-    expand_threshold = convert(eltype(u), alg.expand_threshold)
-    shrink_factor = convert(eltype(u), alg.shrink_factor)
-    expand_factor = convert(eltype(u), alg.expand_factor)
-    # Set default trust region radius if not specified
-    if iszero(max_trust_radius)
-        max_trust_radius = convert(eltype(u), max(norm(fu1), maximum(u) - minimum(u)))
-    end
-    if iszero(initial_trust_radius)
-        initial_trust_radius = convert(eltype(u), max_trust_radius / 11)
-    end
+    u_tmp = zero(u)
+    u_cauchy = zero(u)
+    u_gauss_newton = zero(u)
 
     loss_new = loss
     H = zero(J)
     g = _mutable_zero(fu1)
     shrink_counter = 0
-    step_size = zero(u)
     fu_new = zero(fu1)
     make_new_J = true
     r = loss
 
+    floatType = typeof(r)
+
+    # set trust region update scheme
+    radius_update_scheme = alg.radius_update_scheme
+
+    # set default type for all trust region parameters
+    trustType = floatType
+    if radius_update_scheme == RadiusUpdateSchemes.NLsolve
+        max_trust_radius = convert(trustType, Inf)
+        initial_trust_radius = norm(u0) > 0 ? convert(trustType, norm(u0)) : one(trustType)
+    else
+        max_trust_radius = convert(trustType, alg.max_trust_radius)
+        if iszero(max_trust_radius)
+            max_trust_radius = convert(trustType, max(norm(fu1), maximum(u) - minimum(u)))
+        end
+        initial_trust_radius = convert(trustType, alg.initial_trust_radius)
+        if iszero(initial_trust_radius)
+            initial_trust_radius = convert(trustType, max_trust_radius / 11)
+        end
+    end
+    step_threshold = convert(trustType, alg.step_threshold)
+    shrink_threshold = convert(trustType, alg.shrink_threshold)
+    expand_threshold = convert(trustType, alg.expand_threshold)
+    shrink_factor = convert(trustType, alg.shrink_factor)
+    expand_factor = convert(trustType, alg.expand_factor)
+
     # Parameters for the Schemes
-    p1 = convert(eltype(u), 0.0)
-    p2 = convert(eltype(u), 0.0)
-    p3 = convert(eltype(u), 0.0)
-    p4 = convert(eltype(u), 0.0)
-    ϵ = convert(eltype(u), 1.0e-8)
-    if radius_update_scheme === RadiusUpdateSchemes.Hei
-        step_threshold = convert(eltype(u), 0.0)
-        shrink_threshold = convert(eltype(u), 0.25)
-        expand_threshold = convert(eltype(u), 0.25)
-        p1 = convert(eltype(u), 5.0) # M
-        p2 = convert(eltype(u), 0.1) # β
-        p3 = convert(eltype(u), 0.15) # γ1
-        p4 = convert(eltype(u), 0.15) # γ2
-        initial_trust_radius = convert(eltype(u), 1.0)
+    p1 = convert(floatType, 0.0)
+    p2 = convert(floatType, 0.0)
+    p3 = convert(floatType, 0.0)
+    p4 = convert(floatType, 0.0)
+    ϵ = convert(floatType, 1.0e-8)
+    if radius_update_scheme === RadiusUpdateSchemes.NLsolve
+        p1 = convert(floatType, 0.5)
+    elseif radius_update_scheme === RadiusUpdateSchemes.Hei
+        step_threshold = convert(trustType, 0.0)
+        shrink_threshold = convert(trustType, 0.25)
+        expand_threshold = convert(trustType, 0.25)
+        p1 = convert(floatType, 5.0)  # M
+        p2 = convert(floatType, 0.1)  # β
+        p3 = convert(floatType, 0.15) # γ1
+        p4 = convert(floatType, 0.15) # γ2
+        initial_trust_radius = convert(trustType, 1.0)
     elseif radius_update_scheme === RadiusUpdateSchemes.Yuan
-        step_threshold = convert(eltype(u), 0.0001)
-        shrink_threshold = convert(eltype(u), 0.25)
-        expand_threshold = convert(eltype(u), 0.25)
-        p1 = convert(eltype(u), 2.0) # μ
-        p2 = convert(eltype(u), 1 / 6) # c5
-        p3 = convert(eltype(u), 6.0) # c6
-        p4 = convert(eltype(u), 0.0)
+        step_threshold = convert(trustType, 0.0001)
+        shrink_threshold = convert(trustType, 0.25)
+        expand_threshold = convert(trustType, 0.25)
+        p1 = convert(floatType, 2.0)   # μ
+        p2 = convert(floatType, 1 / 6) # c5
+        p3 = convert(floatType, 6.0)   # c6
         if iip
             auto_jacvec!(g, (fu, x) -> f(fu, x, p), u, fu1)
         else
@@ -270,54 +299,58 @@ function SciMLBase.__init(prob::NonlinearProblem{uType, iip}, alg::TrustRegion,
                 g = auto_jacvec(x -> f(x, p), u, fu1)
             end
         end
-        initial_trust_radius = convert(eltype(u), p1 * norm(g))
+        initial_trust_radius = convert(trustType, p1 * norm(g))
     elseif radius_update_scheme === RadiusUpdateSchemes.Fan
-        step_threshold = convert(eltype(u), 0.0001)
-        shrink_threshold = convert(eltype(u), 0.25)
-        expand_threshold = convert(eltype(u), 0.75)
-        p1 = convert(eltype(u), 0.1) # μ
-        p2 = convert(eltype(u), 1 / 4) # c5
-        p3 = convert(eltype(u), 12) # c6
-        p4 = convert(eltype(u), 1.0e18) # M
-        initial_trust_radius = convert(eltype(u), p1 * (norm(fu1)^0.99))
+        step_threshold = convert(trustType, 0.0001)
+        shrink_threshold = convert(trustType, 0.25)
+        expand_threshold = convert(trustType, 0.75)
+        p1 = convert(floatType, 0.1) # μ
+        p2 = convert(floatType, 0.25) # c5
+        p3 = convert(floatType, 12.0) # c6
+        p4 = convert(floatType, 1.0e18) # M
+        initial_trust_radius = convert(trustType, p1 * (norm(fu1)^0.99))
     elseif radius_update_scheme === RadiusUpdateSchemes.Bastin
-        step_threshold = convert(eltype(u), 0.05)
-        shrink_threshold = convert(eltype(u), 0.05)
-        expand_threshold = convert(eltype(u), 0.9)
-        p1 = convert(eltype(u), 2.5)  #alpha_1
-        p2 = convert(eltype(u), 0.25) # alpha_2
-        p3 = convert(eltype(u), 0) # not required
-        p4 = convert(eltype(u), 0) # not required
-        initial_trust_radius = convert(eltype(u), 1.0)
+        step_threshold = convert(trustType, 0.05)
+        shrink_threshold = convert(trustType, 0.05)
+        expand_threshold = convert(trustType, 0.9)
+        p1 = convert(floatType, 2.5)  # alpha_1
+        p2 = convert(floatType, 0.25) # alpha_2
+        initial_trust_radius = convert(trustType, 1.0)
     end
 
     return TrustRegionCache{iip}(f, alg, u_prev, u, fu_prev, fu1, fu2, p, uf, linsolve, J,
         jac_cache, false, maxiters, internalnorm, ReturnCode.Default, abstol, prob,
         radius_update_scheme, initial_trust_radius, max_trust_radius, step_threshold,
         shrink_threshold, expand_threshold, shrink_factor, expand_factor, loss, loss_new,
-        H, g, shrink_counter, step_size, du, fu_new, make_new_J, r, p1, p2, p3, p4, ϵ,
+        H, g, shrink_counter, du, u_tmp, u_gauss_newton, u_cauchy, fu_new, make_new_J, r,
+        p1, p2, p3, p4, ϵ,
         NLStats(1, 0, 0, 0, 0))
 end
 
 isinplace(::TrustRegionCache{iip}) where {iip} = iip
 
 function perform_step!(cache::TrustRegionCache{true})
-    @unpack make_new_J, J, fu, f, u, p, u_tmp, alg, linsolve = cache
+    @unpack make_new_J, J, fu, f, u, p, u_gauss_newton, alg, linsolve = cache
     if cache.make_new_J
         jacobian!!(J, cache)
-        mul!(cache.H, J, J)
-        mul!(cache.g, J, fu)
+        mul!(cache.H, J', J)
+        mul!(cache.g, J', fu)
         cache.stats.njacs += 1
+
+        # do not use A = cache.H, b = _vec(cache.g) since it is equivalent 
+        # to  A = cache.J, b = _vec(fu) as long as the Jacobian is non-singular
+        linres = dolinsolve(alg.precs, linsolve, A = J, b = _vec(fu),
+            linu = _vec(u_gauss_newton),
+            p = p, reltol = cache.abstol)
+        cache.linsolve = linres.cache
+        @. cache.u_gauss_newton = -1 * u_gauss_newton
     end
 
-    linres = dolinsolve(alg.precs, linsolve; A = cache.H, b = _vec(cache.g),
-        linu = _vec(u_tmp), p, reltol = cache.abstol)
-    cache.linsolve = linres.cache
-    cache.u_tmp .= -1 .* u_tmp
+    # Compute dogleg step
     dogleg!(cache)
 
     # Compute the potentially new u
-    cache.u_tmp .= u .+ cache.step_size
+    @. cache.u_tmp = u + cache.du
     f(cache.fu_new, cache.u_tmp, p)
     trust_region_step!(cache)
     cache.stats.nf += 1
@@ -331,18 +364,18 @@ function perform_step!(cache::TrustRegionCache{false})
 
     if make_new_J
         J = jacobian!!(cache.J, cache)
-        cache.H = J * J
-        cache.g = J * fu
+        cache.H = J' * J
+        cache.g = J' * fu
         cache.stats.njacs += 1
+        cache.u_gauss_newton = -1 .* (cache.H \ cache.g)
     end
 
-    @unpack g, H = cache
     # Compute the Newton step.
-    cache.u_tmp = -H \ g
     dogleg!(cache)
 
     # Compute the potentially new u
-    cache.u_tmp = u .+ cache.step_size
+    cache.u_tmp = u + cache.du
+
     cache.fu_new = f(cache.u_tmp, p)
     trust_region_step!(cache)
     cache.stats.nf += 1
@@ -355,25 +388,25 @@ function retrospective_step!(cache::TrustRegionCache)
     @unpack J, fu_prev, fu, u_prev, u = cache
     J = jacobian!!(deepcopy(J), cache)
     if J isa Number
-        cache.H = J * J
-        cache.g = J * fu
+        cache.H = J' * J
+        cache.g = J' * fu
     else
-        mul!(cache.H, J, J)
-        mul!(cache.g, J, fu)
+        mul!(cache.H, J', J)
+        mul!(cache.g, J', fu)
     end
     cache.stats.njacs += 1
-    @unpack H, g, step_size = cache
+    @unpack H, g, du = cache
 
     return -(get_loss(fu_prev) - get_loss(fu)) /
-           (step_size' * g + step_size' * H * step_size / 2)
+           (dot(du, g) + dot(du, H, du) / 2)
 end
 
 function trust_region_step!(cache::TrustRegionCache)
-    @unpack fu_new, step_size, g, H, loss, max_trust_r, radius_update_scheme = cache
+    @unpack fu_new, du, g, H, loss, max_trust_r, radius_update_scheme = cache
     cache.loss_new = get_loss(fu_new)
 
     # Compute the ratio of the actual reduction to the predicted reduction.
-    cache.r = -(loss - cache.loss_new) / (step_size' * g + step_size' * H * step_size / 2)
+    cache.r = -(loss - cache.loss_new) / (dot(du, g) + dot(du, H, du) / 2)
     @unpack r = cache
 
     if radius_update_scheme === RadiusUpdateSchemes.Simple
@@ -403,6 +436,51 @@ function trust_region_step!(cache::TrustRegionCache)
             cache.force_stop = true
         end
 
+    elseif radius_update_scheme === RadiusUpdateSchemes.NLsolve
+        # accept/reject decision
+        if r > cache.step_threshold # accept
+            take_step!(cache)
+            cache.loss = cache.loss_new
+            cache.make_new_J = true
+        else # reject
+            cache.make_new_J = false
+        end
+
+        # trust region update 
+        if r < 1 // 10 # cache.shrink_threshold 
+            cache.trust_r *= 1 // 2 # cache.shrink_factor 
+        elseif r >= 9 // 10 # cache.expand_threshold 
+            cache.trust_r = 2 * norm(cache.du) # cache.expand_factor * norm(cache.du) 
+        elseif r >= 1 // 2 # cache.p1 
+            cache.trust_r = max(cache.trust_r, 2 * norm(cache.du)) # cache.expand_factor * norm(cache.du))
+        end
+
+        # convergence test
+        if iszero(cache.fu) || cache.internalnorm(cache.fu) < cache.abstol
+            cache.force_stop = true
+        end
+
+    elseif radius_update_scheme === RadiusUpdateSchemes.NocedalWright
+        # accept/reject decision
+        if r > cache.step_threshold # accept
+            take_step!(cache)
+            cache.loss = cache.loss_new
+            cache.make_new_J = true
+        else # reject
+            cache.make_new_J = false
+        end
+
+        if r < 1 // 4
+            cache.trust_r = (1 // 4) * norm(cache.du)
+        elseif (r > (3 // 4)) && abs(norm(cache.du) - cache.trust_r) / cache.trust_r < 1e-6
+            cache.trust_r = min(2 * cache.trust_r, cache.max_trust_r)
+        end
+
+        # convergence test
+        if iszero(cache.fu) || cache.internalnorm(cache.fu) < cache.abstol
+            cache.force_stop = true
+        end
+
     elseif radius_update_scheme === RadiusUpdateSchemes.Hei
         if r > cache.step_threshold
             take_step!(cache)
@@ -413,14 +491,14 @@ function trust_region_step!(cache::TrustRegionCache)
         end
         # Hei's radius update scheme
         @unpack shrink_threshold, p1, p2, p3, p4 = cache
-        if rfunc(r, shrink_threshold, p1, p3, p4, p2) * cache.internalnorm(step_size) <
+        if rfunc(r, shrink_threshold, p1, p3, p4, p2) * cache.internalnorm(du) <
            cache.trust_r
             cache.shrink_counter += 1
         else
             cache.shrink_counter = 0
         end
         cache.trust_r = rfunc(r, shrink_threshold, p1, p3, p4, p2) *
-                        cache.internalnorm(step_size)
+                        cache.internalnorm(du)
 
         if iszero(cache.fu) || cache.internalnorm(cache.fu) < cache.abstol ||
            cache.internalnorm(g) < cache.ϵ
@@ -432,7 +510,7 @@ function trust_region_step!(cache::TrustRegionCache)
             cache.p1 = cache.p2 * cache.p1
             cache.shrink_counter += 1
         elseif r >= cache.expand_threshold &&
-               cache.internalnorm(step_size) > cache.trust_r / 2
+               cache.internalnorm(du) > cache.trust_r / 2
             cache.p1 = cache.p3 * cache.p1
             cache.shrink_counter = 0
         end
@@ -481,7 +559,7 @@ function trust_region_step!(cache::TrustRegionCache)
             cache.loss = cache.loss_new
             cache.make_new_J = true
             if retrospective_step!(cache) >= cache.expand_threshold
-                cache.trust_r = max(cache.p1 * cache.internalnorm(step_size), cache.trust_r)
+                cache.trust_r = max(cache.p1 * cache.internalnorm(du), cache.trust_r)
             end
 
         else
@@ -495,31 +573,63 @@ function trust_region_step!(cache::TrustRegionCache)
     end
 end
 
-function dogleg!(cache::TrustRegionCache)
-    @unpack u_tmp, trust_r = cache
+function dogleg!(cache::TrustRegionCache{true})
+    @unpack u_tmp, u_gauss_newton, u_cauchy, trust_r = cache
+
+    # Take the full Gauss-Newton step if lies within the trust region.
+    if norm(u_gauss_newton) ≤ trust_r
+        cache.du .= u_gauss_newton
+        return
+    end
+
+    # Take intersection of steepest descent direction and trust region if Cauchy point lies outside of trust region
+    l_grad = norm(cache.g) # length of the gradient
+    d_cauchy = l_grad^3 / dot(cache.g, cache.H, cache.g) # distance of the cauchy point from the current iterate
+    if d_cauchy >= trust_r
+        @. cache.du = -(trust_r / l_grad) * cache.g # step to the end of the trust region
+        return
+    end
+
+    # Take the intersection of dogled with trust region if Cauchy point lies inside the trust region
+    @. u_cauchy = -(d_cauchy / l_grad) * cache.g # compute Cauchy point
+    @. u_tmp = u_gauss_newton - u_cauchy # calf of the dogleg -- use u_tmp to avoid allocation
+
+    a = dot(u_tmp, u_tmp)
+    b = 2 * dot(u_cauchy, u_tmp)
+    c = d_cauchy^2 - trust_r^2
+    aux = max(b^2 - 4 * a * c, 0.0) # technically guaranteed to be non-negative but hedging against floating point issues
+    τ = (-b + sqrt(aux)) / (2 * a) # stepsize along dogleg to trust region boundary
+
+    @. cache.du = u_cauchy + τ * u_tmp
+end
+
+function dogleg!(cache::TrustRegionCache{false})
+    @unpack u_tmp, u_gauss_newton, u_cauchy, trust_r = cache
 
-    # Test if the full step is within the trust region.
-    if norm(u_tmp) ≤ trust_r
-        cache.step_size = deepcopy(u_tmp)
+    # Take the full Gauss-Newton step if lies within the trust region.
+    if norm(u_gauss_newton) ≤ trust_r
+        cache.du = deepcopy(u_gauss_newton)
         return
     end
 
-    # Calcualte Cauchy point, optimum along the steepest descent direction.
-    δsd = -cache.g
-    norm_δsd = norm(δsd)
-    if norm_δsd ≥ trust_r
-        cache.step_size = δsd .* trust_r / norm_δsd
+    ## Take intersection of steepest descent direction and trust region if Cauchy point lies outside of trust region
+    l_grad = norm(cache.g)
+    d_cauchy = l_grad^3 / dot(cache.g, cache.H, cache.g) # distance of the cauchy point from the current iterate
+    if d_cauchy > trust_r # cauchy point lies outside of trust region
+        cache.du = -(trust_r / l_grad) * cache.g # step to the end of the trust region
         return
     end
 
-    # Find the intersection point on the boundary.
-    N_sd = u_tmp - δsd
-    dot_N_sd = dot(N_sd, N_sd)
-    dot_sd_N_sd = dot(δsd, N_sd)
-    dot_sd = dot(δsd, δsd)
-    fact = dot_sd_N_sd^2 - dot_N_sd * (dot_sd - trust_r^2)
-    τ = (-dot_sd_N_sd + sqrt(fact)) / dot_N_sd
-    cache.step_size = δsd + τ * N_sd
+    # Take the intersection of dogled with trust region if Cauchy point lies inside the trust region
+    u_cauchy = -(d_cauchy / l_grad) * cache.g # compute Cauchy point
+    u_tmp = u_gauss_newton - u_cauchy # calf of the dogleg
+    a = dot(u_tmp, u_tmp)
+    b = 2 * dot(u_cauchy, u_tmp)
+    c = d_cauchy^2 - trust_r^2
+    aux = max(b^2 - 4 * a * c, 0.0) # technically guaranteed to be non-negative but hedging against floating point issues
+    τ = (-b + sqrt(aux)) / (2 * a) # stepsize along dogleg to trust region boundary
+
+    cache.du = u_cauchy + τ * u_tmp
 end
 
 function take_step!(cache::TrustRegionCache{true})
diff --git a/test/basictests.jl b/test/basictests.jl
index 54e63e93d..e872ec3d1 100644
--- a/test/basictests.jl
+++ b/test/basictests.jl
@@ -141,8 +141,9 @@ end
         return solve(prob, TrustRegion(; radius_update_scheme); abstol = 1e-9, kwargs...)
     end
 
-    radius_update_schemes = [RadiusUpdateSchemes.Simple, RadiusUpdateSchemes.Hei,
-        RadiusUpdateSchemes.Yuan, RadiusUpdateSchemes.Fan, RadiusUpdateSchemes.Bastin]
+    radius_update_schemes = [RadiusUpdateSchemes.Simple, RadiusUpdateSchemes.NocedalWright,
+        RadiusUpdateSchemes.NLsolve, RadiusUpdateSchemes.Hei, RadiusUpdateSchemes.Yuan,
+        RadiusUpdateSchemes.Fan, RadiusUpdateSchemes.Bastin]
     u0s = VERSION ≥ v"1.9" ? ([1.0, 1.0], @SVector[1.0, 1.0], 1.0) : ([1.0, 1.0], 1.0)
 
     @testset "[OOP] u0: $(typeof(u0)) radius_update_scheme: $(radius_update_scheme)" for u0 in u0s,
@@ -284,7 +285,7 @@ end
                 maxiters)
             sol_oop = benchmark_nlsolve_oop(quadratic_f, u0; radius_update_scheme,
                 maxiters)
-            @test sol_iip.u ≈ sol_iip.u
+            @test sol_iip.u ≈ sol_oop.u
         end
     end
 end
@@ -402,3 +403,60 @@ end
         end
     end
 end
+
+# --- PseudoTransient tests ---
+
+@testset "PseudoTransient" begin
+
+    #iip
+    function test_f!(du, u, p)
+        du[1] = 2 - 2u[1]
+        du[2] = u[1] - 4u[2]
+        return du
+    end
+
+    #oop
+    simple_test(u, p) = 0.9f0 .* u .- u
+
+    #test jacobian free PseudoTransient method
+    function f!(res, u, p)
+        @. res = u * u - p
+    end
+
+    @testset "[IIP] u0: $(typeof(u0))" for u0 in (zeros(2),)
+        probN = NonlinearProblem{true}(test_f!, u0)
+        sol = solve(probN, PseudoTransient())
+        @test sol.retcode == ReturnCode.Success
+        du = zeros(2)
+        test_f!(du, sol.u, 4.0)
+        @test du≈[0, 0] atol=1e-7
+    end
+
+    @testset "[OOP] u0: $(typeof(u0))" for u0 in ([1.0f0],)
+        probN = NonlinearProblem{false}(simple_test, u0)
+        sol = solve(probN, PseudoTransient(alpha_initial = 1.0))
+        @test sol.retcode == ReturnCode.Success
+        @test abs(sol.u[1]) <= 1.0f-4
+    end
+
+    @testset "Jacobian Free PT" begin
+        u0 = [1.0, 1.0]
+        p = 2.0
+        probN = NonlinearProblem(f!, u0, p)
+        linsolve = LinearSolve.KrylovJL_GMRES()
+        sol = solve(probN,
+            PseudoTransient(alpha_initial = 10.0, linsolve = linsolve),
+            reltol = 1e-9)
+        @test sol.retcode == ReturnCode.Success
+    end
+
+    @testset "NewtonRaphson Fails" begin
+        p = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
+        u0 = [-10.0, -1.0, 1.0, 2.0, 3.0, 4.0, 10.0]
+        probN = NonlinearProblem{false}(newton_fails, u0, p)
+        sol = solve(probN, PseudoTransient(alpha_initial = 1.0), abstol = 1e-10)
+        @test all(abs.(newton_fails(sol.u, p)) .< 1e-10)
+    end
+end
+
+# Test that `PseudoTransient` passes a test that `NewtonRaphson` fails on.