diff --git a/Project.toml b/Project.toml
index 41da07bcf..6411e91a5 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "NonlinearSolve"
 uuid = "8913a72c-1f9b-4ce2-8d82-65094dcecaec"
 authors = ["SciML"]
-version = "2.6.0"
+version = "2.6.1"
 
 [deps]
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
diff --git a/src/NonlinearSolve.jl b/src/NonlinearSolve.jl
index 2b6ab25e4..34d8d869b 100644
--- a/src/NonlinearSolve.jl
+++ b/src/NonlinearSolve.jl
@@ -5,9 +5,9 @@ if isdefined(Base, :Experimental) && isdefined(Base.Experimental, Symbol("@max_m
 end
 
 import Reexport: @reexport
-import PrecompileTools
+import PrecompileTools: @recompile_invalidations, @compile_workload, @setup_workload
 
-PrecompileTools.@recompile_invalidations begin
+@recompile_invalidations begin
     using DiffEqBase, LinearAlgebra, LinearSolve, SparseArrays, SparseDiffTools
     using FastBroadcast: @..
     import ArrayInterface: restructure
@@ -95,17 +95,49 @@ include("jacobian.jl")
 include("ad.jl")
 include("default.jl")
 
-PrecompileTools.@compile_workload begin
-    for T in (Float32, Float64)
-        probs = (NonlinearProblem{false}((u, p) -> u .* u .- p, T(0.1), T(2)),
-            NonlinearProblem{false}((u, p) -> u .* u .- p, T[0.1], T[2]),
-            NonlinearProblem{true}((du, u, p) -> du .= u .* u .- p, T[0.1], T[2]))
+@setup_workload begin
+    nlfuncs = ((NonlinearFunction{false}((u, p) -> u .* u .- p), 0.1),
+        (NonlinearFunction{false}((u, p) -> u .* u .- p), [0.1]),
+        (NonlinearFunction{true}((du, u, p) -> du .= u .* u .- p), [0.1]))
+    probs_nls = NonlinearProblem[]
+    for T in (Float32, Float64), (fn, u0) in nlfuncs
+        push!(probs_nls, NonlinearProblem(fn, T.(u0), T(2)))
+    end
 
-        precompile_algs = (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(),
-            PseudoTransient(), GeneralBroyden(), GeneralKlement(), DFSane(), nothing)
+    nls_algs = (NewtonRaphson(), TrustRegion(), LevenbergMarquardt(), PseudoTransient(),
+        GeneralBroyden(), GeneralKlement(), DFSane(), nothing)
+
+    probs_nlls = NonlinearLeastSquaresProblem[]
+    nlfuncs = ((NonlinearFunction{false}((u, p) -> (u .^ 2 .- p)[1:1]), [0.1, 0.0]),
+        (NonlinearFunction{false}((u, p) -> vcat(u .* u .- p, u .* u .- p)), [0.1, 0.1]),
+        (NonlinearFunction{true}((du, u, p) -> du[1] = u[1] * u[1] - p,
+                resid_prototype = zeros(1)), [0.1, 0.0]),
+        (NonlinearFunction{true}((du, u, p) -> du .= vcat(u .* u .- p, u .* u .- p),
+                resid_prototype = zeros(4)), [0.1, 0.1]))
+    for (fn, u0) in nlfuncs
+        push!(probs_nlls, NonlinearLeastSquaresProblem(fn, u0, 2.0))
+    end
+    nlfuncs = ((NonlinearFunction{false}((u, p) -> (u .^ 2 .- p)[1:1]), Float32[0.1, 0.0]),
+        (NonlinearFunction{false}((u, p) -> vcat(u .* u .- p, u .* u .- p)),
+            Float32[0.1, 0.1]),
+        (NonlinearFunction{true}((du, u, p) -> du[1] = u[1] * u[1] - p,
+                resid_prototype = zeros(Float32, 1)), Float32[0.1, 0.0]),
+        (NonlinearFunction{true}((du, u, p) -> du .= vcat(u .* u .- p, u .* u .- p),
+                resid_prototype = zeros(Float32, 4)), Float32[0.1, 0.1]))
+    for (fn, u0) in nlfuncs
+        push!(probs_nlls, NonlinearLeastSquaresProblem(fn, u0, 2.0f0))
+    end
 
-        for prob in probs, alg in precompile_algs
-            solve(prob, alg, abstol = T(1e-2))
+    nlls_algs = (LevenbergMarquardt(), GaussNewton(),
+        LevenbergMarquardt(; linsolve = LUFactorization()),
+        GaussNewton(; linsolve = LUFactorization()))
+
+    @compile_workload begin
+        for prob in probs_nls, alg in nls_algs
+            solve(prob, alg, abstol = 1e-2)
+        end
+        for prob in probs_nlls, alg in nlls_algs
+            solve(prob, alg, abstol = 1e-2)
         end
     end
 end
diff --git a/src/levenberg.jl b/src/levenberg.jl
index f181fcfcc..1462e9510 100644
--- a/src/levenberg.jl
+++ b/src/levenberg.jl
@@ -260,7 +260,7 @@ function perform_step!(cache::LevenbergMarquardtCache{true, fastls}) where {fast
     # Usual Levenberg-Marquardt step ("velocity").
     # The following lines do: cache.v = -cache.mat_tmp \ cache.u_tmp
     if fastls
-        cache.mat_tmp[1:length(fu1), :] .= cache.J
+        copyto!(@view(cache.mat_tmp[1:length(fu1), :]), cache.J)
         cache.mat_tmp[(length(fu1) + 1):end, :] .= λ .* cache.DᵀD
         cache.rhs_tmp[1:length(fu1)] .= _vec(fu1)
         linres = dolinsolve(alg.precs, linsolve; A = cache.mat_tmp,
@@ -299,8 +299,8 @@ function perform_step!(cache::LevenbergMarquardtCache{true, fastls}) where {fast
     cache.stats.nfactors += 2
 
     # Require acceptable steps to satisfy the following condition.
-    norm_v = norm(v)
-    if 2 * norm(cache.a) ≤ α_geodesic * norm_v
+    norm_v = cache.internalnorm(v)
+    if 2 * cache.internalnorm(cache.a) ≤ α_geodesic * norm_v
         _vec(cache.δ) .= _vec(v) .+ _vec(cache.a) ./ 2
         @unpack δ, loss_old, norm_v_old, v_old, b_uphill = cache
         f(cache.fu_tmp, u .+ δ, p)
@@ -356,7 +356,7 @@ function perform_step!(cache::LevenbergMarquardtCache{false, fastls}) where {fas
 
     # Usual Levenberg-Marquardt step ("velocity").
     if fastls
-        cache.mat_tmp = vcat(J, λ .* cache.DᵀD)
+        cache.mat_tmp = _vcat(J, λ .* cache.DᵀD)
         cache.rhs_tmp[1:length(fu1)] .= -_vec(fu1)
         linres = dolinsolve(alg.precs, linsolve; A = cache.mat_tmp,
             b = cache.rhs_tmp, linu = _vec(cache.v), p = p, reltol = cache.abstol)
@@ -392,8 +392,8 @@ function perform_step!(cache::LevenbergMarquardtCache{false, fastls}) where {fas
     cache.stats.nfactors += 1
 
     # Require acceptable steps to satisfy the following condition.
-    norm_v = norm(v)
-    if 2 * norm(cache.a) ≤ α_geodesic * norm_v
+    norm_v = cache.internalnorm(v)
+    if 2 * cache.internalnorm(cache.a) ≤ α_geodesic * norm_v
         cache.δ = _restructure(cache.δ, _vec(v) .+ _vec(cache.a) ./ 2)
         @unpack δ, loss_old, norm_v_old, v_old, b_uphill = cache
         fu_new = f(u .+ δ, p)
diff --git a/test/23_test_problems.jl b/test/23_test_problems.jl
index 5ec5e611f..b77d89087 100644
--- a/test/23_test_problems.jl
+++ b/test/23_test_problems.jl
@@ -68,7 +68,7 @@ end
     broken_tests = Dict(alg => Int[] for alg in alg_ops)
     broken_tests[alg_ops[1]] = [3, 6, 17, 21]
     broken_tests[alg_ops[2]] = [3, 6, 17, 21]
-    broken_tests[alg_ops[3]] = [6, 11, 21]
+    broken_tests[alg_ops[3]] = [6, 11, 17, 21]
 
     test_on_library(problems, dicts, alg_ops, broken_tests)
 end
diff --git a/test/basictests.jl b/test/basictests.jl
index bff4bcbae..b1f9d3cb7 100644
--- a/test/basictests.jl
+++ b/test/basictests.jl
@@ -390,7 +390,7 @@ end
     @testset "Keyword Arguments" begin
         damping_initial = [0.5, 2.0, 5.0]
         damping_increase_factor = [1.5, 3.0, 10.0]
-        damping_decrease_factor = Float64[2, 5, 12]
+        damping_decrease_factor = Float64[2, 5, 10.0]
         finite_diff_step_geodesic = [0.02, 0.2, 0.3]
         α_geodesic = [0.6, 0.8, 0.9]
         b_uphill = Float64[0, 1, 2]
@@ -408,7 +408,7 @@ end
                 b_uphill = options[6], min_damping_D = options[7])
 
             probN = NonlinearProblem{false}(quadratic_f, [1.0, 1.0], 2.0)
-            sol = solve(probN, alg, abstol = 1e-12)
+            sol = solve(probN, alg; abstol = 1e-13, maxiters = 10000)
             @test all(abs.(quadratic_f(sol.u, 2.0)) .< 1e-10)
         end
     end