From c86413cc55a591670c4a3fafa03cca54bbf110e3 Mon Sep 17 00:00:00 2001
From: Avik Pal <avikpal@mit.edu>
Date: Thu, 7 Dec 2023 15:53:20 -0500
Subject: [PATCH] Update documentation

---
 Project.toml                                  |   2 +-
 docs/Project.toml                             |   6 +-
 docs/src/api/fastlevenbergmarquardt.md        |  19 ++
 docs/src/api/leastsquaresoptim.md             |  19 ++
 docs/src/api/minpack.md                       |   4 +-
 docs/src/api/nlsolve.md                       |   4 +-
 docs/src/api/simplenonlinearsolve.md          |  12 +-
 docs/src/api/steadystatediffeq.md             |   4 +-
 docs/src/api/sundials.md                      |   4 +-
 docs/src/basics/FAQ.md                        |  62 ++++++-
 docs/src/basics/NonlinearFunctions.md         |   8 +-
 docs/src/basics/NonlinearProblem.md           |  17 +-
 docs/src/basics/solve.md                      |  34 ++--
 docs/src/index.md                             |  51 +++---
 docs/src/release_notes.md                     |  23 ++-
 docs/src/solvers/BracketingSolvers.md         |  26 ++-
 .../solvers/NonlinearLeastSquaresSolvers.md   |  66 +++++--
 docs/src/solvers/NonlinearSystemSolvers.md    | 158 ++++++++--------
 docs/src/solvers/SteadyStateSolvers.md        |  28 +--
 docs/src/tutorials/code_optimization.md       | 128 +++++++------
 docs/src/tutorials/getting_started.md         | 172 ++++++++++++------
 docs/src/tutorials/large_systems.md           | 129 +++++++------
 docs/src/tutorials/modelingtoolkit.md         |  29 +--
 docs/src/tutorials/small_compile.md           |  45 ++---
 src/default.jl                                |  93 ++++++----
 25 files changed, 691 insertions(+), 452 deletions(-)
 create mode 100644 docs/src/api/fastlevenbergmarquardt.md
 create mode 100644 docs/src/api/leastsquaresoptim.md

diff --git a/Project.toml b/Project.toml
index 50851177e..46f2191da 100644
--- a/Project.toml
+++ b/Project.toml
@@ -66,7 +66,7 @@ NonlinearProblemLibrary = "0.1"
 Pkg = "1"
 PrecompileTools = "1"
 Printf = "<0.0.1, 1"
-Random = "1"
+Random = "<0.0.1, 1"
 RecursiveArrayTools = "2"
 Reexport = "0.2, 1"
 SafeTestsets = "0.1"
diff --git a/docs/Project.toml b/docs/Project.toml
index f81157cd7..f92656632 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -9,6 +9,7 @@ LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae"
 ModelingToolkit = "961ee093-0014-501f-94e3-6117800e7a78"
 NonlinearSolve = "8913a72c-1f9b-4ce2-8d82-65094dcecaec"
 NonlinearSolveMINPACK = "c100e077-885d-495a-a2ea-599e143bf69d"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
 SciMLNLSolve = "e9a6253c-8580-4d32-9898-8661bb511710"
 SimpleNonlinearSolve = "727e6d20-b764-4bd8-a329-72de5adea6c7"
@@ -26,12 +27,13 @@ Documenter = "1"
 IncompleteLU = "0.2"
 LinearSolve = "2"
 ModelingToolkit = "8"
-NonlinearSolve = "1, 2"
+NonlinearSolve = "3"
 NonlinearSolveMINPACK = "0.1"
+Random = "<0.0.1, 1"
 SciMLBase = "2.4"
 SciMLNLSolve = "0.1"
 SimpleNonlinearSolve = "1"
 StaticArrays = "1"
-SteadyStateDiffEq = "1.10, 2"
+SteadyStateDiffEq = "2"
 Sundials = "4.11"
 Symbolics = "4, 5"
diff --git a/docs/src/api/fastlevenbergmarquardt.md b/docs/src/api/fastlevenbergmarquardt.md
new file mode 100644
index 000000000..b970cf819
--- /dev/null
+++ b/docs/src/api/fastlevenbergmarquardt.md
@@ -0,0 +1,19 @@
+# FastLevenbergMarquardt.jl
+
+This is a wrapper package for importing solvers from FastLevenbergMarquardt.jl into the
+SciML interface. Note that these solvers do not come by default, and thus one needs to
+install the package before using these solvers:
+
+```julia
+using Pkg
+Pkg.add("FastLevenbergMarquardt")
+using FastLevenbergMarquardt
+```
+
+These methods can be used independently of the rest of NonlinearSolve.jl
+
+## Solver API
+
+```@docs
+FastLevenbergMarquardtJL
+```
diff --git a/docs/src/api/leastsquaresoptim.md b/docs/src/api/leastsquaresoptim.md
new file mode 100644
index 000000000..89c07d8b3
--- /dev/null
+++ b/docs/src/api/leastsquaresoptim.md
@@ -0,0 +1,19 @@
+# LeastSquaresOptim.jl
+
+This is a wrapper package for importing solvers from LeastSquaresOptim.jl into the SciML
+interface. Note that these solvers do not come by default, and thus one needs to install
+the package before using these solvers:
+
+```julia
+using Pkg
+Pkg.add("LeastSquaresOptim")
+using LeastSquaresOptim
+```
+
+These methods can be used independently of the rest of NonlinearSolve.jl
+
+## Solver API
+
+```@docs
+LeastSquaresOptimJL
+```
diff --git a/docs/src/api/minpack.md b/docs/src/api/minpack.md
index 5c1e80856..36bec764f 100644
--- a/docs/src/api/minpack.md
+++ b/docs/src/api/minpack.md
@@ -1,8 +1,8 @@
 # MINPACK.jl
 
 This is a wrapper package for importing solvers from Sundials into the SciML interface.
-Note that these solvers do not come by default, and thus one needs to install
-the package before using these solvers:
+Note that these solvers do not come by default, and thus one needs to install the package
+before using these solvers:
 
 ```julia
 using Pkg
diff --git a/docs/src/api/nlsolve.md b/docs/src/api/nlsolve.md
index 703a5174b..f0b611cc9 100644
--- a/docs/src/api/nlsolve.md
+++ b/docs/src/api/nlsolve.md
@@ -1,8 +1,8 @@
 # NLsolve.jl
 
 This is a wrapper package for importing solvers from NLsolve.jl into the SciML interface.
-Note that these solvers do not come by default, and thus one needs to install
-the package before using these solvers:
+Note that these solvers do not come by default, and thus one needs to install the package
+before using these solvers:
 
 ```julia
 using Pkg
diff --git a/docs/src/api/simplenonlinearsolve.md b/docs/src/api/simplenonlinearsolve.md
index 7b5a6ca6f..72f738140 100644
--- a/docs/src/api/simplenonlinearsolve.md
+++ b/docs/src/api/simplenonlinearsolve.md
@@ -6,7 +6,8 @@ These methods can be used independently of the rest of NonlinearSolve.jl
 
 ### Interval Methods
 
-These methods are suited for interval (scalar) root-finding problems, i.e. `IntervalNonlinearProblem`.
+These methods are suited for interval (scalar) root-finding problems,
+i.e. `IntervalNonlinearProblem`.
 
 ```@docs
 ITP
@@ -18,14 +19,15 @@ Brent
 
 ### General Methods
 
-These methods are suited for any general nonlinear root-finding problem, i.e. `NonlinearProblem`.
+These methods are suited for any general nonlinear root-finding problem, i.e.
+`NonlinearProblem`.
 
 ```@docs
 SimpleNewtonRaphson
-Broyden
+SimpleBroyden
 SimpleHalley
-Klement
+SimpleKlement
 SimpleTrustRegion
 SimpleDFSane
-LBroyden
+SimpleLimitedMemoryBroyden
 ```
diff --git a/docs/src/api/steadystatediffeq.md b/docs/src/api/steadystatediffeq.md
index 3681ce9dd..3bfe61c1a 100644
--- a/docs/src/api/steadystatediffeq.md
+++ b/docs/src/api/steadystatediffeq.md
@@ -1,8 +1,8 @@
 # SteadyStateDiffEq.jl
 
 This is a wrapper package for using ODE solvers from
-[DifferentialEquations.jl](https://docs.sciml.ai/DiffEqDocs/stable/) into the SciML interface.
-Note that these solvers do not come by default, and thus one needs to install
+[DifferentialEquations.jl](https://docs.sciml.ai/DiffEqDocs/stable/) into the SciML
+interface. Note that these solvers do not come by default, and thus one needs to install
 the package before using these solvers:
 
 ```julia
diff --git a/docs/src/api/sundials.md b/docs/src/api/sundials.md
index 551176329..80a6b367d 100644
--- a/docs/src/api/sundials.md
+++ b/docs/src/api/sundials.md
@@ -1,8 +1,8 @@
 # Sundials.jl
 
 This is a wrapper package for importing solvers from Sundials into the SciML interface.
-Note that these solvers do not come by default, and thus one needs to install
-the package before using these solvers:
+Note that these solvers do not come by default, and thus one needs to install the package
+before using these solvers:
 
 ```julia
 using Pkg
diff --git a/docs/src/basics/FAQ.md b/docs/src/basics/FAQ.md
index 7e6fbd5b5..62add8e83 100644
--- a/docs/src/basics/FAQ.md
+++ b/docs/src/basics/FAQ.md
@@ -8,7 +8,7 @@ On the test example:
 ```@example
 using NonlinearSolve, BenchmarkTools
 
-N = 100_000;
+const N = 100_000;
 levels = 1.5 .* rand(N);
 out = zeros(N);
 myfun(x, lv) = x * sin(x) - lv
@@ -31,8 +31,62 @@ end
 @btime f2(out, levels, 1.0)
 ```
 
-MATLAB 2022a achieves 1.66s. Try this code yourself: we receive 0.06 seconds, or a 28x speedup.
-This example is still not optimized in the Julia code, and we expect an improvement in a near
-future version.
+MATLAB 2022a achieves 1.66s. Try this code yourself: we receive 0.009 seconds, or a 184x
+speedup.
 
 For more information on performance of SciML, see the [SciMLBenchmarks](https://docs.sciml.ai/SciMLBenchmarksOutput/stable/).
+
+## The solver tried to set a Dual Number in my Vector of Floats.How do I fix that?
+
+This is a common problem that occurs if the code was not written to be generic based on the
+input types. For example, consider this example taken from
+[this issue](https://github.com/SciML/NonlinearSolve.jl/issues/298)
+
+```@example dual_error_faq
+using NonlinearSolve, Random
+
+function fff_incorrect(var, p)
+    v_true = [1.0, 0.1, 2.0, 0.5]
+    xx = [1.0, 2.0, 3.0, 4.0]
+    xx[1] = var[1] - v_true[1]
+    return var - v_true
+end
+
+v_true = [1.0, 0.1, 2.0, 0.5]
+v_init = v_true .+ randn!(similar(v_true)) * 0.1
+
+prob_oop = NonlinearLeastSquaresProblem{false}(fff_incorrect, v_init)
+try
+    sol = solve(prob_oop, LevenbergMarquardt(); maxiters = 10000, abstol = 1e-8)
+catch e
+    @error e
+end
+```
+
+Essentially what happened was, NonlinearSolve checked that we can use ForwardDiff.jl to
+differentiate the function based on the input types. However, this function has
+`xx = [1.0, 2.0, 3.0, 4.0]` followed by a `xx[1] = var[1] - v_true[1]` where `var` might
+be a Dual number. This causes the error. To fix it:
+
+ 1. Specify the `autodiff` to be `AutoFiniteDiff`
+
+```@example dual_error_faq
+sol = solve(prob_oop, LevenbergMarquardt(; autodiff = AutoFiniteDiff()); maxiters = 10000,
+    abstol = 1e-8)
+```
+
+This worked but, Finite Differencing is not the recommended approach in any scenario.
+Instead, rewrite the function to use
+[PreallocationTools.jl](https://github.com/SciML/PreallocationTools.jl) or write it as
+
+```@example dual_error_faq
+function fff_correct(var, p)
+    v_true = [1.0, 0.1, 2.0, 0.5]
+    xx = eltype(var)[1.0, 2.0, 3.0, 4.0]
+    xx[1] = var[1] - v_true[1]
+    return xx - v_true
+end
+
+prob_oop = NonlinearLeastSquaresProblem{false}(fff_correct, v_init)
+sol = solve(prob_oop, LevenbergMarquardt(); maxiters = 10000, abstol = 1e-8)
+```
diff --git a/docs/src/basics/NonlinearFunctions.md b/docs/src/basics/NonlinearFunctions.md
index e679a1cb3..f3e142ac5 100644
--- a/docs/src/basics/NonlinearFunctions.md
+++ b/docs/src/basics/NonlinearFunctions.md
@@ -1,10 +1,10 @@
 # [NonlinearFunctions and Jacobian Types](@id nonlinearfunctions)
 
 The SciML ecosystem provides an extensive interface for declaring extra functions
-associated with the differential equation's data. In traditional libraries, there
-is usually only one option: the Jacobian. However, we allow for a large array
-of pre-computed functions to speed up the calculations. This is offered via the
-`NonlinearFunction` types, which can be passed to the problems.
+associated with the differential equation's data. In traditional libraries, there is usually
+only one option: the Jacobian. However, we allow for a large array of pre-computed functions
+to speed up the calculations. This is offered via the `NonlinearFunction` types, which can
+be passed to the problems.
 
 ## Function Type Definitions
 
diff --git a/docs/src/basics/NonlinearProblem.md b/docs/src/basics/NonlinearProblem.md
index 11dc09772..23acf78b5 100644
--- a/docs/src/basics/NonlinearProblem.md
+++ b/docs/src/basics/NonlinearProblem.md
@@ -1,13 +1,17 @@
 # [Nonlinear Problems](@id problems)
 
-## The Three Types of Nonlinear Problems
+## The Four Types of Nonlinear Problems
 
-NonlinearSolve.jl tackles three related types of nonlinear systems:
+NonlinearSolve.jl tackles four related types of nonlinear systems:
 
- 1. Interval rootfinding problems. I.e., find the ``t \in [t_0, t_f]`` such that ``f(t) = 0``.
+ 1. Interval rootfinding problems. I.e., find the ``t \in [t_0, t_f]`` such that
+    ``f(t) = 0``.
  2. Systems of nonlinear equations, i.e., find the ``u`` such that ``f(u) = 0``.
  3. Steady state problems, i.e., find the ``u`` such that ``u' = f(u,t)`` has reached steady state,
     i.e., ``0 = f(u, ∞)``.
+ 4. The nonlinear least squares problem, which is an under/over-constrained nonlinear system
+    which might not be satisfiable, i.e. there may be no `u` such that `f(u) = 0`, and thus
+    we find the `u` which minimizes `||f(u)||` in the least squares sense.
 
 The first is for solving scalar rootfinding problems, i.e., finding a single number, and
 requires that a bracketing interval is known. For a bracketing interval, one must have that
@@ -16,9 +20,9 @@ interval.
 
 !!! note
     
-    Interval rootfinding problems allow for `f` to return an array, in which case the interval
-    rootfinding problem is interpreted as finding the first `t` such that any of the components
-    of the array hit zero.
+    Interval rootfinding problems allow for `f` to return an array, in which case the
+    interval rootfinding problem is interpreted as finding the first `t` such that any of
+    the components of the array hit zero.
 
 The second type of nonlinear system can be multidimensional, and thus no ordering nor
 boundaries are assumed to be known. For a system of nonlinear equations, `f` can return
@@ -43,4 +47,5 @@ ODE `u' = f(u,t)`.
 SciMLBase.IntervalNonlinearProblem
 SciMLBase.NonlinearProblem
 SciMLBase.SteadyStateProblem
+SciMLBase.NonlinearLeastSquaresProblem
 ```
diff --git a/docs/src/basics/solve.md b/docs/src/basics/solve.md
index afa091223..0f205aba1 100644
--- a/docs/src/basics/solve.md
+++ b/docs/src/basics/solve.md
@@ -6,28 +6,28 @@ solve(prob::SciMLBase.NonlinearProblem, args...; kwargs...)
 
 ## General Controls
 
-- `alias_u0::Bool`: Whether to alias the initial condition or use a copy.
-  Defaults to `false`.
-- `internal_norm::Function`: The norm used by the solver. Default depends on algorithm
-  choice.
+  - `alias_u0::Bool`: Whether to alias the initial condition or use a copy.
+    Defaults to `false`.
+  - `internal_norm::Function`: The norm used by the solver. Default depends on algorithm
+    choice.
 
 ## Iteration Controls
 
-- `maxiters::Int`: The maximum number of iterations to perform. Defaults to `1000`.
-- `abstol::Number`: The absolute tolerance.
-- `reltol::Number`: The relative tolerance.
-- `termination_condition`: Termination Condition from DiffEqBase. Defaults to
-  `AbsSafeBestTerminationMode()` for `NonlinearSolve.jl` and `AbsTerminateMode()` for
-  `SimpleNonlinearSolve.jl`.
+  - `maxiters::Int`: The maximum number of iterations to perform. Defaults to `1000`.
+  - `abstol::Number`: The absolute tolerance.
+  - `reltol::Number`: The relative tolerance.
+  - `termination_condition`: Termination Condition from DiffEqBase. Defaults to
+    `AbsSafeBestTerminationMode()` for `NonlinearSolve.jl` and `AbsTerminateMode()` for
+    `SimpleNonlinearSolve.jl`.
 
 ## Tracing Controls
 
 These are exclusively available for native `NonlinearSolve.jl` solvers.
 
-- `show_trace`: Must be `Val(true)` or `Val(false)`. This controls whether the trace is
-  displayed to the console. (Defaults to `Val(false)`)
-- `trace_level`: Needs to be one of Trace Objects: [`TraceMinimal`](@ref),
-  [`TraceWithJacobianConditionNumber`](@ref), or [`TraceAll`](@ref). This controls the
-  level of detail of the trace. (Defaults to `TraceMinimal()`)
-- `store_trace`: Must be `Val(true)` or `Val(false)`. This controls whether the trace is
-  stored in the solution object. (Defaults to `Val(false)`)
+  - `show_trace`: Must be `Val(true)` or `Val(false)`. This controls whether the trace is
+    displayed to the console. (Defaults to `Val(false)`)
+  - `trace_level`: Needs to be one of Trace Objects: [`TraceMinimal`](@ref),
+    [`TraceWithJacobianConditionNumber`](@ref), or [`TraceAll`](@ref). This controls the
+    level of detail of the trace. (Defaults to `TraceMinimal()`)
+  - `store_trace`: Must be `Val(true)` or `Val(false)`. This controls whether the trace is
+    stored in the solution object. (Defaults to `Val(false)`)
diff --git a/docs/src/index.md b/docs/src/index.md
index 134328ddc..b35f09f3a 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -1,20 +1,19 @@
 # NonlinearSolve.jl: High-Performance Unified Nonlinear Solvers
 
-NonlinearSolve.jl is a unified interface for the nonlinear solving packages of
-Julia. The package includes its own high-performance nonlinear solvers which include the
-ability to swap out to fast direct and iterative linear solvers, along with the
-ability to use sparse automatic differentiation for Jacobian construction and
-Jacobian-vector products. NonlinearSolve.jl interfaces with other packages of the Julia ecosystem
-to make it easy to test alternative solver packages and pass small types to
-control algorithm swapping. It also interfaces with the
-[ModelingToolkit.jl](https://docs.sciml.ai/ModelingToolkit/stable/) world of symbolic
-modeling to allow for automatically generating high-performance code.
-
-Performance is key: the current methods are made to be highly performant on
-scalar and statically sized small problems, with options for large-scale systems.
-If you run into any performance issues, please file an issue.
-Consult the [NonlinearSystemSolvers](@ref nonlinearsystemsolvers) page for
-information on how to import solvers from different packages.
+NonlinearSolve.jl is a unified interface for the nonlinear solving packages of Julia. The
+package includes its own high-performance nonlinear solvers which include the ability to
+swap out to fast direct and iterative linear solvers, along with the ability to use sparse
+automatic differentiation for Jacobian construction and Jacobian-vector products.
+NonlinearSolve.jl interfaces with other packages of the Julia ecosystem to make it easy to
+test alternative solver packages and pass small types to control algorithm swapping. It also
+interfaces with the [ModelingToolkit.jl](https://docs.sciml.ai/ModelingToolkit/stable/)
+world of symbolic modeling to allow for automatically generating high-performance code.
+
+Performance is key: the current methods are made to be highly performant on scalar and
+statically sized small problems, with options for large-scale systems. If you run into any
+performance issues, please file an issue. Consult the
+[NonlinearSystemSolvers](@ref nonlinearsystemsolvers) page for information on how to import
+solvers from different packages.
 
 ## Installation
 
@@ -27,17 +26,17 @@ Pkg.add("NonlinearSolve")
 
 ## Contributing
 
-- Please refer to the
-  [SciML ColPrac: Contributor's Guide on Collaborative Practices for Community Packages](https://github.com/SciML/ColPrac/blob/master/README.md)
-  for guidance on PRs, issues, and other matters relating to contributing to SciML.
-
-- See the [SciML Style Guide](https://github.com/SciML/SciMLStyle) for common coding practices and other style decisions.
-- There are a few community forums:
-
-  - The #diffeq-bridged and #sciml-bridged channels in the [Julia Slack](https://julialang.org/slack/)
-  - The #diffeq-bridged and #sciml-bridged channels in the [Julia Zulip](https://julialang.zulipchat.com/#narrow/stream/279055-sciml-bridged)
-  - On the [Julia Discourse forums](https://discourse.julialang.org)
-  - See also [SciML Community page](https://sciml.ai/community/)
+  - Please refer to the
+    [SciML ColPrac: Contributor's Guide on Collaborative Practices for Community Packages](https://github.com/SciML/ColPrac/blob/master/README.md)
+    for guidance on PRs, issues, and other matters relating to contributing to SciML.
+
+  - See the [SciML Style Guide](https://github.com/SciML/SciMLStyle) for common coding practices and other style decisions.
+  - There are a few community forums:
+    
+      + The #diffeq-bridged and #sciml-bridged channels in the [Julia Slack](https://julialang.org/slack/)
+      + The #diffeq-bridged and #sciml-bridged channels in the [Julia Zulip](https://julialang.zulipchat.com/#narrow/stream/279055-sciml-bridged)
+      + On the [Julia Discourse forums](https://discourse.julialang.org)
+      + See also [SciML Community page](https://sciml.ai/community/)
 
 ## Reproducibility
 
diff --git a/docs/src/release_notes.md b/docs/src/release_notes.md
index 829576a7f..e7cde6789 100644
--- a/docs/src/release_notes.md
+++ b/docs/src/release_notes.md
@@ -2,8 +2,21 @@
 
 ## Breaking Changes in NonlinearSolve.jl v3
 
-1. `GeneralBroyden` and `GeneralKlement` have been renamed to `Broyden` and `Klement`
-   respectively.
-2. Compat for `SimpleNonlinearSolve` has been bumped to `v1`.
-3. The old API to specify autodiff via `Val` and chunksize (that was deprecated in v2) has
-   been removed.
+ 1. `GeneralBroyden` and `GeneralKlement` have been renamed to `Broyden` and `Klement`
+    respectively.
+ 2. Compat for `SimpleNonlinearSolve` has been bumped to `v1`.
+ 3. The old style of specifying autodiff with `chunksize`, `standardtag`, etc. has been
+    deprecated in favor of directly specifying the autodiff type, like `AutoForwardDiff`.
+
+## Breaking Changes in SimpleNonlinearSolve.jl v1
+
+  - Batched solvers have been removed in favor of `BatchedArrays.jl`. Stay tuned for detailed
+    tutorials on how to use `BatchedArrays.jl` with `NonlinearSolve` & `SimpleNonlinearSolve`
+    solvers.
+  - The old style of specifying autodiff with `chunksize`, `standardtag`, etc. has been
+    deprecated in favor of directly specifying the autodiff type, like `AutoForwardDiff`.
+  - `Broyden` and `Klement` have been renamed to `SimpleBroyden` and `SimpleKlement` to
+    avoid conflicts with `NonlinearSolve.jl`'s `GeneralBroyden` and `GeneralKlement`, which
+    will be renamed to `Broyden` and `Klement` in the future.
+  - `LBroyden` has been renamed to `SimpleLimitedMemoryBroyden` to make it consistent with
+    `NonlinearSolve.jl`'s `LimitedMemoryBroyden`.
diff --git a/docs/src/solvers/BracketingSolvers.md b/docs/src/solvers/BracketingSolvers.md
index cc3bd031a..af322af74 100644
--- a/docs/src/solvers/BracketingSolvers.md
+++ b/docs/src/solvers/BracketingSolvers.md
@@ -1,17 +1,25 @@
 # [Interval Rootfinding Methods (Bracketing Solvers)](@id bracketing)
 
-`solve(prob::IntervalNonlinearProblem,alg;kwargs)`
+`solve(prob::IntervalNonlinearProblem, alg; kwargs...)`
 
 Solves for ``f(t) = 0`` in the problem defined by `prob` using the algorithm `alg`. If no
 algorithm is given, a default algorithm will be chosen.
 
 ## Recommended Methods
 
-`ITP()` is the recommended method for the scalar interval root-finding problems. It is particularly well-suited for cases where the function is smooth and well-behaved; and achieved superlinear convergence while retaining the optimal worst-case performance of the Bisection method. For more details, consult the detailed solver API docs.
+`ITP()` is the recommended method for the scalar interval root-finding problems. It is
+particularly well-suited for cases where the function is smooth and well-behaved; and
+achieved superlinear convergence while retaining the optimal worst-case performance of the
+Bisection method. For more details, consult the detailed solver API docs.
 
-`Ridder` is a hybrid method that uses the value of function at the midpoint of the interval to perform an exponential interpolation to the root. This gives a fast convergence with a guaranteed convergence of at most twice the number of iterations as the bisection method.
+`Ridder` is a hybrid method that uses the value of function at the midpoint of the interval
+to perform an exponential interpolation to the root. This gives a fast convergence with a
+guaranteed convergence of at most twice the number of iterations as the bisection method.
 
-`Brent` is a combination of the bisection method, the secant method and inverse quadratic interpolation. At every iteration, Brent's method decides which method out of these three is likely to do best, and proceeds by doing a step according to that method. This gives a robust and fast method, which therefore enjoys considerable popularity.
+`Brent` is a combination of the bisection method, the secant method and inverse quadratic
+interpolation. At every iteration, Brent's method decides which method out of these three is
+likely to do best, and proceeds by doing a step according to that method. This gives a
+robust and fast method, which therefore enjoys considerable popularity.
 
 ## Full List of Methods
 
@@ -20,8 +28,8 @@ algorithm is given, a default algorithm will be chosen.
 These methods are automatically included as part of NonlinearSolve.jl. Though, one can use
 SimpleNonlinearSolve.jl directly to decrease the dependencies and improve load time.
 
-- `ITP`: A non-allocating ITP (Interpolate, Truncate & Project) method
-- `Falsi`: A non-allocating regula falsi method
-- `Bisection`: A common bisection method
-- `Ridder`: A non-allocating Ridder method
-- `Brent`: A non-allocating Brent method
+  - `ITP`: A non-allocating ITP (Interpolate, Truncate & Project) method
+  - `Falsi`: A non-allocating regula falsi method
+  - `Bisection`: A common bisection method
+  - `Ridder`: A non-allocating Ridder method
+  - `Brent`: A non-allocating Brent method
diff --git a/docs/src/solvers/NonlinearLeastSquaresSolvers.md b/docs/src/solvers/NonlinearLeastSquaresSolvers.md
index 588e21a84..fbd68c847 100644
--- a/docs/src/solvers/NonlinearLeastSquaresSolvers.md
+++ b/docs/src/solvers/NonlinearLeastSquaresSolvers.md
@@ -7,20 +7,58 @@ Solves the nonlinear least squares problem defined by `prob` using the algorithm
 
 ## Recommended Methods
 
-The default method `FastShortcutNLLSPolyalg` is a good choice for most
-problems. It is a polyalgorithm that attempts to use a fast algorithm
-(`GaussNewton`) and if that fails it falls back to a more robust
-algorithm (`LevenbergMarquardt`).
+The default method `FastShortcutNLLSPolyalg` is a good choice for most problems. It is a
+polyalgorithm that attempts to use a fast algorithm (`GaussNewton`) and if that fails it
+falls back to a more robust algorithm (`LevenbergMarquardt`).
 
 ## Full List of Methods
 
-- `LevenbergMarquardt()`: An advanced Levenberg-Marquardt implementation with the
-  improvements suggested in the [paper](https://arxiv.org/abs/1201.5885) "Improvements to
-  the Levenberg-Marquardt algorithm for nonlinear least-squares minimization". Designed for
-  large-scale and numerically-difficult nonlinear systems.
-- `GaussNewton()`: An advanced GaussNewton implementation with support for efficient
-  handling of sparse matrices via colored automatic differentiation and preconditioned
-  linear solvers. Designed for large-scale and numerically-difficult nonlinear least squares
-  problems.
-- `SimpleGaussNewton()`: Simple Gauss Newton Implementation with `QRFactorization` to
-  solve a linear least squares problem at each step!
+### NonlinearSolve.jl
+
+  - `LevenbergMarquardt()`: An advanced Levenberg-Marquardt implementation with the
+    improvements suggested in the [paper](https://arxiv.org/abs/1201.5885) "Improvements to
+    the Levenberg-Marquardt algorithm for nonlinear least-squares minimization". Designed
+    for large-scale and numerically-difficult nonlinear systems.
+  - `GaussNewton()`: An advanced GaussNewton implementation with support for efficient
+    handling of sparse matrices via colored automatic differentiation and preconditioned
+    linear solvers. Designed for large-scale and numerically-difficult nonlinear least
+    squares problems.
+
+### SimpleNonlinearSolve.jl
+
+These methods are included with NonlinearSolve.jl by default, though SimpleNonlinearSolve.jl
+can be used  directly to reduce dependencies and improve load times.
+SimpleNonlinearSolve.jl's methods excel at small problems and problems defined with static
+arrays.
+
+  - `SimpleGaussNewton()`: Simple Gauss Newton implementation using QR factorizations for
+    numerical stability.
+
+### FastLevenbergMarquardt.jl
+
+A wrapper over
+[FastLevenbergMarquardt.jl](https://github.com/kamesy/FastLevenbergMarquardt.jl). Note that
+it is called `FastLevenbergMarquardt` since the original package is called "Fast", though
+benchmarks demonstrate `LevenbergMarquardt()` usually outperforms.
+
+  - `FastLevenbergMarquardtJL(linsolve = :cholesky)`, can also choose `linsolve = :qr`.
+
+### LeastSquaresOptim.jl
+
+A wrapper over
+[LeastSquaresOptim.jl](https://github.com/matthieugomez/LeastSquaresOptim.jl). Has a core
+algorithm `LeastSquaresOptimJL(alg; linsolve)` where the choices for `alg` are:
+
+  - `:lm` a Levenberg-Marquardt implementation
+  - `:dogleg` a trust-region dogleg Gauss-Newton
+
+And the choices for `linsolve` are:
+
+  - `:qr`
+  - `:cholesky`
+  - `:lsmr` a conjugate gradient method (LSMR with diagonal preconditioner).
+
+### Optimization.jl
+
+`NonlinearLeastSquaresProblem`s can be converted into an `OptimizationProblem`  and used
+with any solver of [Optimization.jl](https://github.com/SciML/Optimization.jl).
diff --git a/docs/src/solvers/NonlinearSystemSolvers.md b/docs/src/solvers/NonlinearSystemSolvers.md
index 15a7d8d5e..078131ff8 100644
--- a/docs/src/solvers/NonlinearSystemSolvers.md
+++ b/docs/src/solvers/NonlinearSystemSolvers.md
@@ -7,31 +7,27 @@ Solves for ``f(u)=0`` in the problem defined by `prob` using the algorithm
 
 ## Recommended Methods
 
-The default method `FastShortcutNonlinearPolyalg` is a good choice for most
-problems. It is a polyalgorithm that attempts to use a fast algorithm
-(Klement, Broyden) and if that fails it falls back to a more robust
-algorithm (`NewtonRaphson`) before falling back the most robust variant of
-`TrustRegion`. For basic problems this will be very fast, for harder problems
-it will make sure to work.
-
-If one is looking for more robustness then `RobustMultiNewton` is a good choice.
-It attempts a set of the most robust methods in succession and only fails if
-all of the methods fail to converge. Additionally, `DynamicSS` can be a good choice
-for high stability.
-
-As a balance, `NewtonRaphson` is a good choice for most problems that aren't too
-difficult yet need high performance, and  `TrustRegion` is a bit less performant
-but more stable. If the problem is well-conditioned, `Klement` or `Broyden`
-may be faster, but highly dependent on the eigenvalues of the Jacobian being
-sufficiently small.
-
-`NewtonRaphson` and `TrustRegion` are designed for for large systems.
-They can make use of sparsity patterns for sparse automatic differentiation
-and sparse linear solving of very large systems. Meanwhile,
-`SimpleNewtonRaphson` and `SimpleTrustRegion` are implementations which is specialized for
-small equations. They are non-allocating on static arrays and thus really well-optimized
-for small systems, thus usually outperforming the other methods when such types are
-used for `u0`.
+The default method `FastShortcutNonlinearPolyalg` is a good choice for most problems. It is
+a polyalgorithm that attempts to use a fast algorithm (Klement, Broyden) and if that fails
+it falls back to a more robust algorithm (`NewtonRaphson`) before falling back the most
+robust variant of `TrustRegion`. For basic problems this will be very fast, for harder
+problems it will make sure to work.
+
+If one is looking for more robustness then `RobustMultiNewton` is a good choice. It attempts
+a set of the most robust methods in succession and only fails if all of the methods fail to
+converge. Additionally, `DynamicSS` can be a good choice for high stability.
+
+As a balance, `NewtonRaphson` is a good choice for most problems that aren't too difficult
+yet need high performance, and  `TrustRegion` is a bit less performant but more stable. If
+the problem is well-conditioned, `Klement` or `Broyden` may be faster, but highly dependent
+on the eigenvalues of the Jacobian being sufficiently small.
+
+`NewtonRaphson` and `TrustRegion` are designed for for large systems. They can make use of
+sparsity patterns for sparse automatic differentiation and sparse linear solving of very
+large systems. Meanwhile, `SimpleNewtonRaphson` and `SimpleTrustRegion` are implementations
+which are specialized for small equations. They are non-allocating on static arrays and thus
+really well-optimized for small systems, thus usually outperforming the other methods when
+such types are used for `u0`.
 
 ## Full List of Methods
 
@@ -47,34 +43,34 @@ linear solver, automatic differentiation, abstract array types, GPU,
 sparse/structured matrix support, etc. These methods support the largest set of types and
 features, but have a bit of overhead on very small problems.
 
-- `NewtonRaphson()`:A Newton-Raphson method with swappable nonlinear solvers and autodiff
-  methods for high performance on large and sparse systems.
-- `TrustRegion()`: A Newton Trust Region dogleg method with swappable nonlinear solvers and
-  autodiff methods for high performance on large and sparse systems.
-- `LevenbergMarquardt()`: An advanced Levenberg-Marquardt implementation with the
-  improvements suggested in the [paper](https://arxiv.org/abs/1201.5885) "Improvements to
-  the Levenberg-Marquardt algorithm for nonlinear least-squares minimization". Designed for
-  large-scale and numerically-difficult nonlinear systems.
-- `PseudoTransient()`: A pseudo-transient method which mixes the stability of Euler-type
-  stepping with the convergence speed of a Newton method. Good for highly unstable
-  systems.
-- `RobustMultiNewton()`: A polyalgorithm that mixes highly robust methods (line searches and
-  trust regions) in order to be as robust as possible for difficult problems. If this method
-  fails to converge, then one can be pretty certain that most (all?) other choices would
-  likely fail.
-- `FastShortcutNonlinearPolyalg()`: The default method. A polyalgorithm that mixes fast methods
-  with fallbacks to robust methods to allow for solving easy problems quickly without sacrificing
-  robustness on the hard problems.
-- `Broyden()`: Generalization of Broyden's Quasi-Newton Method with Line Search and
-  Automatic Jacobian Resetting. This is a fast method but unstable when the condition number of
-  the Jacobian matrix is sufficiently large.
-- `Klement()`: Generalization of Klement's Quasi-Newton Method with Line Search and
-  Automatic Jacobian Resetting. This is a fast method but unstable when the condition number of
-  the Jacobian matrix is sufficiently large.
-- `LimitedMemoryBroyden()`: An advanced version of `LBroyden` which uses a limited memory
-  Broyden method. This is a fast method but unstable when the condition number of
-  the Jacobian matrix is sufficiently large. It is recommended to use `Broyden` or
-  `Klement` instead unless the memory usage is a concern.
+  - `NewtonRaphson()`:A Newton-Raphson method with swappable nonlinear solvers and autodiff
+    methods for high performance on large and sparse systems.
+  - `TrustRegion()`: A Newton Trust Region dogleg method with swappable nonlinear solvers and
+    autodiff methods for high performance on large and sparse systems.
+  - `LevenbergMarquardt()`: An advanced Levenberg-Marquardt implementation with the
+    improvements suggested in the [paper](https://arxiv.org/abs/1201.5885) "Improvements to
+    the Levenberg-Marquardt algorithm for nonlinear least-squares minimization". Designed for
+    large-scale and numerically-difficult nonlinear systems.
+  - `PseudoTransient()`: A pseudo-transient method which mixes the stability of Euler-type
+    stepping with the convergence speed of a Newton method. Good for highly unstable
+    systems.
+  - `RobustMultiNewton()`: A polyalgorithm that mixes highly robust methods (line searches and
+    trust regions) in order to be as robust as possible for difficult problems. If this method
+    fails to converge, then one can be pretty certain that most (all?) other choices would
+    likely fail.
+  - `FastShortcutNonlinearPolyalg()`: The default method. A polyalgorithm that mixes fast methods
+    with fallbacks to robust methods to allow for solving easy problems quickly without sacrificing
+    robustness on the hard problems.
+  - `Broyden()`: Generalization of Broyden's Quasi-Newton Method with Line Search and
+    Automatic Jacobian Resetting. This is a fast method but unstable when the condition number of
+    the Jacobian matrix is sufficiently large.
+  - `Klement()`: Generalization of Klement's Quasi-Newton Method with Line Search and
+    Automatic Jacobian Resetting. This is a fast method but unstable when the condition number of
+    the Jacobian matrix is sufficiently large.
+  - `LimitedMemoryBroyden()`: An advanced version of `LBroyden` which uses a limited memory
+    Broyden method. This is a fast method but unstable when the condition number of
+    the Jacobian matrix is sufficiently large. It is recommended to use `Broyden` or
+    `Klement` instead unless the memory usage is a concern.
 
 ### SimpleNonlinearSolve.jl
 
@@ -82,21 +78,21 @@ These methods are included with NonlinearSolve.jl by default, though SimpleNonli
 can be used directly to reduce dependencies and improve load times. SimpleNonlinearSolve.jl's
 methods excel at small problems and problems defined with static arrays.
 
-- `SimpleNewtonRaphson()`: A simplified implementation of the Newton-Raphson method.
-- `SimpleBroyden()`: The classic Broyden's quasi-Newton method.
-- `SimpleLimitedMemoryBroyden()`: A low-memory Broyden implementation, similar to L-BFGS. This method is
-  common in machine learning contexts but is known to be unstable in comparison to many
-  other choices.
-- `SimpleKlement()`: A quasi-Newton method due to Klement. It's supposed to be more efficient
-  than Broyden's method, and it seems to be in the cases that have been tried, but more
-  benchmarking is required.
-- `SimpleTrustRegion()`: A dogleg trust-region Newton method. Improved globalizing stability
-  for more robust fitting over basic Newton methods, though potentially with a cost.
-- `SimpleDFSane()`: A low-overhead implementation of the df-sane method for solving
-  large-scale nonlinear systems of equations.
-- `SimpleHalley()`: A low-overhead implementation of the Halley method. This is a higher order
-  method and thus can converge faster to low tolerances than a Newton method. Requires higher
-  order derivatives, so best used when automatic differentiation is available.
+  - `SimpleNewtonRaphson()`: A simplified implementation of the Newton-Raphson method.
+  - `SimpleBroyden()`: The classic Broyden's quasi-Newton method.
+  - `SimpleLimitedMemoryBroyden()`: A low-memory Broyden implementation, similar to L-BFGS. This method is
+    common in machine learning contexts but is known to be unstable in comparison to many
+    other choices.
+  - `SimpleKlement()`: A quasi-Newton method due to Klement. It's supposed to be more efficient
+    than Broyden's method, and it seems to be in the cases that have been tried, but more
+    benchmarking is required.
+  - `SimpleTrustRegion()`: A dogleg trust-region Newton method. Improved globalizing stability
+    for more robust fitting over basic Newton methods, though potentially with a cost.
+  - `SimpleDFSane()`: A low-overhead implementation of the df-sane method for solving
+    large-scale nonlinear systems of equations.
+  - `SimpleHalley()`: A low-overhead implementation of the Halley method. This is a higher order
+    method and thus can converge faster to low tolerances than a Newton method. Requires higher
+    order derivatives, so best used when automatic differentiation is available.
 
 !!! note
     
@@ -110,35 +106,35 @@ SteadyStateDiffEq.jl uses ODE solvers to iteratively approach the steady state.
 very stable method for solving nonlinear systems, though often more
 computationally expensive than direct methods.
 
-- `DynamicSS()`: Uses an ODE solver to find the steady state. Automatically terminates when
-  close to the steady state.
-- `SSRootfind()`: Uses a NonlinearSolve compatible solver to find the steady state.
+  - `DynamicSS()`: Uses an ODE solver to find the steady state. Automatically terminates when
+    close to the steady state.
+  - `SSRootfind()`: Uses a NonlinearSolve compatible solver to find the steady state.
 
 ### SciMLNLSolve.jl
 
 This is a wrapper package for importing solvers from NLsolve.jl into the SciML interface.
 
-- `NLSolveJL()`: A wrapper for [NLsolve.jl](https://github.com/JuliaNLSolvers/NLsolve.jl)
+  - `NLSolveJL()`: A wrapper for [NLsolve.jl](https://github.com/JuliaNLSolvers/NLsolve.jl)
 
 Submethod choices for this algorithm include:
 
-- `:anderson`: Anderson-accelerated fixed-point iteration
-- `:newton`: Classical Newton method with an optional line search
-- `:trust_region`: Trust region Newton method (the default choice)
+  - `:anderson`: Anderson-accelerated fixed-point iteration
+  - `:newton`: Classical Newton method with an optional line search
+  - `:trust_region`: Trust region Newton method (the default choice)
 
 ### MINPACK.jl
 
 MINPACK.jl methods are good for medium-sized nonlinear solves. It does not scale due to
 the lack of sparse Jacobian support, though the methods are very robust and stable.
 
-- `CMINPACK()`: A wrapper for using the classic MINPACK method through [MINPACK.jl](https://github.com/sglyon/MINPACK.jl)
+  - `CMINPACK()`: A wrapper for using the classic MINPACK method through [MINPACK.jl](https://github.com/sglyon/MINPACK.jl)
 
 Submethod choices for this algorithm include:
 
-- `:hybr`: Modified version of Powell's algorithm.
-- `:lm`: Levenberg-Marquardt.
-- `:lmdif`: Advanced Levenberg-Marquardt
-- `:hybrd`: Advanced modified version of Powell's algorithm
+  - `:hybr`: Modified version of Powell's algorithm.
+  - `:lm`: Levenberg-Marquardt.
+  - `:lmdif`: Advanced Levenberg-Marquardt
+  - `:hybrd`: Advanced modified version of Powell's algorithm
 
 ### Sundials.jl
 
@@ -146,4 +142,4 @@ Sundials.jl are a classic set of C/Fortran methods which are known for good scal
 Newton-Krylov form. However, KINSOL is known to be less stable than some other
 implementations, as it has no line search or globalizer (trust region).
 
-- `KINSOL()`: The KINSOL method of the SUNDIALS C library
+  - `KINSOL()`: The KINSOL method of the SUNDIALS C library
diff --git a/docs/src/solvers/SteadyStateSolvers.md b/docs/src/solvers/SteadyStateSolvers.md
index a3818a6d8..b1a57a2c0 100644
--- a/docs/src/solvers/SteadyStateSolvers.md
+++ b/docs/src/solvers/SteadyStateSolvers.md
@@ -9,9 +9,9 @@ Solves for the steady states in the problem defined by `prob` using the algorith
 
 Conversion to a NonlinearProblem is generally the fastest method. However, this will not
 guarantee the preferred root, and thus if the preferred root is required, then it's
-recommended that one uses `DynamicSS`. For `DynamicSS`, often an adaptive stiff
-solver, like a Rosenbrock or BDF method (`Rodas5` or `QNDF`), is a good way to allow for
-very large time steps as the steady state approaches.
+recommended that one uses `DynamicSS`. For `DynamicSS`, often an adaptive stiff solver,
+like a Rosenbrock or BDF method (`Rodas5` or `QNDF`), is a good way to allow for very large
+time steps as the steady state approaches.
 
 !!! note
     
@@ -28,7 +28,11 @@ very large time steps as the steady state approaches.
 
 Any `SteadyStateProblem` can be trivially converted to a `NonlinearProblem` via
 `NonlinearProblem(prob::SteadyStateProblem)`. Using this approach, any of the solvers from
-the [Nonlinear System Solvers page](@ref nonlinearsystemsolvers) can be used.
+the [Nonlinear System Solvers page](@ref nonlinearsystemsolvers) can be used. As a
+convenience, users can use:
+
+  - `SSRootfind`: A wrapper around `NonlinearSolve.jl` compliant solvers which converts
+    the `SteadyStateProblem` to a `NonlinearProblem` and solves it.
 
 ### SteadyStateDiffEq.jl
 
@@ -36,13 +40,13 @@ SteadyStateDiffEq.jl uses ODE solvers to iteratively approach the steady state.
 very stable method for solving nonlinear systems,
 though often computationally more expensive than direct methods.
 
-- `DynamicSS` : Uses an ODE solver to find the steady state. Automatically terminates when
-  close to the steady state. `DynamicSS(alg; abstol=1e-8, reltol=1e-6, tspan=Inf)` requires
-  that an ODE algorithm is given as the first argument. The absolute and relative tolerances
-  specify the termination conditions on the derivative's closeness to zero. This internally
-  uses the `TerminateSteadyState` callback from the Callback Library. The simulated time,
-  for which the ODE is solved, can be limited by `tspan`.  If `tspan` is a number, it is
-  equivalent to passing `(zero(tspan), tspan)`.
+  - `DynamicSS` : Uses an ODE solver to find the steady state. Automatically terminates
+    when close to the steady state. `DynamicSS(alg; tspan=Inf)` requires that an ODE
+    algorithm is given as the first argument. The absolute and relative tolerances specify
+    the termination conditions on the derivative's closeness to zero. This internally
+    uses the `TerminateSteadyState` callback from the Callback Library. The simulated time,
+    for which the ODE is solved, can be limited by `tspan`.  If `tspan` is a number, it is
+    equivalent to passing `(zero(tspan), tspan)`.
 
 Example usage:
 
@@ -56,4 +60,4 @@ sol = solve(prob, DynamicSS(CVODE_BDF()), dt = 1.0)
 
 !!! note
     
-    If you use `CVODE_BDF` you may need to give a starting `dt` via `dt=....`.*
+    If you use `CVODE_BDF` you may need to give a starting `dt` via `dt=....`.
diff --git a/docs/src/tutorials/code_optimization.md b/docs/src/tutorials/code_optimization.md
index cd4089d5d..1bfc1c302 100644
--- a/docs/src/tutorials/code_optimization.md
+++ b/docs/src/tutorials/code_optimization.md
@@ -2,19 +2,18 @@
 
 ## General Code Optimization in Julia
 
-Before starting this tutorial, we recommend the reader to check out one of the
-many tutorials for optimization Julia code. The following is an incomplete
-list:
+Before starting this tutorial, we recommend the reader to check out one of the many
+tutorials for optimization Julia code. The following is an incomplete list:
 
   - [The Julia Performance Tips](https://docs.julialang.org/en/v1/manual/performance-tips/)
   - [MIT 18.337 Course Notes on Optimizing Serial Code](https://mitmath.github.io/18337/lecture2/optimizing)
   - [What scientists must know about hardware to write fast code](https://viralinstruction.com/posts/hardware/)
 
-User-side optimizations are important because, for sufficiently difficult problems,
-most time will be spent inside your `f` function, the function you are
-trying to solve. “Efficient solvers" are those that reduce the required
-number of `f` calls to hit the error tolerance. The main ideas for optimizing
-your nonlinear solver code, or any Julia function, are the following:
+User-side optimizations are important because, for sufficiently difficult problems, most
+time will be spent inside your `f` function, the function you are trying to solve.
+“Efficient solvers" are those that reduce the required number of `f` calls to hit the error
+tolerance. The main ideas for optimizing your nonlinear solver code, or any Julia function,
+are the following:
 
   - Make it non-allocating
   - Use StaticArrays for small arrays
@@ -24,8 +23,8 @@ your nonlinear solver code, or any Julia function, are the following:
   - Make use of BLAS calls
   - Optimize algorithm choice
 
-We'll discuss these strategies in the context of nonlinear solvers.
-Let's start with small systems.
+We'll discuss these strategies in the context of nonlinear solvers. Let's start with small
+systems.
 
 ## Optimizing Nonlinear Solver Code for Small Systems
 
@@ -48,7 +47,7 @@ We can use BenchmarkTools.jl to get more precise timings:
 ```@example small_opt
 using BenchmarkTools
 
-@btime solve(prob, NewtonRaphson())
+@benchmark solve(prob, NewtonRaphson())
 ```
 
 Note that this way of writing the function is a shorthand for:
@@ -59,14 +58,14 @@ function f(u, p)
 end
 ```
 
-where the function `f` returns an array. This is a common pattern from things like MATLAB's `fzero`
-or SciPy's `scipy.optimize.fsolve`. However, by design it's very slow. I nthe benchmark you can see
-that there are many allocations. These allocations cause the memory allocator to take more time
-than the actual numerics itself, which is one of the reasons why codes from MATLAB and Python end up
-slow.
+where the function `f` returns an array. This is a common pattern from things like MATLAB's
+`fzero` or SciPy's `scipy.optimize.fsolve`. However, by design it's very slow. In the
+benchmark you can see that there are many allocations. These allocations cause the memory
+allocator to take more time than the actual numerics itself, which is one of the reasons why
+codes from MATLAB and Python end up slow.
 
-In order to avoid this issue, we can use a non-allocating "in-place" approach. Written out by hand,
-this looks like:
+In order to avoid this issue, we can use a non-allocating "in-place" approach. Written out
+by hand, this looks like:
 
 ```@example small_opt
 function f(du, u, p)
@@ -76,7 +75,7 @@ function f(du, u, p)
 end
 
 prob = NonlinearProblem(f, u0, p)
-@btime sol = solve(prob, NewtonRaphson())
+@benchmark sol = solve(prob, NewtonRaphson())
 ```
 
 Notice how much faster this already runs! We can make this code even simpler by using
@@ -87,33 +86,31 @@ function f(du, u, p)
     du .= u .* u .- p
 end
 
-@btime sol = solve(prob, NewtonRaphson())
+@benchmark sol = solve(prob, NewtonRaphson())
 ```
 
 ## Further Optimizations for Small Nonlinear Solves with Static Arrays and SimpleNonlinearSolve
 
-Allocations are only expensive if they are “heap allocations”. For a more
-in-depth definition of heap allocations,
+Allocations are only expensive if they are “heap allocations”. For a more in-depth
+definition of heap allocations,
 [there are many sources online](http://net-informations.com/faq/net/stack-heap.htm).
-But a good working definition is that heap allocations are variable-sized slabs
-of memory which have to be pointed to, and this pointer indirection costs time.
-Additionally, the heap has to be managed, and the garbage controllers has to
-actively keep track of what's on the heap.
-
-However, there's an alternative to heap allocations, known as stack allocations.
-The stack is statically-sized (known at compile time) and thus its accesses are
-quick. Additionally, the exact block of memory is known in advance by the
-compiler, and thus re-using the memory is cheap. This means that allocating on
-the stack has essentially no cost!
-
-Arrays have to be heap allocated because their size (and thus the amount of
-memory they take up) is determined at runtime. But there are structures in
-Julia which are stack-allocated. `struct`s for example are stack-allocated
-“value-type”s. `Tuple`s are a stack-allocated collection. The most useful data
-structure for NonlinearSolve though is the `StaticArray` from the package
-[StaticArrays.jl](https://github.com/JuliaArrays/StaticArrays.jl). These arrays
-have their length determined at compile-time. They are created using macros
-attached to normal array expressions, for example:
+But a good working definition is that heap allocations are variable-sized slabs of memory
+which have to be pointed to, and this pointer indirection costs time. Additionally, the heap
+has to be managed, and the garbage controllers has to actively keep track of what's on the
+heap.
+
+However, there's an alternative to heap allocations, known as stack allocations. The stack
+is statically-sized (known at compile time) and thus its accesses are quick. Additionally,
+the exact block of memory is known in advance by the compiler, and thus re-using the memory
+is cheap. This means that allocating on the stack has essentially no cost!
+
+Arrays have to be heap allocated because their size (and thus the amount of memory they take
+up) is determined at runtime. But there are structures in Julia which are stack-allocated.
+`struct`s for example are stack-allocated “value-type”s. `Tuple`s are a stack-allocated
+collection. The most useful data structure for NonlinearSolve though is the `StaticArray`
+from the package [StaticArrays.jl](https://github.com/JuliaArrays/StaticArrays.jl). These
+arrays have their length determined at compile-time. They are created using macros attached
+to normal array expressions, for example:
 
 ```@example small_opt
 using StaticArrays
@@ -121,23 +118,21 @@ A = SA[2.0, 3.0, 5.0]
 typeof(A) # SVector{3, Float64} (alias for SArray{Tuple{3}, Float64, 1, 3})
 ```
 
-Notice that the `3` after `SVector` gives the size of the `SVector`. It cannot
-be changed. Additionally, `SVector`s are immutable, so we have to create a new
-`SVector` to change values. But remember, we don't have to worry about
-allocations because this data structure is stack-allocated. `SArray`s have
-numerous extra optimizations as well: they have fast matrix multiplication,
-fast QR factorizations, etc. which directly make use of the information about
-the size of the array. Thus, when possible, they should be used.
+Notice that the `3` after `SVector` gives the size of the `SVector`. It cannot be changed.
+Additionally, `SVector`s are immutable, so we have to create a new `SVector` to change
+values. But remember, we don't have to worry about allocations because this data structure
+is stack-allocated. `SArray`s have numerous extra optimizations as well: they have fast
+matrix multiplication, fast QR factorizations, etc. which directly make use of the
+information about the size of the array. Thus, when possible, they should be used.
 
-Unfortunately, static arrays can only be used for sufficiently small arrays.
-After a certain size, they are forced to heap allocate after some instructions
-and their compile time balloons. Thus, static arrays shouldn't be used if your
-system has more than ~20 variables. Additionally, only the native Julia
-algorithms can fully utilize static arrays.
+Unfortunately, static arrays can only be used for sufficiently small arrays. After a certain
+size, they are forced to heap allocate after some instructions and their compile time
+balloons. Thus, static arrays shouldn't be used if your system has more than ~20 variables.
+Additionally, only the native Julia algorithms can fully utilize static arrays.
 
 Let's ***optimize our nonlinear solve using static arrays***. Note that in this case, we
-want to use the out-of-place allocating form, but this time we want to output
-a static array. Doing it with broadcasting looks like:
+want to use the out-of-place allocating form, but this time we want to output a static
+array. Doing it with broadcasting looks like:
 
 ```@example small_opt
 function f_SA(u, p)
@@ -146,28 +141,29 @@ end
 u0 = SA[1.0, 1.0]
 p = 2.0
 prob = NonlinearProblem(f_SA, u0, p)
-@btime solve(prob, NewtonRaphson())
+@benchmark solve(prob, NewtonRaphson())
 ```
 
-Note that only change here is that `u0` is made into a StaticArray! If we needed to write `f` out
-for a more complex nonlinear case, then we'd simply do the following:
+Note that only change here is that `u0` is made into a StaticArray! If we needed to write
+`f` out for a more complex nonlinear case, then we'd simply do the following:
 
 ```@example small_opt
 function f_SA(u, p)
     SA[u[1] * u[1] - p, u[2] * u[2] - p]
 end
 
-@btime solve(prob, NewtonRaphson())
+@benchmark solve(prob, NewtonRaphson())
 ```
 
-However, notice that this did not give us a speedup but rather a slowdown. This is because many of the
-methods in NonlinearSolve.jl are designed to scale to larger problems, and thus aggressively do things
-like caching which is good for large problems but not good for these smaller problems and static arrays.
-In order to see the full benefit, we need to move to one of the methods from SimpleNonlinearSolve.jl
-which are designed for these small-scale static examples. Let's now use `SimpleNewtonRaphson`:
+However, notice that this did not give us a speedup but rather a slowdown. This is because
+many of the methods in NonlinearSolve.jl are designed to scale to larger problems, and thus
+aggressively do things like caching which is good for large problems but not good for these
+smaller problems and static arrays. In order to see the full benefit, we need to move to one
+of the methods from SimpleNonlinearSolve.jl which are designed for these small-scale static
+examples. Let's now use `SimpleNewtonRaphson`:
 
 ```@example small_opt
-@btime solve(prob, SimpleNewtonRaphson())
+@benchmark solve(prob, SimpleNewtonRaphson())
 ```
 
-And there we go, around 100ns from our starting point of almost 6μs!
+And there we go, around `40ns` from our starting point of almost `4μs`!
diff --git a/docs/src/tutorials/getting_started.md b/docs/src/tutorials/getting_started.md
index a5cc8786e..836990b9e 100644
--- a/docs/src/tutorials/getting_started.md
+++ b/docs/src/tutorials/getting_started.md
@@ -1,36 +1,34 @@
 # Getting Started with Nonlinear Rootfinding in Julia
 
-NonlinearSolve.jl is a system for solving rootfinding problems, i.e. finding
-the value $$u$$ such that $$f(u) = 0$$. In this tutorial we will go through
-the basics of NonlinearSolve.jl, demonstrating the core ideas and leading you
-to understanding the deeper parts of the documentation.
-
-## The Three Types of Nonlinear Systems
-
-There are three types of nonlinear systems:
-
- 1. The "standard nonlinear system", i.e. the `NonlinearProblem`. This is a
-    system of equations with an initial condition where you want to satisfy
-    all equations simultaneously.
- 2. The "interval rootfinding problem", i.e. the `IntervalNonlinearProblem`.
-    This is the case where you're given an interval `[a,b]` and need to find
-    where `f(u) = 0` for `u` inside the bounds.
- 3. The "steady state problem", i.e. find the `u` such that `u' = f(u) = 0`.
-    While related to (1), it's not entirely the same because there's a uniquely
-    defined privileged root.
- 4. The nonlinear least squares problem, which is an overconstrained nonlinear
-    system (i.e. more equations than states) which might not be satisfiable, i.e.
-    there may be no `u` such that `f(u) = 0`, and thus we find the `u` which
-    minimizes `||f(u)||` in the least squares sense.
-
-For now let's focus on the first two. The other two are covered in later tutorials,
-but from the first two we can show the general flow of the NonlinearSolve.jl package.
+NonlinearSolve.jl is a system for solving rootfinding problems, i.e. finding the value $$u$$
+such that $$f(u) = 0$$. In this tutorial we will go through the basics of NonlinearSolve.jl,
+demonstrating the core ideas and leading you to understanding the deeper parts of the
+documentation.
+
+## The Four Types of Nonlinear Systems
+
+There are four types of nonlinear systems:
+
+ 1. The "standard nonlinear system", i.e. the `NonlinearProblem`. This is a system of
+    equations with an initial condition where you want to satisfy all equations
+    simultaneously.
+ 2. The "interval rootfinding problem", i.e. the `IntervalNonlinearProblem`. This is the
+    case where you're given an interval `[a,b]` and need to find where `f(u) = 0` for `u`
+    inside the bounds.
+ 3. The "steady state problem", i.e. find the `u` such that `u' = f(u) = 0`. While related
+    to (1), it's not entirely the same because there's a uniquely defined privileged root.
+ 4. The nonlinear least squares problem, which is an under/over-constrained nonlinear system
+    which might not be satisfiable, i.e. there may be no `u` such that `f(u) = 0`, and thus
+    we find the `u` which minimizes `||f(u)||` in the least squares sense.
+
+One important distinction is that (1) and (3) require the input and output sizes to be the
+same, while (4) does not.
 
 ## Problem Type 1: Solving Nonlinear Systems of Equations
 
-A nonlinear system $$f(u) = 0$$ is specified by defining a function `f(u,p)`,
-where `p` are the parameters of the system. For example, the following solves
-the vector equation $$f(u) = u^2 - p$$ for a vector of equations:
+A nonlinear system $$f(u) = 0$$ is specified by defining a function `f(u,p)`, where `p` are
+the parameters of the system. For example, the following solves the vector
+equation $$f(u) = u^2 - p$$ for a vector of equations:
 
 ```@example 1
 using NonlinearSolve
@@ -42,15 +40,15 @@ prob = NonlinearProblem(f, u0, p)
 sol = solve(prob)
 ```
 
-where `u0` is the initial condition for the rootfinder. Native NonlinearSolve.jl
-solvers use the given type of `u0` to determine the type used within the solver
-and the return. Note that the parameters `p` can be any type, but most are an
-AbstractArray for automatic differentiation.
+where `u0` is the initial condition for the rootfinder. Native NonlinearSolve.jl solvers use
+the given type of `u0` to determine the type used within the solver and the return. Note
+that the parameters `p` can be any type, but most are an AbstractArray for automatic
+differentiation.
 
 ### Investigating the Solution
 
-To investigate the solution, one can look at the elements of the `NonlinearSolution`.
-The most important value is `sol.u`: this is the `u` that satisfies `f(u) = 0`. For example:
+To investigate the solution, one can look at the elements of the `NonlinearSolution`. The
+most important value is `sol.u`: this is the `u` that satisfies `f(u) = 0`. For example:
 
 ```@example 1
 u = sol.u
@@ -60,63 +58,70 @@ u = sol.u
 f(u, p)
 ```
 
-This final value, the difference of the solution against zero, can also be found with `sol.resid`:
+This final value, the difference of the solution against zero, can also be found with
+`sol.resid`:
 
 ```@example 1
 sol.resid
 ```
 
-To know if the solution converged, or why the solution had not converged we can check the return
-code (`retcode`):
+To know if the solution converged, or why the solution had not converged we can check the
+return code (`retcode`):
 
 ```@example 1
 sol.retcode
 ```
 
-There are multiple return codes which can mean the solve was successful, and thus we can use the
-general command `SciMLBase.successful_retcode` to check whether the solution process exited as
-intended:
+There are multiple return codes which can mean the solve was successful, and thus we can use
+the general command `SciMLBase.successful_retcode` to check whether the solution process
+exited as intended:
 
 ```@example 1
 SciMLBase.successful_retcode(sol)
 ```
 
-If we're curious about what it took to solve this equation, then you're in luck because all of the
-details can be found in `sol.stats`:
+If we're curious about what it took to solve this equation, then you're in luck because all
+of the details can be found in `sol.stats`:
 
 ```@example 1
 sol.stats
 ```
 
-For more information on `NonlinearSolution`s, see the [`NonlinearSolution` manual page](@ref solution).
+For more information on `NonlinearSolution`s, see the
+[`NonlinearSolution` manual page](@ref solution).
 
 ### Interacting with the Solver Options
 
-While `sol = solve(prob)` worked for our case here, in many situations you may need to interact more
-deeply with how the solving process is done. First things first, you can specify the solver using the
-positional arguments. For example, let's set the solver to `TrustRegion()`:
+While `sol = solve(prob)` worked for our case here, in many situations you may need to
+interact more deeply with how the solving process is done. First things first, you can
+specify the solver using the positional arguments. For example, let's set the solver to
+`TrustRegion()`:
 
 ```@example 1
 solve(prob, TrustRegion())
 ```
 
-For a complete list of solver choices, see [the nonlinear system solvers page](@ref nonlinearsystemsolvers).
+For a complete list of solver choices, see
+[the nonlinear system solvers page](@ref nonlinearsystemsolvers).
 
-Next we can modify the tolerances. Here let's set some really low tolerances to force a tight solution:
+Next we can modify the tolerances. Here let's set some really low tolerances to force a
+tight solution:
 
 ```@example 1
 solve(prob, TrustRegion(), reltol = 1e-12, abstol = 1e-12)
 ```
 
-There are many more options for doing this configuring. Specifically for handling termination conditions,
-see the [Termination Conditions](@ref termination_condition) page for more details. And for more details on
-all of the available keyword arguments, see the [solver options](@ref solver_options) page.
+There are many more options for doing this configuring. Specifically for handling
+termination conditions, see the [Termination Conditions](@ref termination_condition) page
+for more details. And for more details on all of the available keyword arguments, see the
+[solver options](@ref solver_options) page.
 
 ## Problem Type 2: Solving Interval Rootfinding Problems with Bracketing Methods
 
-For scalar rootfinding problems, bracketing methods exist in NonlinearSolve. The difference with bracketing
-methods is that with bracketing methods, instead of giving a `u0` initial condition, you pass a `uspan (a,b)`
-bracket in which the zero is expected to live. For example:
+For scalar rootfinding problems, bracketing methods exist in NonlinearSolve. The difference
+with bracketing methods is that with bracketing methods, instead of giving a `u0` initial
+condition, you pass a `uspan (a,b)` bracket in which the zero is expected to live. For
+example:
 
 ```@example 1
 using NonlinearSolve
@@ -126,14 +131,67 @@ prob_int = IntervalNonlinearProblem(f, uspan)
 sol = solve(prob_int)
 ```
 
-All of the same option handling from before works just as before, now just with different solver choices
-(see the [bracketing solvers](@ref bracketing) page for more details). For example, let's set the solver
-to `ITP()` and set a high absolute tolerance:
+All of the same option handling from before works just as before, now just with different
+solver choices (see the [bracketing solvers](@ref bracketing) page for more details). For
+example, let's set the solver to `ITP()` and set a high absolute tolerance:
 
 ```@example 1
 sol = solve(prob_int, ITP(), abstol = 0.01)
 ```
 
+## Problem Type 3: Solving Steady State Problems
+
+For Steady State Problems, we have a wrapper package
+[SteadyStateDiffEq.jl](https://github.com/SciML/SteadyStateDiffEq.jl). This package
+automates handling SteadyStateProblems with NonlinearSolve and OrdinaryDiffEq.
+
+```@example 1
+using NonlinearSolve, SteadyStateDiffEq
+
+f(u, p, t) = [2 - 2u[1]; u[1] - 4u[2]]
+u0 = [0.0, 0.0]
+prob = SteadyStateProblem(f, u0)
+
+solve(prob, SSRootfind())
+```
+
+If you don't provide a nonlinear solver to `SSRootfind` it uses a polyalgorithm from
+NonlinearSolve. We can also provide the actual nonlinear solver to use:
+
+```@example 1
+solve(prob, SSRootfind(Broyden()))
+```
+
+## Problem Type 4: Solving Nonlinear Least Squares Problems
+
+```@example 1
+using NonlinearSolve
+
+function nlls!(du, u, p)
+    du[1] = 2u[1] - 2
+    du[2] = u[1] - 4u[2]
+    du[3] = 0
+end
+```
+
+Note that here the output array is of length `3` while the input array is of length `2`. We
+need to provide the `resid_prototype` to tell the solver what the output size is (this can
+be skipped for out of place problems):
+
+```@example 1
+u0 = [0.0, 0.0]
+prob = NonlinearLeastSquaresProblem(NonlinearFunction(nlls!, resid_prototype = zeros(3)),
+    u0)
+
+solve(prob)
+```
+
+Same as before, we can change the solver and tolerances:
+
+```@example 1
+solve(prob, GaussNewton(), reltol = 1e-12, abstol = 1e-12)
+```
+
 ## Going Beyond the Basics: How to use the Documentation
 
 Congrats, you now know how to use the basics of NonlinearSolve.jl! However, there is so much more to
diff --git a/docs/src/tutorials/large_systems.md b/docs/src/tutorials/large_systems.md
index 8e53d193c..5748cb351 100644
--- a/docs/src/tutorials/large_systems.md
+++ b/docs/src/tutorials/large_systems.md
@@ -1,9 +1,11 @@
 # [Efficiently Solving Large Sparse Ill-Conditioned Nonlinear Systems in Julia](@id large_systems)
 
-This tutorial is for getting into the extra features of using NonlinearSolve.jl. Solving ill-conditioned nonlinear systems
-requires specializing the linear solver on properties of the Jacobian in order to cut down on the ``\mathcal{O}(n^3)``
-linear solve and the ``\mathcal{O}(n^2)`` back-solves. This tutorial is designed to explain the advanced usage of
-NonlinearSolve.jl by solving the steady state stiff Brusselator partial differential equation (BRUSS) using NonlinearSolve.jl.
+This tutorial is for getting into the extra features of using NonlinearSolve.jl. Solving
+ill-conditioned nonlinear systems requires specializing the linear solver on properties of
+the Jacobian in order to cut down on the ``\mathcal{O}(n^3)`` linear solve and the
+``\mathcal{O}(n^2)`` back-solves. This tutorial is designed to explain the advanced usage of
+NonlinearSolve.jl by solving the steady state stiff Brusselator partial differential
+equation (BRUSS) using NonlinearSolve.jl.
 
 ## Definition of the Brusselator Equation
 
@@ -47,13 +49,13 @@ u(x,y+1,t) &= u(x,y,t)
 \end{align}
 ```
 
-To solve this PDE, we will discretize it into a system of ODEs with the finite
-difference method. We discretize `u` and `v` into arrays of the values at each
-time point: `u[i,j] = u(i*dx,j*dy)` for some choice of `dx`/`dy`, and same for
-`v`. Then our ODE is defined with `U[i,j,k] = [u v]`. The second derivative
-operator, the Laplacian, discretizes to become a tridiagonal matrix with
-`[1 -2 1]` and a `1` in the top right and bottom left corners. The nonlinear functions
-are then applied at each point in space (they are broadcast). Use `dx=dy=1/32`.
+To solve this PDE, we will discretize it into a system of ODEs with the finite difference
+method. We discretize `u` and `v` into arrays of the values at each time point:
+`u[i,j] = u(i*dx,j*dy)` for some choice of `dx`/`dy`, and same for `v`. Then our ODE is
+defined with `U[i,j,k] = [u v]`. The second derivative operator, the Laplacian, discretizes
+to become a tridiagonal matrix with `[1 -2 1]` and a `1` in the top right and bottom left
+corners. The nonlinear functions are then applied at each point in space (they are
+broadcast). Use `dx=dy=1/32`.
 
 The resulting `NonlinearProblem` definition is:
 
@@ -100,18 +102,17 @@ prob_brusselator_2d = NonlinearProblem(brusselator_2d_loop, u0, p)
 
 ## Choosing Jacobian Types
 
-When we are solving this nonlinear problem, the Jacobian must be built at many
-iterations, and this can be one of the most
-expensive steps. There are two pieces that must be optimized in order to reach
-maximal efficiency when solving stiff equations: the sparsity pattern and the
-construction of the Jacobian. The construction is filling the matrix
-`J` with values, while the sparsity pattern is what `J` to use.
+When we are solving this nonlinear problem, the Jacobian must be built at many iterations,
+and this can be one of the most expensive steps. There are two pieces that must be optimized
+in order to reach maximal efficiency when solving stiff equations: the sparsity pattern and
+the construction of the Jacobian. The construction is filling the matrix `J` with values,
+while the sparsity pattern is what `J` to use.
 
-The sparsity pattern is given by a prototype matrix, the `jac_prototype`, which
-will be copied to be used as `J`. The default is for `J` to be a `Matrix`,
-i.e. a dense matrix. However, if you know the sparsity of your problem, then
-you can pass a different matrix type. For example, a `SparseMatrixCSC` will
-give a sparse matrix. Other sparse matrix types include:
+The sparsity pattern is given by a prototype matrix, the `jac_prototype`, which will be
+copied to be used as `J`. The default is for `J` to be a `Matrix`, i.e. a dense matrix.
+However, if you know the sparsity of your problem, then you can pass a different matrix
+type. For example, a `SparseMatrixCSC` will give a sparse matrix. Other sparse matrix types
+include:
 
   - Bidiagonal
   - Tridiagonal
@@ -122,16 +123,15 @@ give a sparse matrix. Other sparse matrix types include:
 ## Declaring a Sparse Jacobian with Automatic Sparsity Detection
 
 Jacobian sparsity is declared by the `jac_prototype` argument in the `NonlinearFunction`.
-Note that you should only do this if the sparsity is high, for example, 0.1%
-of the matrix is non-zeros, otherwise the overhead of sparse matrices can be higher
-than the gains from sparse differentiation!
+Note that you should only do this if the sparsity is high, for example, 0.1% of the matrix
+is non-zeros, otherwise the overhead of sparse matrices can be higher than the gains from
+sparse differentiation!
 
 One of the useful companion tools for NonlinearSolve.jl is
-[Symbolics.jl](https://github.com/JuliaSymbolics/Symbolics.jl).
-This allows for automatic declaration of Jacobian sparsity types. To see this
-in action, we can give an example `du` and `u` and call `jacobian_sparsity`
-on our function with the example arguments, and it will kick out a sparse matrix
-with our pattern, that we can turn into our `jac_prototype`.
+[Symbolics.jl](https://github.com/JuliaSymbolics/Symbolics.jl). This allows for automatic
+declaration of Jacobian sparsity types. To see this in action, we can give an example `du`
+and `u` and call `jacobian_sparsity` on our function with the example arguments, and it will
+kick out a sparse matrix with our pattern, that we can turn into our `jac_prototype`.
 
 ```@example ill_conditioned_nlprob
 using Symbolics
@@ -140,12 +140,11 @@ jac_sparsity = Symbolics.jacobian_sparsity((du, u) -> brusselator_2d_loop(du, u,
     du0, u0)
 ```
 
-Notice that Julia gives a nice print out of the sparsity pattern. That's neat, and
-would be tedious to build by hand! Now we just pass it to the `NonlinearFunction`
-like as before:
+Notice that Julia gives a nice print out of the sparsity pattern. That's neat, and would be
+tedious to build by hand! Now we just pass it to the `NonlinearFunction` like as before:
 
 ```@example ill_conditioned_nlprob
-ff = NonlinearFunction(brusselator_2d_loop; jac_prototype = float.(jac_sparsity))
+ff = NonlinearFunction(brusselator_2d_loop; sparsity = jac_sparsity)
 ```
 
 Build the `NonlinearProblem`:
@@ -170,37 +169,36 @@ or `NewtonRaphson(linsolve = UMFPACKFactorization())`
 
 ## Using Jacobian-Free Newton-Krylov
 
-A completely different way to optimize the linear solvers for large sparse
-matrices is to use a Krylov subspace method. This requires choosing a linear
-solver for changing to a Krylov method. To swap the linear solver out, we use
-the `linsolve` command and choose the GMRES linear solver.
+A completely different way to optimize the linear solvers for large sparse matrices is to
+use a Krylov subspace method. This requires choosing a linear solver for changing to a
+Krylov method. To swap the linear solver out, we use the `linsolve` command and choose the
+GMRES linear solver.
 
 ```@example ill_conditioned_nlprob
 @btime solve(prob_brusselator_2d, NewtonRaphson(linsolve = KrylovJL_GMRES()));
 nothing # hide
 ```
 
-Notice that this acceleration does not require the definition of a sparsity
-pattern, and can thus be an easier way to scale for large problems. For more
-information on linear solver choices, see the
+Notice that this acceleration does not require the definition of a sparsity pattern, and can
+thus be an easier way to scale for large problems. For more information on linear solver
+choices, see the
 [linear solver documentation](https://docs.sciml.ai/DiffEqDocs/stable/features/linear_nonlinear/#linear_nonlinear).
 `linsolve` choices are any valid [LinearSolve.jl](https://linearsolve.sciml.ai/dev/) solver.
 
 !!! note
     
-    Switching to a Krylov linear solver will automatically change the nonlinear problem solver
-    into Jacobian-free mode, dramatically reducing the memory required. This can
-    be overridden by adding `concrete_jac=true` to the algorithm.
+    Switching to a Krylov linear solver will automatically change the nonlinear problem
+    solver into Jacobian-free mode, dramatically reducing the memory required. This can be
+    overridden by adding `concrete_jac=true` to the algorithm.
 
 ## Adding a Preconditioner
 
 Any [LinearSolve.jl-compatible preconditioner](https://docs.sciml.ai/LinearSolve/stable/basics/Preconditioners/)
-can be used as a preconditioner in the linear solver interface. To define
-preconditioners, one must define a `precs` function in compatible with nonlinear
-solvers which returns the left and right preconditioners, matrices which
-approximate the inverse of `W = I - gamma*J` used in the solution of the ODE.
-An example of this with using [IncompleteLU.jl](https://github.com/haampie/IncompleteLU.jl)
-is as follows:
+can be used as a preconditioner in the linear solver interface. To define preconditioners,
+one must define a `precs` function in compatible with nonlinear solvers which returns the
+left and right preconditioners, matrices which approximate the inverse of `W = I - gamma*J`
+used in the solution of the ODE. An example of this with using
+[IncompleteLU.jl](https://github.com/haampie/IncompleteLU.jl) is as follows:
 
 ```@example ill_conditioned_nlprob
 using IncompleteLU
@@ -218,22 +216,21 @@ end
 nothing # hide
 ```
 
-Notice a few things about this preconditioner. This preconditioner uses the
-sparse Jacobian, and thus we set `concrete_jac=true` to tell the algorithm to
-generate the Jacobian (otherwise, a Jacobian-free algorithm is used with GMRES
-by default). Then `newW = true` whenever a new `W` matrix is computed, and
-`newW=nothing` during the startup phase of the solver. Thus, we do a check
-`newW === nothing || newW` and when true, it's only at these points when
-we update the preconditioner, otherwise we just pass on the previous version.
-We use `convert(AbstractMatrix,W)` to get the concrete `W` matrix (matching
-`jac_prototype`, thus `SpraseMatrixCSC`) which we can use in the preconditioner's
-definition. Then we use `IncompleteLU.ilu` on that sparse matrix to generate
-the preconditioner. We return `Pl,nothing` to say that our preconditioner is a
-left preconditioner, and that there is no right preconditioning.
-
-This method thus uses both the Krylov solver and the sparse Jacobian. Not only
-that, it is faster than both implementations! IncompleteLU is fussy in that it
-requires a well-tuned `τ` parameter. Another option is to use
+Notice a few things about this preconditioner. This preconditioner uses the sparse Jacobian,
+and thus we set `concrete_jac = true` to tell the algorithm to generate the Jacobian
+(otherwise, a Jacobian-free algorithm is used with GMRES by default). Then `newW = true`
+whenever a new `W` matrix is computed, and `newW = nothing` during the startup phase of the
+solver. Thus, we do a check `newW === nothing || newW` and when true, it's only at these
+points when we update the preconditioner, otherwise we just pass on the previous version.
+We use `convert(AbstractMatrix,W)` to get the concrete `W` matrix (matching `jac_prototype`,
+thus `SpraseMatrixCSC`) which we can use in the preconditioner's definition. Then we use
+`IncompleteLU.ilu` on that sparse matrix to generate the preconditioner. We return
+`Pl, nothing` to say that our preconditioner is a left preconditioner, and that there is no
+right preconditioning.
+
+This method thus uses both the Krylov solver and the sparse Jacobian. Not only that, it is
+faster than both implementations! IncompleteLU is fussy in that it requires a well-tuned `τ`
+parameter. Another option is to use
 [AlgebraicMultigrid.jl](https://github.com/JuliaLinearAlgebra/AlgebraicMultigrid.jl)
 which is more automatic. The setup is very similar to before:
 
diff --git a/docs/src/tutorials/modelingtoolkit.md b/docs/src/tutorials/modelingtoolkit.md
index 3ee7d8e75..d6d9711c3 100644
--- a/docs/src/tutorials/modelingtoolkit.md
+++ b/docs/src/tutorials/modelingtoolkit.md
@@ -1,9 +1,9 @@
 # [Symbolic Nonlinear System Definition and Acceleration via ModelingToolkit](@id modelingtoolkit)
 
-[ModelingToolkit.jl](https://docs.sciml.ai/ModelingToolkit/dev/) is a symbolic-numeric modeling system
-for the Julia SciML ecosystem. It adds a high-level interactive interface for the numerical solvers
-which can make it easy to symbolically modify and generate equations to be solved. The basic form of
-using ModelingToolkit looks as follows:
+[ModelingToolkit.jl](https://docs.sciml.ai/ModelingToolkit/dev/) is a symbolic-numeric
+modeling system for the Julia SciML ecosystem. It adds a high-level interactive interface
+for the numerical solvers which can make it easy to symbolically modify and generate
+equations to be solved. The basic form of using ModelingToolkit looks as follows:
 
 ```@example mtk
 using ModelingToolkit, NonlinearSolve
@@ -31,8 +31,8 @@ sol = solve(prob, NewtonRaphson())
 
 ## Symbolic Derivations of Extra Functions
 
-As a symbolic system, ModelingToolkit can be used to represent the equations and derive new forms. For example,
-let's look at the equations:
+As a symbolic system, ModelingToolkit can be used to represent the equations and derive new
+forms. For example, let's look at the equations:
 
 ```@example mtk
 equations(ns)
@@ -44,8 +44,8 @@ We can ask it what the Jacobian of our system is via `calculate_jacobian`:
 calculate_jacobian(ns)
 ```
 
-We can tell MTK to generate a computable form of this analytical Jacobian via `jac = true` to help the solver
-use efficient forms:
+We can tell MTK to generate a computable form of this analytical Jacobian via `jac = true`
+to help the solver use efficient forms:
 
 ```@example mtk
 prob = NonlinearProblem(ns, u0, ps, jac = true)
@@ -54,9 +54,9 @@ sol = solve(prob, NewtonRaphson())
 
 ## Symbolic Simplification of Nonlinear Systems via Tearing
 
-One of the major reasons for using ModelingToolkit is to allow structural simplification of the systems. It's very
-easy to write down a mathematical model that, in theory, could be solved more simply. Let's take a look at a quick
-system:
+One of the major reasons for using ModelingToolkit is to allow structural simplification of
+the systems. It's very easy to write down a mathematical model that, in theory, could be
+solved more simply. Let's take a look at a quick system:
 
 ```@example mtk
 @variables u1 u2 u3 u4 u5
@@ -118,6 +118,7 @@ sol[u5]
 
 ## Component-Based and Acausal Modeling
 
-If you're interested in building models in a component or block based form, such as seen in systems like Simulink or Modelica,
-take a deeper look at [ModelingToolkit.jl's documentation](https://docs.sciml.ai/ModelingToolkit/stable/) which goes into
-detail on such features.
+If you're interested in building models in a component or block based form, such as seen in
+systems like Simulink or Modelica, take a deeper look at
+[ModelingToolkit.jl's documentation](https://docs.sciml.ai/ModelingToolkit/stable/) which
+goes into detail on such features.
diff --git a/docs/src/tutorials/small_compile.md b/docs/src/tutorials/small_compile.md
index c5a40ee05..ec28361f4 100644
--- a/docs/src/tutorials/small_compile.md
+++ b/docs/src/tutorials/small_compile.md
@@ -1,11 +1,11 @@
 # [Faster Startup and and Static Compilation](@id fast_startup)
 
-In many instances one may want a very lightweight version of NonlinearSolve.jl. For this case there
-exists the solver package SimpleNonlinearSolve.jl. SimpleNonlinearSolve.jl solvers all satisfy the
-same interface as NonlinearSolve.jl, but they are designed to be simpler, lightweight, and thus
-have a faster startup time. Everything that can be done with NonlinearSolve.jl can be done with
-SimpleNonlinearSolve.jl. Thus for example, we can solve the core tutorial problem with just SimpleNonlinearSolve.jl
-as follows:
+In many instances one may want a very lightweight version of NonlinearSolve.jl. For this
+case there exists the solver package SimpleNonlinearSolve.jl. SimpleNonlinearSolve.jl
+solvers all satisfy the same interface as NonlinearSolve.jl, but they are designed to be
+simpler, lightweight, and thus have a faster startup time. Everything that can be done with
+NonlinearSolve.jl can be done with SimpleNonlinearSolve.jl. Thus for example, we can solve
+the core tutorial problem with just SimpleNonlinearSolve.jl as follows:
 
 ```@example simple
 using SimpleNonlinearSolve
@@ -17,23 +17,24 @@ prob = NonlinearProblem(f, u0, p)
 sol = solve(prob, SimpleNewtonRaphson())
 ```
 
-However, there are a few downsides to SimpleNonlinearSolve's `SimpleX` style algorithms to note:
+However, there are a few downsides to SimpleNonlinearSolve's `SimpleX` style algorithms to
+note:
 
- 1. SimpleNonlinearSolve.jl's methods are not hooked into the LinearSolve.jl system, and thus do not have
-    the ability to specify linear solvers, use sparse matrices, preconditioners, and all of the other features
-    which are required to scale for very large systems of equations.
- 2. SimpleNonlinearSolve.jl's methods have less robust error handling and termination conditions, and thus
-    these methods are missing some flexibility and give worse hints for debugging.
- 3. SimpleNonlinearSolve.jl's methods are focused on out-of-place support. There is some in-place support,
-    but it's designed for simple smaller systems and so some optimizations are missing.
+ 1. SimpleNonlinearSolve.jl's methods are not hooked into the LinearSolve.jl system, and
+    thus do not have the ability to specify linear solvers, use sparse matrices,
+    preconditioners, and all of the other features which are required to scale for very
+    large systems of equations.
+ 2. SimpleNonlinearSolve.jl's methods have less robust error handling and termination
+    conditions, and thus these methods are missing some flexibility and give worse hints
+    for debugging. Note that these can be enabled but are disabled by default.
 
 However, the major upsides of SimpleNonlinearSolve.jl are:
 
  1. The methods are optimized and non-allocating on StaticArrays
  2. The methods are minimal in compilation
 
-As such, you can use the code as shown above to have very low startup with good methods, but for more scaling and debuggability
-we recommend the full NonlinearSolve.jl. But that said,
+As such, you can use the code as shown above to have very low startup with good methods, but
+for more scaling and debuggability we recommend the full NonlinearSolve.jl. But that said,
 
 ```@example simple
 using StaticArrays
@@ -44,11 +45,13 @@ prob = NonlinearProblem(f, u0, p)
 sol = solve(prob, SimpleNewtonRaphson())
 ```
 
-using StaticArrays.jl is also the fastest form for small equations, so if you know your system is small then SimpleNonlinearSolve.jl
-is not only sufficient but optimal.
+using StaticArrays.jl is also the fastest form for small equations, so if you know your
+system is small then SimpleNonlinearSolve.jl is not only sufficient but optimal.
 
 ## Static Compilation
 
-Julia has tools for building small binaries via static compilation with [StaticCompiler.jl](https://github.com/tshort/StaticCompiler.jl).
-However, these tools are currently limited to type-stable non-allocating functions. That said, SimpleNonlinearSolve.jl's solvers are
-precisely the subset of NonlinearSolve.jl which are compatible with static compilation.
+Julia has tools for building small binaries via static compilation with
+[StaticCompiler.jl](https://github.com/tshort/StaticCompiler.jl).
+However, these tools are currently limited to type-stable non-allocating functions. That
+said, SimpleNonlinearSolve.jl's solvers are precisely the subset of NonlinearSolve.jl which
+are compatible with static compilation.
diff --git a/src/default.jl b/src/default.jl
index b3f5d99ed..fa1eef52b 100644
--- a/src/default.jl
+++ b/src/default.jl
@@ -166,7 +166,7 @@ end
 
 """
     RobustMultiNewton(; concrete_jac = nothing, linsolve = nothing, precs = DEFAULT_PRECS,
-        adkwargs...)
+        autodiff = nothing)
 
 A polyalgorithm focused on robustness. It uses a mixture of Newton methods with different
 globalizing techniques (trust region updates, line searches, etc.) in order to find a
@@ -180,7 +180,7 @@ or more precision / more stable linear solver choice is required).
 
   - `autodiff`: determines the backend used for the Jacobian. Note that this argument is
     ignored if an analytical Jacobian is passed, as that will be used instead. Defaults to
-    `AutoForwardDiff()`. Valid choices are types from ADTypes.jl.
+    `nothing`.
   - `concrete_jac`: whether to build a concrete Jacobian. If a Krylov-subspace method is used,
     then the Jacobian will not be constructed and instead direct Jacobian-vector products
     `J*v` are computed using forward-mode automatic differentiation or finite differencing
@@ -197,22 +197,23 @@ or more precision / more stable linear solver choice is required).
     [LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/).
 """
 function RobustMultiNewton(; concrete_jac = nothing, linsolve = nothing,
-        precs = DEFAULT_PRECS, adkwargs...)
-    algs = (TrustRegion(; concrete_jac, linsolve, precs, adkwargs...),
-        TrustRegion(; concrete_jac, linsolve, precs,
-            radius_update_scheme = RadiusUpdateSchemes.Bastin, adkwargs...),
+        precs = DEFAULT_PRECS, autodiff = nothing)
+    algs = (TrustRegion(; concrete_jac, linsolve, precs),
+        TrustRegion(; concrete_jac, linsolve, precs, autodiff,
+            radius_update_scheme = RadiusUpdateSchemes.Bastin),
         NewtonRaphson(; concrete_jac, linsolve, precs, linesearch = BackTracking(),
-            adkwargs...),
+            autodiff),
         TrustRegion(; concrete_jac, linsolve, precs,
-            radius_update_scheme = RadiusUpdateSchemes.NLsolve, adkwargs...),
+            radius_update_scheme = RadiusUpdateSchemes.NLsolve, autodiff),
         TrustRegion(; concrete_jac, linsolve, precs,
-            radius_update_scheme = RadiusUpdateSchemes.Fan, adkwargs...))
+            radius_update_scheme = RadiusUpdateSchemes.Fan, autodiff))
     return NonlinearSolvePolyAlgorithm(algs, Val(:NLS))
 end
 
 """
     FastShortcutNonlinearPolyalg(; concrete_jac = nothing, linsolve = nothing,
-        precs = DEFAULT_PRECS, must_use_jacobian::Val = Val(false), adkwargs...)
+        precs = DEFAULT_PRECS, must_use_jacobian::Val = Val(false),
+        prefer_simplenonlinearsolve::Val{SA} = Val(false), autodiff = nothing)
 
 A polyalgorithm focused on balancing speed and robustness. It first tries less robust methods
 for more performance and then tries more robust techniques if the faster ones fail.
@@ -221,7 +222,7 @@ for more performance and then tries more robust techniques if the faster ones fa
 
   - `autodiff`: determines the backend used for the Jacobian. Note that this argument is
     ignored if an analytical Jacobian is passed, as that will be used instead. Defaults to
-    `AutoForwardDiff()`. Valid choices are types from ADTypes.jl.
+    `nothing`.
   - `concrete_jac`: whether to build a concrete Jacobian. If a Krylov-subspace method is used,
     then the Jacobian will not be constructed and instead direct Jacobian-vector products
     `J*v` are computed using forward-mode automatic differentiation or finite differencing
@@ -239,31 +240,53 @@ for more performance and then tries more robust techniques if the faster ones fa
 """
 function FastShortcutNonlinearPolyalg(; concrete_jac = nothing, linsolve = nothing,
         precs = DEFAULT_PRECS, must_use_jacobian::Val{JAC} = Val(false),
-        adkwargs...) where {JAC}
+        prefer_simplenonlinearsolve::Val{SA} = Val(false),
+        autodiff = nothing) where {JAC, SA}
     if JAC
-        algs = (NewtonRaphson(; concrete_jac, linsolve, precs, adkwargs...),
-            NewtonRaphson(; concrete_jac, linsolve, precs, linesearch = BackTracking(),
-                adkwargs...),
-            TrustRegion(; concrete_jac, linsolve, precs, adkwargs...),
-            TrustRegion(; concrete_jac, linsolve, precs,
-                radius_update_scheme = RadiusUpdateSchemes.Bastin, adkwargs...))
+        if SA
+            algs = (SimpleNewtonRaphson(; autodiff),
+                SimpleTrustRegion(; autodiff),
+                NewtonRaphson(; concrete_jac, linsolve, precs, linesearch = BackTracking(),
+                    autodiff),
+                TrustRegion(; concrete_jac, linsolve, precs,
+                    radius_update_scheme = RadiusUpdateSchemes.Bastin, autodiff))
+        else
+            algs = (NewtonRaphson(; concrete_jac, linsolve, precs, autodiff),
+                NewtonRaphson(; concrete_jac, linsolve, precs, linesearch = BackTracking(),
+                    autodiff),
+                TrustRegion(; concrete_jac, linsolve, precs, autodiff),
+                TrustRegion(; concrete_jac, linsolve, precs,
+                    radius_update_scheme = RadiusUpdateSchemes.Bastin, autodiff))
+        end
     else
-        algs = (Broyden(),
-            Broyden(; init_jacobian = Val(:true_jacobian)),
-            Klement(; linsolve, precs),
-            NewtonRaphson(; concrete_jac, linsolve, precs, adkwargs...),
-            NewtonRaphson(; concrete_jac, linsolve, precs, linesearch = BackTracking(),
-                adkwargs...),
-            TrustRegion(; concrete_jac, linsolve, precs, adkwargs...),
-            TrustRegion(; concrete_jac, linsolve, precs,
-                radius_update_scheme = RadiusUpdateSchemes.Bastin, adkwargs...))
+        if SA
+            algs = (SimpleBroyden(),
+                Broyden(; init_jacobian = Val(:true_jacobian)),
+                SimpleKlement(),
+                SimpleNewtonRaphson(; autodiff),
+                SimpleTrustRegion(; autodiff),
+                NewtonRaphson(; concrete_jac, linsolve, precs, linesearch = BackTracking(),
+                    autodiff),
+                TrustRegion(; concrete_jac, linsolve, precs,
+                    radius_update_scheme = RadiusUpdateSchemes.Bastin, autodiff))
+        else
+            algs = (Broyden(),
+                Broyden(; init_jacobian = Val(:true_jacobian)),
+                Klement(; linsolve, precs),
+                NewtonRaphson(; concrete_jac, linsolve, precs, autodiff),
+                NewtonRaphson(; concrete_jac, linsolve, precs, linesearch = BackTracking(),
+                    autodiff),
+                TrustRegion(; concrete_jac, linsolve, precs, autodiff),
+                TrustRegion(; concrete_jac, linsolve, precs,
+                    radius_update_scheme = RadiusUpdateSchemes.Bastin, autodiff))
+        end
     end
     return NonlinearSolvePolyAlgorithm(algs, Val(:NLS))
 end
 
 """
     FastShortcutNLLSPolyalg(; concrete_jac = nothing, linsolve = nothing,
-                              precs = DEFAULT_PRECS, adkwargs...)
+                              precs = DEFAULT_PRECS, kwargs...)
 
 A polyalgorithm focused on balancing speed and robustness. It first tries less robust methods
 for more performance and then tries more robust techniques if the faster ones fail.
@@ -289,11 +312,11 @@ for more performance and then tries more robust techniques if the faster ones fa
     [LinearSolve.jl documentation](https://docs.sciml.ai/LinearSolve/stable/).
 """
 function FastShortcutNLLSPolyalg(; concrete_jac = nothing, linsolve = nothing,
-        precs = DEFAULT_PRECS, adkwargs...)
-    algs = (GaussNewton(; concrete_jac, linsolve, precs, adkwargs...),
+        precs = DEFAULT_PRECS, kwargs...)
+    algs = (GaussNewton(; concrete_jac, linsolve, precs, kwargs...),
         GaussNewton(; concrete_jac, linsolve, precs, linesearch = BackTracking(),
-            adkwargs...),
-        LevenbergMarquardt(; concrete_jac, linsolve, precs, adkwargs...))
+            kwargs...),
+        LevenbergMarquardt(; concrete_jac, linsolve, precs, kwargs...))
     return NonlinearSolvePolyAlgorithm(algs, Val(:NLLS))
 end
 
@@ -313,8 +336,10 @@ end
 
 function SciMLBase.__solve(prob::NonlinearProblem, ::Nothing, args...; kwargs...)
     must_use_jacobian = Val(prob.f.jac !== nothing)
-    return SciMLBase.__solve(prob, FastShortcutNonlinearPolyalg(; must_use_jacobian),
-        args...; kwargs...)
+    prefer_simplenonlinearsolve = Val(prob.u0 isa SArray)
+    return SciMLBase.__solve(prob,
+        FastShortcutNonlinearPolyalg(; must_use_jacobian,
+            prefer_simplenonlinearsolve), args...; kwargs...)
 end
 
 function SciMLBase.__init(prob::NonlinearLeastSquaresProblem, ::Nothing, args...; kwargs...)