diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index e25f4f89..84629b0e 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -12,7 +12,9 @@ jobs:
     strategy:
       matrix:
         group:
-          - Core
+          - Shooting
+          - MIRK
+          - Others
         version:
           - '1'
     steps:
@@ -32,6 +34,8 @@ jobs:
             ${{ runner.os }}-
       - uses: julia-actions/julia-buildpkg@v1
       - uses: julia-actions/julia-runtest@v1
+        env:
+          GROUP: ${{ matrix.group }}
       - uses: julia-actions/julia-processcoverage@v1
       - uses: codecov/codecov-action@v3
         with:
diff --git a/Project.toml b/Project.toml
index 7aa43f32..35027d33 100644
--- a/Project.toml
+++ b/Project.toml
@@ -18,6 +18,7 @@ SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
 Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 SparseDiffTools = "47a9eef4-7e08-11e9-0b38-333d64bd3804"
+Static = "aedffcd0-7271-4cad-89d0-dc628f76c6d3"
 TruncatedStacktraces = "781d530d-4396-4725-bb49-402e4bee1e77"
 UnPack = "3a884ed6-31ef-47d7-9d2a-63182c4928ed"
 
diff --git a/ext/BoundaryValueDiffEqODEInterfaceExt.jl b/ext/BoundaryValueDiffEqODEInterfaceExt.jl
index 1e357006..257ea6de 100644
--- a/ext/BoundaryValueDiffEqODEInterfaceExt.jl
+++ b/ext/BoundaryValueDiffEqODEInterfaceExt.jl
@@ -1,6 +1,7 @@
 module BoundaryValueDiffEqODEInterfaceExt
 
 using SciMLBase, BoundaryValueDiffEq, ODEInterface
+import SciMLBase: __solve
 import ODEInterface: OptionsODE, OPT_ATOL, OPT_RTOL, OPT_METHODCHOICE, OPT_DIAGNOSTICOUTPUT,
     OPT_ERRORCONTROL, OPT_SINGULARTERM, OPT_MAXSTEPS, OPT_BVPCLASS, OPT_SOLMETHOD,
     OPT_RHS_CALLMODE, RHS_CALL_INSITU, evalSolution
@@ -18,7 +19,7 @@ end
 # BVPM2
 #------
 ## TODO: We can specify Drhs using forwarddiff if we want to
-function SciMLBase.__solve(prob::BVProblem, alg::BVPM2; dt = 0.0, reltol = 1e-3, kwargs...)
+function __solve(prob::BVProblem, alg::BVPM2; dt = 0.0, reltol = 1e-3, kwargs...)
     _test_bvpm2_bvpsol_problem_criteria(prob, prob.problem_type, :BVPM2)
 
     has_initial_guess = prob.u0 isa AbstractVector{<:AbstractArray}
@@ -64,7 +65,7 @@ end
 #-------
 # BVPSOL
 #-------
-function SciMLBase.__solve(prob::BVProblem, alg::BVPSOL; maxiters = 1000, reltol = 1e-3,
+function __solve(prob::BVProblem, alg::BVPSOL; maxiters = 1000, reltol = 1e-3,
     dt = 0.0, verbose = true, kwargs...)
     _test_bvpm2_bvpsol_problem_criteria(prob, prob.problem_type, :BVPSOL)
     @assert isa(prob.p, SciMLBase.NullParameters) "BVPSOL only supports NullParameters!"
diff --git a/src/BoundaryValueDiffEq.jl b/src/BoundaryValueDiffEq.jl
index 157e6f69..ad06c840 100644
--- a/src/BoundaryValueDiffEq.jl
+++ b/src/BoundaryValueDiffEq.jl
@@ -1,16 +1,17 @@
 module BoundaryValueDiffEq
 
 using Adapt, LinearAlgebra, PreallocationTools, Reexport, Setfield, SparseArrays, SciMLBase,
-    RecursiveArrayTools
+    Static, RecursiveArrayTools, ForwardDiff
 @reexport using ADTypes, DiffEqBase, NonlinearSolve, SparseDiffTools, SciMLBase
 
 import ADTypes: AbstractADType
-import ArrayInterface: matrix_colors, parameterless_type
+import ArrayInterface: matrix_colors, parameterless_type, undefmatrix
 import ConcreteStructs: @concrete
 import DiffEqBase: solve
 import ForwardDiff: pickchunksize
 import RecursiveArrayTools: ArrayPartition, DiffEqArray
-import SciMLBase: AbstractDiffEqInterpolation
+import SciMLBase: AbstractDiffEqInterpolation, StandardBVProblem, __solve
+import RecursiveArrayTools: ArrayPartition
 import SparseDiffTools: AbstractSparseADType
 import TruncatedStacktraces: @truncate_stacktrace
 import UnPack: @unpack
@@ -19,24 +20,27 @@ include("types.jl")
 include("utils.jl")
 include("algorithms.jl")
 include("alg_utils.jl")
+
 include("mirk_tableaus.jl")
-include("cache.jl")
-include("collocation.jl")
-include("nlprob.jl")
+
 include("solve/single_shooting.jl")
+include("solve/multiple_shooting.jl")
 include("solve/mirk.jl")
+
+include("collocation.jl")
+include("sparse_jacobians.jl")
+
 include("adaptivity.jl")
 include("interpolation.jl")
 
-function SciMLBase.__solve(prob::BVProblem, alg::BoundaryValueDiffEqAlgorithm, args...;
-    kwargs...)
+function __solve(prob::BVProblem, alg::BoundaryValueDiffEqAlgorithm, args...; kwargs...)
     cache = init(prob, alg, args...; kwargs...)
     return solve!(cache)
 end
 
-export Shooting
+export Shooting, MultipleShooting
 export MIRK2, MIRK3, MIRK4, MIRK5, MIRK6
-export MIRKJacobianComputationAlgorithm
+export MIRKJacobianComputationAlgorithm, BVPJacobianAlgorithm
 # From ODEInterface.jl
 export BVPM2, BVPSOL
 
diff --git a/src/adaptivity.jl b/src/adaptivity.jl
index ab6e3eb4..d76df301 100644
--- a/src/adaptivity.jl
+++ b/src/adaptivity.jl
@@ -18,9 +18,7 @@ end
 Find the interval that `t` belongs to in `mesh`. Assumes that `mesh` is sorted.
 """
 function interval(mesh, t)
-    t == first(mesh) && return 1
-    t == last(mesh) && return length(mesh) - 1
-    return searchsortedfirst(mesh, t) - 1
+    return clamp(searchsortedfirst(mesh, t) - 1, 1, length(mesh) - 1)
 end
 
 """
@@ -237,11 +235,8 @@ function sum_stages!(z, cache::MIRKCache, w, i::Int, dt = cache.mesh_dt[i])
 
     z .= zero(z)
     __maybe_matmul!(z, k_discrete[i].du[:, 1:stage], w[1:stage])
-    __maybe_matmul!(z,
-        k_interp[i][:, 1:(s_star - stage)],
-        w[(stage + 1):s_star],
-        true,
-        true)
+    __maybe_matmul!(z, k_interp[i][:, 1:(s_star - stage)],
+        w[(stage + 1):s_star], true, true)
     z .= z .* dt .+ cache.y₀[i]
 
     return z
@@ -253,18 +248,12 @@ end
 
     z .= zero(z)
     __maybe_matmul!(z, k_discrete[i].du[:, 1:stage], w[1:stage])
-    __maybe_matmul!(z,
-        k_interp[i][:, 1:(s_star - stage)],
-        w[(stage + 1):s_star],
-        true,
-        true)
+    __maybe_matmul!(z, k_interp[i][:, 1:(s_star - stage)],
+        w[(stage + 1):s_star], true, true)
     z′ .= zero(z′)
     __maybe_matmul!(z′, k_discrete[i].du[:, 1:stage], w′[1:stage])
-    __maybe_matmul!(z′,
-        k_interp[i][:, 1:(s_star - stage)],
-        w′[(stage + 1):s_star],
-        true,
-        true)
+    __maybe_matmul!(z′, k_interp[i][:, 1:(s_star - stage)],
+        w′[(stage + 1):s_star], true, true)
     z .= z .* dt[1] .+ cache.y₀[i]
 
     return z, z′
diff --git a/src/algorithms.jl b/src/algorithms.jl
index 5aff9f08..cb825cd2 100644
--- a/src/algorithms.jl
+++ b/src/algorithms.jl
@@ -1,13 +1,9 @@
-const DEFAULT_NLSOLVE_SHOOTING = NewtonRaphson()
-const DEFAULT_NLSOLVE_MIRK = NewtonRaphson()
-const DEFAULT_JACOBIAN_ALGORITHM_MIRK = MIRKJacobianComputationAlgorithm()
-
 # Algorithms
 abstract type BoundaryValueDiffEqAlgorithm <: SciMLBase.AbstractBVPAlgorithm end
 abstract type AbstractMIRK <: BoundaryValueDiffEqAlgorithm end
 
 """
-    Shooting(ode_alg; nlsolve = BoundaryValueDiffEq.DEFAULT_NLSOLVE_SHOOTING)
+    Shooting(ode_alg; nlsolve = NewtonRaphson())
 
 Single shooting method, reduces BVP to an initial value problem and solves the IVP.
 """
@@ -16,36 +12,73 @@ struct Shooting{O, N} <: BoundaryValueDiffEqAlgorithm
     nlsolve::N
 end
 
-Shooting(ode_alg; nlsolve = DEFAULT_NLSOLVE_SHOOTING) = Shooting(ode_alg, nlsolve)
+Shooting(ode_alg; nlsolve = NewtonRaphson()) = Shooting(ode_alg, nlsolve)
+
+"""
+    MultipleShooting(nshoots::Int, ode_alg; nlsolve = NewtonRaphson(),
+        grid_coarsening = true)
+
+Multiple Shooting method, reduces BVP to an initial value problem and solves the IVP.
+Significantly more stable than Single Shooting.
+"""
+@concrete struct MultipleShooting{J <: BVPJacobianAlgorithm}
+    ode_alg
+    nlsolve
+    jac_alg::J
+    nshoots::Int
+    grid_coarsening
+end
+
+function concretize_jacobian_algorithm(alg::MultipleShooting, prob)
+    jac_alg = concrete_jacobian_algorithm(alg.jac_alg, prob, alg)
+    return MultipleShooting(alg.ode_alg, alg.nlsolve, jac_alg, alg.nshoots,
+        alg.grid_coarsening)
+end
+
+function update_nshoots(alg::MultipleShooting, nshoots::Int)
+    return MultipleShooting(alg.ode_alg, alg.nlsolve, alg.jac_alg, nshoots,
+        alg.grid_coarsening)
+end
+
+function MultipleShooting(nshoots::Int, ode_alg; nlsolve = NewtonRaphson(),
+    grid_coarsening = true, jac_alg = BVPJacobianAlgorithm())
+    @assert grid_coarsening isa Bool || grid_coarsening isa Function ||
+            grid_coarsening isa AbstractVector{<:Integer} ||
+            grid_coarsening isa NTuple{N, <:Integer} where {N}
+    grid_coarsening isa Tuple && (grid_coarsening = Vector(grid_coarsening...))
+    if grid_coarsening isa AbstractVector
+        sort!(grid_coarsening; rev = true)
+        @assert all(grid_coarsening .> 0) && 1 ∉ grid_coarsening
+    end
+    return MultipleShooting(ode_alg, nlsolve, jac_alg, nshoots, grid_coarsening)
+end
 
 for order in (2, 3, 4, 5, 6)
     alg = Symbol("MIRK$(order)")
 
     @eval begin
         """
-            $($alg)(; nlsolve = BoundaryValueDiffEq.DEFAULT_NLSOLVE_MIRK,
-                jac_alg = BoundaryValueDiffEq.DEFAULT_JACOBIAN_ALGORITHM_MIRK)
+            $($alg)(; nlsolve = NewtonRaphson(), jac_alg = BVPJacobianAlgorithm())
 
         $($order)th order Monotonic Implicit Runge Kutta method, with Newton Raphson nonlinear solver as default.
 
         ## References
 
         @article{Enright1996RungeKuttaSW,
-        title={Runge-Kutta Software with Defect Control for Boundary Value ODEs},
-        author={Wayne H. Enright and Paul H. Muir},
-        journal={SIAM J. Sci. Comput.},
-        year={1996},
-        volume={17},
-        pages={479-497}
+            title={Runge-Kutta Software with Defect Control for Boundary Value ODEs},
+            author={Wayne H. Enright and Paul H. Muir},
+            journal={SIAM J. Sci. Comput.},
+            year={1996},
+            volume={17},
+            pages={479-497}
         }
         """
-        struct $(alg){N, J <: MIRKJacobianComputationAlgorithm} <: AbstractMIRK
+        struct $(alg){N, J <: BVPJacobianAlgorithm} <: AbstractMIRK
             nlsolve::N
             jac_alg::J
         end
 
-        function $(alg)(; nlsolve = DEFAULT_NLSOLVE_MIRK,
-            jac_alg = DEFAULT_JACOBIAN_ALGORITHM_MIRK)
+        function $(alg)(; nlsolve = NewtonRaphson(), jac_alg = BVPJacobianAlgorithm())
             return $(alg)(nlsolve, jac_alg)
         end
     end
diff --git a/src/cache.jl b/src/cache.jl
deleted file mode 100644
index 6d70d9e9..00000000
--- a/src/cache.jl
+++ /dev/null
@@ -1,67 +0,0 @@
-@concrete struct MIRKCache{iip, T}
-    order::Int                 # The order of MIRK method
-    stage::Int                 # The state of MIRK method
-    M::Int                     # The number of equations
-    in_size
-    f
-    bc
-    prob                       # BVProblem
-    problem_type               # StandardBVProblem
-    p                          # Parameters
-    alg                        # MIRK methods
-    TU                         # MIRK Tableau
-    ITU                        # MIRK Interpolation Tableau
-    # Everything below gets resized in adaptive methods
-    mesh                       # Discrete mesh
-    mesh_dt                    # Step size
-    k_discrete                 # Stage information associated with the discrete Runge-Kutta method
-    k_interp                   # Stage information associated with the discrete Runge-Kutta method
-    y
-    y₀
-    residual
-    # The following 2 caches are never resized
-    fᵢ_cache
-    fᵢ₂_cache
-    defect
-    new_stages
-    kwargs
-end
-
-Base.eltype(::MIRKCache{iip, T}) where {iip, T} = T
-
-"""
-    expand_cache!(cache::MIRKCache)
-
-After redistributing or halving the mesh, this function expands the required vectors to
-match the length of the new mesh.
-"""
-function expand_cache!(cache::MIRKCache)
-    Nₙ = length(cache.mesh)
-    __append_similar!(cache.k_discrete, Nₙ - 1, cache.M)
-    __append_similar!(cache.k_interp, Nₙ - 1, cache.M)
-    __append_similar!(cache.y, Nₙ, cache.M)
-    __append_similar!(cache.y₀, Nₙ, cache.M)
-    __append_similar!(cache.residual, Nₙ, cache.M)
-    __append_similar!(cache.defect, Nₙ - 1, cache.M)
-    __append_similar!(cache.new_stages, Nₙ - 1, cache.M)
-    return cache
-end
-
-__append_similar!(::Nothing, n, _) = nothing
-
-function __append_similar!(x::AbstractVector{<:AbstractArray}, n, _)
-    N = n - length(x)
-    N == 0 && return x
-    N < 0 && throw(ArgumentError("Cannot append a negative number of elements"))
-    append!(x, [similar(first(x)) for _ in 1:N])
-    return x
-end
-
-function __append_similar!(x::AbstractVector{<:MaybeDiffCache}, n, M)
-    N = n - length(x)
-    N == 0 && return x
-    N < 0 && throw(ArgumentError("Cannot append a negative number of elements"))
-    chunksize = pickchunksize(M * (N + length(x)))
-    append!(x, [maybe_allocate_diffcache(first(x), chunksize) for _ in 1:N])
-    return x
-end
diff --git a/src/collocation.jl b/src/collocation.jl
index 8ab995f3..0bfeb3f3 100644
--- a/src/collocation.jl
+++ b/src/collocation.jl
@@ -1,9 +1,3 @@
-__initial_state_from_prob(prob::BVProblem, mesh) = __initial_state_from_prob(prob.u0, mesh)
-__initial_state_from_prob(u0::AbstractArray, mesh) = [copy(vec(u0)) for _ in mesh]
-function __initial_state_from_prob(u0::AbstractVector{<:AbstractVector}, _)
-    [copy(vec(u)) for u in u0]
-end
-
 function Φ!(residual, cache::MIRKCache, y, u, p = cache.p)
     return Φ!(residual, cache.fᵢ_cache, cache.k_discrete, cache.f, cache.TU,
         y, u, p, cache.mesh, cache.mesh_dt, cache.stage)
diff --git a/src/nlprob.jl b/src/nlprob.jl
deleted file mode 100644
index 27487dc9..00000000
--- a/src/nlprob.jl
+++ /dev/null
@@ -1,253 +0,0 @@
-function construct_nlproblem(cache::MIRKCache{iip}, y::AbstractVector) where {iip}
-    loss_bc = if iip
-        function loss_bc_internal!(resid::AbstractVector, u::AbstractVector, p = cache.p)
-            y_ = recursive_unflatten!(cache.y, u)
-            eval_bc_residual!(resid, cache.problem_type, cache.bc, y_, p, cache.mesh)
-            return resid
-        end
-    else
-        function loss_bc_internal(u::AbstractVector, p = cache.p)
-            y_ = recursive_unflatten!(cache.y, u)
-            return eval_bc_residual(cache.problem_type, cache.bc, y_, p, cache.mesh)
-        end
-    end
-
-    loss_collocation = if iip
-        function loss_collocation_internal!(resid::AbstractVector, u::AbstractVector,
-            p = cache.p)
-            y_ = recursive_unflatten!(cache.y, u)
-            resids = [get_tmp(r, u) for r in cache.residual[2:end]]
-            Φ!(resids, cache, y_, u, p)
-            recursive_flatten!(resid, resids)
-            return resid
-        end
-    else
-        function loss_collocation_internal(u::AbstractVector, p = cache.p)
-            y_ = recursive_unflatten!(cache.y, u)
-            resids = Φ(cache, y_, u, p)
-            return mapreduce(vec, vcat, resids)
-        end
-    end
-
-    loss = if !(cache.problem_type isa TwoPointBVProblem)
-        if iip
-            function loss_internal!(resid::AbstractVector, u::AbstractVector, p = cache.p)
-                y_ = recursive_unflatten!(cache.y, u)
-                resids = [get_tmp(r, u) for r in cache.residual]
-                eval_bc_residual!(resids[1], cache.problem_type, cache.bc, y_, p,
-                    cache.mesh)
-                Φ!(resids[2:end], cache, y_, u, p)
-                recursive_flatten!(resid, resids)
-                return resid
-            end
-        else
-            function loss_internal(u::AbstractVector, p = cache.p)
-                y_ = recursive_unflatten!(cache.y, u)
-                resid_bc = eval_bc_residual(cache.problem_type, cache.bc, y_, p, cache.mesh)
-                resid_co = Φ(cache, y_, u, p)
-                return vcat(resid_bc, mapreduce(vec, vcat, resid_co))
-            end
-        end
-    else
-        # Reordering for 2 point BVP
-        if iip
-            function loss_internal_2point!(resid::AbstractVector, u::AbstractVector,
-                p = cache.p)
-                y_ = recursive_unflatten!(cache.y, u)
-                resids = [get_tmp(r, u) for r in cache.residual]
-                eval_bc_residual!(resids[1], cache.problem_type, cache.bc, y_, p,
-                    cache.mesh)
-                Φ!(resids[2:end], cache, y_, u, p)
-                recursive_flatten_twopoint!(resid, resids)
-                return resid
-            end
-        else
-            function loss_internal_2point(u::AbstractVector, p = cache.p)
-                y_ = recursive_unflatten!(cache.y, u)
-                resid_bc = eval_bc_residual(cache.problem_type, cache.bc, y_, p, cache.mesh)
-                resid_co = Φ(cache, y_, u, p)
-                return vcat(resid_bc.x[1], mapreduce(vec, vcat, resid_co), resid_bc.x[2])
-            end
-        end
-    end
-
-    return generate_nlprob(cache, y, loss_bc, loss_collocation, loss, cache.problem_type)
-end
-
-function construct_sparse_banded_jac_prototype(y, M, N)
-    l = sum(i -> min(2M + i, M * N) - max(1, i - 1) + 1, 1:(M * (N - 1)))
-    Is = Vector{Int}(undef, l)
-    Js = Vector{Int}(undef, l)
-    idx = 1
-    for i in 1:(M * (N - 1)), j in max(1, i - 1):min(2M + i, M * N)
-        Is[idx] = i
-        Js[idx] = j
-        idx += 1
-    end
-    col_colorvec = Vector{Int}(undef, M * N)
-    for i in eachindex(col_colorvec)
-        col_colorvec[i] = mod1(i, min(2M + 1, M * N) + 1)
-    end
-    row_colorvec = Vector{Int}(undef, M * (N - 1))
-    for i in eachindex(row_colorvec)
-        row_colorvec[i] = mod1(i, min(2M + 1, M * N) + 1)
-    end
-
-    y_ = similar(y, length(Is))
-    return (sparse(adapt(parameterless_type(y), Is), adapt(parameterless_type(y), Js),
-            y_, M * (N - 1), M * N), col_colorvec, row_colorvec)
-end
-
-# Two Point Specialization
-function construct_sparse_banded_jac_prototype(y::ArrayPartition, M, N)
-    l = sum(i -> min(2M + i, M * N) - max(1, i - 1) + 1, 1:(M * (N - 1)))
-    l_top = M * length(y.x[1].x[1])
-    l_bot = M * length(y.x[1].x[2])
-
-    Is = Vector{Int}(undef, l + l_top + l_bot)
-    Js = Vector{Int}(undef, l + l_top + l_bot)
-    idx = 1
-
-    for i in 1:length(y.x[1].x[1]), j in 1:M
-        Is[idx] = i
-        Js[idx] = j
-        idx += 1
-    end
-
-    for i in 1:(M * (N - 1)), j in max(1, i - 1):min(2M + i, M * N)
-        Is[idx] = i + length(y.x[1].x[1])
-        Js[idx] = j
-        idx += 1
-    end
-
-    for i in 1:length(y.x[1].x[2]), j in 1:M
-        Is[idx] = i + length(y.x[1].x[1]) + M * (N - 1)
-        Js[idx] = j + M * (N - 1)
-        idx += 1
-    end
-
-    col_colorvec = Vector{Int}(undef, M * N)
-    for i in eachindex(col_colorvec)
-        col_colorvec[i] = mod1(i, min(2M + 1, M * N) + 1)
-    end
-    row_colorvec = Vector{Int}(undef, M * N)
-    for i in eachindex(row_colorvec)
-        row_colorvec[i] = mod1(i, min(2M + 1, M * N) + 1)
-    end
-
-    y_ = similar(y, length(Is))
-    return (sparse(adapt(parameterless_type(y), Is), adapt(parameterless_type(y), Js),
-            y_, M * N, M * N), col_colorvec, row_colorvec)
-end
-
-function generate_nlprob(cache::MIRKCache{iip}, y, loss_bc, loss_collocation, loss,
-    _) where {iip}
-    @unpack nlsolve, jac_alg = cache.alg
-    N = length(cache.mesh)
-
-    resid_bc = cache.prob.f.bcresid_prototype === nothing ? similar(y, cache.M) :
-               cache.prob.f.bcresid_prototype
-    resid_collocation = similar(y, cache.M * (N - 1))
-
-    sd_bc = jac_alg.bc_diffmode isa AbstractSparseADType ? SymbolicsSparsityDetection() :
-            NoSparsityDetection()
-
-    if iip
-        cache_bc = sparse_jacobian_cache(jac_alg.bc_diffmode, sd_bc, loss_bc, resid_bc, y)
-    else
-        cache_bc = sparse_jacobian_cache(jac_alg.bc_diffmode, sd_bc, loss_bc, y;
-            fx = resid_bc)
-    end
-
-    sd_collocation = if jac_alg.collocation_diffmode isa AbstractSparseADType
-        Jₛ, cvec, rvec = construct_sparse_banded_jac_prototype(y, cache.M, N)
-        PrecomputedJacobianColorvec(; jac_prototype = Jₛ, row_colorvec = rvec,
-            col_colorvec = cvec)
-    else
-        NoSparsityDetection()
-    end
-
-    if iip
-        cache_collocation = sparse_jacobian_cache(jac_alg.collocation_diffmode,
-            sd_collocation, loss_collocation, resid_collocation, y)
-    else
-        cache_collocation = sparse_jacobian_cache(jac_alg.collocation_diffmode,
-            sd_collocation, loss_collocation, y; fx = resid_collocation)
-    end
-
-    jac_prototype = vcat(init_jacobian(cache_bc),
-        jac_alg.collocation_diffmode isa AbstractSparseADType ? Jₛ :
-        init_jacobian(cache_collocation))
-
-    # TODO: Pass `p` into `loss_bc` and `loss_collocation`. Currently leads to a Tag
-    #       mismatch for ForwardDiff
-    jac = if iip
-        function jac_internal!(J, x, p)
-            sparse_jacobian!(@view(J[1:(cache.M), :]), jac_alg.bc_diffmode, cache_bc,
-                loss_bc, resid_bc, x)
-            sparse_jacobian!(@view(J[(cache.M + 1):end, :]), jac_alg.collocation_diffmode,
-                cache_collocation, loss_collocation, resid_collocation, x)
-            return J
-        end
-    else
-        J_ = jac_prototype
-        function jac_internal(x, p)
-            sparse_jacobian!(@view(J_[1:(cache.M), :]), jac_alg.bc_diffmode, cache_bc,
-                loss_bc, x)
-            sparse_jacobian!(@view(J_[(cache.M + 1):end, :]), jac_alg.collocation_diffmode,
-                cache_collocation, loss_collocation, x)
-            return J_
-        end
-    end
-
-    return NonlinearProblem(NonlinearFunction{iip}(loss; jac, jac_prototype), y, cache.p)
-end
-
-function generate_nlprob(cache::MIRKCache{iip}, y, loss_bc, loss_collocation, loss,
-    ::TwoPointBVProblem) where {iip}
-    @unpack nlsolve, jac_alg = cache.alg
-    N = length(cache.mesh)
-
-    if !iip && cache.prob.f.bcresid_prototype === nothing
-        y_ = recursive_unflatten!(cache.y, y)
-        resid_ = cache.bc[1](y_[1], cache.p), cache.bc[2](y_[end], cache.p)
-        resid = ArrayPartition(ArrayPartition(resid_), similar(y, cache.M * (N - 1)))
-    else
-        resid = ArrayPartition(cache.prob.f.bcresid_prototype,
-            similar(y, cache.M * (N - 1)))
-    end
-
-    sd = if jac_alg.diffmode isa AbstractSparseADType
-        Jₛ, cvec, rvec = construct_sparse_banded_jac_prototype(resid, cache.M, N)
-        PrecomputedJacobianColorvec(; jac_prototype = Jₛ, row_colorvec = rvec,
-            col_colorvec = cvec)
-    else
-        NoSparsityDetection()
-    end
-
-    if iip
-        diffcache = sparse_jacobian_cache(jac_alg.diffmode, sd, loss, resid, y)
-    else
-        diffcache = sparse_jacobian_cache(jac_alg.diffmode, sd, loss, y; fx = resid)
-    end
-
-    jac_prototype = jac_alg.diffmode isa AbstractSparseADType ? Jₛ :
-                    init_jacobian(diffcache)
-
-    # TODO: Pass `p` into `loss_bc` and `loss_collocation`. Currently leads to a Tag
-    #       mismatch for ForwardDiff
-    jac = if iip
-        function jac_internal!(J, x, p)
-            sparse_jacobian!(J, jac_alg.diffmode, diffcache, loss, resid, x)
-            return J
-        end
-    else
-        J_ = jac_prototype
-        function jac_internal(x, p)
-            sparse_jacobian!(J_, jac_alg.diffmode, diffcache, loss, x)
-            return J_
-        end
-    end
-
-    return NonlinearProblem(NonlinearFunction{iip}(loss; jac, jac_prototype), y, cache.p)
-end
diff --git a/src/solve/mirk.jl b/src/solve/mirk.jl
index 7a2bb97a..5fc55ef2 100644
--- a/src/solve/mirk.jl
+++ b/src/solve/mirk.jl
@@ -1,26 +1,47 @@
+@concrete struct MIRKCache{iip, T}
+    order::Int                 # The order of MIRK method
+    stage::Int                 # The state of MIRK method
+    M::Int                     # The number of equations
+    in_size
+    f
+    bc
+    prob                       # BVProblem
+    problem_type               # StandardBVProblem
+    p                          # Parameters
+    alg                        # MIRK methods
+    TU                         # MIRK Tableau
+    ITU                        # MIRK Interpolation Tableau
+    bcresid_prototype
+    # Everything below gets resized in adaptive methods
+    mesh                       # Discrete mesh
+    mesh_dt                    # Step size
+    k_discrete                 # Stage information associated with the discrete Runge-Kutta method
+    k_interp                   # Stage information associated with the discrete Runge-Kutta method
+    y
+    y₀
+    residual
+    # The following 2 caches are never resized
+    fᵢ_cache
+    fᵢ₂_cache
+    defect
+    new_stages
+    kwargs
+end
+
+Base.eltype(::MIRKCache{iip, T}) where {iip, T} = T
+
 function SciMLBase.__init(prob::BVProblem, alg::AbstractMIRK; dt = 0.0,
     abstol = 1e-3, adaptive = true, kwargs...)
-    has_initial_guess = prob.u0 isa AbstractVector{<:AbstractArray}
+    @set! alg.jac_alg = concrete_jacobian_algorithm(alg.jac_alg, prob, alg)
     iip = isinplace(prob)
-    (T, M, n) = if has_initial_guess
-        # If user provided a vector of initial guesses
-        _u0 = first(prob.u0)
-        eltype(_u0), length(_u0), (length(prob.u0) - 1)
-    else
-        dt ≤ 0 && throw(ArgumentError("dt must be positive"))
-        eltype(prob.u0), length(prob.u0), Int(cld((prob.tspan[2] - prob.tspan[1]), dt))
-    end
+    has_initial_guess, T, M, n, X = __extract_problem_details(prob; dt,
+        check_positive_dt = true)
     chunksize = pickchunksize(M * (n + 1))
-    if has_initial_guess
-        fᵢ_cache = maybe_allocate_diffcache(vec(similar(_u0)), chunksize, alg.jac_alg)
-        fᵢ₂_cache = vec(similar(_u0))
-    else
-        fᵢ_cache = maybe_allocate_diffcache(vec(similar(prob.u0)), chunksize, alg.jac_alg)
-        fᵢ₂_cache = vec(similar(prob.u0))
-    end
 
-    # Without this, boxing breaks type stability
-    X = has_initial_guess ? _u0 : prob.u0
+    __alloc_diffcache = x -> __maybe_allocate_diffcache(vec(x), chunksize, alg.jac_alg)
+
+    fᵢ_cache = __alloc_diffcache(similar(X))
+    fᵢ₂_cache = vec(similar(X))
 
     # NOTE: Assumes the user provided initial guess is on a uniform mesh
     mesh = collect(range(prob.tspan[1], stop = prob.tspan[2], length = n + 1))
@@ -31,99 +52,85 @@ function SciMLBase.__init(prob::BVProblem, alg::AbstractMIRK; dt = 0.0,
 
     # Don't flatten this here, since we need to expand it later if needed
     y₀ = __initial_state_from_prob(prob, mesh)
-    y = [maybe_allocate_diffcache(vec(copy(yᵢ)), chunksize, alg.jac_alg) for yᵢ in y₀]
+    y = __alloc_diffcache.(copy.(y₀))
     TU, ITU = constructMIRK(alg, T)
     stage = alg_stage(alg)
 
-    k_discrete = [maybe_allocate_diffcache(similar(X, M, stage), chunksize, alg.jac_alg)
+    k_discrete = [__maybe_allocate_diffcache(similar(X, M, stage), chunksize, alg.jac_alg)
                   for _ in 1:n]
-    k_interp = adaptive ? [similar(X, M, ITU.s_star - stage) for _ in 1:n] :
-               [similar(X, 0, 0) for _ in 1:n]
+    k_interp = [similar(X, ifelse(adaptive, M, 0), ifelse(adaptive, ITU.s_star - stage, 0))
+                for _ in 1:n]
 
-    resid₁_size = if prob.f.bcresid_prototype === nothing
-        size(X)
-    elseif prob.f.bcresid_prototype isa ArrayPartition
-        size.(prob.f.bcresid_prototype.x)
-    else
-        size(prob.f.bcresid_prototype)
-    end
+    bcresid_prototype, resid₁_size = __get_bcresid_prototype(prob.problem_type, prob, X)
 
-    if iip
-        if prob.f.bcresid_prototype === nothing
-            residual = [maybe_allocate_diffcache(vec(copy(yᵢ)), chunksize, alg.jac_alg)
-                        for yᵢ in y₀]
-        else
-            residual = vcat([
-                    maybe_allocate_diffcache(vec(copy(prob.f.bcresid_prototype)),
-                        chunksize, alg.jac_alg)],
-                [maybe_allocate_diffcache(vec(copy(yᵢ)), chunksize, alg.jac_alg)
-                 for yᵢ in y₀[2:end]])
-        end
+    residual = if iip
+        vcat([__alloc_diffcache(bcresid_prototype)],
+            __alloc_diffcache.(copy.(@view(y₀[2:end]))))
     else
-        residual = nothing
+        nothing
     end
 
-    defect = adaptive ? [similar(X, M) for _ in 1:n] : [similar(X, 0) for _ in 1:n]
-
-    new_stages = adaptive ? [similar(X, M) for _ in 1:n] : [similar(X, 0) for _ in 1:n]
+    defect = [similar(X, ifelse(adaptive, M, 0)) for _ in 1:n]
+    new_stages = [similar(X, ifelse(adaptive, M, 0)) for _ in 1:n]
 
     # Transform the functions to handle non-vector inputs
     f, bc = if X isa AbstractVector
         prob.f, prob.f.bc
     elseif iip
-        function vecf!(du, u, p, t)
-            du_ = reshape(du, size(X))
-            x_ = reshape(u, size(X))
-            prob.f(du_, x_, p, t)
-            return du
-        end
+        vecf!(du, u, p, t) = prob.f(reshape(du, size(X)), reshape(u, size(X)), p, t)
         vecbc! = if !(prob.problem_type isa TwoPointBVProblem)
             function __vecbc!(resid, sol, p, t)
-                resid_ = reshape(resid, resid₁_size)
-                sol_ = map(s -> reshape(s, size(X)), sol)
-                prob.f.bc(resid_, sol_, p, t)
-                return resid
+                prob.f.bc(reshape(resid, resid₁_size),
+                    map(Base.Fix2(reshape, size(X)), sol), p, t)
             end
         else
             function __vecbc_a!(resida, ua, p)
-                resida_ = reshape(resida, resid₁_size[1])
-                ua_ = reshape(ua, size(X))
-                prob.f.bc[1](resida_, ua_, p)
-                return nothing
+                prob.f.bc[1](reshape(resida, resid₁_size[1]), reshape(ua, size(X)), p)
             end
             function __vecbc_b!(residb, ub, p)
-                residb_ = reshape(residb, resid₁_size[2])
-                ub_ = reshape(ub, size(X))
-                prob.f.bc[2](residb_, ub_, p)
-                return nothing
+                prob.f.bc[2](reshape(residb, resid₁_size[2]), reshape(ub, size(X)), p)
             end
             (__vecbc_a!, __vecbc_b!)
         end
+        bcresid_prototype = vec(bcresid_prototype)
         vecf!, vecbc!
     else
-        function vecf(u, p, t)
-            x_ = reshape(u, size(X))
-            return vec(prob.f(x_, p, t))
-        end
+        vecf(u, p, t) = vec(prob.f(reshape(u, size(X)), p, t))
         vecbc = if !(prob.problem_type isa TwoPointBVProblem)
-            function __vecbc(sol, p, t)
-                sol_ = map(s -> reshape(s, size(X)), sol)
-                return vec(prob.f.bc(sol_, p, t))
-            end
+            __vecbc(sol, p, t) = vec(prob.f.bc(map(Base.Fix2(reshape, size(X)), sol), p, t))
         else
             __vecbc_a(ua, p) = vec(prob.f.bc[1](reshape(ua, size(X)), p))
             __vecbc_b(ub, p) = vec(prob.f.bc[2](reshape(ub, size(X)), p))
             (__vecbc_a, __vecbc_b)
         end
+        bcresid_prototype = vec(bcresid_prototype)
         vecf, vecbc
     end
 
     return MIRKCache{iip, T}(alg_order(alg), stage, M, size(X), f, bc, prob,
-        prob.problem_type, prob.p, alg, TU, ITU, mesh, mesh_dt, k_discrete, k_interp, y, y₀,
-        residual, fᵢ_cache, fᵢ₂_cache, defect, new_stages,
+        prob.problem_type, prob.p, alg, TU, ITU, bcresid_prototype, mesh, mesh_dt,
+        k_discrete, k_interp, y, y₀, residual, fᵢ_cache, fᵢ₂_cache, defect, new_stages,
         (; defect_threshold, MxNsub, abstol, dt, adaptive, kwargs...))
 end
 
+"""
+    __expand_cache!(cache::MIRKCache)
+
+After redistributing or halving the mesh, this function expands the required vectors to
+match the length of the new mesh.
+"""
+function __expand_cache!(cache::MIRKCache)
+    Nₙ = length(cache.mesh)
+    __append_similar!(cache.k_discrete, Nₙ - 1, cache.M)
+    __append_similar!(cache.k_interp, Nₙ - 1, cache.M)
+    __append_similar!(cache.y, Nₙ, cache.M)
+    __append_similar!(cache.y₀, Nₙ, cache.M)
+    __append_similar!(cache.residual, Nₙ, cache.M)
+    __append_similar!(cache.defect, Nₙ - 1, cache.M)
+    __append_similar!(cache.new_stages, Nₙ - 1, cache.M)
+    return cache
+end
+
 function __split_mirk_kwargs(; defect_threshold, MxNsub, abstol, dt, adaptive = true,
     kwargs...)
     return ((defect_threshold, MxNsub, abstol, adaptive, dt),
@@ -138,7 +145,7 @@ function SciMLBase.solve!(cache::MIRKCache)
     defect_norm = 2 * abstol
 
     while SciMLBase.successful_retcode(info) && defect_norm > abstol
-        nlprob = construct_nlproblem(cache, recursive_flatten(y₀))
+        nlprob = __construct_nlproblem(cache, recursive_flatten(y₀))
         sol_nlprob = solve(nlprob, alg.nlsolve; abstol, kwargs...)
         recursive_unflatten!(cache.y₀, sol_nlprob.u)
 
@@ -161,7 +168,7 @@ function SciMLBase.solve!(cache::MIRKCache)
                     for (i, m) in enumerate(cache.mesh)
                         interp_eval!(cache.y₀[i], cache, m, mesh, mesh_dt)
                     end
-                    expand_cache!(cache)
+                    __expand_cache!(cache)
                 end
             end
         else
@@ -171,7 +178,7 @@ function SciMLBase.solve!(cache::MIRKCache)
                 info = ReturnCode.Failure
             else
                 half_mesh!(cache)
-                expand_cache!(cache)
+                __expand_cache!(cache)
                 recursive_fill!(cache.y₀, 0)
                 info = ReturnCode.Success # Force a restart
                 defect_norm = 2 * abstol
@@ -183,3 +190,146 @@ function SciMLBase.solve!(cache::MIRKCache)
     return DiffEqBase.build_solution(prob, alg, cache.mesh,
         u; interp = MIRKInterpolation(cache.mesh, u, cache), retcode = info)
 end
+
+# Constructing the Nonlinear Problem
+function __construct_nlproblem(cache::MIRKCache{iip}, y::AbstractVector) where {iip}
+    loss_bc = if iip
+        function loss_bc_internal!(resid::AbstractVector, u::AbstractVector, p = cache.p)
+            y_ = recursive_unflatten!(cache.y, u)
+            eval_bc_residual!(resid, cache.problem_type, cache.bc, y_, p, cache.mesh)
+            return resid
+        end
+    else
+        function loss_bc_internal(u::AbstractVector, p = cache.p)
+            y_ = recursive_unflatten!(cache.y, u)
+            return eval_bc_residual(cache.problem_type, cache.bc, y_, p, cache.mesh)
+        end
+    end
+
+    loss_collocation = if iip
+        function loss_collocation_internal!(resid::AbstractVector, u::AbstractVector,
+            p = cache.p)
+            y_ = recursive_unflatten!(cache.y, u)
+            resids = [get_tmp(r, u) for r in cache.residual[2:end]]
+            Φ!(resids, cache, y_, u, p)
+            recursive_flatten!(resid, resids)
+            return resid
+        end
+    else
+        function loss_collocation_internal(u::AbstractVector, p = cache.p)
+            y_ = recursive_unflatten!(cache.y, u)
+            resids = Φ(cache, y_, u, p)
+            return mapreduce(vec, vcat, resids)
+        end
+    end
+
+    loss = if iip
+        function loss_internal!(resid::AbstractVector, u::AbstractVector, p = cache.p)
+            y_ = recursive_unflatten!(cache.y, u)
+            resids = [get_tmp(r, u) for r in cache.residual]
+            eval_bc_residual!(resids[1], cache.problem_type, cache.bc, y_, p, cache.mesh)
+            Φ!(resids[2:end], cache, y_, u, p)
+            if cache.problem_type isa TwoPointBVProblem
+                recursive_flatten_twopoint!(resid, resids)
+            else
+                recursive_flatten!(resid, resids)
+            end
+            return resid
+        end
+    else
+        function loss_internal(u::AbstractVector, p = cache.p)
+            y_ = recursive_unflatten!(cache.y, u)
+            resid_bc = eval_bc_residual(cache.problem_type, cache.bc, y_, p, cache.mesh)
+            resid_co = Φ(cache, y_, u, p)
+            if cache.problem_type isa TwoPointBVProblem
+                return vcat(resid_bc.x[1], mapreduce(vec, vcat, resid_co), resid_bc.x[2])
+            else
+                return vcat(resid_bc, mapreduce(vec, vcat, resid_co))
+            end
+        end
+    end
+
+    return __construct_nlproblem(cache, y, loss_bc, loss_collocation, loss,
+        cache.problem_type)
+end
+
+function __construct_nlproblem(cache::MIRKCache{iip}, y, loss_bc, loss_collocation, loss,
+    ::StandardBVProblem) where {iip}
+    @unpack nlsolve, jac_alg = cache.alg
+    N = length(cache.mesh)
+
+    resid_bc = cache.bcresid_prototype
+    resid_collocation = similar(y, cache.M * (N - 1))
+
+    sd_bc = jac_alg.bc_diffmode isa AbstractSparseADType ? SymbolicsSparsityDetection() :
+            NoSparsityDetection()
+    cache_bc = __sparse_jacobian_cache(Val(iip), jac_alg.bc_diffmode, sd_bc, loss_bc,
+        resid_bc, y)
+
+    sd_collocation = if jac_alg.nonbc_diffmode isa AbstractSparseADType
+        PrecomputedJacobianColorvec(__generate_sparse_jacobian_prototype(cache,
+            cache.problem_type, y, cache.M, N))
+    else
+        NoSparsityDetection()
+    end
+    cache_collocation = __sparse_jacobian_cache(Val(iip), jac_alg.nonbc_diffmode,
+        sd_collocation, loss_collocation, resid_collocation, y)
+
+    jac_prototype = vcat(init_jacobian(cache_bc), init_jacobian(cache_collocation))
+
+    jac = if iip
+        function jac_internal!(J, x, p)
+            sparse_jacobian!(@view(J[1:(cache.M), :]), jac_alg.bc_diffmode, cache_bc,
+                loss_bc, resid_bc, x)
+            sparse_jacobian!(@view(J[(cache.M + 1):end, :]), jac_alg.nonbc_diffmode,
+                cache_collocation, loss_collocation, resid_collocation, x)
+            return J
+        end
+    else
+        J_ = jac_prototype
+        function jac_internal(x, p)
+            sparse_jacobian!(@view(J_[1:(cache.M), :]), jac_alg.bc_diffmode, cache_bc,
+                loss_bc, x)
+            sparse_jacobian!(@view(J_[(cache.M + 1):end, :]), jac_alg.nonbc_diffmode,
+                cache_collocation, loss_collocation, x)
+            return J_
+        end
+    end
+
+    return NonlinearProblem(NonlinearFunction{iip}(loss; jac, jac_prototype), y, cache.p)
+end
+
+function __construct_nlproblem(cache::MIRKCache{iip}, y, loss_bc, loss_collocation, loss,
+    ::TwoPointBVProblem) where {iip}
+    @unpack nlsolve, jac_alg = cache.alg
+    N = length(cache.mesh)
+
+    resid = ArrayPartition(cache.bcresid_prototype, similar(y, cache.M * (N - 1)))
+
+    # TODO: We can splitup the computation here as well similar to the Multiple Shooting
+    # TODO: code. That way for the BC part the actual jacobian computation is even cheaper
+    # TODO: Remember to not reorder if we end up using that implementation
+    sd = if jac_alg.diffmode isa AbstractSparseADType
+        PrecomputedJacobianColorvec(__generate_sparse_jacobian_prototype(cache,
+            cache.problem_type, resid.x[1], cache.M, N))
+    else
+        NoSparsityDetection()
+    end
+    diffcache = __sparse_jacobian_cache(Val(iip), jac_alg.diffmode, sd, loss, resid, y)
+    jac_prototype = init_jacobian(diffcache)
+
+    jac = if iip
+        function jac_internal!(J, x, p)
+            sparse_jacobian!(J, jac_alg.diffmode, diffcache, loss, resid, x)
+            return J
+        end
+    else
+        J_ = jac_prototype
+        function jac_internal(x, p)
+            sparse_jacobian!(J_, jac_alg.diffmode, diffcache, loss, x)
+            return J_
+        end
+    end
+
+    return NonlinearProblem(NonlinearFunction{iip}(loss; jac, jac_prototype), y, cache.p)
+end
diff --git a/src/solve/multiple_shooting.jl b/src/solve/multiple_shooting.jl
new file mode 100644
index 00000000..e9bb791f
--- /dev/null
+++ b/src/solve/multiple_shooting.jl
@@ -0,0 +1,358 @@
+function __solve(prob::BVProblem, _alg::MultipleShooting; odesolve_kwargs = (;),
+    nlsolve_kwargs = (;), ensemblealg = EnsembleThreads(), verbose = true, kwargs...)
+    @unpack f, tspan = prob
+
+    ig, T, N, Nig, u0 = __extract_problem_details(prob; dt = 0.1)
+    has_initial_guess = known(ig)
+
+    bcresid_prototype, resid_size = __get_bcresid_prototype(prob, u0)
+    iip, bc, u0, u0_size = isinplace(prob), prob.f.bc, deepcopy(u0), size(u0)
+
+    __alg = concretize_jacobian_algorithm(_alg, prob)
+    alg = if has_initial_guess && Nig != __alg.nshoots + 1
+        verbose &&
+            @warn "Initial guess length != `nshoots + 1`! Adapting to `nshoots = $(Nig - 1)`"
+        update_nshoots(__alg, Nig - 1)
+    else
+        __alg
+    end
+    nshoots = alg.nshoots
+
+    if prob.problem_type isa TwoPointBVProblem
+        resida_len = length(bcresid_prototype.x[1])
+        residb_len = length(bcresid_prototype.x[2])
+    end
+
+    # We will use colored AD for this part!
+    @views function solve_internal_odes!(resid_nodes, us, p, cur_nshoots, nodes)
+        ts_ = Vector{Vector{typeof(first(tspan))}}(undef, cur_nshoots)
+        us_ = Vector{Vector{typeof(us)}}(undef, cur_nshoots)
+
+        function prob_func(probᵢ, i, repeat)
+            return remake(probᵢ; u0 = reshape(us[((i - 1) * N + 1):(i * N)], u0_size),
+                tspan = (nodes[i], nodes[i + 1]))
+        end
+
+        function reduction(u, data, I)
+            for i in I
+                u.us[i] = data[i].u
+                u.ts[i] = data[i].t
+                u.resid[((i - 1) * N + 1):(i * N)] .= vec(us[(i * N + 1):((i + 1) * N)]) .-
+                                                      vec(data[i].u[end])
+            end
+            return (u, false)
+        end
+
+        odeprob = ODEProblem{iip}(f, reshape(us[1:N], u0_size), tspan, p)
+
+        ensemble_prob = EnsembleProblem(odeprob; prob_func, reduction, safetycopy = false,
+            u_init = (; us = us_, ts = ts_, resid = resid_nodes))
+        ensemble_sol = __solve(ensemble_prob, alg.ode_alg, ensemblealg; odesolve_kwargs...,
+            verbose, kwargs..., save_end = true, save_everystep = false,
+            trajectories = cur_nshoots)
+
+        return reduce(vcat, ensemble_sol.u.us), reduce(vcat, ensemble_sol.u.ts)
+    end
+
+    compute_bc_residual! = if prob.problem_type isa TwoPointBVProblem
+        @views function compute_bc_residual_tp!(resid_bc, us::ArrayPartition, p,
+            cur_nshoots, nodes, resid_nodes::Union{Nothing, MaybeDiffCache} = nothing)
+            ua, ub0 = us.x
+            # Just Recompute the last ODE Solution
+            lastodeprob = ODEProblem{iip}(f, reshape(ub0, u0_size),
+                (nodes[end - 1], nodes[end]), p)
+            sol_ode_last = __solve(lastodeprob, alg.ode_alg; odesolve_kwargs..., verbose,
+                kwargs..., save_everystep = false, saveat = (), save_end = true)
+            ub = vec(sol_ode_last.u[end])
+
+            resid_bc_a, resid_bc_b = if resid_bc isa ArrayPartition
+                resid_bc.x
+            else
+                resid_bc[1:resida_len], resid_bc[(resida_len + 1):end]
+            end
+
+            if iip
+                bc[1](resid_bc_a, ua, p)
+                bc[2](resid_bc_b, ub, p)
+            else
+                resid_bc_a .= bc[1](ua, p)
+                resid_bc_b .= bc[2](ub, p)
+            end
+
+            return resid_bc
+        end
+    else
+        @views function compute_bc_residual_mp!(resid_bc, us, p, cur_nshoots, nodes,
+            resid_nodes::Union{Nothing, MaybeDiffCache} = nothing)
+            if resid_nodes === nothing
+                _resid_nodes = similar(us, cur_nshoots * N)  # This might be Dual based on `us`
+            else
+                _resid_nodes = get_tmp(resid_nodes, us)
+            end
+
+            # NOTE: We need to recompute this to correctly propagate the dual numbers / gradients
+            _us, _ts = solve_internal_odes!(_resid_nodes, us, p, cur_nshoots, nodes)
+
+            # Boundary conditions
+            # Builds an ODESolution object to keep the framework for bc(,,) consistent
+            odeprob = ODEProblem{iip}(f, reshape(us[1:N], u0_size), tspan, p)
+            total_solution = SciMLBase.build_solution(odeprob, alg.ode_alg, _ts, _us)
+
+            if iip
+                eval_bc_residual!(resid_bc, prob.problem_type, bc, total_solution, p)
+            else
+                resid_bc .= eval_bc_residual(prob.problem_type, bc, total_solution, p)
+            end
+
+            return resid_bc
+        end
+    end
+
+    @views function loss!(resid::ArrayPartition, us, p, cur_nshoots, nodes)
+        resid_bc, resid_nodes = resid.x[1], resid.x[2]
+
+        _us, _ts = solve_internal_odes!(resid_nodes, us, p, cur_nshoots, nodes)
+
+        # Boundary conditions
+        # Builds an ODESolution object to keep the framework for bc(,,) consistent
+        odeprob = ODEProblem{iip}(f, reshape(us[1:N], u0_size), tspan, p)
+        total_solution = SciMLBase.build_solution(odeprob, alg.ode_alg, _ts, _us)
+
+        if iip
+            eval_bc_residual!(resid_bc, prob.problem_type, bc, total_solution, p)
+        else
+            resid_bc .= eval_bc_residual(prob.problem_type, bc, total_solution, p)
+        end
+
+        return resid
+    end
+
+    jac! = if prob.problem_type isa TwoPointBVProblem
+        @views function jac_tp!(J::AbstractMatrix, us, p, resid_bc,
+            resid_nodes::MaybeDiffCache, ode_jac_cache, bc_jac_cache::Tuple, ode_fn, bc_fn,
+            cur_nshoot, nodes)
+            # This is mostly a safety measure
+            fill!(J, 0)
+
+            J_bc = J[1:N, :]
+            J_c = J[(N + 1):end, :]
+
+            sparse_jacobian!(J_c, alg.jac_alg.nonbc_diffmode, ode_jac_cache, ode_fn,
+                resid_nodes.du, us)
+
+            # For BC
+            bc_jac_cache′, J_bc′ = bc_jac_cache
+            sparse_jacobian!(J_bc′, alg.jac_alg.bc_diffmode, bc_jac_cache′, bc_fn,
+                resid_bc, ArrayPartition(us[1:N], us[(end - N + 1):end]))
+            resida, residb = resid_bc.x
+            J_bc[1:length(resida), 1:N] .= J_bc′[1:length(resida), 1:N]
+            idxᵢ = (length(resida) + 1):(length(resida) + length(residb))
+            J_bc[idxᵢ, (end - 2N + 1):(end - N)] .= J_bc′[idxᵢ, (end - N + 1):end]
+
+            return nothing
+        end
+    else
+        @views function jac_mp!(J::AbstractMatrix, us, p, resid_bc,
+            resid_nodes::MaybeDiffCache, ode_jac_cache, bc_jac_cache, ode_fn, bc_fn,
+            cur_nshoot, nodes)
+            # This is mostly a safety measure
+            fill!(J, 0)
+
+            J_bc = J[1:N, :]
+            J_c = J[(N + 1):end, :]
+
+            sparse_jacobian!(J_c, alg.jac_alg.nonbc_diffmode, ode_jac_cache, ode_fn,
+                resid_nodes.du, us)
+
+            # For BC
+            sparse_jacobian!(J_bc, alg.jac_alg.bc_diffmode, bc_jac_cache, bc_fn, resid_bc,
+                us)
+
+            return nothing
+        end
+    end
+
+    # This gets all the nshoots except the final SingleShooting case
+    all_nshoots = get_all_nshoots(alg.grid_coarsening, nshoots)
+    u_at_nodes, nodes = similar(u0, 0), typeof(first(tspan))[]
+
+    for (i, cur_nshoot) in enumerate(all_nshoots)
+        if i == 1
+            nodes, u_at_nodes = multiple_shooting_initialize(prob, alg, ig, nshoots;
+                odesolve_kwargs, verbose, kwargs...)
+        else
+            nodes, u_at_nodes = multiple_shooting_initialize(u_at_nodes, prob, alg, nodes,
+                cur_nshoot, all_nshoots[i - 1]::Int, ig; odesolve_kwargs, verbose,
+                kwargs...)
+        end
+
+        resid_prototype = ArrayPartition(bcresid_prototype,
+            similar(u_at_nodes, cur_nshoot * N))
+        resid_nodes = __maybe_allocate_diffcache(resid_prototype.x[2],
+            pickchunksize((cur_nshoot + 1) * N), alg.jac_alg.bc_diffmode)
+
+        if alg.jac_alg.nonbc_diffmode isa AbstractSparseADType ||
+           alg.jac_alg.bc_diffmode isa AbstractSparseADType
+            J_full, J_c, J_bc = __generate_sparse_jacobian_prototype(alg, prob.problem_type,
+                bcresid_prototype, u0, N, cur_nshoot)
+        end
+
+        ode_fn = (du, u) -> solve_internal_odes!(du, u, prob.p, cur_nshoot, nodes)
+        sd_ode = alg.jac_alg.nonbc_diffmode isa AbstractSparseADType ?
+                 PrecomputedJacobianColorvec(J_c) : NoSparsityDetection()
+        ode_jac_cache = sparse_jacobian_cache(alg.jac_alg.nonbc_diffmode, sd_ode,
+            ode_fn, similar(u_at_nodes, cur_nshoot * N), u_at_nodes)
+
+        bc_fn = (du, u) -> compute_bc_residual!(du, u, prob.p, cur_nshoot, nodes,
+            resid_nodes)
+        if prob.problem_type isa TwoPointBVProblem
+            sd_bc = alg.jac_alg.bc_diffmode isa AbstractSparseADType ?
+                    PrecomputedJacobianColorvec(J_bc) : NoSparsityDetection()
+            bc_jac_cache_partial = sparse_jacobian_cache(alg.jac_alg.bc_diffmode, sd_bc,
+                bc_fn, similar(bcresid_prototype),
+                ArrayPartition(@view(u_at_nodes[1:N]),
+                    @view(u_at_nodes[(end - N + 1):end])))
+
+            bc_jac_cache = (bc_jac_cache_partial, init_jacobian(bc_jac_cache_partial))
+
+            jac_prototype = if @isdefined(J_full)
+                J_full
+            else
+                __zeros_like(u_at_nodes, length(resid_prototype), length(u_at_nodes))
+            end
+        else
+            sd_bc = alg.jac_alg.bc_diffmode isa AbstractSparseADType ?
+                    SymbolicsSparsityDetection() : NoSparsityDetection()
+            bc_jac_cache = sparse_jacobian_cache(alg.jac_alg.bc_diffmode,
+                sd_bc, bc_fn, similar(bcresid_prototype), u_at_nodes)
+
+            jac_prototype = vcat(init_jacobian(bc_jac_cache), init_jacobian(ode_jac_cache))
+        end
+
+        jac_fn = (J, us, p) -> jac!(J, us, p, similar(bcresid_prototype), resid_nodes,
+            ode_jac_cache, bc_jac_cache, ode_fn, bc_fn, cur_nshoot, nodes)
+
+        loss_function! = NonlinearFunction{true}((args...) -> loss!(args..., cur_nshoot,
+                nodes); resid_prototype, jac = jac_fn, jac_prototype)
+        nlprob = NonlinearProblem(loss_function!, u_at_nodes, prob.p)
+        sol_nlsolve = __solve(nlprob, alg.nlsolve; nlsolve_kwargs..., verbose, kwargs...)
+        # u_at_nodes = sol_nlsolve.u
+    end
+
+    single_shooting_prob = remake(prob; u0 = reshape(u_at_nodes[1:N], u0_size))
+    return __solve(single_shooting_prob, Shooting(alg.ode_alg; alg.nlsolve);
+        odesolve_kwargs, nlsolve_kwargs, verbose, kwargs...)
+end
+
+@views function multiple_shooting_initialize(prob, alg::MultipleShooting, ::True,
+    nshoots; odesolve_kwargs = (;), verbose = true, kwargs...)
+    @unpack f, u0, tspan, p = prob
+    @unpack ode_alg = alg
+
+    nodes = range(tspan[1], tspan[2]; length = nshoots + 1)
+    N = length(first(u0))
+
+    u_at_nodes = similar(first(u0), (nshoots + 1) * N)
+    recursive_flatten!(u_at_nodes, u0)
+    return nodes, u_at_nodes
+end
+
+@views function multiple_shooting_initialize(prob, alg::MultipleShooting, ::False,
+    nshoots; odesolve_kwargs = (;), verbose = true, kwargs...)
+    @unpack f, u0, tspan, p = prob
+    @unpack ode_alg = alg
+
+    nodes = range(tspan[1], tspan[2]; length = nshoots + 1)
+    N = length(u0)
+
+    # Ensures type stability in case the parameters are dual numbers
+    if !(typeof(p) <: SciMLBase.NullParameters)
+        if !isconcretetype(eltype(p)) && verbose
+            @warn "Type inference will fail if eltype(p) is not a concrete type"
+        end
+        u_at_nodes = similar(u0, promote_type(eltype(u0), eltype(p)), (nshoots + 1) * N)
+    else
+        u_at_nodes = similar(u0, (nshoots + 1) * N)
+    end
+
+    # Assumes no initial guess for now
+    start_prob = ODEProblem{isinplace(prob)}(f, u0, tspan, p)
+    sol = __solve(start_prob, ode_alg; odesolve_kwargs..., verbose, kwargs...,
+        saveat = nodes)
+
+    if SciMLBase.successful_retcode(sol)
+        u_at_nodes[1:N] .= vec(sol.u[1])
+        for i in 2:(nshoots + 1)
+            u_at_nodes[(N + (i - 2) * N) .+ (1:N)] .= vec(sol.u[i])
+        end
+    else
+        @warn "Initialization using odesolve failed. Initializing using 0s. It is \
+               recommended to provide an `initial_guess` function in this case."
+        fill!(u_at_nodes, 0)
+    end
+
+    return nodes, u_at_nodes
+end
+
+@views function multiple_shooting_initialize(u_at_nodes_prev, prob, alg, prev_nodes,
+    nshoots, old_nshoots, ig; odesolve_kwargs = (;), kwargs...)
+    @unpack f, u0, tspan, p = prob
+    nodes = range(tspan[1], tspan[2]; length = nshoots + 1)
+    N = known(ig) ? length(first(u0)) : length(u0)
+
+    u_at_nodes = similar(u_at_nodes_prev, N + nshoots * N)
+    u_at_nodes[1:N] .= u_at_nodes_prev[1:N]
+    u_at_nodes[(end - N + 1):end] .= u_at_nodes_prev[(end - N + 1):end]
+
+    skipsize = old_nshoots / nshoots
+    for i in 2:nshoots
+        pos = skipsize * (i - 1) + 1
+        idxs = (N + (i - 2) * N) .+ (1:N)
+        if isinteger(pos)
+            # If the current node is also a node of the finer grid
+            ind = trunc(Int, pos)
+            idxs_prev = (N + (ind - 2) * N .+ (1:N))
+            u_at_nodes[idxs] .= u_at_nodes_prev[idxs_prev]
+        else
+            # If the current node is not a node of the finer grid simulate from closest
+            # previous node and take result from simulation
+            fpos = floor(Int, pos)
+            r = pos - fpos
+
+            t0 = prev_nodes[fpos]
+            tf = prev_nodes[fpos + 1]
+            tstop = t0 + r * (tf - t0)
+
+            idxs_prev = (N + (fpos - 2) * N .+ (1:N))
+            ustart = u_at_nodes_prev[idxs_prev]
+
+            odeprob = ODEProblem(f, ustart, (t0, tstop), p)
+            odesol = __solve(odeprob, alg.ode_alg; odesolve_kwargs..., kwargs...,
+                saveat = (), save_end = true)
+
+            u_at_nodes[idxs] .= odesol.u[end]
+        end
+    end
+
+    return nodes, u_at_nodes
+end
+
+@inline function get_all_nshoots(grid_coarsening, nshoots)
+    if grid_coarsening isa Bool
+        !grid_coarsening && return [nshoots]
+        update_fn = Base.Fix2(÷, 2)
+    elseif grid_coarsening isa Function
+        update_fn = grid_coarsening
+    else
+        grid_coarsening[1] == nshoots && return grid_coarsening
+        return vcat(nshoots, grid_coarsening)
+    end
+    nshoots_vec = Int[nshoots]
+    next = update_fn(nshoots)
+    while next > 1
+        push!(nshoots_vec, next)
+        next = update_fn(last(nshoots_vec))
+    end
+    @assert !(1 in nshoots_vec)
+    return nshoots_vec
+end
diff --git a/src/solve/single_shooting.jl b/src/solve/single_shooting.jl
index 52e32501..2b8d9489 100644
--- a/src/solve/single_shooting.jl
+++ b/src/solve/single_shooting.jl
@@ -1,28 +1,35 @@
-# TODO: Differentiate between nlsolve kwargs and odesolve kwargs
-# TODO: Support Non-Vector Inputs
-function SciMLBase.__solve(prob::BVProblem, alg::Shooting; kwargs...)
-    iip = isinplace(prob)
-    bc = prob.f.bc
-    u0 = deepcopy(prob.u0)
+function __solve(prob::BVProblem, alg::Shooting; odesolve_kwargs = (;),
+    nlsolve_kwargs = (;), verbose = true, kwargs...)
+    ig, T, _, _, u0 = __extract_problem_details(prob; dt = 0.1)
+    known(ig) && verbose &&
+        @warn "Initial guess provided, but will be ignored for Shooting!"
+
+    bcresid_prototype, resid_size = __get_bcresid_prototype(prob, u0)
+    iip, bc, u0, u0_size = isinplace(prob), prob.f.bc, deepcopy(u0), size(u0)
+
     loss_fn = if iip
-        function loss!(resid, u0, p)
-            tmp_prob = ODEProblem{iip}(prob.f, u0, prob.tspan, p)
-            internal_sol = solve(tmp_prob, alg.ode_alg; kwargs...)
-            eval_bc_residual!(resid, prob.problem_type, bc, internal_sol, p)
+        function loss!(resid, u0_, p)
+            odeprob = ODEProblem{true}(prob.f, reshape(u0_, u0_size), prob.tspan, p)
+            odesol = __solve(odeprob, alg.ode_alg; odesolve_kwargs..., verbose, kwargs...)
+            eval_bc_residual!(__safe_reshape(resid, resid_size), prob.problem_type, bc,
+                odesol, p)
             return nothing
         end
     else
-        function loss(u0, p)
-            tmp_prob = ODEProblem(prob.f, u0, prob.tspan, p)
-            internal_sol = solve(tmp_prob, alg.ode_alg; kwargs...)
-            return eval_bc_residual(prob.problem_type, bc, internal_sol, p)
+        function loss(u0_, p)
+            odeprob = ODEProblem{false}(prob.f, reshape(u0_, u0_size), prob.tspan, p)
+            odesol = __solve(odeprob, alg.ode_alg; odesolve_kwargs..., verbose, kwargs...)
+            return vec(eval_bc_residual(prob.problem_type, bc, odesol, p))
         end
     end
-    opt = solve(NonlinearProblem(NonlinearFunction{iip}(loss_fn; prob.f.jac_prototype,
-                resid_prototype = prob.f.bcresid_prototype), u0, prob.p), alg.nlsolve;
-        kwargs...)
-    sol_prob = ODEProblem{iip}(prob.f, opt.u, prob.tspan, prob.p)
-    sol = solve(sol_prob, alg.ode_alg; kwargs...)
-    return DiffEqBase.solution_new_retcode(sol,
-        sol.retcode == opt.retcode ? ReturnCode.Success : ReturnCode.Failure)
+    opt = __solve(NonlinearProblem(NonlinearFunction{iip}(loss_fn; prob.f.jac_prototype,
+                resid_prototype = bcresid_prototype), vec(u0), prob.p), alg.nlsolve;
+        nlsolve_kwargs..., verbose, kwargs...)
+    newprob = ODEProblem{iip}(prob.f, reshape(opt.u, u0_size), prob.tspan, prob.p)
+    sol = __solve(newprob, alg.ode_alg; odesolve_kwargs..., verbose, kwargs...)
+
+    if !SciMLBase.successful_retcode(opt)
+        return SciMLBase.solution_new_retcode(sol, ReturnCode.Failure)
+    end
+    return sol
 end
diff --git a/src/sparse_jacobians.jl b/src/sparse_jacobians.jl
new file mode 100644
index 00000000..e9c88abb
--- /dev/null
+++ b/src/sparse_jacobians.jl
@@ -0,0 +1,205 @@
+# This file defines several common patterns of sparse Jacobians we see in the BVP solvers.
+function _sparse_like(I, J, x::AbstractArray, m = maximum(I), n = maximum(J))
+    I′ = adapt(parameterless_type(x), I)
+    J′ = adapt(parameterless_type(x), J)
+    V = __ones_like(x, length(I))
+    return sparse(I′, J′, V, m, n)
+end
+
+# Helpers for IIP/OOP functions
+function __sparse_jacobian_cache(::Val{iip}, ad, sd, fn, fx, y) where {iip}
+    if iip
+        sparse_jacobian_cache(ad, sd, fn, fx, y)
+    else
+        sparse_jacobian_cache(ad, sd, fn, y; fx)
+    end
+end
+
+@concrete struct ColoredMatrix
+    M
+    row_colorvec
+    col_colorvec
+end
+
+Base.size(M::ColoredMatrix, args...) = size(M.M, args...)
+Base.eltype(M::ColoredMatrix) = eltype(M.M)
+
+ColoredMatrix() = ColoredMatrix(nothing, nothing, nothing)
+
+function SparseDiffTools.PrecomputedJacobianColorvec(M::ColoredMatrix)
+    return PrecomputedJacobianColorvec(; jac_prototype = M.M, M.row_colorvec,
+        M.col_colorvec)
+end
+
+# For MIRK Methods
+"""
+    __generate_sparse_jacobian_prototype(::MIRKCache, y, M, N)
+    __generate_sparse_jacobian_prototype(::MIRKCache, _, y, M, N)
+    __generate_sparse_jacobian_prototype(::MIRKCache, ::TwoPointBVProblem, y, M, N)
+
+Generate a prototype of the sparse Jacobian matrix for the BVP problem with row and column
+coloring.
+
+If the problem is a TwoPointBVProblem, then this is the complete Jacobian, else it only
+computes the sparse part excluding the contributions from the boundary conditions.
+"""
+function __generate_sparse_jacobian_prototype(cache::MIRKCache, y, M, N)
+    return __generate_sparse_jacobian_prototype(cache, cache.problem_type, y, M, N)
+end
+
+function __generate_sparse_jacobian_prototype(::MIRKCache, _, y, M, N)
+    l = sum(i -> min(2M + i, M * N) - max(1, i - 1) + 1, 1:(M * (N - 1)))
+    Is = Vector{Int}(undef, l)
+    Js = Vector{Int}(undef, l)
+    idx = 1
+    for i in 1:(M * (N - 1)), j in max(1, i - 1):min(2M + i, M * N)
+        Is[idx] = i
+        Js[idx] = j
+        idx += 1
+    end
+
+    J_c = _sparse_like(Is, Js, y, M * (N - 1), M * N)
+
+    col_colorvec = Vector{Int}(undef, size(J_c, 2))
+    for i in eachindex(col_colorvec)
+        col_colorvec[i] = mod1(i, min(2M + 1, M * N) + 1)
+    end
+    row_colorvec = Vector{Int}(undef, size(J_c, 1))
+    for i in eachindex(row_colorvec)
+        row_colorvec[i] = mod1(i, min(2M + 1, M * N) + 1)
+    end
+
+    return ColoredMatrix(J_c, row_colorvec, col_colorvec)
+end
+
+function __generate_sparse_jacobian_prototype(::MIRKCache, ::TwoPointBVProblem,
+    y::ArrayPartition, M, N)
+    resida, residb = y.x
+
+    l = sum(i -> min(2M + i, M * N) - max(1, i - 1) + 1, 1:(M * (N - 1)))
+    l_top = M * length(resida)
+    l_bot = M * length(residb)
+
+    Is = Vector{Int}(undef, l + l_top + l_bot)
+    Js = Vector{Int}(undef, l + l_top + l_bot)
+
+    idx = 1
+    for i in 1:length(resida), j in 1:M
+        Is[idx] = i
+        Js[idx] = j
+        idx += 1
+    end
+    for i in 1:(M * (N - 1)), j in max(1, i - 1):min(2M + i, M * N)
+        Is[idx] = i + length(resida)
+        Js[idx] = j
+        idx += 1
+    end
+    for i in 1:length(residb), j in 1:M
+        Is[idx] = i + length(resida) + M * (N - 1)
+        Js[idx] = j + M * (N - 1)
+        idx += 1
+    end
+
+    J = _sparse_like(Is, Js, y, M * N, M * N)
+
+    col_colorvec = Vector{Int}(undef, size(J, 2))
+    for i in eachindex(col_colorvec)
+        col_colorvec[i] = mod1(i, min(2M + 1, M * N) + 1)
+    end
+    row_colorvec = Vector{Int}(undef, size(J, 1))
+    for i in eachindex(row_colorvec)
+        row_colorvec[i] = mod1(i, min(2M + 1, M * N) + 1)
+    end
+
+    return ColoredMatrix(J, row_colorvec, col_colorvec)
+end
+
+# For Multiple Shooting
+"""
+    __generate_sparse_jacobian_prototype(::MultipleShooting, ::StandardBVProblem,
+        bcresid_prototype, u0, N::Int, nshoots::Int)
+    __generate_sparse_jacobian_prototype(::MultipleShooting, ::TwoPointBVProblem,
+        bcresid_prototype, u0, N::Int, nshoots::Int)
+
+Returns a 3-Tuple:
+
+* Entire Jacobian Prototype (if Two-Point Problem) else `nothing`.
+* Sparse Non-BC Part Jacobian Prototype along with the column and row color vectors.
+* Sparse BC Part Jacobian Prototype along with the column and row color vectors (if
+  Two-Point Problem) else `nothing`.
+"""
+function __generate_sparse_jacobian_prototype(::MultipleShooting, ::StandardBVProblem,
+    bcresid_prototype, u0, N::Int, nshoots::Int)
+    Is = Vector{Int}(undef, (N^2 + N) * nshoots)
+    Js = Vector{Int}(undef, (N^2 + N) * nshoots)
+
+    idx = 1
+    for i in 1:nshoots
+        for (i₁, i₂) in Iterators.product(1:N, 1:N)
+            Is[idx] = i₁ + ((i - 1) * N)
+            Js[idx] = i₂ + ((i - 1) * N)
+            idx += 1
+        end
+        Is[idx:(idx + N - 1)] .= (1:N) .+ ((i - 1) * N)
+        Js[idx:(idx + N - 1)] .= (1:N) .+ (i * N)
+        idx += N
+    end
+
+    J_c = _sparse_like(Is, Js, u0)
+
+    col_colorvec = Vector{Int}(undef, size(J_c, 2))
+    for i in eachindex(col_colorvec)
+        col_colorvec[i] = mod1(i, 2N)
+    end
+    row_colorvec = Vector{Int}(undef, size(J_c, 1))
+    for i in eachindex(row_colorvec)
+        row_colorvec[i] = mod1(i, 2N)
+    end
+
+    return nothing, ColoredMatrix(J_c, row_colorvec, col_colorvec), nothing
+end
+
+function __generate_sparse_jacobian_prototype(alg::MultipleShooting, ::TwoPointBVProblem,
+    bcresid_prototype::ArrayPartition, u0, N::Int, nshoots::Int)
+    resida, residb = bcresid_prototype.x
+    L₁, L₂ = length(resida), length(residb)
+
+    _, J_c, _ = __generate_sparse_jacobian_prototype(alg, StandardBVProblem(),
+        bcresid_prototype, u0, N, nshoots)
+
+    Is_bc = Vector{Int}(undef, (L₁ + L₂) * N)
+    Js_bc = Vector{Int}(undef, (L₁ + L₂) * N)
+    idx = 1
+    for i in 1:L₁, j in 1:N
+        Is_bc[idx] = i
+        Js_bc[idx] = j
+        idx += 1
+    end
+    for i in 1:L₂, j in 1:N
+        Is_bc[idx] = i + L₁
+        Js_bc[idx] = j + N
+        idx += 1
+    end
+
+    col_colorvec_bc = Vector{Int}(undef, 2N)
+    row_colorvec_bc = Vector{Int}(undef, L₁ + L₂)
+    col_colorvec_bc[1:N] .= 1:N
+    col_colorvec_bc[(N + 1):end] .= 1:N
+    for i in 1:max(L₁, L₂)
+        i ≤ L₁ && (row_colorvec_bc[i] = i)
+        i ≤ L₂ && (row_colorvec_bc[i + L₁] = i)
+    end
+
+    J_bc = ColoredMatrix(_sparse_like(Is_bc, Js_bc, bcresid_prototype), row_colorvec_bc,
+        col_colorvec_bc)
+
+    J_full = _sparse_like(Int[], Int[], u0, size(J_bc, 1) + size(J_c, 1),
+        size(J_c, 2))
+
+    J_full[(L₁ + L₂ + 1):end, :] .= J_c.M
+    J_full[1:L₁, 1:N] .= J_bc.M[1:L₁, 1:N]
+    J_full[(L₁ + 1):(L₁ + L₂), (end - 2N + 1):(end - N)] .= J_bc.M[(L₁ + 1):(L₁ + L₂),
+        (N + 1):(2N)]
+
+    return J_full, J_c, J_bc
+end
diff --git a/src/types.jl b/src/types.jl
index ecbe1ae8..f38898af 100644
--- a/src/types.jl
+++ b/src/types.jl
@@ -33,40 +33,82 @@ end
 @truncate_stacktrace MIRKInterpTableau 1
 
 # Sparsity Detection
-@concrete struct MIRKJacobianComputationAlgorithm
+@concrete struct BVPJacobianAlgorithm
     bc_diffmode
-    collocation_diffmode
+    nonbc_diffmode
     diffmode
 end
 
-function MIRKJacobianComputationAlgorithm(diffmode = missing;
-    collocation_diffmode = missing, bc_diffmode = missing)
+function BVPJacobianAlgorithm(diffmode = missing; nonbc_diffmode = missing,
+    bc_diffmode = missing)
     if diffmode !== missing
-        @assert collocation_diffmode === missing && bc_diffmode === missing
-        return MIRKJacobianComputationAlgorithm(diffmode, diffmode, diffmode)
+        bc_diffmode = bc_diffmode === missing ? diffmode : bc_diffmode
+        nonbc_diffmode = nonbc_diffmode === missing ? diffmode : nonbc_diffmode
+        return BVPJacobianAlgorithm(diffmode, diffmode, diffmode)
     else
-        @static if VERSION < v"1.9"
-            diffmode = AutoForwardDiff()
-            bc_diffmode = bc_diffmode === missing ? AutoForwardDiff() : bc_diffmode
-            collocation_diffmode = collocation_diffmode === missing ?
-                                   AutoForwardDiff() : collocation_diffmode
-        else
-            diffmode = AutoSparseForwardDiff()
-            bc_diffmode = bc_diffmode === missing ? AutoForwardDiff() : bc_diffmode
-            collocation_diffmode = collocation_diffmode === missing ?
-                                   AutoSparseForwardDiff() : collocation_diffmode
-        end
-        return MIRKJacobianComputationAlgorithm(bc_diffmode, collocation_diffmode,
-            collocation_diffmode)
+        diffmode = nothing
+        bc_diffmode = bc_diffmode === missing ? nothing : bc_diffmode
+        nonbc_diffmode = nonbc_diffmode === missing ? nothing : nonbc_diffmode
+        return BVPJacobianAlgorithm(bc_diffmode, nonbc_diffmode, diffmode)
     end
 end
 
+"""
+    concrete_jacobian_algorithm(jac_alg, prob, alg)
+    concrete_jacobian_algorithm(jac_alg, problem_type, prob, alg)
+
+If user provided all the required fields, then return the user provided algorithm.
+Otherwise, based on the problem type and the algorithm, decide the missing fields.
+
+For example, for `TwoPointBVProblem`, the `bc_diffmode` is set to
+`AutoSparseForwardDiff` while for `StandardBVProblem`, the `bc_diffmode` is set to
+`AutoForwardDiff`.
+"""
+function concrete_jacobian_algorithm(jac_alg::BVPJacobianAlgorithm, prob::BVProblem, alg)
+    return concrete_jacobian_algorithm(jac_alg, prob.problem_type, prob, alg)
+end
+
+function concrete_jacobian_algorithm(jac_alg::BVPJacobianAlgorithm, ::StandardBVProblem,
+    prob::BVProblem, alg)
+    diffmode = jac_alg.diffmode === nothing ? AutoSparseForwardDiff() : jac_alg.diffmode
+    bc_diffmode = jac_alg.bc_diffmode === nothing ? AutoForwardDiff() : jac_alg.bc_diffmode
+    nonbc_diffmode = jac_alg.nonbc_diffmode === nothing ? AutoSparseForwardDiff() :
+                     jac_alg.nonbc_diffmode
+
+    return BVPJacobianAlgorithm(bc_diffmode, nonbc_diffmode, diffmode)
+end
+
+function concrete_jacobian_algorithm(jac_alg::BVPJacobianAlgorithm, ::TwoPointBVProblem,
+    prob::BVProblem, alg)
+    diffmode = jac_alg.diffmode === nothing ? AutoSparseForwardDiff() : jac_alg.diffmode
+    bc_diffmode = jac_alg.bc_diffmode === nothing ? AutoSparseForwardDiff() :
+                  jac_alg.bc_diffmode
+    nonbc_diffmode = jac_alg.nonbc_diffmode === nothing ? AutoSparseForwardDiff() :
+                     jac_alg.nonbc_diffmode
+
+    return BVPJacobianAlgorithm(bc_diffmode, nonbc_diffmode, diffmode)
+end
+
+# This can cause Type Instability
+function concretize_jacobian_algorithm(alg, prob)
+    @set! alg.jac_alg = concrete_jacobian_algorithm(alg.jac_alg, prob, alg)
+    return alg
+end
+
+function MIRKJacobianComputationAlgorithm(diffmode = missing;
+    collocation_diffmode = missing, bc_diffmode = missing)
+    Base.depwarn("`MIRKJacobianComputationAlgorithm` has been deprecated in favor of \
+        `BVPJacobianAlgorithm`. Replace `collocation_diffmode` with `nonbc_diffmode",
+        :MIRKJacobianComputationAlgorithm)
+    return BVPJacobianAlgorithm(diffmode; nonbc_diffmode = collocation_diffmode,
+        bc_diffmode)
+end
+
 __needs_diffcache(::Union{AutoForwardDiff, AutoSparseForwardDiff}) = true
 __needs_diffcache(_) = false
-function __needs_diffcache(jac_alg::MIRKJacobianComputationAlgorithm)
-    return __needs_diffcache(jac_alg.diffmode) ||
-           __needs_diffcache(jac_alg.bc_diffmode) ||
-           __needs_diffcache(jac_alg.collocation_diffmode)
+function __needs_diffcache(jac_alg::BVPJacobianAlgorithm)
+    return __needs_diffcache(jac_alg.diffmode) || __needs_diffcache(jac_alg.bc_diffmode) ||
+           __needs_diffcache(jac_alg.nonbc_diffmode)
 end
 
 # We don't need to always allocate a DiffCache. This works around that.
@@ -74,15 +116,11 @@ end
     du
 end
 
-function maybe_allocate_diffcache(x, chunksize, jac_alg)
-    if __needs_diffcache(jac_alg)
-        return DiffCache(x, chunksize)
-    else
-        return FakeDiffCache(x)
-    end
+function __maybe_allocate_diffcache(x, chunksize, jac_alg)
+    return __needs_diffcache(jac_alg) ? DiffCache(x, chunksize) : FakeDiffCache(x)
 end
-maybe_allocate_diffcache(x::DiffCache, chunksize) = DiffCache(similar(x.du), chunksize)
-maybe_allocate_diffcache(x::FakeDiffCache, _) = FakeDiffCache(similar(x.du))
+__maybe_allocate_diffcache(x::DiffCache, chunksize) = DiffCache(similar(x.du), chunksize)
+__maybe_allocate_diffcache(x::FakeDiffCache, _) = FakeDiffCache(similar(x.du))
 
 PreallocationTools.get_tmp(dc::FakeDiffCache, _) = dc.du
 
diff --git a/src/utils.jl b/src/utils.jl
index 3082566d..d68958b2 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -90,3 +90,76 @@ eval_bc_residual!(resid, _, bc!, sol, p, t) = bc!(resid, sol, p, t)
     bcb!(resid.x[2], ub, p)
     return resid
 end
+
+__append_similar!(::Nothing, n, _) = nothing
+
+function __append_similar!(x::AbstractVector{<:AbstractArray}, n, _)
+    N = n - length(x)
+    N == 0 && return x
+    N < 0 && throw(ArgumentError("Cannot append a negative number of elements"))
+    append!(x, [similar(first(x)) for _ in 1:N])
+    return x
+end
+
+function __append_similar!(x::AbstractVector{<:MaybeDiffCache}, n, M)
+    N = n - length(x)
+    N == 0 && return x
+    N < 0 && throw(ArgumentError("Cannot append a negative number of elements"))
+    chunksize = pickchunksize(M * (N + length(x)))
+    append!(x, [__maybe_allocate_diffcache(first(x), chunksize) for _ in 1:N])
+    return x
+end
+
+## Problem with Initial Guess
+function __extract_problem_details(prob; kwargs...)
+    return __extract_problem_details(prob, prob.u0; kwargs...)
+end
+function __extract_problem_details(prob, u0::AbstractVector{<:AbstractArray}; kwargs...)
+    # Problem has Initial Guess
+    _u0 = first(u0)
+    return True(), eltype(_u0), length(_u0), (length(u0) - 1), _u0
+end
+function __extract_problem_details(prob, u0; dt = 0.0, check_positive_dt::Bool = false)
+    # Problem does not have Initial Guess
+    check_positive_dt && dt ≤ 0 && throw(ArgumentError("dt must be positive"))
+    t₀, t₁ = prob.tspan
+    return False(), eltype(u0), length(u0), Int(cld(t₁ - t₀, dt)), prob.u0
+end
+
+__initial_state_from_prob(prob::BVProblem, mesh) = __initial_state_from_prob(prob.u0, mesh)
+__initial_state_from_prob(u0::AbstractArray, mesh) = [copy(vec(u0)) for _ in mesh]
+function __initial_state_from_prob(u0::AbstractVector{<:AbstractVector}, _)
+    return [copy(vec(u)) for u in u0]
+end
+
+function __get_bcresid_prototype(prob::BVProblem, u)
+    return __get_bcresid_prototype(prob.problem_type, prob, u)
+end
+function __get_bcresid_prototype(::TwoPointBVProblem, prob::BVProblem, u)
+    prototype = if isinplace(prob)
+        prob.f.bcresid_prototype
+    elseif prob.f.bcresid_prototype !== nothing
+        prob.f.bcresid_prototype
+    else
+        ArrayPartition(first(prob.f.bc)(u, prob.p), last(prob.f.bc)(u, prob.p))
+    end
+    return prototype, size.(prototype.x)
+end
+function __get_bcresid_prototype(::StandardBVProblem, prob::BVProblem, u)
+    prototype = prob.f.bcresid_prototype !== nothing ? prob.f.bcresid_prototype :
+                __zeros_like(u)
+    return prototype, size(prototype)
+end
+
+function __fill_like(v, x, args...)
+    y = similar(x, args...)
+    fill!(y, v)
+    return y
+end
+__zeros_like(args...) = __fill_like(0, args...)
+__ones_like(args...) = __fill_like(1, args...)
+
+__safe_reshape(x, args...) = reshape(x, args...)
+function __safe_reshape(x::ArrayPartition, sizes::NTuple)
+    return ArrayPartition(__safe_reshape.(x.x, sizes))
+end
diff --git a/test/ensemble.jl b/test/mirk/ensemble.jl
similarity index 79%
rename from test/ensemble.jl
rename to test/mirk/ensemble.jl
index e0771993..d03c2d94 100644
--- a/test/ensemble.jl
+++ b/test/mirk/ensemble.jl
@@ -19,9 +19,9 @@ bvp = BVProblem(ode!, bc!, initial_guess, tspan, p)
 ensemble_prob = EnsembleProblem(bvp; prob_func)
 
 @testset "$(solver)" for solver in (MIRK2, MIRK3, MIRK4, MIRK5, MIRK6)
-    jac_algs = [MIRKJacobianComputationAlgorithm(),
-        MIRKJacobianComputationAlgorithm(; bc_diffmode = AutoFiniteDiff(),
-            collocation_diffmode = AutoSparseFiniteDiff())]
+    jac_algs = [BVPJacobianAlgorithm(),
+        BVPJacobianAlgorithm(; bc_diffmode = AutoFiniteDiff(),
+            nonbc_diffmode = AutoSparseFiniteDiff())]
     for jac_alg in jac_algs
         # Not sure why it is throwing so many warnings
         sol = solve(ensemble_prob, solver(; jac_alg); trajectories = 10, dt = 0.1)
diff --git a/test/mirk_convergence_tests.jl b/test/mirk/mirk_convergence_tests.jl
similarity index 95%
rename from test/mirk_convergence_tests.jl
rename to test/mirk/mirk_convergence_tests.jl
index 363c85a9..cdfd879f 100644
--- a/test/mirk_convergence_tests.jl
+++ b/test/mirk/mirk_convergence_tests.jl
@@ -75,8 +75,6 @@ testTol = 0.2
 affineTol = 1e-2
 dts = 1 .// 2 .^ (3:-1:1)
 
-@info "Collocation method (MIRK)"
-
 @testset "Affineness" begin
     @testset "Problem: $i" for i in (1, 2, 5, 6)
         prob = probArr[i]
@@ -117,8 +115,8 @@ end
 u0 = MVector{2}([pi / 2, pi / 2])
 bvp1 = BVProblem(simplependulum!, bc_pendulum!, u0, tspan)
 
-jac_alg = MIRKJacobianComputationAlgorithm(; bc_diffmode = AutoFiniteDiff(),
-    collocation_diffmode = AutoSparseFiniteDiff())
+jac_alg = BVPJacobianAlgorithm(; bc_diffmode = AutoFiniteDiff(),
+    nonbc_diffmode = AutoSparseFiniteDiff())
 
 # Using ForwardDiff might lead to Cache expansion warnings
 @test_nowarn solve(bvp1, MIRK2(; jac_alg); dt = 0.005)
diff --git a/test/vectorofvector_initials.jl b/test/mirk/vectorofvector_initials.jl
similarity index 100%
rename from test/vectorofvector_initials.jl
rename to test/mirk/vectorofvector_initials.jl
diff --git a/test/non_vector_inputs.jl b/test/misc/non_vector_inputs.jl
similarity index 82%
rename from test/non_vector_inputs.jl
rename to test/misc/non_vector_inputs.jl
index 52f6c81c..170d48f9 100644
--- a/test/non_vector_inputs.jl
+++ b/test/misc/non_vector_inputs.jl
@@ -50,4 +50,13 @@ probs = [
             @test norm(diff(first.(sol.u)) .+ 0.2, Inf) + abs(sol[1][1] - 5) < 0.01
         end
     end
+
+    @testset "Single Shooting" begin
+        for prob in probs
+            @time sol = solve(prob, Shooting(Tsit5()))
+            @test norm(boundary(sol, prob.p, nothing)) < 0.01
+        end
+    end
+
+    # FIXME: Add Multiple Shooting here once it supports non-vector inputs
 end
diff --git a/test/odeinterface_ex7.jl b/test/misc/odeinterface_ex7.jl
similarity index 100%
rename from test/odeinterface_ex7.jl
rename to test/misc/odeinterface_ex7.jl
diff --git a/test/misc/type_stability.jl b/test/misc/type_stability.jl
new file mode 100644
index 00000000..ef20e632
--- /dev/null
+++ b/test/misc/type_stability.jl
@@ -0,0 +1,62 @@
+using BoundaryValueDiffEq, OrdinaryDiffEq, LinearAlgebra, Test
+
+f(u, p, t) = [p[1] * u[1] - p[2] * u[1] * u[2], p[3] * u[1] * u[2] - p[4] * u[2]]
+function f!(du, u, p, t)
+    du[1] = p[1] * u[1] - p[2] * u[1] * u[2]
+    du[2] = p[3] * u[1] * u[2] - p[4] * u[2]
+end
+
+bc(sol, p, t) = [sol[1][1] - 1, sol[end][2] - 2]
+function bc!(res, sol, p, t)
+    res[1] = sol[1][1] - 1
+    res[2] = sol[end][2] - 2
+end
+twobc_a(ua, p) = [ua[1] - 1]
+twobc_b(ub, p) = [ub[2] - 2]
+twobc_a!(resa, ua, p) = (resa[1] = ua[1] - 1)
+twobc_b!(resb, ub, p) = (resb[1] = ub[2] - 2)
+
+u0 = Float64[0, 0]
+tspan = (0.0, 1.0)
+p = [1.0, 1.0, 1.0, 1.0]
+bcresid_prototype = (zeros(1), zeros(1))
+
+# Multi-Point BVP
+@testset "Multi-Point BVP" begin
+    mpbvp_iip = BVProblem(f!, bc!, u0, tspan, p)
+    mpbvp_oop = BVProblem(f, bc, u0, tspan, p)
+
+    @testset "Shooting Methods" begin
+        @inferred solve(mpbvp_iip, Shooting(Tsit5()))
+        @inferred solve(mpbvp_oop, Shooting(Tsit5()))
+        @inferred solve(mpbvp_iip, MultipleShooting(5, Tsit5()))
+        @inferred solve(mpbvp_oop, MultipleShooting(5, Tsit5()))
+    end
+
+    @testset "MIRK Methods" begin
+        for solver in (MIRK2(), MIRK3(), MIRK4(), MIRK5(), MIRK6())
+            @inferred solve(mpbvp_iip, solver; dt = 0.2)
+            @inferred solve(mpbvp_oop, solver; dt = 0.2)
+        end
+    end
+end
+
+# Two-Point BVP
+@testset "Two-Point BVP" begin
+    tpbvp_iip = TwoPointBVProblem(f!, (twobc_a!, twobc_b!), u0, tspan, p; bcresid_prototype)
+    tpbvp_oop = TwoPointBVProblem(f, (twobc_a, twobc_b), u0, tspan, p)
+
+    @testset "Shooting Methods" begin
+        @inferred solve(tpbvp_iip, Shooting(Tsit5()))
+        @inferred solve(tpbvp_oop, Shooting(Tsit5()))
+        @inferred solve(tpbvp_iip, MultipleShooting(5, Tsit5()))
+        @inferred solve(tpbvp_oop, MultipleShooting(5, Tsit5()))
+    end
+
+    @testset "MIRK Methods" begin
+        for solver in (MIRK2(), MIRK3(), MIRK4(), MIRK5(), MIRK6())
+            @inferred solve(tpbvp_iip, solver; dt = 0.2)
+            @inferred solve(tpbvp_oop, solver; dt = 0.2)
+        end
+    end
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index fbf6fd64..4d1c7c3e 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,36 +1,49 @@
 using Test, SafeTestsets
 
+const GROUP = uppercase(get(ENV, "GROUP", "ALL"))
+
 @testset "Boundary Value Problem Tests" begin
-    @time @testset "Shooting Method Tests" begin
-        @time @safetestset "Shooting Tests" begin
-            include("shooting_tests.jl")
-        end
-        @time @safetestset "Orbital" begin
-            include("orbital.jl")
+    if GROUP == "ALL" || GROUP == "SHOOTING"
+        @time @testset "Shooting Method Tests" begin
+            @time @safetestset "Shooting Tests" begin
+                include("shooting/shooting_tests.jl")
+            end
+            @time @safetestset "Ray Tracing BVP" begin
+                include("shooting/ray_tracing.jl")
+            end
+            @time @safetestset "Orbital" begin
+                include("shooting/orbital.jl")
+            end
         end
     end
 
-    @time @testset "Collocation Method (MIRK) Tests" begin
-        @time @safetestset "Ensemble" begin
-            include("ensemble.jl")
-        end
-        @time @safetestset "MIRK Convergence Tests" begin
-            include("mirk_convergence_tests.jl")
-        end
-        @time @safetestset "Vector of Vector" begin
-            include("vectorofvector_initials.jl")
+    if GROUP == "ALL" || GROUP == "MIRK"
+        @time @testset "Collocation Method (MIRK) Tests" begin
+            @time @safetestset "Ensemble" begin
+                include("mirk/ensemble.jl")
+            end
+            @time @safetestset "MIRK Convergence Tests" begin
+                include("mirk/mirk_convergence_tests.jl")
+            end
+            @time @safetestset "Vector of Vector" begin
+                include("mirk/vectorofvector_initials.jl")
+            end
         end
     end
 
-    @time @testset "ODE Interface Solvers" begin
-        @time @safetestset "ODE Interface Tests" begin
-            include("odeinterface_ex7.jl")
-        end
-    end
+    if GROUP == "ALL" || GROUP == "OTHERS"
+        @time @testset "Miscelleneous" begin
+            @time @safetestset "Non Vector Inputs" begin
+                include("misc/non_vector_inputs.jl")
+            end
+
+            @time @safetestset "Type Stability" begin
+                include("misc/type_stability.jl")
+            end
 
-    @time @testset "Non Vector Inputs Tests" begin
-        @time @safetestset "Non Vector Inputs" begin
-            include("non_vector_inputs.jl")
+            @time @safetestset "ODE Interface Tests" begin
+                include("misc/odeinterface_ex7.jl")
+            end
         end
     end
     
diff --git a/test/orbital.jl b/test/shooting/orbital.jl
similarity index 69%
rename from test/orbital.jl
rename to test/shooting/orbital.jl
index cd3ce853..d28c47d4 100644
--- a/test/orbital.jl
+++ b/test/shooting/orbital.jl
@@ -1,8 +1,6 @@
 # Lambert's Problem
 using BoundaryValueDiffEq, OrdinaryDiffEq, LinearAlgebra, Test
 
-@info "Testing Lambert's Problem"
-
 y0 = [
     -4.7763169762853989E+06,
     -3.8386398704441520E+05,
@@ -66,18 +64,24 @@ cur_bc_2point_b! = (resid, sol, p) -> bc!_generator_2p_b(resid, sol, init_val)
 resid_f = Array{Float64}(undef, 6)
 resid_f_2p = (Array{Float64, 1}(undef, 3), Array{Float64, 1}(undef, 3))
 
-TestTol = 0.05
-
 ### Now use the BVP solver to get closer
 bvp = BVProblem(orbital!, cur_bc!, y0, tspan)
 for autodiff in (AutoForwardDiff(), AutoFiniteDiff(; fdtype = Val(:central)),
     AutoSparseForwardDiff(), AutoFiniteDiff(; fdtype = Val(:forward)),
     AutoSparseFiniteDiff())
-    nlsolve = NewtonRaphson(; autodiff)
+    nlsolve = TrustRegion(; autodiff)
     @time sol = solve(bvp, Shooting(DP5(); nlsolve); force_dtmin = true, abstol = 1e-13,
-        reltol = 1e-13)
+        reltol = 1e-13, verbose = false)
+    cur_bc!(resid_f, sol, nothing, sol.t)
+    @info "Single Shooting Lambert's Problem: $(norm(resid_f, Inf))"
+    @test norm(resid_f, Inf) < 0.005
+
+    jac_alg = BVPJacobianAlgorithm(; nonbc_diffmode = autodiff)
+    @time sol = solve(bvp, MultipleShooting(10, AutoVern7(Rodas5P()); nlsolve, jac_alg);
+        abstol = 1e-6, reltol = 1e-6, verbose = false)
     cur_bc!(resid_f, sol, nothing, sol.t)
-    @test norm(resid_f, Inf) < TestTol
+    @info "Multiple Shooting Lambert's Problem: $(norm(resid_f, Inf))"
+    @test norm(resid_f, Inf) < 0.005
 end
 
 ### Using the TwoPoint BVP Structure
@@ -86,10 +90,19 @@ bvp = TwoPointBVProblem(orbital!, (cur_bc_2point_a!, cur_bc_2point_b!), y0, tspa
 for autodiff in (AutoForwardDiff(), AutoFiniteDiff(; fdtype = Val(:central)),
     AutoSparseForwardDiff(), AutoFiniteDiff(; fdtype = Val(:forward)),
     AutoSparseFiniteDiff())
-    nlsolve = NewtonRaphson(; autodiff)
+    nlsolve = TrustRegion(; autodiff)
     @time sol = solve(bvp, Shooting(DP5(); nlsolve); force_dtmin = true, abstol = 1e-13,
-        reltol = 1e-13)
+        reltol = 1e-13, verbose = false)
+    cur_bc_2point_a!(resid_f_2p[1], sol(t0), nothing)
+    cur_bc_2point_b!(resid_f_2p[2], sol(t1), nothing)
+    @info "Single Shooting Lambert's Problem: $(norm(reduce(vcat, resid_f_2p), Inf))"
+    @test norm(reduce(vcat, resid_f_2p), Inf) < 0.005
+
+    jac_alg = BVPJacobianAlgorithm(; nonbc_diffmode = autodiff, bc_diffmode = autodiff)
+    @time sol = solve(bvp, MultipleShooting(10, AutoVern7(Rodas5P()); nlsolve, jac_alg);
+        abstol = 1e-6, reltol = 1e-6, verbose = false)
     cur_bc_2point_a!(resid_f_2p[1], sol(t0), nothing)
     cur_bc_2point_b!(resid_f_2p[2], sol(t1), nothing)
-    @test norm(reduce(vcat, resid_f_2p), Inf) < TestTol
+    @info "Multiple Shooting Lambert's Problem: $(norm(reduce(vcat, resid_f_2p), Inf))"
+    @test norm(reduce(vcat, resid_f_2p), Inf) < 0.005
 end
diff --git a/test/shooting/ray_tracing.jl b/test/shooting/ray_tracing.jl
new file mode 100644
index 00000000..e4d55781
--- /dev/null
+++ b/test/shooting/ray_tracing.jl
@@ -0,0 +1,137 @@
+using BoundaryValueDiffEq, LinearAlgebra, OrdinaryDiffEq, Test
+
+@inline v(x, y, z, p) = 1 / (4 + cos(p[1] * x) + sin(p[2] * y) - cos(p[3] * z))
+@inline ux(x, y, z, p) = -p[1] * sin(p[1] * x)
+@inline uy(x, y, z, p) = p[2] * cos(p[2] * y)
+@inline uz(x, y, z, p) = p[3] * sin(p[3] * z)
+
+function ray_tracing(u, p, t)
+    du = similar(u)
+    ray_tracing!(du, u, p, t)
+    return du
+end
+
+function ray_tracing!(du, u, p, t)
+    x, y, z, ξ, η, ζ, T, S = u
+
+    nu = v(x, y, z, p) # Velocity of a sound wave, function of space;
+    μx = ux(x, y, z, p) # ∂(slowness)/∂x, function of space
+    μy = uy(x, y, z, p) # ∂(slowness)/∂y, function of space
+    μz = uz(x, y, z, p) # ∂(slowness)/∂z, function of space
+
+    du[1] = S * nu * ξ
+    du[2] = S * nu * η
+    du[3] = S * nu * ζ
+
+    du[4] = S * μx
+    du[5] = S * μy
+    du[6] = S * μz
+
+    du[7] = S / nu
+    du[8] = 0
+
+    return nothing
+end
+
+function ray_tracing_bc(sol, p, t)
+    res = similar(first(sol))
+    ray_tracing_bc!(res, sol, p, t)
+    return res
+end
+
+function ray_tracing_bc!(res, sol, p, t)
+    ua = sol(0.0)
+    ub = sol(1.0)
+    nu = v(ua[1], ua[2], ua[3], p) # Velocity of a sound wave, function of space;
+
+    res[1] = ua[1] - p[4]
+    res[2] = ua[2] - p[5]
+    res[3] = ua[3] - p[6]
+    res[4] = ua[7]      # T(0) = 0
+    res[5] = ua[4]^2 + ua[5]^2 + ua[6]^2 - 1 / nu^2
+    res[6] = ub[1] - p[7]
+    res[7] = ub[2] - p[8]
+    res[8] = ub[3] - p[9]
+    return nothing
+end
+
+function ray_tracing_bc_a(ua, p)
+    resa = similar(ua, 5)
+    ray_tracing_bc_a!(resa, ua, p)
+    return resa
+end
+
+function ray_tracing_bc_a!(resa, ua, p)
+    nu = v(ua[1], ua[2], ua[3], p) # Velocity of a sound wave, function of space;
+
+    resa[1] = ua[1] - p[4]
+    resa[2] = ua[2] - p[5]
+    resa[3] = ua[3] - p[5]
+    resa[4] = ua[7]
+    resa[5] = ua[4]^2 + ua[5]^2 + ua[6]^2 - 1 / nu^2
+
+    return nothing
+end
+
+function ray_tracing_bc_b(ub, p)
+    resb = similar(ub, 3)
+    ray_tracing_bc_b!(resb, ub, p)
+    return resb
+end
+
+function ray_tracing_bc_b!(resb, ub, p)
+    resb[1] = ub[1] - p[7]
+    resb[2] = ub[2] - p[8]
+    resb[3] = ub[3] - p[9]
+    return nothing
+end
+
+p = [0, 1, 2, 0, 0, 0, 4, 3, 2.0]
+
+dx = p[7] - p[4]
+dy = p[8] - p[5]
+dz = p[9] - p[6]
+
+u0 = zeros(8)
+u0[1:3] .= 0 # position
+u0[4] = dx / v(p[4], p[5], p[6], p)
+u0[5] = dy / v(p[4], p[5], p[6], p)
+u0[6] = dz / v(p[4], p[5], p[6], p)
+u0[8] = 1
+
+tspan = (0.0, 1.0)
+
+prob_oop = BVProblem{false}(ray_tracing, ray_tracing_bc, u0, tspan, p)
+prob_iip = BVProblem{true}(ray_tracing!, ray_tracing_bc!, u0, tspan, p)
+prob_tp_oop = TwoPointBVProblem{false}(ray_tracing, (ray_tracing_bc_a, ray_tracing_bc_b),
+    u0, tspan, p)
+prob_tp_iip = TwoPointBVProblem{true}(ray_tracing!, (ray_tracing_bc_a!, ray_tracing_bc_b!),
+    u0, tspan, p; bcresid_prototype = (zeros(5), zeros(3)))
+
+alg_sp = MultipleShooting(10, AutoVern7(Rodas4P()); nlsolve = NewtonRaphson(),
+    grid_coarsening = Base.Fix2(div, 3),
+    jac_alg = BVPJacobianAlgorithm(; bc_diffmode = AutoForwardDiff(),
+        nonbc_diffmode = AutoSparseForwardDiff()))
+alg_dense = MultipleShooting(10, AutoVern7(Rodas4P()); nlsolve = NewtonRaphson(),
+    grid_coarsening = Base.Fix2(div, 3),
+    jac_alg = BVPJacobianAlgorithm(; bc_diffmode = AutoForwardDiff(),
+        nonbc_diffmode = AutoForwardDiff()))
+alg_default = MultipleShooting(10, AutoVern7(Rodas4P()); nlsolve = NewtonRaphson(),
+    grid_coarsening = Base.Fix2(div, 3))
+
+for (prob, alg) in Iterators.product((prob_oop, prob_iip, prob_tp_oop, prob_tp_iip),
+    (alg_sp, alg_dense, alg_default))
+    sol = solve(prob, alg; abstol = 1e-9, reltol = 1e-9, maxiters = 1000)
+    @test SciMLBase.successful_retcode(sol.retcode)
+
+    if prob.problem_type isa TwoPointBVProblem
+        resida, residb = zeros(5), zeros(3)
+        ray_tracing_bc_a!(resida, sol.u[1], p)
+        ray_tracing_bc_b!(residb, sol.u[end], p)
+        @test norm(vcat(resida, residb), 2) < 5e-5
+    else
+        resid = zeros(8)
+        ray_tracing_bc!(resid, sol, p, sol.t)
+        @test norm(resid, 2) < 5e-5
+    end
+end
diff --git a/test/shooting/shooting_tests.jl b/test/shooting/shooting_tests.jl
new file mode 100644
index 00000000..b2d96bb7
--- /dev/null
+++ b/test/shooting/shooting_tests.jl
@@ -0,0 +1,166 @@
+using BoundaryValueDiffEq, LinearAlgebra, OrdinaryDiffEq, Test
+
+@testset "Basic Shooting Tests" begin
+    SOLVERS = [Shooting(Tsit5()), MultipleShooting(10, Tsit5())]
+
+    tspan = (0.0, 100.0)
+    u0 = [0.0, 1.0]
+
+    # Inplace
+    function f1!(du, u, p, t)
+        du[1] = u[2]
+        du[2] = -u[1]
+        return nothing
+    end
+
+    function bc1!(resid, sol, p, t)
+        t₀, t₁ = first(t), last(t)
+        resid[1] = sol(t₀)[1]
+        resid[2] = sol(t₁)[1] - 1
+        return nothing
+    end
+
+    bvp1 = BVProblem(f1!, bc1!, u0, tspan)
+    @test SciMLBase.isinplace(bvp1)
+    for solver in SOLVERS
+        resid_f = Array{Float64}(undef, 2)
+        sol = solve(bvp1, solver; abstol = 1e-13, reltol = 1e-13)
+        @test SciMLBase.successful_retcode(sol)
+        bc1!(resid_f, sol, nothing, sol.t)
+        @test norm(resid_f) < 1e-12
+    end
+
+    # Out of Place
+    f1(u, p, t) = [u[2], -u[1]]
+
+    function bc1(sol, p, t)
+        t₀, t₁ = first(t), last(t)
+        return [sol(t₀)[1], sol(t₁)[1] - 1]
+    end
+
+    @test_throws SciMLBase.NonconformingFunctionsError BVProblem(f1!, bc1, u0, tspan)
+    @test_throws SciMLBase.NonconformingFunctionsError BVProblem(f1, bc1!, u0, tspan)
+
+    bvp2 = BVProblem(f1, bc1, u0, tspan)
+    @test !SciMLBase.isinplace(bvp2)
+    for solver in SOLVERS
+        sol = solve(bvp2, solver; abstol = 1e-13, reltol = 1e-13)
+        @test SciMLBase.successful_retcode(sol)
+        resid_f = bc1(sol, nothing, sol.t)
+        @test norm(resid_f) < 1e-12
+    end
+
+    # Inplace
+    bc2a!(resid, ua, p) = (resid[1] = ua[1])
+    bc2b!(resid, ub, p) = (resid[1] = ub[1] - 1)
+
+    bvp3 = TwoPointBVProblem(f1!, (bc2a!, bc2b!), u0, tspan;
+        bcresid_prototype = (Array{Float64}(undef, 1), Array{Float64}(undef, 1)))
+    @test SciMLBase.isinplace(bvp3)
+    for solver in SOLVERS
+        sol = solve(bvp3, solver; abstol = 1e-13, reltol = 1e-13)
+        @test SciMLBase.successful_retcode(sol)
+        resid_f = (Array{Float64, 1}(undef, 1), Array{Float64, 1}(undef, 1))
+        bc2a!(resid_f[1], sol(tspan[1]), nothing)
+        bc2b!(resid_f[2], sol(tspan[2]), nothing)
+        @test norm(reduce(vcat, resid_f)) < 1e-11
+    end
+
+    # Out of Place
+    bc2a(ua, p) = [ua[1]]
+    bc2b(ub, p) = [ub[1] - 1]
+
+    bvp4 = TwoPointBVProblem(f1, (bc2a, bc2b), u0, tspan)
+    @test !SciMLBase.isinplace(bvp4)
+    for solver in SOLVERS
+        sol = solve(bvp4, solver; abstol = 1e-13, reltol = 1e-13)
+        @test SciMLBase.successful_retcode(sol)
+        resid_f = reduce(vcat, (bc2a(sol(tspan[1]), nothing), bc2b(sol(tspan[2]), nothing)))
+        @test norm(resid_f) < 1e-11
+    end
+end
+
+@testset "Shooting with Complex Values" begin
+    # Test for complex values
+    function f1!(du, u, p, t)
+        du[1] = u[2]
+        du[2] = -u[1]
+        return nothing
+    end
+
+    function bc1!(resid, sol, p, t)
+        t₀, t₁ = first(t), last(t)
+        resid[1] = sol(t₀)[1]
+        resid[2] = sol(t₁)[1] - 1
+        return nothing
+    end
+
+    tspan = (0.0, 100.0)
+    u0 = [0.0, 1.0] .+ 1im
+    bvp = BVProblem(f1!, bc1!, u0, tspan)
+    resid_f = Array{ComplexF64}(undef, 2)
+
+    nlsolve = NewtonRaphson(; autodiff = AutoFiniteDiff())
+    jac_alg = BVPJacobianAlgorithm(; bc_diffmode = AutoFiniteDiff(),
+        nonbc_diffmode = AutoSparseFiniteDiff())
+    for solver in [Shooting(Tsit5(); nlsolve),
+        MultipleShooting(10, Tsit5(); nlsolve, jac_alg)]
+        sol = solve(bvp, solver; abstol = 1e-13, reltol = 1e-13)
+        @test SciMLBase.successful_retcode(sol)
+        bc1!(resid_f, sol, nothing, sol.t)
+        @test norm(resid_f) < 1e-12
+    end
+end
+
+@testset "Flow In a Channel" begin
+    function flow_in_a_channel!(du, u, p, t)
+        R, P = p
+        A, f′′, f′, f, h′, h, θ′, θ = u
+        du[1] = 0
+        du[2] = R * (f′^2 - f * f′′) - R * A
+        du[3] = f′′
+        du[4] = f′
+        du[5] = -R * f * h′ - 1
+        du[6] = h′
+        du[7] = -P * f * θ′
+        du[8] = θ′
+    end
+
+    function bc_flow!(resid, sol, p, tspan)
+        t₁, t₂ = extrema(tspan)
+        solₜ₁ = sol(t₁)
+        solₜ₂ = sol(t₂)
+        resid[1] = solₜ₁[4]
+        resid[2] = solₜ₁[3]
+        resid[3] = solₜ₂[4] - 1
+        resid[4] = solₜ₂[3]
+        resid[5] = solₜ₁[6]
+        resid[6] = solₜ₂[6]
+        resid[7] = solₜ₁[8]
+        resid[8] = solₜ₂[8] - 1
+    end
+
+    tspan = (0.0, 1.0)
+    p = [10.0, 7.0]
+    u0 = zeros(8)
+
+    flow_bvp = BVProblem{true}(flow_in_a_channel!, bc_flow!, u0, tspan, p)
+
+    sol_shooting = solve(flow_bvp,
+        Shooting(AutoTsit5(Rosenbrock23()), NewtonRaphson());
+        maxiters = 100)
+    @test SciMLBase.successful_retcode(sol_shooting)
+
+    resid = zeros(8)
+    bc_flow!(resid, sol_shooting, p, sol_shooting.t)
+    @test norm(resid, Inf) < 1e-6
+
+    sol_msshooting = solve(flow_bvp,
+        MultipleShooting(10, AutoTsit5(Rosenbrock23()); nlsolve = NewtonRaphson());
+        maxiters = 100)
+    @test SciMLBase.successful_retcode(sol_msshooting)
+
+    resid = zeros(8)
+    bc_flow!(resid, sol_msshooting, p, sol_msshooting.t)
+    @test norm(resid, Inf) < 1e-6
+end
diff --git a/test/shooting_tests.jl b/test/shooting_tests.jl
deleted file mode 100644
index 4a4443ad..00000000
--- a/test/shooting_tests.jl
+++ /dev/null
@@ -1,92 +0,0 @@
-using BoundaryValueDiffEq, LinearAlgebra, OrdinaryDiffEq, Test
-
-@info "Shooting method"
-
-@info "Multi-Point BVProblem" # Not really but using that API
-
-tspan = (0.0, 100.0)
-u0 = [0.0, 1.0]
-
-# Inplace
-function f1!(du, u, p, t)
-    du[1] = u[2]
-    du[2] = -u[1]
-    return nothing
-end
-
-function bc1!(resid, sol, p, t)
-    t₀, t₁ = first(t), last(t)
-    resid[1] = sol(t₀)[1]
-    resid[2] = sol(t₁)[1] - 1
-    return nothing
-end
-
-bvp1 = BVProblem(f1!, bc1!, u0, tspan)
-@test SciMLBase.isinplace(bvp1)
-resid_f = Array{Float64}(undef, 2)
-sol = solve(bvp1, Shooting(Tsit5()); abstol = 1e-13, reltol = 1e-13)
-@test SciMLBase.successful_retcode(sol)
-bc1!(resid_f, sol, nothing, sol.t)
-@test norm(resid_f) < 1e-12
-
-# Out of Place
-f1(u, p, t) = [u[2], -u[1]]
-
-function bc1(sol, p, t)
-    t₀, t₁ = first(t), last(t)
-    return [sol(t₀)[1], sol(t₁)[1] - 1]
-end
-
-@test_throws SciMLBase.NonconformingFunctionsError BVProblem(f1!, bc1, u0, tspan)
-@test_throws SciMLBase.NonconformingFunctionsError BVProblem(f1, bc1!, u0, tspan)
-
-bvp2 = BVProblem(f1, bc1, u0, tspan)
-@test !SciMLBase.isinplace(bvp2)
-sol = solve(bvp2, Shooting(Tsit5()); abstol = 1e-13, reltol = 1e-13)
-@test SciMLBase.successful_retcode(sol)
-resid_f = bc1(sol, nothing, sol.t)
-@test norm(resid_f) < 1e-12
-
-@info "Two Point BVProblem" # Not really but using that API
-
-# Inplace
-function bc2a!(resid, ua, p)
-    resid[1] = ua[1]
-    return nothing
-end
-
-function bc2b!(resid, ub, p)
-    resid[1] = ub[1] - 1
-    return nothing
-end
-
-bvp3 = TwoPointBVProblem(f1!, (bc2a!, bc2b!), u0, tspan;
-    bcresid_prototype = (Array{Float64}(undef, 1), Array{Float64}(undef, 1)))
-@test SciMLBase.isinplace(bvp3)
-sol = solve(bvp3, Shooting(Tsit5()); abstol = 1e-13, reltol = 1e-13)
-@test SciMLBase.successful_retcode(sol)
-resid_f = (Array{Float64, 1}(undef, 1), Array{Float64, 1}(undef, 1))
-bc2a!(resid_f[1], sol(tspan[1]), nothing)
-bc2b!(resid_f[2], sol(tspan[2]), nothing)
-@test norm(reduce(vcat, resid_f)) < 1e-11
-
-# Out of Place
-bc2a(ua, p) = [ua[1]]
-bc2b(ub, p) = [ub[1] - 1]
-
-bvp4 = TwoPointBVProblem(f1, (bc2a, bc2b), u0, tspan)
-@test !SciMLBase.isinplace(bvp4)
-sol = solve(bvp4, Shooting(Tsit5()); abstol = 1e-13, reltol = 1e-13)
-@test SciMLBase.successful_retcode(sol)
-resid_f = reduce(vcat, (bc2a(sol(tspan[1]), nothing), bc2b(sol(tspan[2]), nothing)))
-@test norm(resid_f) < 1e-12
-
-#Test for complex values
-u0 = [0.0, 1.0] .+ 1im
-bvp = BVProblem(f1!, bc1!, u0, tspan)
-resid_f = Array{ComplexF64}(undef, 2)
-sol = solve(bvp, Shooting(Tsit5(); nlsolve = NewtonRaphson(; autodiff = AutoFiniteDiff()));
-    abstol = 1e-13, reltol = 1e-13)
-resid_f = Array{ComplexF64}(undef, 2)
-bc1!(resid_f, sol, nothing, sol.t)
-@test norm(resid_f) < 1e-12