diff --git a/Project.toml b/Project.toml index 73470e5..cb70bde 100644 --- a/Project.toml +++ b/Project.toml @@ -10,13 +10,32 @@ PositiveFactorizations = "85a6dd25-e78a-55b7-8502-1745935b8125" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +[weakdeps] +AbstractDifferentiation = "c29ec348-61ec-40c8-8164-b8c60e9d9f3d" +ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" +ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" +SparseDiffTools = "47a9eef4-7e08-11e9-0b38-333d64bd3804" +StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" +Symbolics = "0c5d862f-8b57-4792-8d23-62f2024744c7" + +[extensions] +AbstractDifferentiationExt = "AbstractDifferentiation" +ForwardDiffExt = "ForwardDiff" +SparseForwardDiffExt = ["ForwardDiff", "SparseDiffTools", "Symbolics"] +StaticArraysExt = "StaticArrays" + [compat] +AbstractDifferentiation = "0.6.0" +ForwardDiff = "0.10.36" IterativeSolvers = "0.9" PositiveFactorizations = "0.2" +ReverseDiff = "1.15.1" SparseDiffTools = "1.13.2" +StaticArrays = "1.6.5" julia = "1" [extras] +AbstractDifferentiation = "c29ec348-61ec-40c8-8164-b8c60e9d9f3d" ArbNumerics = "7e558dbc-694d-5a72-987c-6f4ebed21442" DoubleFloats = "497a8b3b-efae-58df-a0af-a86822472b78" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" @@ -26,6 +45,7 @@ IterativeSolvers = "42fd0dbc-a981-5370-80f2-aaf504508153" LinearMaps = "7a12625a-238d-50fd-b39a-03d52299707e" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" SparseDiffTools = "47a9eef4-7e08-11e9-0b38-333d64bd3804" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index 82cfd54..8662394 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -27,6 +27,9 @@ nav: - Tutorials: - Minimizing a function: 'optimization.md' - Solving non-linear equations: 'nonlineareq.md' + - Solving non-linear least squares problems: 'leastsquares.md' + - Extensions: + - Using extensions: 'extensions.md' # - Gradients and Hessians: 'user/gradientsandhessians.md' # - Configurable Options: 'user/config.md' # - Linesearch: 'algo/linesearch.md' diff --git a/docs/src/extensions.md b/docs/src/extensions.md new file mode 100644 index 0000000..e98a250 --- /dev/null +++ b/docs/src/extensions.md @@ -0,0 +1,275 @@ +# Extensions to NLSolvers +Extensions are feature in Julia that allows package owners to provide functionality (through added methods to library functions) based on other packages. These packages can be useful for some users but cause increased load times for all users. Using extensions we've provided a few convenience functions that do not have to be loaded by all users, but only those who need it. + +## ForwardDiff.jl +ForwardDiff.jl derivatives are available to users by using the `ScalarObjective(ForwardDiffAutoDiff(), x_seed; f=my_f)` constructor. Below is an example that shows that the hand-written and ForwardDiff derivatives agree. +``` +using NLSolvers, ForwardDiff, Test +function himmelblau!(x) + fx = (x[1]^2 + x[2] - 11)^2 + (x[1] + x[2]^2 - 7)^2 + return fx +end +function himmelblau_g!(∇f, x) + ∇f[1] = + 4.0 * x[1]^3 + 4.0 * x[1] * x[2] - 44.0 * x[1] + 2.0 * x[1] + 2.0 * x[2]^2 - 14.0 + ∇f[2] = + 2.0 * x[1]^2 + 2.0 * x[2] - 22.0 + 4.0 * x[1] * x[2] + 4.0 * x[2]^3 - 28.0 * x[2] + ∇f +end +function himmelblau_fg!(∇f, x) + himmelblau!(x), himmelblau_g!(∇f, x) +end +function himmelblau_h!(∇²f, x) + ∇²f[1, 1] = 12.0 * x[1]^2 + 4.0 * x[2] - 44.0 + 2.0 + ∇²f[1, 2] = 4.0 * x[1] + 4.0 * x[2] + ∇²f[2, 1] = ∇²f[1, 2] + ∇²f[2, 2] = 2.0 + 4.0 * x[1] + 12.0 * x[2]^2 - 28.0 + return ∇²f +end +function himmelblau_fgh!(∇f, H, x) + himmelblau!(x), himmelblau_g!(∇f, x), himmelblau_h!(H, x) +end + +objective = ScalarObjective( + f=himmelblau!, + g=himmelblau_g!, + fg=himmelblau_fg!, + h=himmelblau_h!, + fgh=himmelblau_fgh!, + ) + +forward_objective = ScalarObjective(Experimental.ForwardDiffAutoDiff(), [3.0,3.0]; f=himmelblau!) + +# Test gradient +G_forward = zeros(2) +forward_objective.g(G_forward, [0.1,0.2]) +G = zeros(2) +objective.g(G, [0.1,0.2]) +using Test +@test G ≈ G_forward + +# Test joint f and g evaluation +G_forward = zeros(2) +fG_forward, _ = forward_objective.fg(G_forward, [0.1,0.2]) + +G = zeros(2) +f, _ = objective.fg(G, [0.1,0.2]) + +@test G ≈ G_forward +@test f ≈ fG_forward + +# Test Hessian evaluation +H_forward = zeros(2,2) +forward_objective.h(H_forward, [0.1,0.2]) + +H = zeros(2,2) +objective.h(H, [0.1,0.2]) + +@test H ≈ H_forward + +# Test joint f, G, H evaluation +H_forward = zeros(2,2) +G_forward = zeros(2) +f_forward, _, _ = forward_objective.fgh(G_forward, H_forward, [0.1,0.2]) + +G = zeros(2) +H = zeros(2,2) +f, _, _= objective.fgh(G, H, [0.1,0.2]) + +@test f ≈ f_forward +@test G ≈ G_forward +@test H ≈ H_forward + +# Test hessian-vector calculations +# test hv +x0 = [0.1,0.2] +hv = x0*0 +forward_objective.hv(hv, x0, 2*ones(2)) + +objective.h(H, x0) +@test H*(2.0*ones(2)) ≈ hv +``` + +## AbstractDifferentiation +``` +using NLSolvers, ForwardDiff, Test +import AbstractDifferentiation as AD +function himmelblau!(x) + fx = (x[1]^2 + x[2] - 11)^2 + (x[1] + x[2]^2 - 7)^2 + return fx +end +function himmelblau_g!(∇f, x) + ∇f[1] = + 4.0 * x[1]^3 + 4.0 * x[1] * x[2] - 44.0 * x[1] + 2.0 * x[1] + 2.0 * x[2]^2 - 14.0 + ∇f[2] = + 2.0 * x[1]^2 + 2.0 * x[2] - 22.0 + 4.0 * x[1] * x[2] + 4.0 * x[2]^3 - 28.0 * x[2] + ∇f +end +function himmelblau_fg!(∇f, x) + himmelblau!(x), himmelblau_g!(∇f, x) +end +function himmelblau_h!(∇²f, x) + ∇²f[1, 1] = 12.0 * x[1]^2 + 4.0 * x[2] - 44.0 + 2.0 + ∇²f[1, 2] = 4.0 * x[1] + 4.0 * x[2] + ∇²f[2, 1] = ∇²f[1, 2] + ∇²f[2, 2] = 2.0 + 4.0 * x[1] + 12.0 * x[2]^2 - 28.0 + return ∇²f +end +function himmelblau_fgh!(∇f, H, x) + himmelblau!(x), himmelblau_g!(∇f, x), himmelblau_h!(H, x) +end + +objective = ScalarObjective( + f=himmelblau!, + g=himmelblau_g!, + fg=himmelblau_fg!, + h=himmelblau_h!, + fgh=himmelblau_fgh!, + ) + +forward_objective = ScalarObjective(AD.ForwardDiffBackend(), [3.0,3.0]; f=himmelblau!) + +# Test gradient +G_forward = zeros(2) +forward_objective.g(G_forward, [0.1,0.2]) +G = zeros(2) +objective.g(G, [0.1,0.2]) +using Test +@test G ≈ G_forward + +# Test joint f and g evaluation +G_forward = zeros(2) +fG_forward, _ = forward_objective.fg(G_forward, [0.1,0.2]) + +G = zeros(2) +f, _ = objective.fg(G, [0.1,0.2]) + +@test G ≈ G_forward +@test f ≈ fG_forward + +# Test Hessian evaluation +H_forward = zeros(2,2) +forward_objective.h(H_forward, [0.1,0.2]) + +H = zeros(2,2) +objective.h(H, [0.1,0.2]) + +@test H ≈ H_forward + +# Test joint f, G, H evaluation +H_forward = zeros(2,2) +G_forward = zeros(2) +f_forward, _, _ = forward_objective.fgh(G_forward, H_forward, [0.1,0.2]) + +G = zeros(2) +H = zeros(2,2) +f, _, _= objective.fgh(G, H, [0.1,0.2]) + +@test f ≈ f_forward +@test G ≈ G_forward +@test H ≈ H_forward + +# Test hessian-vector calculations +# test hv +x0 = [0.1,0.2] +hv = x0*0 +forward_objective.hv(hv, x0, 2*ones(2)) + +objective.h(H, x0) +@test H*(2.0*ones(2)) ≈ hv + + + +using NLSolvers, Test +import AbstractDifferentiation as AD + +using ReverseDiff + +function himmelblau!(x) + fx = (x[1]^2 + x[2] - 11)^2 + (x[1] + x[2]^2 - 7)^2 + return fx +end +function himmelblau_g!(∇f, x) + ∇f[1] = + 4.0 * x[1]^3 + 4.0 * x[1] * x[2] - 44.0 * x[1] + 2.0 * x[1] + 2.0 * x[2]^2 - 14.0 + ∇f[2] = + 2.0 * x[1]^2 + 2.0 * x[2] - 22.0 + 4.0 * x[1] * x[2] + 4.0 * x[2]^3 - 28.0 * x[2] + ∇f +end +function himmelblau_fg!(∇f, x) + himmelblau!(x), himmelblau_g!(∇f, x) +end +function himmelblau_h!(∇²f, x) + ∇²f[1, 1] = 12.0 * x[1]^2 + 4.0 * x[2] - 44.0 + 2.0 + ∇²f[1, 2] = 4.0 * x[1] + 4.0 * x[2] + ∇²f[2, 1] = ∇²f[1, 2] + ∇²f[2, 2] = 2.0 + 4.0 * x[1] + 12.0 * x[2]^2 - 28.0 + return ∇²f +end +function himmelblau_fgh!(∇f, H, x) + himmelblau!(x), himmelblau_g!(∇f, x), himmelblau_h!(H, x) +end + +objective = ScalarObjective( + f=himmelblau!, + g=himmelblau_g!, + fg=himmelblau_fg!, + h=himmelblau_h!, + fgh=himmelblau_fgh!, + ) + +reverse_objective = ScalarObjective(AD.ReverseDiffBackend(), [3.0,3.0]; f=himmelblau!) + +# Test gradient +G_reverse = zeros(2) +reverse_objective.g(G_reverse, [0.1,0.2]) +G = zeros(2) +objective.g(G, [0.1,0.2]) +using Test +@test G ≈ G_reverse + +# Test joint f and g evaluation +G_reverse = zeros(2) +fG_reverse, _ = reverse_objective.fg(G_reverse, [0.1,0.2]) + +G = zeros(2) +f, _ = objective.fg(G, [0.1,0.2]) + +@test G ≈ G_reverse +@test f ≈ fG_reverse + +# Test Hessian evaluation +H_forward = zeros(2,2) +reverse_objective.h(H_forward, [0.1,0.2]) + +H = zeros(2,2) +objective.h(H, [0.1,0.2]) + +@test H ≈ H_forward + +# Test joint f, G, H evaluation +H_forward = zeros(2,2) +G_reverse = zeros(2) +f_forward, _, _ = reverse_objective.fgh(G_reverse, H_forward, [0.1,0.2]) + +G = zeros(2) +H = zeros(2,2) +f, _, _= objective.fgh(G, H, [0.1,0.2]) + +@test f ≈ f_forward +@test G ≈ G_reverse +@test H ≈ H_forward + +# Test hessian-vector calculations +# test hv +x0 = [0.1,0.2] +hv = x0*0 +reverse_objective.hv(hv, x0, 2*ones(2)) + +objective.h(H, x0) +@test H*(2.0*ones(2)) ≈ hv + +``` + +## Future +I plan to support SparseDiffTools.jl and more. \ No newline at end of file diff --git a/docs/src/leastsquares.md b/docs/src/leastsquares.md index eef29f6..844ebfc 100644 --- a/docs/src/leastsquares.md +++ b/docs/src/leastsquares.md @@ -1 +1,131 @@ -# Non-linear Least Squares \ No newline at end of file +# Non-linear Least Squares + +Non-linear Least Squares problems arise in many statistical problems across applications in economics, physics, pharmacometrics, and more. This package does not provide any statistical analysis tools, but it does provide the numerical procedures necessary to obtain the estimates. + +## Scalar + +#ScalarLsqObjective +#VectorLsqObjective + +@. model(x, p) = p[1] * exp(-x * p[2]) +xdata = range(0, stop = 10, length = 20) +ydata = model(xdata, [1.0 2.0]) + 0.01 * randn(length(xdata)) + +function f_lsq(x) + mod = model(xdata, x) + return sum(abs2, mod .- ydata) / 2 +end +function g!(G, x) + ForwardDiff.gradient!(G, f_lsq, x) + return G +end +function h!(H, x) + ForwardDiff.hessian!(H, f_lsq, x) + return H +end +function fg(G, x) + fx = f_lsq(x) + g!(G, x) + return fx, G +end +function fgh!(G, H, x) + fx = f_lsq(x) + g!(G, x) + h!(H, x) + return fx, G, H +end +obj = ScalarObjective(f_lsq, g!, fg, fgh!, h!, nothing, nothing, nothing) +prob = OptimizationProblem(obj, ([1.5, 0.0], [3.0, 3.0])) + +p0 = [1.5, 0.5] + +res = [solve(prob, copy(p0), LineSearch(BFGS()), OptimizationOptions()), +solve(prob, copy(p0), NelderMead(), OptimizationOptions()), +solve(prob, copy(p0), SimulatedAnnealing(), OptimizationOptions()), +solve(prob, copy(p0), LineSearch(BFGS()), OptimizationOptions()), +solve(prob, copy(p0), LineSearch(SR1()), OptimizationOptions()), +solve(prob, copy(p0), LineSearch(DFP()), OptimizationOptions()), +solve(prob, copy(p0), LineSearch(DBFGS()), OptimizationOptions()), +solve(prob, copy(p0), TrustRegion(Newton(), Dogleg()), OptimizationOptions()), +solve(prob, copy(p0), TrustRegion(Newton(), NTR()), OptimizationOptions()), +solve(prob, copy(p0), TrustRegion(Newton(), NWI()), OptimizationOptions()), +solve(prob, copy(p0), TrustRegion(SR1(), Dogleg()), OptimizationOptions()), +solve(prob, copy(p0), TrustRegion(SR1(), NTR()), OptimizationOptions()), +solve(prob, copy(p0), TrustRegion(SR1(), NWI()), OptimizationOptions()), +solve(prob, copy(p0), TrustRegion(DFP(inverse=false), Dogleg()), OptimizationOptions()), +solve(prob, copy(p0), TrustRegion(DFP(inverse=false), NTR()), OptimizationOptions()), +solve(prob, copy(p0), TrustRegion(DFP(inverse=false), NWI()), OptimizationOptions()), +solve(prob, copy(p0), TrustRegion(DBFGS(inverse=false), Dogleg()), OptimizationOptions()), +solve(prob, copy(p0), TrustRegion(BFGS(inverse=false), NTR()), OptimizationOptions()), +solve(prob, copy(p0), TrustRegion(BFGS(inverse=false), NWI()), OptimizationOptions()), +solve(prob, copy(p0), LineSearch(ConjugateGradient(update=HS()), HZAW()), OptimizationOptions()), +solve(prob, copy(p0), LineSearch(ConjugateGradient(update=CD()), HZAW()), OptimizationOptions()), +solve(prob, copy(p0), LineSearch(ConjugateGradient(update=HZ()), HZAW()), OptimizationOptions()), +solve(prob, copy(p0), LineSearch(ConjugateGradient(update=FR()), HZAW()), OptimizationOptions()), +solve(prob, copy(p0), LineSearch(ConjugateGradient(update=PRP()), HZAW()), OptimizationOptions()), +solve(prob, copy(p0), LineSearch(ConjugateGradient(update=VPRP()), HZAW()), OptimizationOptions()), +solve(prob, copy(p0), LineSearch(ConjugateGradient(update=LS()), HZAW()), OptimizationOptions()), +solve(prob, copy(p0), LineSearch(ConjugateGradient(update=DY()), HZAW()), OptimizationOptions())] + + +res_elapsed = [(@elapsed solve(prob, copy(p0), LineSearch(BFGS()), OptimizationOptions())), +(@elapsed solve(prob, copy(p0), NelderMead(), OptimizationOptions())), +(@elapsed solve(prob, copy(p0), SimulatedAnnealing(), OptimizationOptions())), +(@elapsed solve(prob, copy(p0), LineSearch(BFGS()), OptimizationOptions())), +(@elapsed solve(prob, copy(p0), LineSearch(SR1()), OptimizationOptions())), +(@elapsed solve(prob, copy(p0), LineSearch(DFP()), OptimizationOptions())), +(@elapsed solve(prob, copy(p0), LineSearch(DBFGS()), OptimizationOptions())), +(@elapsed solve(prob, copy(p0), TrustRegion(Newton(), Dogleg()), OptimizationOptions())), +(@elapsed solve(prob, copy(p0), TrustRegion(Newton(), NTR()), OptimizationOptions())), +(@elapsed solve(prob, copy(p0), TrustRegion(Newton(), NWI()), OptimizationOptions())), +(@elapsed solve(prob, copy(p0), TrustRegion(SR1(), Dogleg()), OptimizationOptions())), +(@elapsed solve(prob, copy(p0), TrustRegion(SR1(), NTR()), OptimizationOptions())), +(@elapsed solve(prob, copy(p0), TrustRegion(SR1(), NWI()), OptimizationOptions())), +(@elapsed solve(prob, copy(p0), TrustRegion(DFP(inverse=false), Dogleg()), OptimizationOptions())), +(@elapsed solve(prob, copy(p0), TrustRegion(DFP(inverse=false), NTR()), OptimizationOptions())), +(@elapsed solve(prob, copy(p0), TrustRegion(DFP(inverse=false), NWI()), OptimizationOptions())), +(@elapsed solve(prob, copy(p0), TrustRegion(DBFGS(inverse=false), Dogleg()), OptimizationOptions())), +(@elapsed solve(prob, copy(p0), TrustRegion(BFGS(inverse=false), NTR()), OptimizationOptions())), +(@elapsed solve(prob, copy(p0), TrustRegion(BFGS(inverse=false), NWI()), OptimizationOptions())), +(@elapsed solve(prob, copy(p0), LineSearch(ConjugateGradient(update=HS()), HZAW()), OptimizationOptions())), +(@elapsed solve(prob, copy(p0), LineSearch(ConjugateGradient(update=CD()), HZAW()), OptimizationOptions())), +(@elapsed solve(prob, copy(p0), LineSearch(ConjugateGradient(update=HZ()), HZAW()), OptimizationOptions())), +(@elapsed solve(prob, copy(p0), LineSearch(ConjugateGradient(update=FR()), HZAW()), OptimizationOptions())), +(@elapsed solve(prob, copy(p0), LineSearch(ConjugateGradient(update=PRP()), HZAW()), OptimizationOptions())), +(@elapsed solve(prob, copy(p0), LineSearch(ConjugateGradient(update=VPRP()), HZAW()), OptimizationOptions())), +(@elapsed solve(prob, copy(p0), LineSearch(ConjugateGradient(update=LS()), HZAW()), OptimizationOptions())), +(@elapsed solve(prob, copy(p0), LineSearch(ConjugateGradient(update=DY()), HZAW()), OptimizationOptions()))] +res_allocated = [(@allocated solve(prob, copy(p0), LineSearch(BFGS()), OptimizationOptions())), +(@allocated solve(prob, copy(p0), NelderMead(), OptimizationOptions())), +(@allocated solve(prob, copy(p0), SimulatedAnnealing(), OptimizationOptions())), +(@allocated solve(prob, copy(p0), LineSearch(BFGS()), OptimizationOptions())), +(@allocated solve(prob, copy(p0), LineSearch(SR1()), OptimizationOptions())), +(@allocated solve(prob, copy(p0), LineSearch(DFP()), OptimizationOptions())), +(@allocated solve(prob, copy(p0), LineSearch(DBFGS()), OptimizationOptions())), +(@allocated solve(prob, copy(p0), TrustRegion(Newton(), Dogleg()), OptimizationOptions())), +(@allocated solve(prob, copy(p0), TrustRegion(Newton(), NTR()), OptimizationOptions())), +(@allocated solve(prob, copy(p0), TrustRegion(Newton(), NWI()), OptimizationOptions())), +(@allocated solve(prob, copy(p0), TrustRegion(SR1(), Dogleg()), OptimizationOptions())), +(@allocated solve(prob, copy(p0), TrustRegion(SR1(), NTR()), OptimizationOptions())), +(@allocated solve(prob, copy(p0), TrustRegion(SR1(), NWI()), OptimizationOptions())), +(@allocated solve(prob, copy(p0), TrustRegion(DFP(), Dogleg()), OptimizationOptions())), +(@allocated solve(prob, copy(p0), TrustRegion(DFP(), NTR()), OptimizationOptions())), +(@allocated solve(prob, copy(p0), TrustRegion(DFP(), NWI()), OptimizationOptions())), +(@allocated solve(prob, copy(p0), TrustRegion(DBFGS(), Dogleg()), OptimizationOptions())), +(@allocated solve(prob, copy(p0), TrustRegion(BFGS(), NTR()), OptimizationOptions())), +(@allocated solve(prob, copy(p0), TrustRegion(BFGS(), NWI()), OptimizationOptions())), +(@allocated solve(prob, copy(p0), LineSearch(ConjugateGradient(update=HS()), HZAW()), OptimizationOptions())), +(@allocated solve(prob, copy(p0), LineSearch(ConjugateGradient(update=CD()), HZAW()), OptimizationOptions())), +(@allocated solve(prob, copy(p0), LineSearch(ConjugateGradient(update=HZ()), HZAW()), OptimizationOptions())), +(@allocated solve(prob, copy(p0), LineSearch(ConjugateGradient(update=FR()), HZAW()), OptimizationOptions())), +(@allocated solve(prob, copy(p0), LineSearch(ConjugateGradient(update=PRP()), HZAW()), OptimizationOptions())), +(@allocated solve(prob, copy(p0), LineSearch(ConjugateGradient(update=VPRP()), HZAW()), OptimizationOptions())), +(@allocated solve(prob, copy(p0), LineSearch(ConjugateGradient(update=LS()), HZAW()), OptimizationOptions())), +(@allocated solve(prob, copy(p0), LineSearch(ConjugateGradient(update=DY()), HZAW()), OptimizationOptions()))] + + +res_bounds = [solve(prob, copy(p0), ParticleSwarm(), OptimizationOptions()), +solve(prob, copy(p0), ActiveBox(), OptimizationOptions()),] + +res_minimum = map(x->x.info.minimum, res) +res_bounds_minimum = map(x->x.info.minimum, res_bounds) \ No newline at end of file diff --git a/ext/AbstractDifferentiationExt.jl b/ext/AbstractDifferentiationExt.jl new file mode 100644 index 0000000..2f3ac16 --- /dev/null +++ b/ext/AbstractDifferentiationExt.jl @@ -0,0 +1,43 @@ +module AbstractDifferentiationExt + +using NLSolvers +import AbstractDifferentiation as AD + +import NLSolvers: ScalarObjective, mutation_style +function NLSolvers.ScalarObjective(autodiff::AD.AbstractBackend, x, style=NLSolvers.mutation_style(x, nothing); f = nothing, g = nothing, fg = nothing, fgh = nothing, h = nothing, hv = nothing, batched_f = nothing, param = nothing) + sco = ScalarObjective(f, g, fg, fgh, h, hv, batched_f, param) + _autodiff(autodiff, x, sco, style) +end +function _autodiff(fdad::AD.AbstractBackend, x, so::ScalarObjective, style) + g_f = (g,x) -> let f = so.f + g .= AD.gradient(fdad, f, x)[1] + g + end + + fg_f = (g, x) -> let f = so.f + v, G = AD.value_and_gradient(fdad, f, x) + g .= G[1] # why is G a tuple... + v, g + end + + h_f = (H, x) -> let f = so.f + h = AD.hessian(fdad, f, x) + H .= h[1] + H + end + + fgh_f = (g, h, x) -> let f = so.f + v, G, H = AD.value_gradient_and_hessian(fdad, f, x) + g .= G[1] + h .= H[1] + v, g, h + end + + hv_f = (w, x, v) -> let f = so.f, g = g_f + h = AD.hessian(fdad, f, x) + w .= h[1]*v + end + + ScalarObjective(f = so.f, g = g_f, fg = fg_f, h = h_f, fgh = fgh_f, hv = hv_f, param=so.param) +end +end \ No newline at end of file diff --git a/ext/ForwardDiffExt.jl b/ext/ForwardDiffExt.jl new file mode 100644 index 0000000..9b62497 --- /dev/null +++ b/ext/ForwardDiffExt.jl @@ -0,0 +1,59 @@ +module ForwardDiffExt + +using NLSolvers, ForwardDiff + +import NLSolvers: ScalarObjective, mutation_style +function NLSolvers.ScalarObjective(autodiff::Experimental.ForwardDiffAutoDiff, x, style=NLSolvers.mutation_style(x, nothing); f = nothing, g = nothing, fg = nothing, fgh = nothing, h = nothing, hv = nothing, batched_f = nothing, param = nothing) + sco = ScalarObjective(f, g, fg, fgh, h, hv, batched_f, param) + _autodiff(autodiff, x, sco, style) +end +function _autodiff(fdad::Experimental.ForwardDiffAutoDiff, x, so::ScalarObjective, style) + g_f = (g,x) -> let f = so.f + chunk = something(fdad.chunk, ForwardDiff.Chunk(x)) + gcfg = ForwardDiff.GradientConfig(f, x, chunk) + gr_res = DiffResults.DiffResult(zero(eltype(x)), g) + + ForwardDiff.gradient!(gr_res, f, x, gcfg) + g + end + + fg_f = (g, x) -> let f = so.f + chunk = something(fdad.chunk, ForwardDiff.Chunk(x)) + gcfg = ForwardDiff.GradientConfig(f, x, chunk) + gr_res = DiffResults.DiffResult(zero(eltype(x)), g) + + ForwardDiff.gradient!(gr_res, f, x, gcfg) + DiffResults.value(gr_res), g + end + + h_f = (H, x) -> let f = so.f + chunk = something(fdad.chunk, ForwardDiff.Chunk(x)) + + H_res = DiffResults.DiffResult(zero(eltype(x)), x*0, H) + hcfg = ForwardDiff.HessianConfig(f, H_res, x, chunk) + ForwardDiff.hessian!(H_res, f, x, hcfg) + H + end + + fgh_f = (G, H, x) -> let f = so.f + chunk = something(fdad.chunk, ForwardDiff.Chunk(x)) + + H_res = DiffResults.DiffResult(zero(eltype(x)), G, H) + hcfg = ForwardDiff.HessianConfig(f, H_res, x, chunk) + ForwardDiff.hessian!(H_res, f, x, hcfg) + + DiffResults.value(H_res), G, H + end + + hv_f = (w, x, v) -> let f = so.f, g = g_f + #=chunk = something(fdad.chunk, ForwardDiff.Chunk(x)) + gcfg = ForwardDiff.GradientConfig(x->sum(g(x*0, x)'*v), x, chunk) + gr_res = DiffResults.DiffResult(zero(eltype(x)), x*0) + + w .= ForwardDiff.gradient!(gr_res, f, x, gcfg)=# + w .= ForwardDiff.gradient(x->sum(g_f(x*0, x)'*v), x) + end + + ScalarObjective(f = so.f, g = g_f, fg = fg_f, h = h_f, fgh = fgh_f, hv = hv_f) +end +end \ No newline at end of file diff --git a/ext/SparseForwardDiffExt.jl b/ext/SparseForwardDiffExt.jl new file mode 100644 index 0000000..673abeb --- /dev/null +++ b/ext/SparseForwardDiffExt.jl @@ -0,0 +1,55 @@ +module SparseForwardDiffExt + +using NLSolvers, ForwardDiff, SparseDiffTools, Symbolics + +import NLSolvers: ScalarObjective, mutation_style +function NLSolvers.ScalarObjective(autodiff::Experimental.SparseForwardDiff, x, style=NLSolvers.mutation_style(x, nothing); f = nothing, g = nothing, fg = nothing, fgh = nothing, h = nothing, hv = nothing, batched_f = nothing, param = nothing) + sco = ScalarObjective(f, g, fg, fgh, h, hv, batched_f, param) + _autodiff(autodiff, x, sco, style) +end +function _autodiff(fdad::Experimental.SparseForwardDiff, x, so::ScalarObjective, style) + g_f = (g,x) -> let f = so.f + chunk = something(fdad.chunk, ForwardDiff.Chunk(x)) + g .= ForwardDiff.gradient(f, x) + end + + fg_f = (g, x) -> let f = so.f + # brug diffresults her + chunk = something(fdad.chunk, ForwardDiff.Chunk(x)) + gcfg = ForwardDiff.GradientConfig(f, x, chunk) + gr_res = DiffResults.DiffResult(zero(eltype(x)), g) + + ForwardDiff.gradient!(gr_res, f, x, gcfg) + DiffResults.value(gr_res), g + end + + sparsity_pattern = Symbolics.hessian_sparsity(so.f, x) + hes = eltype(x).(sparsity_pattern) + colorvec = matrix_colors(hes) + h_f = (H, x) -> let f = so.f, sparsity_pattern = sparsity_pattern, hes = hes, colorvec = colorvec + hescache = ForwardColorHesCache(f, x, colorvec, sparsity_pattern) + numauto_color_hessian!(H, f, x, hescache) + H + end + + fgh_f = (G, H, x) -> let f = so.f + # brug diffresults her + chunk = something(fdad.chunk, ForwardDiff.Chunk(x)) + + H_res = DiffResults.DiffResult(zero(eltype(x)), G, H) + hcfg = ForwardDiff.HessianConfig(f, H_res, x, chunk) + ForwardDiff.hessian!(H_res, f, x, hcfg) + + DiffResults.value(H_res), G, H + end + + hv_f = (w, x, v) -> let f = so.f, g = g_f + numauto_hesvec!(w,f,x,v, + ForwardDiff.GradientConfig(f,v), + similar(v), + similar(v)) + end + + ScalarObjective(f = so.f, g = g_f, fg = fg_f, h = h_f, fgh = fgh_f, hv = hv_f) +end +end \ No newline at end of file diff --git a/ext/StaticArraysExt.jl b/ext/StaticArraysExt.jl new file mode 100644 index 0000000..9d72d76 --- /dev/null +++ b/ext/StaticArraysExt.jl @@ -0,0 +1,8 @@ +module StaticArraysExt + +using NLSolvers, StaticArrays + +import NLSolvers: mutation_style +NLSolvers.mutation_style(x::MArray, ::Nothing) = InPlace() +NLSolvers.mutation_style(x::SArray, ::Nothing) = OutOfPlace() +end \ No newline at end of file diff --git a/src/NLSolvers.jl b/src/NLSolvers.jl index 72617c0..cadccc9 100644 --- a/src/NLSolvers.jl +++ b/src/NLSolvers.jl @@ -1,4 +1,19 @@ module NLSolvers +module Experimental +struct ForwardDiffAutoDiff{C,CO} + chunk::C + coloring::CO +end +ForwardDiffAutoDiff(; chunk = nothing, coloring = nothing) = + ForwardDiffAutoDiff(chunk, coloring) +struct SparseForwardDiff{C,CO} + chunk::C + coloring::CO +end +SparseForwardDiff(; chunk = nothing, coloring = nothing) = + SparseForwardDiff(chunk, coloring) +end +export Experimental import Base: show, summary using Statistics # for var in statistics... probably not worth it @@ -79,6 +94,7 @@ export objective_return isallfinite(x) = mapreduce(isfinite, *, x) + """ MutationStyle @@ -101,6 +117,11 @@ A [`MutationStyle`](@ref) for out-of-place (non-mutating) operations. """ struct OutOfPlace <: MutateStyle end +mutation_style(x, style::Nothing) = OutOfPlace() +mutation_style(x, style::OutOfPlace) = style +mutation_style(x, style::InPlace) = style +mutation_style(x::Array, style::Nothing) = InPlace() + abstract type AbstractProblem end abstract type AbstractOptions end diff --git a/src/globalization/linesearches/backtracking.jl b/src/globalization/linesearches/backtracking.jl index 5de80e3..d5a5f09 100644 --- a/src/globalization/linesearches/backtracking.jl +++ b/src/globalization/linesearches/backtracking.jl @@ -174,7 +174,6 @@ function find_steplength(mstyle, ls::Backtracking, φ::T, λ) where {T} β, α, f_α = interpolate(ls.interp, φ, φ0, dφ0, α, f_α.ϕ, ratio) is_solved = isfinite(f_α.ϕ) && f_α.ϕ <= φ0 + α * t end - ls_success = iter >= maxiter ? false : true if verbose diff --git a/src/nlsolve/problem_types.jl b/src/nlsolve/problem_types.jl index 748f06a..3aaa4b5 100644 --- a/src/nlsolve/problem_types.jl +++ b/src/nlsolve/problem_types.jl @@ -36,7 +36,7 @@ end function jacobian(nleq::NEqProblem{<:ScalarObjective}, J, x) gradient(nleq.R, J, x) end -function value_jacobian(nleq::NEqProblem{<:ScalarObjective, <:Any, <:Any, OutOfPlace}, F, J, x) +function value_jacobian(nleq::NEqProblem{<:ScalarObjective,<:Any,<:Any,OutOfPlace}, F, J, x) nleq.R.fg(J, x) end diff --git a/src/nlsolve/spectral/dfsane.jl b/src/nlsolve/spectral/dfsane.jl index 3118657..f8dae8d 100644 --- a/src/nlsolve/spectral/dfsane.jl +++ b/src/nlsolve/spectral/dfsane.jl @@ -59,7 +59,7 @@ To re-implement the setup in the numerical section of [PAPER] use DFSANE(nexp=2,memory=10, sigma_limit=(1e-10, 1e10), decrease=1e-4, sigma_0=1.0)) """ -struct DFSANE{T,Σ,TAU,N, G, A} +struct DFSANE{T,Σ,TAU,N,G,A} memory::T sigma_limits::Σ taus::TAU @@ -67,7 +67,14 @@ struct DFSANE{T,Σ,TAU,N, G, A} γ::G σ0::A end -DFSANE(; memory = 4, sigma_limits = (1e-5, 1e5), taus=(1 / 10, 1 / 2), nexp=2,decrease = 1 / 10000, sigma_0 = 1.0) = DFSANE(memory, sigma_limits, taus,nexp, decrease, sigma_0) +DFSANE(; + memory = 4, + sigma_limits = (1e-5, 1e5), + taus = (1 / 10, 1 / 2), + nexp = 2, + decrease = 1 / 10000, + sigma_0 = 1.0, +) = DFSANE(memory, sigma_limits, taus, nexp, decrease, sigma_0) init(::NEqProblem, ::DFSANE; x, Fx = copy(x), y = copy(x), d = copy(x), z = copy(x)) = (; x, Fx, y, d, z) @@ -122,7 +129,7 @@ function solve( ηk = ρ2F0 / (1 + iter)^2 φ(α) = norm(F(Fx, (z .= x .+ α .* d)))^nexp φ0 = fx - α, φα = find_steplength(RNMS(method.γ,method.σ0), φ, φ0, fbar, ηk, τmin, τmax) + α, φα = find_steplength(RNMS(method.γ, method.σ0), φ, φ0, fbar, ηk, τmin, τmax) if isnan(α) || isnan(φα) status = :linesearch_failed break diff --git a/src/objectives.jl b/src/objectives.jl index 8390e64..f9a4248 100644 --- a/src/objectives.jl +++ b/src/objectives.jl @@ -29,6 +29,7 @@ ScalarObjective(; batched_f = nothing, param = nothing, ) = ScalarObjective(f, g, fg, fgh, h, hv, batched_f, param) + has_param(so::ScalarObjective) = so.param === nothing ? false : true function value(so::ScalarObjective, x) if has_param(so) @@ -118,7 +119,8 @@ struct VectorObjective{TF,TJ,TFJ,TJv} FJ::TFJ Jv::TJv end -VectorObjective(; F=nothing, J=nothing, FJ=nothing, Jv=nothing) = VectorObjective(F, J, FJ, Jv) +VectorObjective(; F = nothing, J = nothing, FJ = nothing, Jv = nothing) = + VectorObjective(F, J, FJ, Jv) ## If prob is a NEqProblem, then we can just dispatch to least squares MeritObjective # if fast JacVec exists then maybe even line searches that updates the gradient can be used??? diff --git a/src/optimize/linesearch/conjugategradient.jl b/src/optimize/linesearch/conjugategradient.jl index 7a22fa9..695194e 100644 --- a/src/optimize/linesearch/conjugategradient.jl +++ b/src/optimize/linesearch/conjugategradient.jl @@ -313,6 +313,9 @@ function iterate( y .= .-∇fz end β = update_parameter(mstyle, scheme.update, d, ∇fz, ∇fx, y, P, P∇fz) + if !isfinite(β) + β = zero(β) + end return (x = x, fx = fx, ∇fx = ∇fx, z = z, fz = fz, ∇fz = ∇fz, B = nothing, Pg = Pg), P, diff --git a/src/optimize/problem_types.jl b/src/optimize/problem_types.jl index ec2afb4..67c59ce 100644 --- a/src/optimize/problem_types.jl +++ b/src/optimize/problem_types.jl @@ -215,15 +215,15 @@ struct OptimizationOptions{T1,T2,T3,T4,Txn,Tgn} end OptimizationOptions(; - x_abstol = 0.0, - x_reltol = 0.0, + x_abstol = -Inf, + x_reltol = -Inf, x_norm = x -> norm(x, Inf), g_abstol = 1e-8, g_reltol = 0.0, g_norm = x -> norm(x, Inf), f_limit = -Inf, - f_abstol = 0.0, - f_reltol = 0.0, + f_abstol = -Inf, + f_reltol = -Inf, nm_tol = 1e-8, maxiter = 10000, show_trace = false, diff --git a/src/optimize/trustregions/optimize/inplace_loop.jl b/src/optimize/trustregions/optimize/inplace_loop.jl index f1b7eb8..79d8677 100644 --- a/src/optimize/trustregions/optimize/inplace_loop.jl +++ b/src/optimize/trustregions/optimize/inplace_loop.jl @@ -1,3 +1,4 @@ + function solve( problem::OptimizationProblem, s0::Tuple, @@ -59,7 +60,6 @@ function solve( iter += 1 objvars, Δkp1, reject, qnvars = iterate!(p, objvars, Δkp1, approach, problem, options, qnvars, false) - # Check for convergence is_converged = converged(approach, objvars, ∇f0, options, reject, Δkp1) print_trace(approach, options, iter, t0, objvars, Δkp1) @@ -109,15 +109,14 @@ function iterate!( scale = nothing, ) x, fx, ∇fx, z, fz, ∇fz, B, Pg = objvars - T = eltype(x) + scheme, subproblemsolver = modelscheme(approach), algorithm(approach) - y, d, s = qnvars.y, qnvars.d, qnvars.s - fx = fz x = _copyto(mstyle(problem), x, z) ∇fx = _copyto(mstyle(problem), ∇fx, ∇fz) spr = subproblemsolver(∇fx, B, Δk, p, scheme, problem.mstyle; abstol = 1e-10) + Δm = -spr.mz # Grab the model value, m. If m is zero, the solution, z, does not improve @@ -159,12 +158,22 @@ function iterate!( # and will cause quasinewton updates to not update # this seems incorrect as it's already updated, should hold off here end + return (x = x, fx = fx, ∇fx = ∇fx, z = z, fz = fz, ∇fz = ∇fz, B = B, Pg = nothing), Δkp1, reject_step, QNVars(d, s, y) end - +#= +struct TrustRegionUpdateModel + postpone_gradient + postpone_Hessian + store_double + always_update + ratio_for_update + α +end +=# function update_trust_region(spr, R, p) T = eltype(p) # Choosing a parameter > 0 might be preferable here. See p. 4 of Yuans survey diff --git a/test/lsqfit/interface.jl b/test/lsqfit/interface.jl index 7d8ce5f..18825f0 100644 --- a/test/lsqfit/interface.jl +++ b/test/lsqfit/interface.jl @@ -34,6 +34,9 @@ function fgh!(G, H, x) return fx, G, H end obj = ScalarObjective(f, g!, fg, fgh!, h!, nothing, nothing, nothing) +# obj = ScalarObjective(NLSolvers.Experimental.ForwardDiffAutoDiff(), p0; f) + + prob = OptimizationProblem(obj, ([0.0, 0.0], [3.0, 3.0])) res = solve(prob, copy(p0), LineSearch(BFGS()), OptimizationOptions()) res = solve(prob, copy(p0), NelderMead(), OptimizationOptions()) diff --git a/test/nlsolve/interface.jl b/test/nlsolve/interface.jl index 55b9a2c..2f907a3 100644 --- a/test/nlsolve/interface.jl +++ b/test/nlsolve/interface.jl @@ -447,17 +447,14 @@ end @testset "scalar nlsolves" begin function ff(x) x^2 - end - - function fgg(Jx, x) - x^2, 2x - end - - prob_obj = NLSolvers.ScalarObjective( - f=ff, - fg=fgg, - ) - + end + + function fgg(Jx, x) + x^2, 2x + end + + prob_obj = NLSolvers.ScalarObjective(f = ff, fg = fgg) + prob = NEqProblem(prob_obj; inplace = false) x0 = 0.3 @@ -496,4 +493,4 @@ end solve_static() alloced = @allocated solve_static() -@test alloced == 0 \ No newline at end of file +@test alloced == 0 diff --git a/test/optimize/interface.jl b/test/optimize/interface.jl index 1a2db1e..3be0b40 100644 --- a/test/optimize/interface.jl +++ b/test/optimize/interface.jl @@ -408,7 +408,76 @@ end +@testset "Quadratic optimizer jumps to -x and objective is the same" begin + N_test = 9 + w = rand(N_test) + obj_val = rand() + obj(x) = sum(x .^ 2) + obj_val + function g!(G, x) + G .= 2x + G + end + g(x) = 2x + function h!(H, x) + H .= x * x' + 2 * I + H + end + function hv!(Hv, x) + Hv .= (x * x' + 2 * I) * x + Hv + end + #res = Optim.optimize(f, w) + #@test res.method isa NelderMead + + sc = ScalarObjective(; f = obj, g = g!, h = h!, hv = hv!) + cges = [ConjugateGradient(), ConjugateGradient(update = VPRP())] + lses = [ + LineSearch(NLSolvers.SR1()), + LineSearch(NLSolvers.DBFGS()), + LineSearch(NLSolvers.DFP()), + LineSearch(NLSolvers.BB()), + LineSearch(NLSolvers.LBFGS()), + LineSearch(NLSolvers.BFGS()), + LineSearch(NLSolvers.GradientDescent()), + ] + for ls in lses + res = solve(OptimizationProblem(sc), copy(w), ls, OptimizationOptions()) + @test res.info.minimum ≈ obj_val + @test res.info.solution ≈ zeros(N_test) + end + cges = [ + ConjugateGradient(), + ConjugateGradient(update = HS()), + ConjugateGradient(update = CD()), + ConjugateGradient(update = HZ()), + ConjugateGradient(update = FR()), + ConjugateGradient(update = PRP()), + ConjugateGradient(update = VPRP()), + ConjugateGradient(update = LS()), + ConjugateGradient(update = DY()), + ] + lses = [ + LineSearch(NLSolvers.SR1()), + LineSearch(NLSolvers.DBFGS()), + LineSearch(NLSolvers.DFP()), + LineSearch(NLSolvers.BB()), + LineSearch(NLSolvers.LBFGS()), + LineSearch(NLSolvers.BFGS()), + LineSearch(NLSolvers.GradientDescent()), + ] + for ls in lses + res = solve(OptimizationProblem(sc), copy(w), ls, OptimizationOptions()) + @test res.info.minimum ≈ obj_val + @test res.info.solution ≈ zeros(N_test) + end + # ConjugateGradient(update = HS()) fails here if I don't check if beta is finite and sets it to 0 if it isn't. Keeps cycling 0,-0.5,0,-0.5 what's up... + for ls in cges + res = solve(OptimizationProblem(sc), copy(w), ls, OptimizationOptions()) + @test res.info.minimum ≈ obj_val + @test sum(abs, res.info.solution) < 1e-7 + end +end diff --git a/test/optimize/param.jl b/test/optimize/param.jl new file mode 100644 index 0000000..3946ce3 --- /dev/null +++ b/test/optimize/param.jl @@ -0,0 +1,17 @@ +N_test = 9 +w = rand(N_test) +obji(x, data) = sum((x .- data) .^ 2) +function g!(G, x, data) + G .= 2 * (x .- data) + G +end +g(x) = 2x +function h!(H, x, data) + H .= x * x' + 2 * I + H +end +param = [1, 2, 3] + +sc = ScalarObjective(; f = obji, g = g!, h = h!, param = param) +op = OptimizationProblem(sc) +solve(op, [3.0, 4.0, 4.0], BFGS(), OptimizationOptions()) diff --git a/test/optimize/preconditioning.jl b/test/optimize/preconditioning.jl index a68e608..e08ee15 100644 --- a/test/optimize/preconditioning.jl +++ b/test/optimize/preconditioning.jl @@ -44,7 +44,10 @@ using Test if !( N == 10 && optimizer(nothing).linesearcher isa HZAW && - optimizer(nothing).scheme isa LBFGS + optimizer(nothing).scheme isa LBFGS || + N ∈ (50, 100) && + optimizer(nothing).linesearcher isa Backtracking && + optimizer(nothing).scheme isa ConjugateGradient ) @test iter[end-1] >= iter[end] end diff --git a/test/runtests.jl b/test/runtests.jl index 2eeb21d..15104b0 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -16,3 +16,4 @@ include("optimize/preconditioning.jl") include("optimize/complex.jl") include("lsqfit/interface.jl") include("globalization/runtests.jl") +include("optimize/param.jl")