From 456139a6a5e81199ce0e764d39a0877c86b15e51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= Date: Thu, 24 Oct 2024 20:22:49 +0200 Subject: [PATCH 1/4] inverse to linsolve for AS quadratic (#510) --- src/active_set_quadratic.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/active_set_quadratic.jl b/src/active_set_quadratic.jl index b200a8912..8f26b632e 100644 --- a/src/active_set_quadratic.jl +++ b/src/active_set_quadratic.jl @@ -36,7 +36,7 @@ function detect_quadratic_function(grad!, x0; test=true) X[:, i] .-= x0 G[:, i] .= storage .- g0 end - A = G * inv(X) + A = G \ X b = g0 - A * x0 if test x_test = randn(T, n) From ffc360caf3027de2f334a407890943f664a89696 Mon Sep 17 00:00:00 2001 From: "Jannis H." <44263801+JannisHal@users.noreply.github.com> Date: Thu, 24 Oct 2024 23:19:50 +0200 Subject: [PATCH 2/4] Alternating projection update (#513) * Update and generalize AP * Reuse active step and dual_gap information * Example comparing active vanilla FW, bpcg and active set reuse * Bug fix * dist2 - reduce allocations * Format document * Remove obsolete dependence --- examples/alternating_projections.jl | 37 +++++++ src/alternating_methods.jl | 163 ++++++++++++++++------------ test/alternating_methods_tests.jl | 35 +++--- 3 files changed, 149 insertions(+), 86 deletions(-) create mode 100644 examples/alternating_projections.jl diff --git a/examples/alternating_projections.jl b/examples/alternating_projections.jl new file mode 100644 index 000000000..159d204ff --- /dev/null +++ b/examples/alternating_projections.jl @@ -0,0 +1,37 @@ +import FrankWolfe +using LinearAlgebra +using Random + + +include("../examples/plot_utils.jl") + +Random.seed!(100) + +n = 500 +lmo = FrankWolfe.ConvexHullOracle([rand(Float64, (n,)) .+ 1.0 for _ in 1:n]) +lmo2 = FrankWolfe.ConvexHullOracle([rand(Float64, (n,)) .- 1.0 for _ in 1:n]) + + +trajectories = [] + +methods = [FrankWolfe.frank_wolfe, FrankWolfe.blended_pairwise_conditional_gradient, FrankWolfe.blended_pairwise_conditional_gradient] + +for (i,m) in enumerate(methods) + if i == 1 + x, _, _, _, traj_data = FrankWolfe.alternating_projections((lmo, lmo2), ones(n); verbose=true, print_iter=100, trajectory=true, proj_method=m) + elseif i== 2 + x, _, _, _, traj_data = FrankWolfe.alternating_projections((lmo, lmo2), ones(n); verbose=true, print_iter=100, trajectory=true, proj_method=m, reuse_active_set=false, lazy=true) + else + x, _, _, _, traj_data = FrankWolfe.alternating_projections((lmo, lmo2), ones(n); verbose=true, print_iter=100, trajectory=true, proj_method=m, reuse_active_set=true, lazy=true) + end + push!(trajectories, traj_data) +end + + + +labels = ["FW", "BPCG" ,"BPCG (reuse)"] + +plot_trajectories(trajectories, labels, xscalelog=true) + + + diff --git a/src/alternating_methods.jl b/src/alternating_methods.jl index ec1c5b2f5..d07a73649 100644 --- a/src/alternating_methods.jl +++ b/src/alternating_methods.jl @@ -35,12 +35,12 @@ end Alternating Linear Minimization minimizes the objective `f` over the intersections of the feasible domains specified by `lmos`. The tuple `x0` defines the initial points for each domain. -Returns a tuple `(x, v, primal, dual_gap, infeas, traj_data)` with: +Returns a tuple `(x, v, primal, dual_gap, dist2, traj_data)` with: - `x` cartesian product of final iterates - `v` cartesian product of last vertices of the LMOs - `primal` primal value `f(x)` - `dual_gap` final Frank-Wolfe gap -- `infeas` sum of squared, pairwise distances between iterates +- `dist2` is 1/2 of the sum of squared, pairwise distances between iterates - `traj_data` vector of trajectory information. """ function alternating_linear_minimization( @@ -49,17 +49,17 @@ function alternating_linear_minimization( grad!, lmos::NTuple{N,LinearMinimizationOracle}, x0::Tuple{Vararg{Any,N}}; - lambda::Union{Float64, Function}=1.0, + lambda::Union{Float64,Function}=1.0, verbose=false, trajectory=false, callback=nothing, max_iteration=10000, - print_iter = max_iteration / 10, + print_iter=max_iteration / 10, memory_mode=InplaceEmphasis(), line_search::LS=Adaptive(), epsilon=1e-7, kwargs..., -) where {N, LS<:Union{LineSearchMethod,NTuple{N,LineSearchMethod}}} +) where {N,LS<:Union{LineSearchMethod,NTuple{N,LineSearchMethod}}} x0_bc = BlockVector([x0[i] for i in 1:N], [size(x0[i]) for i in 1:N], sum(length, x0)) gradf = similar(x0_bc) @@ -74,21 +74,14 @@ function alternating_linear_minimization( for i in 1:N grad!(gradf.blocks[i], x.blocks[i]) end - t = [2.0 * (N * b - sum(x.blocks)) for b in x.blocks] + t = [N * b - sum(x.blocks) for b in x.blocks] return storage.blocks = λ[] * gradf.blocks + t end end - function dist2(x::BlockVector) - s = 0 - for i=1:N - for j=1:i-1 - diff = x.blocks[i] - x.blocks[j] - s += fast_dot(diff, diff) - end - end - return s - end + dist2(x::BlockVector) = + 0.5 * (N - 1) * sum(fast_dot(x.blocks[i], x.blocks[i]) for i in 1:N) - + sum(fast_dot(x.blocks[i], x.blocks[j]) for i in 1:N for j in 1:i-1) function build_objective() λ = Ref(λ0) @@ -122,8 +115,11 @@ function alternating_linear_minimization( num_type = eltype(x0[1]) grad_type = eltype(gradf.blocks[1]) - line_search_type = line_search isa Tuple ? [typeof(a) for a in line_search] : typeof(line_search) - println("MEMORY_MODE: $memory_mode STEPSIZE: $line_search_type EPSILON: $epsilon MAXITERATION: $max_iteration") + line_search_type = + line_search isa Tuple ? [typeof(a) for a in line_search] : typeof(line_search) + println( + "MEMORY_MODE: $memory_mode STEPSIZE: $line_search_type EPSILON: $epsilon MAXITERATION: $max_iteration", + ) println("TYPE: $num_type GRADIENTTYPE: $grad_type") println("LAMBDA: $lambda") @@ -177,7 +173,7 @@ function alternating_linear_minimization( end if lambda isa Function - callback = function (state,args...) + callback = function (state, args...) state.f.λ[] = lambda(state) state.grad!.λ[] = state.f.λ[] @@ -211,33 +207,15 @@ function alternating_linear_minimization( end -function ProjectionFW(y, lmo; max_iter=10000, eps=1e-3) - f(x) = sum(abs2, x - y) - grad!(storage, x) = storage .= 2 * (x - y) - - x0 = FrankWolfe.compute_extreme_point(lmo, y) - x_opt, _ = FrankWolfe.frank_wolfe( - f, - grad!, - lmo, - x0, - epsilon=eps, - max_iteration=max_iter, - trajectory=true, - line_search=FrankWolfe.Adaptive(verbose=false, relaxed_smoothness=false), - ) - return x_opt -end - """ alternating_projections(lmos::NTuple{N,LinearMinimizationOracle}, x0; ...) where {N} Computes a point in the intersection of feasible domains specified by `lmos`. -Returns a tuple `(x, v, dual_gap, infeas, traj_data)` with: +Returns a tuple `(x, v, dual_gap, dist2, traj_data)` with: - `x` cartesian product of final iterates - `v` cartesian product of last vertices of the LMOs - `dual_gap` final Frank-Wolfe gap -- `infeas` sum of squared, pairwise distances between iterates +- `dist2` is 1/2 * sum of squared, pairwise distances between iterates - `traj_data` vector of trajectory information. """ function alternating_projections( @@ -252,6 +230,10 @@ function alternating_projections( callback=nothing, traj_data=[], timeout=Inf, + proj_method=frank_wolfe, + inner_epsilon::Function=t -> 1 / (t^2 + 1), + reuse_active_set=false, + kwargs..., ) where {N} return alternating_projections( ProductLMO(lmos), @@ -265,6 +247,10 @@ function alternating_projections( callback, traj_data, timeout, + proj_method, + inner_epsilon, + reuse_active_set, + kwargs..., ) end @@ -281,17 +267,21 @@ function alternating_projections( callback=nothing, traj_data=[], timeout=Inf, + proj_method=frank_wolfe, + inner_epsilon::Function=t -> 1 / (t^2 + 1), + reuse_active_set=false, + kwargs..., ) where {N} # header and format string for output of the algorithm - headers = ["Type", "Iteration", "Dual Gap", "Infeas", "Time", "It/sec"] + headers = ["Type", "Iteration", "Dual Gap", "dist2", "Time", "It/sec"] format_string = "%6s %13s %14e %14e %14e %14e\n" - function format_state(state, infeas) + function format_state(state, primal) rep = ( steptype_string[Symbol(state.step_type)], string(state.t), Float64(state.dual_gap), - Float64(infeas), + Float64(primal), state.time, state.t / state.time, ) @@ -300,27 +290,61 @@ function alternating_projections( t = 0 dual_gap = Inf - x = fill(x0, N) - v = similar(x) + dual_gaps = fill(Inf, N) + x = BlockVector(compute_extreme_point.(lmo.lmos, fill(x0, N))) step_type = ST_REGULAR gradient = similar(x) - ndim = ndims(x) - infeasibility(x) = sum( - fast_dot( - selectdim(x, ndim, i) - selectdim(x, ndim, j), - selectdim(x, ndim, i) - selectdim(x, ndim, j), - ) for i in 1:N for j in 1:i-1 - ) + if reuse_active_set + if proj_method ∉ + [away_frank_wolfe, blended_pairwise_conditional_gradient, pairwise_frank_wolfe] + error("The selected FW method does not support active sets reuse.") + end + active_sets = [ActiveSet([(1.0, x.blocks[i])]) for i in 1:N] + end - partial_infeasibility(x) = - sum(fast_dot(x[mod(i - 2, N)+1] - x[i], x[mod(i - 2, N)+1] - x[i]) for i in 1:N) + dist2(x::BlockVector) = + 0.5 * (N - 1) * sum(fast_dot(x.blocks[i], x.blocks[i]) for i in 1:N) - + sum(fast_dot(x.blocks[i], x.blocks[j]) for i in 1:N for j in 1:i-1) function grad!(storage, x) - @. storage = [2 * (x[i] - x[mod(i - 2, N)+1]) for i in 1:N] + return storage.blocks = [2.0 * (N * b - sum(x.blocks)) for b in x.blocks] end - projection_step(x, i, t) = ProjectionFW(x, lmo.lmos[i]; eps=1 / (t^2 + 1)) + function projection_step(i, t) + xii = x.blocks[mod(i - 2, N)+1] # iterate in previous block + f(y) = sum(abs2, y - xii) + function grad_proj!(storage, y) + return storage .= 2 * (y - xii) + end + + if reuse_active_set + + results = proj_method( + f, + grad_proj!, + lmo.lmos[i], + active_sets[i]; + epsilon=inner_epsilon(t), + max_iteration=10000, + line_search=Adaptive(), + kwargs..., + ) + active_sets[i] = results[:active_set] + else + results = proj_method( + f, + grad_proj!, + lmo.lmos[i], + x.blocks[i]; + epsilon=inner_epsilon(t), + max_iteration=10000, + line_search=Adaptive(), + kwargs..., + ) + end + return results[:x], results[:dual_gap] + end if trajectory @@ -372,19 +396,18 @@ function alternating_projections( # Projection step: for i in 1:N # project the previous iterate on the i-th feasible region - x[i] = projection_step(x[mod(i - 2, N)+1], i, t) + x.blocks[i], dual_gaps[i] = projection_step(i, t) end + dual_gap = sum(dual_gaps) + # Update gradients grad!(gradient, x) - # Update dual gaps - v = compute_extreme_point.(lmo.lmos, gradient) - dual_gap = fast_dot(x - v, gradient) # go easy on the memory - only compute if really needed if ((mod(t, print_iter) == 0 && verbose) || callback !== nothing) - infeas = infeasibility(x) + primal = dist2(x) end first_iter = false @@ -393,12 +416,12 @@ function alternating_projections( if callback !== nothing state = CallbackState( t, - infeas, - infeas - dual_gap, + primal, + primal - dual_gap, dual_gap, tot_time, x, - v, + nothing, nothing, nothing, nothing, @@ -408,7 +431,7 @@ function alternating_projections( step_type, ) # @show state - if callback(state, infeas) === false + if callback(state, primal) === false break end end @@ -419,9 +442,9 @@ function alternating_projections( # this is important as some variants do not recompute f(x) and the dual_gap regularly but only when reporting # hence the final computation. step_type = ST_LAST - infeas = infeasibility(x) + primal = dist2(x) grad!(gradient, x) - v = compute_extreme_point.(lmo.lmos, gradient) + v = compute_extreme_point(lmo, gradient) dual_gap = fast_dot(x - v, gradient) tot_time = (time_ns() - time_start) / 1.0e9 @@ -429,8 +452,8 @@ function alternating_projections( if callback !== nothing state = CallbackState( t, - infeas, - infeas - dual_gap, + primal, + primal - dual_gap, dual_gap, tot_time, x, @@ -443,9 +466,9 @@ function alternating_projections( gradient, step_type, ) - callback(state, infeas) + callback(state, primal) end - return x, v, dual_gap, infeas, traj_data + return x, v, dual_gap, primal, traj_data end diff --git a/test/alternating_methods_tests.jl b/test/alternating_methods_tests.jl index 904de49d8..21deecbe4 100644 --- a/test/alternating_methods_tests.jl +++ b/test/alternating_methods_tests.jl @@ -27,7 +27,7 @@ lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n)) grad!, (lmo_nb, lmo_prob), ones(n), - lambda=1.0, + lambda=0.5, line_search=FrankWolfe.Adaptive(relaxed_smoothness=true), ) @@ -40,7 +40,7 @@ lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n)) grad!, (lmo_nb, lmo_prob), ones(n), - lambda=1/3, + lambda=1/6, line_search=FrankWolfe.Adaptive(relaxed_smoothness=true), ) @@ -53,7 +53,7 @@ lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n)) grad!, (lmo_nb, lmo_prob), ones(n), - lambda=1/9, + lambda=1/18, line_search=FrankWolfe.Adaptive(relaxed_smoothness=true), ) @@ -66,7 +66,7 @@ lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n)) grad!, (lmo_nb, lmo_prob), ones(n), - lambda=3.0, + lambda=1.5, line_search=FrankWolfe.Adaptive(relaxed_smoothness=true), ) @@ -134,6 +134,7 @@ lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n)) ones(n), line_search=FrankWolfe.Agnostic(), momentum=0.9, + lambda=0.5 ) @test abs(x.blocks[1][1] - 0.5 / n) < 1e-3 @@ -160,6 +161,7 @@ end ones(n), line_search=FrankWolfe.Adaptive(relaxed_smoothness=true), update_order=order, + lambda=0.5, ) @test abs(x.blocks[1][1] - 0.5 / n) < 1e-5 @test abs(x.blocks[2][1] - 1 / n) < 1e-5 @@ -181,6 +183,7 @@ end grad!, (lmo2, lmo_prob), ones(n), + lambda=0.5 ) @test abs(x.blocks[1][1] - 0.5 / n) < 1e-6 @@ -244,8 +247,8 @@ end grad!, (lmo_nb, lmo_prob), ones(n), - lambda=3.0, - line_search=(FrankWolfe.Shortstep(8.0), FrankWolfe.Adaptive()), # L-smooth in coordinates for L = 2+2*λ + lambda=1.5, + line_search=(FrankWolfe.Shortstep(4.0), FrankWolfe.Adaptive()), # L-smooth in coordinates for L = 1+2*λ update_step=(FrankWolfe.BPCGStep(), FrankWolfe.FrankWolfeStep()), ) @@ -257,31 +260,31 @@ end x, _, _, _, _ = FrankWolfe.alternating_projections((lmo1, lmo_prob), rand(n), verbose=true) - @test abs(x[1][1]) < 1e-6 - @test abs(x[2][1] - 1 / n) < 1e-6 + @test abs(x.blocks[1][1]) < 1e-6 + @test abs(x.blocks[2][1] - 1 / n) < 1e-6 x, _, _, _, _ = FrankWolfe.alternating_projections((lmo3, lmo_prob), rand(n)) - @test abs(x[1][1] - 1) < 1e-6 - @test abs(x[2][1] - 1 / n) < 1e-6 + @test abs(x.blocks[1][1] - 1) < 1e-6 + @test abs(x.blocks[2][1] - 1 / n) < 1e-6 x, _, _, infeas, _ = FrankWolfe.alternating_projections((lmo1, lmo2), rand(n)) - @test abs(x[1][1]) < 1e-6 - @test abs(x[2][1]) < 1e-6 + @test abs(x.blocks[1][1]) < 1e-6 + @test abs(x.blocks[2][1]) < 1e-6 @test infeas < 1e-6 x, _, _, infeas, _ = FrankWolfe.alternating_projections((lmo2, lmo3), rand(n)) - @test abs(x[1][1] - 1) < 1e-4 - @test abs(x[2][1] - 1) < 1e-4 + @test abs(x.blocks[1][1] - 1) < 1e-4 + @test abs(x.blocks[2][1] - 1) < 1e-4 @test infeas < 1e-6 x, _, _, infeas, traj_data = FrankWolfe.alternating_projections((lmo1, lmo3), rand(n), trajectory=true) - @test abs(x[1][1]) < 1e-6 - @test abs(x[2][1] - 1) < 1e-6 + @test abs(x.blocks[1][1]) < 1e-6 + @test abs(x.blocks[2][1] - 1) < 1e-6 @test traj_data != [] @test length(traj_data[1]) == 5 @test length(traj_data) >= 2 From 73585af8bdb92e84d1c5afc554f31da3168ec302 Mon Sep 17 00:00:00 2001 From: "Jannis H." <44263801+JannisHal@users.noreply.github.com> Date: Thu, 24 Oct 2024 23:20:20 +0200 Subject: [PATCH 3/4] Update Block-Coordinate FW (#512) * BPCG step fixes * Update Dual(Progress/Gap)Order * Clean up * Remove bug in test * Immutable DualGapOrder --- src/block_coordinate_algorithms.jl | 77 +++++++++++++++++++++--------- test/alternating_methods_tests.jl | 2 +- 2 files changed, 55 insertions(+), 24 deletions(-) diff --git a/src/block_coordinate_algorithms.jl b/src/block_coordinate_algorithms.jl index 4a1fc7e6a..0d034a7d3 100644 --- a/src/block_coordinate_algorithms.jl +++ b/src/block_coordinate_algorithms.jl @@ -41,20 +41,29 @@ end StochasticUpdate() = StochasticUpdate(-1) -mutable struct DualGapOrder <: BlockCoordinateUpdateOrder +""" +The dual gap order initiates one round of `ļimit` many updates. +The according blocks are sampled with probabilties proportional to their respective dual gaps. +""" +struct DualGapOrder <: BlockCoordinateUpdateOrder limit::Int end DualGapOrder() = DualGapOrder(-1) +""" +The dual progress order initiates one round of `ļimit` many updates. +The according blocks are sampled with probabilties proportional to their respective dual progress. +""" mutable struct DualProgressOrder <: BlockCoordinateUpdateOrder limit::Int previous_dual_gaps::Vector{Float64} dual_progress::Vector{Float64} + last_indices::Vector{Int} end -DualProgressOrder() = DualProgressOrder(-1, [], []) -DualProgressOrder(i::Int64) = DualProgressOrder(i, [], []) +DualProgressOrder() = DualProgressOrder(-1, [], [], []) +DualProgressOrder(i::Int64) = DualProgressOrder(i, [], [], []) """ The Lazy update order is discussed in "Flexible block-iterative @@ -122,6 +131,19 @@ function select_update_indices(u::StochasticUpdate, s::CallbackState, _) return [[rand(1:l)] for i in 1:u.limit] end +function sample_without_replacement(n::Int, weights::Vector{Float64}) + weights = weights ./ sum(weights) + cumsum_weights = cumsum(weights) + indices = zeros(Int, n) + for i in 1:n + indices[i] = findfirst(cumsum_weights .> rand()) + weights[indices[i]] = 0 + weights = weights ./ sum(weights) + cumsum_weights = cumsum(weights) + end + return indices +end + function select_update_indices(u::DualProgressOrder, s::CallbackState, dual_gaps) l = length(s.lmo.lmos) @@ -129,22 +151,32 @@ function select_update_indices(u::DualProgressOrder, s::CallbackState, dual_gaps @assert u.limit <= l @assert u.limit > 0 || u.limit == -1 - # In the first iteration update every block so we get finite dual progress - if s.t < 2 - u.previous_dual_gaps = copy(dual_gaps) - return [[i] for i=1:l] - end - - if s.t == 1 - u.dual_progress = dual_gaps - u.previous_dual_gaps + # In the first two iterations update every block so we get finite dual progress + if s.t < 2 + u.dual_progress = ones(l) + indices = [[i for i=1:l]] else - diff = dual_gaps - u.previous_dual_gaps - u.dual_progress = [d == 0 ? u.dual_progress[i] : d for (i, d) in enumerate(diff)] + # Update dual progress only on updated blocks + for i in u.last_indices + d = u.previous_dual_gaps[i] - dual_gaps[i] + if d < 0 + u.dual_progress[i] = 0 + else + u.dual_progress[i] = d + end + end + n = u.limit == -1 ? l : u.limit + + # If less than n blocks have non-zero dual progress, update all of them + if sum(u.dual_progress .!= 0) < n + indices = [[i for i=1:l]] + else + indices = [sample_without_replacement(n, u.dual_progress)] + end end u.previous_dual_gaps = copy(dual_gaps) - - n = u.limit == -1 ? l : u.limit - return [[findfirst(cumsum(u.dual_progress/sum(u.dual_progress)) .> rand())] for _=1:n] + u.last_indices = vcat(indices...) + return indices end function select_update_indices(u::DualGapOrder, s::CallbackState, dual_gaps) @@ -156,11 +188,11 @@ function select_update_indices(u::DualGapOrder, s::CallbackState, dual_gaps) # In the first iteration update every block so we get finite dual gaps if s.t < 1 - return [[i] for i=1:l] + return [[i for i=1:l]] end n = u.limit == -1 ? l : u.limit - return [[findfirst(cumsum(dual_gaps/sum(dual_gaps)) .> rand())] for _=1:n] + return [sample_without_replacement(n, dual_gaps)] end function select_update_indices(update::LazyUpdate, s::CallbackState, dual_gaps) @@ -237,19 +269,18 @@ mutable struct BPCGStep <: UpdateStep phi::Float64 end -function Base.copy(::FrankWolfeStep) - return FrankWolfeStep() -end +Base.copy(::FrankWolfeStep) = FrankWolfeStep() function Base.copy(obj::BPCGStep) if obj.active_set === nothing - return BPCGStep(false, nothing, obj.renorm_interval, obj.lazy_tolerance, Inf) + return BPCGStep(obj.lazy, nothing, obj.renorm_interval, obj.lazy_tolerance, obj.phi) else return BPCGStep(obj.lazy, copy(obj.active_set), obj.renorm_interval, obj.lazy_tolerance, obj.phi) end end BPCGStep() = BPCGStep(false, nothing, 1000, 2.0, Inf) +BPCGStep(lazy::Bool) = BPCGStep(lazy, nothing, 1000, 2.0, Inf) function update_iterate( ::FrankWolfeStep, @@ -324,7 +355,7 @@ function update_iterate( # minor modification from original paper for improved sparsity # (proof follows with minor modification when estimating the step) - if local_gap > s.phi / s.lazy_tolerance + if local_gap > max(s.phi / s.lazy_tolerance, 0) # Robust condition to not drop the zero-vector if the dual_gap is negative by inaccuracy d = muladd_memory_mode(memory_mode, d, a, v_local) vertex_taken = v_local gamma_max = a_lambda diff --git a/test/alternating_methods_tests.jl b/test/alternating_methods_tests.jl index 21deecbe4..7bf9487a6 100644 --- a/test/alternating_methods_tests.jl +++ b/test/alternating_methods_tests.jl @@ -223,7 +223,7 @@ end grad!, (lmo1, lmo2), ones(n), - update_step=FrankWolfe.BPCGStep(true, nothing, 1000, false, Inf), + update_step=FrankWolfe.BPCGStep(true), ) @test abs(x.blocks[1][1]) < 1e-6 From 1ac1797d59ac9f5df5d882ff811be5f3f81bbf90 Mon Sep 17 00:00:00 2001 From: Sebastian Pokutta <23001135+pokutta@users.noreply.github.com> Date: Fri, 25 Oct 2024 11:12:54 +0200 Subject: [PATCH 4/4] BPCG with direct solve extension (#507) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * added direct solve feature BPCG via LP solver * adjusted for arbitrary LP solver * fixed deps * cleanup and comment * minor * added reporting of direct solve step * chose highs as standard solver * added sparsification * added sparsification code * cleanup * minor cleanup * minor * added generalized direct_solve * clean up, docu, additional direct_solve * docstrings fixed? * sparsifier active set (#508) * sparsifier active set * fix typo * added sparsifying tests * generic tolerane * remove sparsification * format * HiGHS dep * Quadratic solve structure (#511) * sparsifier active set * start working on LP AS * first working quadratic * remove quadratic LP from current * cleanup * HiGHS in test deps * working reworked LP quadratic * working version generic quadratic * slow version generic quadratic * faster term manipulation * copy sufficient * remove comment * added test for quadratic * minor * simplify example * clean up code, verify error with ASQuad * Add update_weights! to fix direct solve with active_set_quadratic * remove direct solve from BPCG * rng changed --------- Co-authored-by: Sébastien Designolle * update example * format * clean up example * fix callback --------- Co-authored-by: Mathieu Besançon Co-authored-by: Sébastien Designolle --- Project.toml | 3 +- examples/blended_pairwise_with_direct.jl | 174 ++++++++ ...wise_with_direct_non-standard-quadratic.jl | 146 +++++++ examples/blended_pairwise_with_sparsify.jl | 100 +++++ examples/simple_stateless.jl | 2 +- src/FrankWolfe.jl | 2 + src/active_set.jl | 4 + src/active_set_quadratic.jl | 18 +- src/active_set_quadratic_direct_solve.jl | 383 ++++++++++++++++++ src/active_set_sparsifier.jl | 96 +++++ src/blended_pairwise.jl | 2 - src/moi_oracle.jl | 2 +- src/utils.jl | 71 ++++ test/quadratic_lp_active_set.jl | 113 ++++++ test/runtests.jl | 14 + test/sparsifying_activeset.jl | 85 ++++ 16 files changed, 1206 insertions(+), 9 deletions(-) create mode 100644 examples/blended_pairwise_with_direct.jl create mode 100644 examples/blended_pairwise_with_direct_non-standard-quadratic.jl create mode 100644 examples/blended_pairwise_with_sparsify.jl create mode 100644 src/active_set_quadratic_direct_solve.jl create mode 100644 src/active_set_sparsifier.jl create mode 100644 test/quadratic_lp_active_set.jl create mode 100644 test/sparsifying_activeset.jl diff --git a/Project.toml b/Project.toml index e56223008..ca298fa1a 100644 --- a/Project.toml +++ b/Project.toml @@ -43,6 +43,7 @@ DynamicPolynomials = "7c1d4256-1411-5781-91ec-d7bc3513ac07" FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" GLPK = "60bf3e95-4087-53dc-ae20-288a0d20c6a6" +HiGHS = "87dc4568-4c63-4d18-b0c0-bb2238e4078b" Hypatia = "b99e6be6-89ff-11e8-14f8-45c827f4f8f2" JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" JuMP = "4076af6c-e467-56ae-b986-b466b2749572" @@ -58,4 +59,4 @@ Tullio = "bc48ee85-29a4-5162-ae0b-a64e1601d4bc" ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea" [targets] -test = ["CSV", "Combinatorics", "DataFrames", "Distributions", "DoubleFloats", "DynamicPolynomials", "FiniteDifferences", "ForwardDiff", "GLPK", "JSON", "JuMP", "LaTeXStrings", "MAT", "MultivariatePolynomials", "Plots", "PlotThemes", "Polyhedra", "ReverseDiff", "ZipFile", "Test", "Tullio", "Clp", "Hypatia"] +test = ["CSV", "Combinatorics", "DataFrames", "Distributions", "DoubleFloats", "DynamicPolynomials", "FiniteDifferences", "ForwardDiff", "GLPK", "HiGHS", "JSON", "JuMP", "LaTeXStrings", "MAT", "MultivariatePolynomials", "Plots", "PlotThemes", "Polyhedra", "ReverseDiff", "ZipFile", "Test", "Tullio", "Clp", "Hypatia"] diff --git a/examples/blended_pairwise_with_direct.jl b/examples/blended_pairwise_with_direct.jl new file mode 100644 index 000000000..9a30bde76 --- /dev/null +++ b/examples/blended_pairwise_with_direct.jl @@ -0,0 +1,174 @@ +#= +This example demonstrates the use of the Blended Pairwise Conditional Gradient algorithm +with direct solve steps for a quadratic optimization problem over a sparse polytope. + +Note the special structure of f(x) = norm(x - x0)^2 that we assume here + +The example showcases how the algorithm balances between: +- Pairwise steps for efficient optimization +- Periodic direct solves for handling the quadratic objective +- Lazy (approximate) linear minimization steps for improved iteration complexity + +It also demonstrates how to set up custom callbacks for tracking algorithm progress. +=# + +using FrankWolfe +using LinearAlgebra +using Random + +import HiGHS +import MathOptInterface as MOI + +include("../examples/plot_utils.jl") + +n = Int(1e4) +k = 10_000 + +s = 10 +@info "Seed $s" +Random.seed!(s) + +xpi = rand(n); +total = sum(xpi); + +const xp = xpi ./ total; + +f(x) = norm(x - xp)^2 +function grad!(storage, x) + @. storage = 2 * (x - xp) +end + +lmo = FrankWolfe.KSparseLMO(5, 1.0) + +const x00 = FrankWolfe.compute_extreme_point(lmo, rand(n)) + +function build_callback(trajectory_arr) + return function callback(state, active_set, args...) + return push!(trajectory_arr, (FrankWolfe.callback_state(state)..., length(active_set))) + end +end + + +trajectoryBPCG_standard = [] +@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + copy(x00), + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + verbose=true, + callback=build_callback(trajectoryBPCG_standard), +); + +# Just projection quadratic +trajectoryBPCG_quadratic = [] +as_quad = FrankWolfe.ActiveSetQuadratic([(1.0, copy(x00))], 2 * LinearAlgebra.I, -2xp) +@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + as_quad, + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + verbose=true, + callback=build_callback(trajectoryBPCG_quadratic), +); + +as_quad = FrankWolfe.ActiveSetQuadratic([(1.0, copy(x00))], 2 * LinearAlgebra.I, -2xp) + +# with quadratic active set +trajectoryBPCG_quadratic_as = [] +@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + as_quad, + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + verbose=true, + callback=build_callback(trajectoryBPCG_quadratic_as), +); + +as_quad_direct = FrankWolfe.ActiveSetQuadraticLinearSolve( + [(1.0, copy(x00))], + 2 * LinearAlgebra.I, + -2xp, + MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), +) + +# with LP acceleration +trajectoryBPCG_quadratic_direct = [] +@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + as_quad_direct, + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + verbose=true, + callback=build_callback(trajectoryBPCG_quadratic_direct), +); + +as_quad_direct_generic = FrankWolfe.ActiveSetQuadraticLinearSolve( + [(1.0, copy(x00))], + 2 * Diagonal(ones(length(xp))), + -2xp, + MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), +) + +# with LP acceleration +trajectoryBPCG_quadratic_direct_generic = [] +@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + as_quad_direct_generic, + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + verbose=true, + callback=build_callback(trajectoryBPCG_quadratic_direct_generic), +); + +as_quad_direct_basic_as = FrankWolfe.ActiveSetQuadraticLinearSolve( + FrankWolfe.ActiveSet([1.0], [copy(x00)], collect(x00)), + 2 * LinearAlgebra.I, + -2xp, + MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), +) + +# with LP acceleration +trajectoryBPCG_quadratic_noqas = [] + +@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + as_quad_direct_basic_as, + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + verbose=true, + callback=build_callback(trajectoryBPCG_quadratic_noqas), +); + + +# Update the data and labels for plotting +data_trajectories = [ + trajectoryBPCG_standard, + trajectoryBPCG_quadratic, + trajectoryBPCG_quadratic_as, + trajectoryBPCG_quadratic_direct, + trajectoryBPCG_quadratic_direct_generic, + trajectoryBPCG_quadratic_noqas, +] +labels_trajectories = [ + "BPCG (Standard)", + "BPCG (Specific Direct)", + "AS_Quad", + "Reloaded", + "Reloaded_generic", + "Reloaded_noqas", +] + +# Plot trajectories +plot_trajectories(data_trajectories, labels_trajectories, xscalelog=false) diff --git a/examples/blended_pairwise_with_direct_non-standard-quadratic.jl b/examples/blended_pairwise_with_direct_non-standard-quadratic.jl new file mode 100644 index 000000000..ab934edd2 --- /dev/null +++ b/examples/blended_pairwise_with_direct_non-standard-quadratic.jl @@ -0,0 +1,146 @@ +#= +This example demonstrates the use of the Blended Pairwise Conditional Gradient algorithm +with direct solve steps for a quadratic optimization problem over a sparse polytope which is not standard quadratic. + +The example showcases how the algorithm balances between: +- Pairwise steps for efficient optimization +- Periodic direct solves for handling the quadratic objective +- Lazy (approximate) linear minimization steps for improved iteration complexity + +It also demonstrates how to set up custom callbacks for tracking algorithm progress. +=# + +using FrankWolfe +using LinearAlgebra +using Random + +import HiGHS +import MathOptInterface as MOI + +include("../examples/plot_utils.jl") + +n = Int(1e2) +k = 10000 + +# s = rand(1:100) +s = 10 +@info "Seed $s" +Random.seed!(s) + +A = let + A = randn(n, n) + A' * A +end +@assert isposdef(A) == true + +const y = Random.rand(Bool, n) * 0.6 .+ 0.3 + +function f(x) + d = x - y + return dot(d, A, d) +end + +function grad!(storage, x) + mul!(storage, A, x) + return mul!(storage, A, y, -2, 2) +end + + +# lmo = FrankWolfe.KSparseLMO(5, 1000.0) + +## other LMOs to try +# lmo_big = FrankWolfe.KSparseLMO(100, big"1.0") +# lmo = FrankWolfe.LpNormLMO{Float64,5}(100.0) +# lmo = FrankWolfe.ProbabilitySimplexOracle(100.0); +lmo = FrankWolfe.UnitSimplexOracle(10000.0); + +x00 = FrankWolfe.compute_extreme_point(lmo, rand(n)) + + +function build_callback(trajectory_arr) + return function callback(state, active_set, args...) + return push!(trajectory_arr, (FrankWolfe.callback_state(state)..., length(active_set))) + end +end + + +trajectoryBPCG_standard = [] +callback = build_callback(trajectoryBPCG_standard) + +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + copy(x00), + max_iteration=k, + line_search=FrankWolfe.Adaptive(), + print_iter=k / 10, + memory_mode=FrankWolfe.InplaceEmphasis(), + verbose=true, + trajectory=true, + callback=callback, +); + +active_set_quadratic_automatic = FrankWolfe.ActiveSetQuadraticLinearSolve( + [(1.0, copy(x00))], + grad!, + MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), + scheduler=FrankWolfe.LogScheduler(start_time=100, scaling_factor=1.2, max_interval=100), +) +trajectoryBPCG_quadratic_automatic = [] +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + active_set_quadratic_automatic, + max_iteration=k, + verbose=true, + callback=build_callback(trajectoryBPCG_quadratic_automatic), +); + +active_set_quadratic_automatic2 = FrankWolfe.ActiveSetQuadraticLinearSolve( + [(1.0, copy(x00))], + grad!, + MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), + scheduler=FrankWolfe.LogScheduler(start_time=10, scaling_factor=2), +) +trajectoryBPCG_quadratic_automatic2 = [] +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + active_set_quadratic_automatic2, + max_iteration=k, + verbose=true, + callback=build_callback(trajectoryBPCG_quadratic_automatic2), +); + + +active_set_quadratic_automatic_standard = FrankWolfe.ActiveSetQuadraticLinearSolve( + FrankWolfe.ActiveSet([(1.0, copy(x00))]), + grad!, + MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), + scheduler=FrankWolfe.LogScheduler(start_time=10, scaling_factor=2), +) +trajectoryBPCG_quadratic_automatic_standard = [] +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + active_set_quadratic_automatic_standard, + max_iteration=k, + verbose=true, + callback=build_callback(trajectoryBPCG_quadratic_automatic_standard), +); + + +dataSparsity = [ + trajectoryBPCG_standard, + trajectoryBPCG_quadratic_automatic, + trajectoryBPCG_quadratic_automatic_standard, +] +labelSparsity = ["BPCG (Standard)", "AS_Quad", "AS_Standard"] + + +# Plot trajectories +plot_trajectories(dataSparsity, labelSparsity, xscalelog=false) diff --git a/examples/blended_pairwise_with_sparsify.jl b/examples/blended_pairwise_with_sparsify.jl new file mode 100644 index 000000000..6d42b7de4 --- /dev/null +++ b/examples/blended_pairwise_with_sparsify.jl @@ -0,0 +1,100 @@ +#= +This example demonstrates the use of the Blended Pairwise Conditional Gradient algorithm +with direct solve steps for a quadratic optimization problem over a sparse polytope. + +The example showcases how the algorithm balances between: +- Pairwise steps for efficient optimization +- Periodic direct solves for handling the quadratic objective +- Lazy (approximate) linear minimization steps for improved iteration complexity + +It also demonstrates how to set up custom callbacks for tracking algorithm progress. +=# + +using FrankWolfe +using LinearAlgebra +using Random + +import HiGHS + +include("../examples/plot_utils.jl") + +n = Int(1e4) +k = 10000 + +s = 10 +@info "Seed $s" +Random.seed!(s) + +xpi = rand(n); +total = sum(xpi); + +# here the optimal solution lies in the interior if you want an optimal solution on a face and not the interior use: +# const xp = xpi; + +const xp = xpi ./ total; + +f(x) = norm(x - xp)^2 +function grad!(storage, x) + @. storage = 2 * (x - xp) +end + +# lmo = FrankWolfe.KSparseLMO(2, 1.0) + +## other LMOs to try +# lmo_big = FrankWolfe.KSparseLMO(100, big"1.0") +lmo = FrankWolfe.LpNormLMO{Float64,5}(1.0) +# lmo = FrankWolfe.ProbabilitySimplexOracle(1.0); +# lmo = FrankWolfe.UnitSimplexOracle(1.0); + +x00 = FrankWolfe.compute_extreme_point(lmo, rand(n)) + +function build_callback(trajectory_arr) + return function callback(state, active_set, args...) + return push!(trajectory_arr, (FrankWolfe.callback_state(state)..., length(active_set))) + end +end + +FrankWolfe.benchmark_oracles(f, grad!, () -> randn(n), lmo; k=100) + +trajectoryBPCG_standard = [] +@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + copy(x00), + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + print_iter=k / 10, + memory_mode=FrankWolfe.InplaceEmphasis(), + verbose=true, + trajectory=true, + callback=build_callback(trajectoryBPCG_standard), +); + +trajectoryBPCG_as_sparse = [] +active_set_sparse = FrankWolfe.ActiveSetSparsifier( + FrankWolfe.ActiveSet([1.0], [x00], similar(x00)), + HiGHS.Optimizer(), +) + +@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + copy(x00), + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + print_iter=k / 10, + memory_mode=FrankWolfe.InplaceEmphasis(), + verbose=true, + trajectory=true, + callback=build_callback(trajectoryBPCG_as_sparse), +); + +# Reduction primal/dual error vs. sparsity of solution + +dataSparsity = [trajectoryBPCG_standard, trajectoryBPCG_quadratic] +labelSparsity = ["BPCG (Standard)", "BPCG (Sparsify)"] + +# Plot sparsity +plot_sparsity(dataSparsity, labelSparsity, legend_position=:topright) diff --git a/examples/simple_stateless.jl b/examples/simple_stateless.jl index 6a550a963..6bf230e26 100644 --- a/examples/simple_stateless.jl +++ b/examples/simple_stateless.jl @@ -71,7 +71,7 @@ plot_trajectories([trajectory_simple[1:end], trajectory_restart[1:end]], ["simpl # simple step iterations about 33% faster -@test line_search.factor <= 44 +@test line_search.factor <= 51 x, v, primal, dual_gap, trajectory_restart_highpres = FrankWolfe.frank_wolfe( f, diff --git a/src/FrankWolfe.jl b/src/FrankWolfe.jl index e6d29c3cc..cc2fef145 100644 --- a/src/FrankWolfe.jl +++ b/src/FrankWolfe.jl @@ -34,6 +34,8 @@ include("moi_oracle.jl") include("function_gradient.jl") include("active_set.jl") include("active_set_quadratic.jl") +include("active_set_quadratic_direct_solve.jl") +include("active_set_sparsifier.jl") include("blended_cg.jl") include("afw.jl") diff --git a/src/active_set.jl b/src/active_set.jl index 62393b47f..c02145666 100644 --- a/src/active_set.jl +++ b/src/active_set.jl @@ -326,3 +326,7 @@ function compute_active_set_iterate!(active_set::AbstractActiveSet{<:ScaledHotVe end return active_set.x end + +function update_weights!(as::AbstractActiveSet, new_weights) + as.weights .= new_weights +end diff --git a/src/active_set_quadratic.jl b/src/active_set_quadratic.jl index 8f26b632e..a616dd1fb 100644 --- a/src/active_set_quadratic.jl +++ b/src/active_set_quadratic.jl @@ -49,7 +49,8 @@ function detect_quadratic_function(grad!, x0; test=true) end function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, grad!::Function) where {AT,R} - return ActiveSetQuadratic(tuple_values, detect_quadratic_function(grad!, tuple_values[1][2])...) + A, b = detect_quadratic_function(grad!, tuple_values[1][2]) + return ActiveSetQuadratic(tuple_values, A, b) end function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::H, b) where {AT,R,H} @@ -71,13 +72,15 @@ function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::H, b) dots_b[idx] = fast_dot(b, atoms[idx]) end x = similar(b) - as = ActiveSetQuadratic{AT,R,typeof(x),H}(weights, atoms, x, A, b, dots_x, dots_A, dots_b, weights_prev, modified) + as = ActiveSetQuadratic(weights, atoms, x, A, b, dots_x, dots_A, dots_b, weights_prev, modified) + compute_active_set_iterate!(as) return as end -function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, grad!) where {AT,R} - return ActiveSetQuadratic{AT,R}(tuple_values, detect_quadratic_function(grad!, tuple_values[1][2])...) +function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, grad!::Function) where {AT,R} + A, b = detect_quadratic_function(grad!, tuple_values[1][2]) + return ActiveSetQuadratic{AT,R}(tuple_values, A, b) end function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::H, b) where {AT,R,H} @@ -116,6 +119,7 @@ function Base.:*(a::Identity, b) return a.λ * b end end + function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::UniformScaling, b) where {AT,R} return ActiveSetQuadratic(tuple_values, Identity(A.λ), b) end @@ -147,6 +151,7 @@ function Base.push!(as::ActiveSetQuadratic{AT,R}, (λ, a)) where {AT,R} return as end +# TODO multi-indices version function Base.deleteat!(as::ActiveSetQuadratic, idx::Int) @inbounds for i in 1:idx-1 as.dots_x[i] -= as.weights_prev[idx] * as.dots_A[idx][i] @@ -311,3 +316,8 @@ function update_active_set_quadratic!(warm_as::ActiveSetQuadratic{AT,R,IT,H}, b) return warm_as end +function update_weights!(as::ActiveSetQuadratic, new_weights) + as.weights_prev .= as.weights + as.weights .= new_weights + as.modified .= true +end diff --git a/src/active_set_quadratic_direct_solve.jl b/src/active_set_quadratic_direct_solve.jl new file mode 100644 index 000000000..fef53a077 --- /dev/null +++ b/src/active_set_quadratic_direct_solve.jl @@ -0,0 +1,383 @@ + +""" + ActiveSetQuadraticLinearSolve{AT, R, IT} + +Represents an active set of extreme vertices collected in a FW algorithm, +along with their coefficients `(λ_i, a_i)`. +`R` is the type of the `λ_i`, `AT` is the type of the atoms `a_i`. +The iterate `x = ∑λ_i a_i` is stored in x with type `IT`. +The objective function is assumed to be of the form `f(x)=½⟨x,Ax⟩+⟨b,x⟩+c` +so that the gradient is `∇f(x)=Ax+b`. + +This active set stores an inner `active_set` that keeps track of the current set of vertices and convex decomposition. +It therefore delegates all update, deletion, and addition operations to this inner active set. +The `weight`, `atoms`, and `x` fields should only be accessed to read and are effectively the same objects as those in the inner active set. + +The structure also contains a scheduler struct which is called with the `should_solve_lp` function. +To define a new frequency at which the LP should be solved, one can define another scheduler struct and implement the corresponding method. +""" +struct ActiveSetQuadraticLinearSolve{ + AT, + R<:Real, + IT, + H, + BT, + OT<:MOI.AbstractOptimizer, + AS<:AbstractActiveSet, + SF, +} <: AbstractActiveSet{AT,R,IT} + weights::Vector{R} + atoms::Vector{AT} + x::IT + A::H # Hessian matrix + b::BT # linear term + active_set::AS + lp_optimizer::OT + scheduler::SF + counter::Base.RefValue{Int} +end + +""" + ActiveSetQuadraticLinearSolve(tuple_values::Vector{Tuple{R,AT}}, grad!::Function, lp_optimizer) + +Creates an ActiveSetQuadraticLinearSolve by computing the Hessian and linear term from `grad!`. +""" +function ActiveSetQuadraticLinearSolve( + tuple_values::Vector{Tuple{R,AT}}, + grad!::Function, + lp_optimizer; + scheduler=LogScheduler(), +) where {AT,R} + A, b = detect_quadratic_function(grad!, tuple_values[1][2]) + return ActiveSetQuadraticLinearSolve(tuple_values, A, b, lp_optimizer, scheduler=scheduler) +end + +""" + ActiveSetQuadraticLinearSolve(tuple_values::Vector{Tuple{R,AT}}, A, b, lp_optimizer) + +Creates an `ActiveSetQuadraticLinearSolve` from the given Hessian `A`, linear term `b` and `lp_optimizer` by creating an inner `ActiveSetQuadratic` active set. +""" +function ActiveSetQuadraticLinearSolve( + tuple_values::Vector{Tuple{R,AT}}, + A::H, + b, + lp_optimizer; + scheduler=LogScheduler(), +) where {AT,R,H} + inner_as = ActiveSetQuadratic(tuple_values, A, b) + return ActiveSetQuadraticLinearSolve( + inner_as.weights, + inner_as.atoms, + inner_as.x, + inner_as.A, + inner_as.b, + inner_as, + lp_optimizer, + scheduler, + Ref(0), + ) +end + +function ActiveSetQuadraticLinearSolve( + inner_as::AbstractActiveSet, + A, + b, + lp_optimizer; + scheduler=LogScheduler(), +) + as = ActiveSetQuadraticLinearSolve( + inner_as.weights, + inner_as.atoms, + inner_as.x, + A, + b, + inner_as, + lp_optimizer, + scheduler, + Ref(0), + ) + compute_active_set_iterate!(as) + return as +end + +function ActiveSetQuadraticLinearSolve( + inner_as::AbstractActiveSet, + A::LinearAlgebra.UniformScaling, + b, + lp_optimizer; + scheduler=LogScheduler(), +) + as = ActiveSetQuadraticLinearSolve( + inner_as.weights, + inner_as.atoms, + inner_as.x, + A, + b, + inner_as, + lp_optimizer, + scheduler, + Ref(0), + ) + compute_active_set_iterate!(as) + return as +end + +function ActiveSetQuadraticLinearSolve( + inner_as::AbstractActiveSet, + grad!::Function, + lp_optimizer; + scheduler=LogScheduler(), +) + A, b = detect_quadratic_function(grad!, inner_as.atoms[1]) + return ActiveSetQuadraticLinearSolve(inner_as, A, b, lp_optimizer; scheduler=scheduler) +end + +function ActiveSetQuadraticLinearSolve{AT,R}( + tuple_values::Vector{<:Tuple{<:Number,<:Any}}, + grad!::Function, + lp_optimizer; + scheduler=LogScheduler(), +) where {AT,R} + A, b = detect_quadratic_function(grad!, tuple_values[1][2]) + return ActiveSetQuadraticLinearSolve{AT,R}( + tuple_values, + A, + b, + lp_optimizer; + scheduler=scheduler, + ) +end + +function ActiveSetQuadraticLinearSolve{AT,R}( + tuple_values::Vector{<:Tuple{<:Number,<:Any}}, + A::H, + b, + lp_optimizer; + scheduler=LogScheduler(), +) where {AT,R,H} + inner_as = ActiveSetQuadratic{AT,R}(tuple_values, A, b) + as = ActiveSetQuadraticLinearSolve{AT,R,typeof(x),H}( + inner_as.weights, + inner_as.atoms, + inner_as.x, + A, + b, + inner_as, + lp_optimizer, + scheduler, + Ref(0), + ) + compute_active_set_iterate!(as) + return as +end + +function ActiveSetQuadraticLinearSolve( + tuple_values::Vector{Tuple{R,AT}}, + A::UniformScaling, + b, + lp_optimizer; + scheduler=LogScheduler(), +) where {AT,R} + return ActiveSetQuadraticLinearSolve( + tuple_values, + Identity(A.λ), + b, + lp_optimizer; + scheduler=scheduler, + ) +end +function ActiveSetQuadraticLinearSolve{AT,R}( + tuple_values::Vector{<:Tuple{<:Number,<:Any}}, + A::UniformScaling, + b, + lp_optimizer; + scheduler=LogScheduler(), +) where {AT,R} + return ActiveSetQuadraticLinearSolve{AT,R}( + tuple_values, + Identity(A.λ), + b, + lp_optimizer; + scheduler=scheduler, + ) +end + +# all mutating functions are delegated to the inner active set + +Base.push!(as::ActiveSetQuadraticLinearSolve, tuple) = push!(as.active_set, tuple) + +Base.deleteat!(as::ActiveSetQuadraticLinearSolve, idx) = deleteat!(as.active_set, idx) + +Base.empty!(as::ActiveSetQuadraticLinearSolve) = empty!(as.active_set) + +function active_set_update!( + as::ActiveSetQuadraticLinearSolve{AT,R}, + lambda, + atom, + renorm=true, + idx=nothing; + weight_purge_threshold=weight_purge_threshold_default(R), + add_dropped_vertices=false, + vertex_storage=nothing, +) where {AT,R} + if idx === nothing + idx = find_atom(as, atom) + end + active_set_update!( + as.active_set, + lambda, + atom, + renorm, + idx; + weight_purge_threshold=weight_purge_threshold, + add_dropped_vertices=add_dropped_vertices, + vertex_storage=vertex_storage, + ) + # new atom introduced, we can solve the auxiliary LP + if idx < 0 + as.counter[] += 1 + if should_solve_lp(as, as.scheduler) + solve_quadratic_activeset_lp!(as) + end + end + return as +end + +active_set_renormalize!(as::ActiveSetQuadraticLinearSolve) = active_set_renormalize!(as.active_set) + +function active_set_argmin(as::ActiveSetQuadraticLinearSolve, direction) + return active_set_argmin(as.active_set, direction) +end + +function active_set_argminmax(as::ActiveSetQuadraticLinearSolve, direction; Φ=0.5) + return active_set_argminmax(as.active_set, direction; Φ=Φ) +end + +# generic quadratic with quadratic information provided + +""" + solve_quadratic_activeset_lp!(as::ActiveSetQuadraticLinearSolve{AT, R, IT, H})) + +Solves the auxiliary LP over the current active set. +The method is specialized by type `H` of the Hessian matrix `A`. +""" +function solve_quadratic_activeset_lp!( + as::ActiveSetQuadraticLinearSolve{AT,R,IT,<:AbstractMatrix}, +) where {AT,R,IT} + nv = length(as) + o = as.lp_optimizer + MOI.empty!(o) + λ = MOI.add_variables(o, nv) + # λ ≥ 0, ∑ λ == 1 + MOI.add_constraint.(o, λ, MOI.GreaterThan(0.0)) + sum_of_variables = MOI.ScalarAffineFunction(MOI.ScalarAffineTerm.(1.0, λ), 0.0) + MOI.add_constraint(o, sum_of_variables, MOI.EqualTo(1.0)) + # Vᵗ A V λ == -Vᵗ b + for atom in as.atoms + lhs = MOI.ScalarAffineFunction{Float64}([], 0.0) + Base.sizehint!(lhs.terms, nv) + # replaces direct sum because of MOI and MutableArithmetic slow sums + for j in 1:nv + push!(lhs.terms, MOI.ScalarAffineTerm(fast_dot(atom, as.A, as.atoms[j]), λ[j])) + end + rhs = -dot(atom, as.b) + MOI.add_constraint(o, lhs, MOI.EqualTo(rhs)) + end + MOI.set(o, MOI.ObjectiveFunction{typeof(sum_of_variables)}(), sum_of_variables) + MOI.set(o, MOI.ObjectiveSense(), MOI.MIN_SENSE) + MOI.optimize!(o) + if MOI.get(o, MOI.TerminationStatus()) ∉ (MOI.OPTIMAL, MOI.FEASIBLE_POINT, MOI.ALMOST_OPTIMAL) + return as + end + indices_to_remove = Int[] + new_weights = R[] + for idx in eachindex(λ) + weight_value = MOI.get(o, MOI.VariablePrimal(), λ[idx]) + if weight_value <= 1e-5 + push!(indices_to_remove, idx) + else + push!(new_weights, weight_value) + end + end + deleteat!(as.active_set, indices_to_remove) + @assert length(as) == length(new_weights) + update_weights!(as.active_set, new_weights) + active_set_cleanup!(as) + active_set_renormalize!(as) + compute_active_set_iterate!(as) + return as +end + +# special case of scaled identity Hessian + +function solve_quadratic_activeset_lp!( + as::ActiveSetQuadraticLinearSolve{AT,R,IT,<:Union{Identity,LinearAlgebra.UniformScaling}}, +) where {AT,R,IT} + hessian_scaling = as.A.λ + nv = length(as) + o = as.lp_optimizer + MOI.empty!(o) + λ = MOI.add_variables(o, nv) + # λ ≥ 0, ∑ λ == 1 + MOI.add_constraint.(o, λ, MOI.GreaterThan(0.0)) + sum_of_variables = MOI.ScalarAffineFunction(MOI.ScalarAffineTerm.(1.0, λ), 0.0) + MOI.add_constraint(o, sum_of_variables, MOI.EqualTo(1.0)) + # a Aᵗ A λ == -Aᵗ b + for atom in as.atoms + lhs = MOI.ScalarAffineFunction{Float64}([], 0.0) + Base.sizehint!(lhs.terms, nv) + # replaces direct sum because of MOI and MutableArithmetic slow sums + for j in 1:nv + push!(lhs.terms, MOI.ScalarAffineTerm(hessian_scaling * dot(atom, as.atoms[j]), λ[j])) + end + rhs = -dot(atom, as.b) + MOI.add_constraint(o, lhs, MOI.EqualTo(rhs)) + end + MOI.set(o, MOI.ObjectiveFunction{typeof(sum_of_variables)}(), sum_of_variables) + MOI.set(o, MOI.ObjectiveSense(), MOI.MIN_SENSE) + MOI.optimize!(o) + if MOI.get(o, MOI.TerminationStatus()) ∉ (MOI.OPTIMAL, MOI.FEASIBLE_POINT, MOI.ALMOST_OPTIMAL) + return as + end + indices_to_remove = Int[] + new_weights = R[] + for idx in eachindex(λ) + weight_value = MOI.get(o, MOI.VariablePrimal(), λ[idx]) + if weight_value <= 2 * weight_purge_threshold_default(typeof(weight_value)) + push!(indices_to_remove, idx) + else + push!(new_weights, weight_value) + end + end + deleteat!(as.active_set, indices_to_remove) + @assert length(as) == length(new_weights) + update_weights!(as.active_set, new_weights) + active_set_cleanup!(as) + @assert all(>=(0), new_weights) + active_set_renormalize!(as) + compute_active_set_iterate!(as) + return as +end + +struct LogScheduler{T} + start_time::Int + scaling_factor::T + max_interval::Int + current_interval::Base.RefValue{Int} + last_solve_counter::Base.RefValue{Int} +end + +LogScheduler(; start_time=20, scaling_factor=1.5, max_interval=1000) = + LogScheduler(start_time, scaling_factor, max_interval, Ref(start_time), Ref(0)) + +function should_solve_lp(as::ActiveSetQuadraticLinearSolve, scheduler::LogScheduler) + if as.counter[] - scheduler.last_solve_counter[] >= scheduler.current_interval[] + scheduler.last_solve_counter[] = as.counter[] + scheduler.current_interval[] = min( + round(Int, scheduler.scaling_factor * scheduler.current_interval[]), + scheduler.max_interval, + ) + return true + end + return false +end diff --git a/src/active_set_sparsifier.jl b/src/active_set_sparsifier.jl new file mode 100644 index 000000000..6bb523217 --- /dev/null +++ b/src/active_set_sparsifier.jl @@ -0,0 +1,96 @@ +struct ActiveSetSparsifier{AT,R,IT,AS<:AbstractActiveSet{AT,R,IT},OT<:MOI.AbstractOptimizer} <: + AbstractActiveSet{AT,R,IT} + active_set::AS + weights::Vector{R} + atoms::Vector{AT} + x::IT + optimizer::OT + minimum_vertices::Int + solve_frequency::Int + counter::Base.RefValue{Int} +end + +function ActiveSetSparsifier( + active_set::AbstractActiveSet, + optimizer::MOI.AbstractOptimizer; + minimum_vertices=50, + solve_frequency=50, +) + return ActiveSetSparsifier( + active_set, + active_set.weights, + active_set.atoms, + active_set.x, + optimizer, + minimum_vertices, + solve_frequency, + Ref(0), + ) +end + +function Base.push!(as::ActiveSetSparsifier, (λ, a)) + return push!(as.active_set, (λ, a)) +end + +Base.deleteat!(as::ActiveSetSparsifier, idx::Int) = deleteat!(as.active_set, idx) + +Base.empty!(as::ActiveSetSparsifier) = empty!(as.active_set) + +function active_set_update!( + as::ActiveSetSparsifier{AS,OT,AT,R,IT}, + lambda, + atom, + renorm=true, + idx=nothing; + kwargs..., +) where {AS,OT,AT,R,IT} + active_set_update!(as.active_set, lambda, atom, renorm, idx; kwargs...) + n = length(as) + as.counter[] += 1 + if n > as.minimum_vertices && mod(as.counter[], as.solve_frequency) == 0 + # sparsifying active set + MOI.empty!(as.optimizer) + x0 = as.active_set.x + λ = MOI.add_variables(as.optimizer, n) + # λ ∈ Δ_n ⇔ λ ≥ 0, ∑ λ == 1 + MOI.add_constraint.(as.optimizer, λ, MOI.GreaterThan(0.0)) + MOI.add_constraint(as.optimizer, sum(λ; init=0.0), MOI.EqualTo(1.0)) + x_sum = 0 * as.active_set.atoms[1] + for (idx, atom) in enumerate(as.active_set.atoms) + x_sum += λ[idx] * atom + end + for idx in eachindex(x_sum) + MOI.add_constraint(as.optimizer, x_sum[idx], MOI.EqualTo(x0[idx])) + end + # Set a dummy objective (minimize ∑λ) + dummy_objective = sum(λ; init=0.0) + MOI.set(as.optimizer, MOI.ObjectiveFunction{typeof(dummy_objective)}(), dummy_objective) + MOI.set(as.optimizer, MOI.ObjectiveSense(), MOI.MIN_SENSE) + MOI.optimize!(as.optimizer) + if MOI.get(as.optimizer, MOI.TerminationStatus()) in + (MOI.OPTIMAL, MOI.FEASIBLE_POINT, MOI.ALMOST_OPTIMAL) + indices_to_remove = Int[] + new_weights = R[] + for idx in eachindex(λ) + weight_value = MOI.get(as.optimizer, MOI.VariablePrimal(), λ[idx]) + if weight_value <= sqrt(weight_purge_threshold_default(typeof(weight_value))) + push!(indices_to_remove, idx) + else + push!(new_weights, weight_value) + end + end + deleteat!(as.active_set.atoms, indices_to_remove) + deleteat!(as.active_set.weights, indices_to_remove) + @assert length(as) == length(new_weights) + as.active_set.weights .= new_weights + active_set_renormalize!(as) + end + end + return as +end + +active_set_renormalize!(as::ActiveSetSparsifier) = active_set_renormalize!(as.active_set) + +active_set_argmin(as::ActiveSetSparsifier, direction) = active_set_argmin(as.active_set, direction) +active_set_argminmax(as::ActiveSetSparsifier, direction; Φ=0.5) = + active_set_argminmax(as.active_set, direction; Φ=Φ) diff --git a/src/blended_pairwise.jl b/src/blended_pairwise.jl index e2efec4d2..a42862063 100644 --- a/src/blended_pairwise.jl +++ b/src/blended_pairwise.jl @@ -1,4 +1,3 @@ - """ blended_pairwise_conditional_gradient(f, grad!, lmo, x0; kwargs...) @@ -325,7 +324,6 @@ function blended_pairwise_conditional_gradient( linesearch_workspace, memory_mode, ) - if callback !== nothing state = CallbackState( t, diff --git a/src/moi_oracle.jl b/src/moi_oracle.jl index 875b8001d..1471bd6e7 100644 --- a/src/moi_oracle.jl +++ b/src/moi_oracle.jl @@ -1,6 +1,6 @@ """ - MathOptLMO{OT <: MOI.Optimizer} <: LinearMinimizationOracle + MathOptLMO{OT <: MOI.AbstractOptimizer} <: LinearMinimizationOracle Linear minimization oracle with feasible space defined through a MathOptInterface.Optimizer. The oracle call sets the direction and reruns the optimizer. diff --git a/src/utils.jl b/src/utils.jl index 629f55e15..c6e01fd6f 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -163,6 +163,77 @@ function fast_dot(A::Matrix{T1}, B::SparseArrays.SparseMatrixCSC{T2}) where {T1, return s end +fast_dot(a, Q, b) = dot(a, Q, b) + +function fast_dot(a::SparseArrays.AbstractSparseVector{<:Real}, Q::Diagonal{<:Real}, b::AbstractVector{<:Real}) + if a === b + return _fast_quadratic_form_symmetric(a, Q) + end + d = Q.diag + nzvals = SparseArrays.nonzeros(a) + nzinds = SparseArrays.nonzeroinds(a) + return sum(eachindex(nzvals); init=zero(eltype(a))) do nzidx + nzvals[nzidx] * d[nzinds[nzidx]] * b[nzinds[nzidx]] + end +end + +function fast_dot(a::SparseArrays.AbstractSparseVector{<:Real}, Q::Diagonal{<:Real}, b::SparseArrays.AbstractSparseVector{<:Real}) + if a === b + return _fast_quadratic_form_symmetric(a, Q) + end + n = length(a) + if length(b) != n + throw( + DimensionMismatch("Vector a has a length $n but b has a length $(length(b))") + ) + end + anzind = SparseArrays.nonzeroinds(a) + bnzind = SparseArrays.nonzeroinds(b) + anzval = SparseArrays.nonzeros(a) + bnzval = SparseArrays.nonzeros(b) + s = zero(Base.promote_eltype(a, Q, b)) + + if isempty(anzind) || isempty(bnzind) + return s + end + + a_idx = 1 + b_idx = 1 + a_idx_last = length(anzind) + b_idx_last = length(bnzind) + + # go through the nonzero indices of a and b simultaneously + @inbounds while a_idx <= a_idx_last && b_idx <= b_idx_last + ia = anzind[a_idx] + ib = bnzind[b_idx] + if ia == ib + s += dot(anzval[a_idx], Q.diag[ia], bnzval[b_idx]) + a_idx += 1 + b_idx += 1 + elseif ia < ib + a_idx += 1 + else + b_idx += 1 + end + end + return s +end + + +function _fast_quadratic_form_symmetric(a, Q) + d = Q.diag + if length(d) != length(a) + throw(DimensionMismatch()) + end + nzvals = SparseArrays.nonzeros(a) + nzinds = SparseArrays.nonzeroinds(a) + s = zero(Base.promote_eltype(a, Q)) + @inbounds for nzidx in eachindex(nzvals) + s += nzvals[nzidx]^2 * d[nzinds[nzidx]] + end + return s +end + """ trajectory_callback(storage) diff --git a/test/quadratic_lp_active_set.jl b/test/quadratic_lp_active_set.jl new file mode 100644 index 000000000..a87b37cea --- /dev/null +++ b/test/quadratic_lp_active_set.jl @@ -0,0 +1,113 @@ +using FrankWolfe +using LinearAlgebra +using Random +using Test +import HiGHS +import MathOptInterface as MOI + +n = Int(1e4) +k = 5000 + +s = 10 +@info "Seed $s" +Random.seed!(s) + +xpi = rand(n); +total = sum(xpi); + +const xp = xpi ./ total; + +f(x) = norm(x - xp)^2 +function grad!(storage, x) + @. storage = 2 * (x - xp) +end + +lmo = FrankWolfe.KSparseLMO(5, 1.0) + +const x00 = FrankWolfe.compute_extreme_point(lmo, rand(n)) + +function build_callback(trajectory_arr) + return function callback(state, active_set, args...) + return push!(trajectory_arr, (FrankWolfe.callback_state(state)..., length(active_set))) + end +end + +trajectoryBPCG_standard = [] +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + copy(x00), + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + verbose=false, + callback=build_callback(trajectoryBPCG_standard), +); + +as_quad_direct = FrankWolfe.ActiveSetQuadraticLinearSolve( + [(1.0, copy(x00))], + 2 * LinearAlgebra.I, + -2xp, + MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), +) + +trajectoryBPCG_quadratic_direct_specialized = [] +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + as_quad_direct, + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + verbose=false, + callback=build_callback(trajectoryBPCG_quadratic_direct_specialized), +); + +as_quad_direct_generic = FrankWolfe.ActiveSetQuadraticLinearSolve( + [(1.0, copy(x00))], + 2 * Diagonal(ones(length(xp))), + -2xp, + MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), +) + +trajectoryBPCG_quadratic_direct_generic = [] +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + as_quad_direct_generic, + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + verbose=true, + callback=build_callback(trajectoryBPCG_quadratic_direct_generic), +); + +as_quad_direct_basic_as = FrankWolfe.ActiveSetQuadraticLinearSolve( + FrankWolfe.ActiveSet([1.0], [copy(x00)], collect(x00)), + 2 * LinearAlgebra.I, + -2xp, + MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), +) + +trajectoryBPCG_quadratic_noqas = [] +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + as_quad_direct_basic_as, + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + verbose=true, + callback=build_callback(trajectoryBPCG_quadratic_noqas), +); + + +dual_gaps_quadratic_specialized = getindex.(trajectoryBPCG_quadratic_direct_specialized, 4) +dual_gaps_quadratic_generic = getindex.(trajectoryBPCG_quadratic_direct_generic, 4) +dual_gaps_quadratic_noqas = getindex.(trajectoryBPCG_quadratic_noqas, 4) +dual_gaps_bpcg = getindex.(trajectoryBPCG_standard, 4) + + +@test dual_gaps_quadratic_specialized[end] < dual_gaps_bpcg[end] +@test dual_gaps_quadratic_noqas[end] < dual_gaps_bpcg[end] +@test norm(dual_gaps_quadratic_noqas - dual_gaps_quadratic_noqas) ≤ k * 1e-5 diff --git a/test/runtests.jl b/test/runtests.jl index d984043dc..40f5ac366 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -798,6 +798,20 @@ using Test end end +module LpDirectSolveTest +using Test +@testset "LP solving for quadratic functions and active set" begin + include("quadratic_lp_active_set.jl") +end +end + +module SparsifyingActiveSetTest +using Test +@testset "Sparsifying active set" begin + include("sparsifying_activeset.jl") +end +end + include("generic-arrays.jl") include("blocks.jl") diff --git a/test/sparsifying_activeset.jl b/test/sparsifying_activeset.jl new file mode 100644 index 000000000..1fde16d53 --- /dev/null +++ b/test/sparsifying_activeset.jl @@ -0,0 +1,85 @@ +#= +This example demonstrates the use of the Blended Pairwise Conditional Gradient algorithm +with direct solve steps for a quadratic optimization problem over a sparse polytope. + +The example showcases how the algorithm balances between: +- Pairwise steps for efficient optimization +- Periodic direct solves for handling the quadratic objective +- Lazy (approximate) linear minimization steps for improved iteration complexity + +It also demonstrates how to set up custom callbacks for tracking algorithm progress. +=# + +using LinearAlgebra +using Test +using Random +using FrankWolfe +import HiGHS + +n = Int(1e4) +k = 10000 + +# s = rand(1:100) +s = 10 +@info "Seed $s" +Random.seed!(s) + +xpi = rand(n); +total = sum(xpi); + +# here the optimal solution lies in the interior if you want an optimal solution on a face and not the interior use: +# const xp = xpi; + +const xp = xpi ./ total; + +f(x) = norm(x - xp)^2 +function grad!(storage, x) + @. storage = 2 * (x - xp) +end + +lmo = FrankWolfe.LpNormLMO{Float64,5}(1.0) + +const x00 = FrankWolfe.compute_extreme_point(lmo, rand(n)) + +function build_callback(trajectory_arr) + return function callback(state, active_set, args...) + return push!(trajectory_arr, (FrankWolfe.callback_state(state)..., length(active_set))) + end +end + +trajectoryBPCG_standard = [] +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + copy(x00), + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + verbose=false, + trajectory=false, + callback=build_callback(trajectoryBPCG_standard), +); + +active_set_sparse = FrankWolfe.ActiveSetSparsifier( + FrankWolfe.ActiveSet([1.0], [x00], similar(x00)), + HiGHS.Optimizer(), +) +trajectoryBPCG_as_sparse = [] +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + copy(x00), + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + verbose=false, + callback=build_callback(trajectoryBPCG_as_sparse), +); + +as_cardinality_bpcg = getindex.(trajectoryBPCG_standard, 6) +as_cardinality_sparse = getindex.(trajectoryBPCG_as_sparse, 6) +@test maximum(as_cardinality_sparse - as_cardinality_bpcg) <= 0 + +dual_gap_bpcg = getindex.(trajectoryBPCG_standard, 4) +dual_gap_sparse = getindex.(trajectoryBPCG_as_sparse, 4) +@test maximum(dual_gap_sparse - dual_gap_bpcg) <= 1e-7