From 935a5f332f6b0422d1496e9d4814c680d9f464bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= Date: Mon, 7 Oct 2024 18:19:54 +0200 Subject: [PATCH 01/19] sparsifier active set --- examples/blended_pairwise_with_sparsify.jl | 30 +++++++-- src/FrankWolfe.jl | 1 + src/active_set_quadratic.jl | 1 - src/active_set_sparsifier.jl | 76 ++++++++++++++++++++++ src/moi_oracle.jl | 2 +- 5 files changed, 101 insertions(+), 9 deletions(-) create mode 100644 src/active_set_sparsifier.jl diff --git a/examples/blended_pairwise_with_sparsify.jl b/examples/blended_pairwise_with_sparsify.jl index 6927b8e4e..30792a8ec 100644 --- a/examples/blended_pairwise_with_sparsify.jl +++ b/examples/blended_pairwise_with_sparsify.jl @@ -15,9 +15,6 @@ using LinearAlgebra using Random import Pkg -Pkg.add("GLPK") -Pkg.add("HiGHS") -import GLPK import HiGHS # lp_solver = GLPK.Optimizer @@ -80,12 +77,11 @@ FrankWolfe.benchmark_oracles(f, grad!, () -> randn(n), lmo; k=100) trajectoryBPCG_standard = [] callback = build_callback(trajectoryBPCG_standard) -x0 = deepcopy(x00) @time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( f, grad!, lmo, - x0, + copy(x00), max_iteration=k, line_search=FrankWolfe.Shortstep(2.0), print_iter=k / 10, @@ -98,12 +94,11 @@ x0 = deepcopy(x00) trajectoryBPCG_quadratic = [] callback = build_callback(trajectoryBPCG_quadratic) -x0 = deepcopy(x00) @time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( f, grad!, lmo, - x0, + copy(x00), max_iteration=k, line_search=FrankWolfe.Shortstep(2.0), print_iter=k / 10, @@ -116,6 +111,27 @@ x0 = deepcopy(x00) lp_solver=lp_solver, ); +trajectoryBPCG_as_sparse = [] +callback = build_callback(trajectoryBPCG_as_sparse) + +active_set_sparse = FrankWolfe.ActiveSetSparsifier(FrankWolfe.ActiveSet([1.0], [x00], similar(x00)), HiGHS.Optimizer()) + +@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + copy(x00), + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + print_iter=k / 10, + memory_mode=FrankWolfe.InplaceEmphasis(), + verbose=true, + trajectory=true, + callback=callback, + sparsify=false, +# squadratic=true, # activate to see the effect of the numerical precision of the LP solver +); + # Reduction primal/dual error vs. sparsity of solution dataSparsity = [trajectoryBPCG_standard, trajectoryBPCG_quadratic] diff --git a/src/FrankWolfe.jl b/src/FrankWolfe.jl index e6d29c3cc..e17e40038 100644 --- a/src/FrankWolfe.jl +++ b/src/FrankWolfe.jl @@ -34,6 +34,7 @@ include("moi_oracle.jl") include("function_gradient.jl") include("active_set.jl") include("active_set_quadratic.jl") +include("active_set_sparsifier.jl") include("blended_cg.jl") include("afw.jl") diff --git a/src/active_set_quadratic.jl b/src/active_set_quadratic.jl index b200a8912..d252d186d 100644 --- a/src/active_set_quadratic.jl +++ b/src/active_set_quadratic.jl @@ -310,4 +310,3 @@ function update_active_set_quadratic!(warm_as::ActiveSetQuadratic{AT,R,IT,H}, b) compute_active_set_iterate!(warm_as) return warm_as end - diff --git a/src/active_set_sparsifier.jl b/src/active_set_sparsifier.jl new file mode 100644 index 000000000..bc54c10bf --- /dev/null +++ b/src/active_set_sparsifier.jl @@ -0,0 +1,76 @@ +struct ActiveSetSparsifier{AT, R, IT, AS <: AbstractActiveSet{AT, R, IT}, OT <: MOI.AbstractOptimizer} <: AbstractActiveSet{AT,R,IT} + active_set::AS + weights::Vector{R} + atoms::Vector{AT} + x::IT + optimizer::OT + minimum_vertices::Int + solve_frequency::Int + counter::Base.RefValue{Int} +end + +function ActiveSetSparsifier(active_set::AbstractActiveSet, optimizer::MOI.AbstractOptimizer; minimum_vertices=50, solve_frequency=50) + return ActiveSetSparsifier(active_set, active_set.weights, active_set.atoms, active_set.x, optimizer, minimum_vertices, solve_frequency, Ref(0)) +end + +function Base.push!(as::ActiveSetSparsifier, (λ, a)) + push!(as.active_set, (λ, a)) +end + +Base.deleteat!(as::ActiveSetSparsifier, idx::Int) = deleteat!(as.active_set, idx) + +Base.empty!(as::ActiveSetSparsifier) = empty!(as.active_set) + +function active_set_update!( + as::ActiveSetSparsifier{AS, OT, AT, R, IT}, + lambda, atom, renorm=true, idx=nothing; + kwargs... +) where {AS, OT, AT, R, IT} + active_set_update!(as.active_set, lambda, atom, renorm, idx; kwargs...) + n = length(as) + as.counter[] += 1 + if n > as.minimum_vertices && mod(as.counter[], as.solve_frequency) == 0 + # sparsifying active set + MOI.empty!(as.optimizer) + x0 = as.active_set.x + λ = MOI.add_variables(as.optimizer, n) + # λ ∈ Δ_n ⇔ λ ≥ 0, ∑ λ == 1 + MOI.add_constraint.(as.optimizer, λ, MOI.GreaterThan(0.0)) + MOI.add_constraint(as.optimizer, sum(λ; init=0.0), MOI.EqualTo(1.0)) + x_sum = 0 * as.active_set.atoms[1] + for (idx, atom) in enumerate(as.active_set.atoms) + x_sum += λ[idx] * as.active_set.atoms + end + for idx in eachindex(x_sum) + MOI.add_constraint(as.optimizer, x_sum[idx], MOI.EqualTo(x0[idx])) + end + # Set a dummy objective (minimize ∑λ) + dummy_objective = sum(λ; init=0.0) + MOI.set(as.optimizer, MOI.ObjectiveFunction{typeof(dummy_objective)}(), dummy_objective) + MOI.set(as.optimizer, MOI.ObjectiveSense(), MOI.MIN_SENSE) + MOI.optimize!(as.optimizer) + if MOI.get(as.optimizer, MOI.TerminationStatus()) in (MOI.OPTIMAL, MOI.FEASIBLE_POINT, MOI.ALMOST_OPTIMAL) + indices_to_remove = Int[] + new_weights = R[] + for idx in eachindex(λ) + weight_value = MOI.get(as.optimizer, MOI.VariablePrimal(), λ[idx]) + if weight_value <= min(1e-3 / n, 1e-8) + push!(indices_to_remove, idx) + else + push!(new_weights, weight_value) + end + end + deleteat!(as.active_set.atoms, indices_to_remove) + deleteat!(as.active_set.weights, indices_to_remove) + @assert length(as) == length(new_weights) + as.active_set.weights .= new_weights + active_set_renormalize!(as) + end + end + return as +end + +active_set_renormalize!(as::ActiveSetSparsifier) = active_set_renormalize!(as.active_set) + +active_set_argmin(as::ActiveSetSparsifier, direction) = active_set_argmin(as.active_set, direction) +active_set_argminmax(as::ActiveSetSparsifier, direction; Φ=0.5) = active_set_argminmax(as.active_set, direction; Φ=Φ) diff --git a/src/moi_oracle.jl b/src/moi_oracle.jl index 875b8001d..1471bd6e7 100644 --- a/src/moi_oracle.jl +++ b/src/moi_oracle.jl @@ -1,6 +1,6 @@ """ - MathOptLMO{OT <: MOI.Optimizer} <: LinearMinimizationOracle + MathOptLMO{OT <: MOI.AbstractOptimizer} <: LinearMinimizationOracle Linear minimization oracle with feasible space defined through a MathOptInterface.Optimizer. The oracle call sets the direction and reruns the optimizer. From d84dadb29f13a28fea36ebd00b0cb275494323a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= Date: Tue, 8 Oct 2024 16:17:09 +0200 Subject: [PATCH 02/19] start working on LP AS --- examples/blended_pairwise_with_direct.jl | 53 ++++++++++++-- src/active_set_quadratic.jl | 91 ++++++++++++++++++++---- 2 files changed, 126 insertions(+), 18 deletions(-) diff --git a/examples/blended_pairwise_with_direct.jl b/examples/blended_pairwise_with_direct.jl index 576361db1..f37a593d1 100644 --- a/examples/blended_pairwise_with_direct.jl +++ b/examples/blended_pairwise_with_direct.jl @@ -21,6 +21,7 @@ Pkg.add("GLPK") Pkg.add("HiGHS") import GLPK import HiGHS +import MathOptInterface as MOI # lp_solver = GLPK.Optimizer lp_solver = HiGHS.Optimizer @@ -30,7 +31,7 @@ lp_solver = HiGHS.Optimizer include("../examples/plot_utils.jl") n = Int(1e4) -k = 10000 +k = 1000 # s = rand(1:100) s = 10 @@ -112,12 +113,11 @@ x0 = deepcopy(x00) trajectoryBPCG_quadratic_nosquad = [] callback = build_callback(trajectoryBPCG_quadratic_nosquad) -x0 = deepcopy(x00) @time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( f, grad!, lmo, - x0, + copy(x00), max_iteration=k, line_search=FrankWolfe.Shortstep(2.0), print_iter=k / 10, @@ -130,9 +130,52 @@ x0 = deepcopy(x00) lp_solver=lp_solver, ); +# Just projection quadratic +trajectoryBPCG_quadratic_as = [] +callback = build_callback(trajectoryBPCG_quadratic_as) + +as_quad = FrankWolfe.ActiveSetQuadratic([(1.0, copy(x00))], 2 * LinearAlgebra.I, -2xp) +@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + as_quad, + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + print_iter=k / 10, + memory_mode=FrankWolfe.InplaceEmphasis(), + verbose=true, + trajectory=true, + callback=callback, +); + +# with LP acceleration +trajectoryBPCG_quadratic_as_lp = [] +callback = build_callback(trajectoryBPCG_quadratic_as_lp) + +as_quad_lp = FrankWolfe.ActiveSetQuadratic( + [(1.0, copy(x00))], + 2 * LinearAlgebra.I, -2xp, + MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), +) +@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + as_quad_lp, + max_iteration=1000, + line_search=FrankWolfe.Shortstep(2.0), + print_iter=k / 10, + memory_mode=FrankWolfe.InplaceEmphasis(), + verbose=true, + trajectory=true, + callback=callback, +); + + # Update the data and labels for plotting -dataSparsity = [trajectoryBPCG_standard, trajectoryBPCG_quadratic, trajectoryBPCG_quadratic_nosquad] -labelSparsity = ["BPCG (Standard)", "BPCG (Specific Direct)", "BPCG (Generic Direct)"] +dataSparsity = [trajectoryBPCG_standard, trajectoryBPCG_quadratic, trajectoryBPCG_quadratic_nosquad, trajectoryBPCG_quadratic_as, trajectoryBPCG_quadratic_as_lp] +labelSparsity = ["BPCG (Standard)", "BPCG (Specific Direct)", "BPCG (Generic Direct)", "AS_Quad", "AS_Quad_LP"] # Plot sparsity # plot_sparsity(dataSparsity, labelSparsity, legend_position=:topright) diff --git a/src/active_set_quadratic.jl b/src/active_set_quadratic.jl index d252d186d..c7849663d 100644 --- a/src/active_set_quadratic.jl +++ b/src/active_set_quadratic.jl @@ -9,7 +9,7 @@ The iterate `x = ∑λ_i a_i` is stored in x with type `IT`. The objective function is assumed to be of the form `f(x)=½⟨x,Ax⟩+⟨b,x⟩+c` so that the gradient is simply `∇f(x)=Ax+b`. """ -struct ActiveSetQuadratic{AT, R <: Real, IT, H} <: AbstractActiveSet{AT,R,IT} +struct ActiveSetQuadratic{AT, R <: Real, IT, H, OT <: Union{Nothing, MOI.AbstractOptimizer}} <: AbstractActiveSet{AT,R,IT} weights::Vector{R} atoms::Vector{AT} x::IT @@ -20,6 +20,9 @@ struct ActiveSetQuadratic{AT, R <: Real, IT, H} <: AbstractActiveSet{AT,R,IT} dots_b::Vector{R} # stores ⟨b, atoms[i]⟩ weights_prev::Vector{R} modified::BitVector + lp_optimizer::OT + lp_frequency::Int + counter::Base.RefValue{Int} end function detect_quadratic_function(grad!, x0; test=true) @@ -48,11 +51,12 @@ function detect_quadratic_function(grad!, x0; test=true) return A, b end -function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, grad!::Function) where {AT,R} - return ActiveSetQuadratic(tuple_values, detect_quadratic_function(grad!, tuple_values[1][2])...) +function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, grad!::Function, lp_optimizer=nothing; lp_frequency=10) where {AT,R} + A, b = detect_quadratic_function(grad!, tuple_values[1][2]) + return ActiveSetQuadratic(tuple_values, A, b, lp_optimizer, lp_frequency=lp_frequency) end -function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::H, b) where {AT,R,H} +function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::H, b, lp_optimizer=nothing; lp_frequency=10) where {AT,R,H} n = length(tuple_values) weights = Vector{R}(undef, n) atoms = Vector{AT}(undef, n) @@ -71,16 +75,18 @@ function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::H, b) dots_b[idx] = fast_dot(b, atoms[idx]) end x = similar(b) - as = ActiveSetQuadratic{AT,R,typeof(x),H}(weights, atoms, x, A, b, dots_x, dots_A, dots_b, weights_prev, modified) + as = ActiveSetQuadratic(weights, atoms, x, A, b, dots_x, dots_A, dots_b, weights_prev, modified, lp_optimizer, lp_frequency, Ref(0)) + compute_active_set_iterate!(as) return as end -function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, grad!) where {AT,R} - return ActiveSetQuadratic{AT,R}(tuple_values, detect_quadratic_function(grad!, tuple_values[1][2])...) +function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, grad!::Function, lp_optimizer=nothing; lp_frequency=10) where {AT,R} + A, b = detect_quadratic_function(grad!, tuple_values[1][2]) + return ActiveSetQuadratic{AT,R}(tuple_values, A, b, lp_optimizer; lp_frequency=lp_frequency) end -function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::H, b) where {AT,R,H} +function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::H, b, lp_optimizer=nothing; lp_frequency=lp_frequency) where {AT,R,H} n = length(tuple_values) weights = Vector{R}(undef, n) atoms = Vector{AT}(undef, n) @@ -99,7 +105,7 @@ function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number, dots_b[idx] = fast_dot(b, atoms[idx]) end x = similar(b) - as = ActiveSetQuadratic{AT,R,typeof(x),H}(weights, atoms, x, A, b, dots_x, dots_A, dots_b, weights_prev, modified) + as = ActiveSetQuadratic{AT,R,typeof(x),H}(weights, atoms, x, A, b, dots_x, dots_A, dots_b, weights_prev, modified, lp_optimizer, lp_frequency, Ref(0)) compute_active_set_iterate!(as) return as end @@ -116,11 +122,11 @@ function Base.:*(a::Identity, b) return a.λ * b end end -function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::UniformScaling, b) where {AT,R} - return ActiveSetQuadratic(tuple_values, Identity(A.λ), b) +function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::UniformScaling, b, lp_optimizer=nothing; lp_frequency=10) where {AT,R} + return ActiveSetQuadratic(tuple_values, Identity(A.λ), b, lp_optimizer; lp_frequency=lp_frequency) end -function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::UniformScaling, b) where {AT,R} - return ActiveSetQuadratic{AT,R}(tuple_values, Identity(A.λ), b) +function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::UniformScaling, b, lp_optimizer=nothing; lp_frequency=10) where {AT,R} + return ActiveSetQuadratic{AT,R}(tuple_values, Identity(A.λ), b, lp_optimizer; lp_frequency=lp_frequency) end # these three functions do not update the active set iterate @@ -193,10 +199,21 @@ function active_set_update!( idx = find_atom(active_set, atom) end if idx > 0 + @info "old atom" @inbounds active_set.weights[idx] += lambda @inbounds active_set.modified[idx] = true else push!(active_set, (lambda, atom)) + if active_set.lp_optimizer !== nothing + active_set.counter[] += 1 + @show active_set.counter[] + @show mod(active_set.counter[], active_set.lp_frequency) + if mod(active_set.counter[], active_set.lp_frequency) == 0 + @show "solving quadratic" + solve_quadratic_activeset_lp!(active_set) + return active_set + end + end end if renorm add_dropped_vertices = add_dropped_vertices ? vertex_storage !== nothing : add_dropped_vertices @@ -207,6 +224,54 @@ function active_set_update!( return active_set end +# generic quadratic +function solve_quadratic_activeset_lp!(active_set::ActiveSetQuadratic{AT, R, IT, H}) where {AT, R, IT, H} + # TODO +end + +# special case of scaled identity Hessian +function solve_quadratic_activeset_lp!(active_set::ActiveSetQuadratic{AT, R, IT, <: Identity}) where {AT, R, IT} + hessian_scaling = active_set.A.λ + # number of vertices and ambient dimension + nv = length(active_set) + o = active_set.lp_optimizer + MOI.empty!(o) + λ = MOI.add_variables(o, nv) + # λ ∈ Δ_n ⇔ λ ≥ 0, ∑ λ == 1 + MOI.add_constraint.(o, λ, MOI.GreaterThan(0.0)) + MOI.add_constraint(o, sum(λ; init=0.0), MOI.EqualTo(1.0)) + # 2 * a Aᵗ A λ == -Aᵗ b + lhs = 0.0 * λ + rhs = 0 * active_set.weights + for (idx, atom) in enumerate(active_set.atoms) + rhs[idx] = -dot(atom, active_set.b) + lhs[idx] = 2 * hessian_scaling * sum(λ[j] * dot(atom, active_set.atoms[j]) for j in 1:nv) + end + MOI.add_constraint.(o, lhs, MOI.EqualTo.(rhs)) + dummy_objective = sum(λ, init=0.0) + MOI.set(o, MOI.ObjectiveFunction{typeof(dummy_objective)}(), dummy_objective) + MOI.set(o, MOI.ObjectiveSense(), MOI.MIN_SENSE) + MOI.optimize!(o) + if MOI.get(o, MOI.TerminationStatus()) in (MOI.OPTIMAL, MOI.FEASIBLE_POINT, MOI.ALMOST_OPTIMAL) + indices_to_remove = Int[] + new_weights = R[] + for idx in eachindex(λ) + weight_value = MOI.get(o, MOI.VariablePrimal(), λ[idx]) + if weight_value <= min(1e-3 / nv, 1e-8) + push!(indices_to_remove, idx) + else + push!(new_weights, weight_value) + end + end + deleteat!(active_set.atoms, indices_to_remove) + deleteat!(active_set.weights, indices_to_remove) + @assert length(active_set) == length(new_weights) + active_set.weights .= new_weights + active_set_renormalize!(active_set) + end + return active_set +end + function active_set_renormalize!(active_set::ActiveSetQuadratic) renorm = sum(active_set.weights) active_set.weights ./= renorm From bd2c6c022788777e3cdae704d898dd4e884a2a0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= Date: Wed, 9 Oct 2024 14:11:16 +0200 Subject: [PATCH 03/19] first working quadratic --- examples/blended_pairwise_with_direct.jl | 33 +++- src/FrankWolfe.jl | 1 + src/active_set_quadratic.jl | 4 +- src/active_set_quadratic_reloaded.jl | 183 +++++++++++++++++++++++ 4 files changed, 215 insertions(+), 6 deletions(-) create mode 100644 src/active_set_quadratic_reloaded.jl diff --git a/examples/blended_pairwise_with_direct.jl b/examples/blended_pairwise_with_direct.jl index f37a593d1..138b07147 100644 --- a/examples/blended_pairwise_with_direct.jl +++ b/examples/blended_pairwise_with_direct.jl @@ -149,20 +149,45 @@ as_quad = FrankWolfe.ActiveSetQuadratic([(1.0, copy(x00))], 2 * LinearAlgebra.I, callback=callback, ); +as_quad_lp = FrankWolfe.ActiveSetQuadratic( + [(1.0, copy(x00))], + 2 * LinearAlgebra.I, -2xp, + MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), +) + # with LP acceleration trajectoryBPCG_quadratic_as_lp = [] callback = build_callback(trajectoryBPCG_quadratic_as_lp) -as_quad_lp = FrankWolfe.ActiveSetQuadratic( +@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + as_quad_lp, + max_iteration=1000, + line_search=FrankWolfe.Shortstep(2.0), + print_iter=k / 10, + memory_mode=FrankWolfe.InplaceEmphasis(), + verbose=true, + trajectory=true, + callback=callback, +); + +as_quad_reloaded = FrankWolfe.ActiveSetQuadraticReloaded( [(1.0, copy(x00))], 2 * LinearAlgebra.I, -2xp, MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), ) + +# with LP acceleration +trajectoryBPCG_quadratic_reloaded = [] +callback = build_callback(trajectoryBPCG_quadratic_reloaded) + @time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( f, grad!, lmo, - as_quad_lp, + as_quad_reloaded, max_iteration=1000, line_search=FrankWolfe.Shortstep(2.0), print_iter=k / 10, @@ -174,8 +199,8 @@ as_quad_lp = FrankWolfe.ActiveSetQuadratic( # Update the data and labels for plotting -dataSparsity = [trajectoryBPCG_standard, trajectoryBPCG_quadratic, trajectoryBPCG_quadratic_nosquad, trajectoryBPCG_quadratic_as, trajectoryBPCG_quadratic_as_lp] -labelSparsity = ["BPCG (Standard)", "BPCG (Specific Direct)", "BPCG (Generic Direct)", "AS_Quad", "AS_Quad_LP"] +dataSparsity = [trajectoryBPCG_standard, trajectoryBPCG_quadratic, trajectoryBPCG_quadratic_nosquad, trajectoryBPCG_quadratic_as, trajectoryBPCG_quadratic_as_lp, trajectoryBPCG_quadratic_reloaded] +labelSparsity = ["BPCG (Standard)", "BPCG (Specific Direct)", "BPCG (Generic Direct)", "AS_Quad", "AS_Quad_LP", "Reloaded"] # Plot sparsity # plot_sparsity(dataSparsity, labelSparsity, legend_position=:topright) diff --git a/src/FrankWolfe.jl b/src/FrankWolfe.jl index e17e40038..5aa0cbee1 100644 --- a/src/FrankWolfe.jl +++ b/src/FrankWolfe.jl @@ -34,6 +34,7 @@ include("moi_oracle.jl") include("function_gradient.jl") include("active_set.jl") include("active_set_quadratic.jl") +include("active_set_quadratic_reloaded.jl") include("active_set_sparsifier.jl") include("blended_cg.jl") diff --git a/src/active_set_quadratic.jl b/src/active_set_quadratic.jl index c7849663d..3332958e1 100644 --- a/src/active_set_quadratic.jl +++ b/src/active_set_quadratic.jl @@ -263,10 +263,10 @@ function solve_quadratic_activeset_lp!(active_set::ActiveSetQuadratic{AT, R, IT, push!(new_weights, weight_value) end end - deleteat!(active_set.atoms, indices_to_remove) - deleteat!(active_set.weights, indices_to_remove) + deleteat!(active_set, indices_to_remove) @assert length(active_set) == length(new_weights) active_set.weights .= new_weights + active_set.weights_prev .= new_weights active_set_renormalize!(active_set) end return active_set diff --git a/src/active_set_quadratic_reloaded.jl b/src/active_set_quadratic_reloaded.jl new file mode 100644 index 000000000..6207c307d --- /dev/null +++ b/src/active_set_quadratic_reloaded.jl @@ -0,0 +1,183 @@ + +""" + ActiveSetQuadraticReloaded{AT, R, IT} + +Represents an active set of extreme vertices collected in a FW algorithm, +along with their coefficients `(λ_i, a_i)`. +`R` is the type of the `λ_i`, `AT` is the type of the atoms `a_i`. +The iterate `x = ∑λ_i a_i` is stored in x with type `IT`. +The objective function is assumed to be of the form `f(x)=½⟨x,Ax⟩+⟨b,x⟩+c` +so that the gradient is simply `∇f(x)=Ax+b`. +""" +struct ActiveSetQuadraticReloaded{AT, R <: Real, IT, H, OT <: Union{Nothing, MOI.AbstractOptimizer}} <: AbstractActiveSet{AT,R,IT} + weights::Vector{R} + atoms::Vector{AT} + x::IT + A::H # Hessian matrix + b::IT # linear term + lp_optimizer::OT + lp_frequency::Int + counter::Base.RefValue{Int} +end + +function ActiveSetQuadraticReloaded(tuple_values::AbstractVector{Tuple{R,AT}}, grad!::Function, lp_optimizer=nothing; lp_frequency=10) where {AT,R} + A, b = detect_quadratic_function(grad!, tuple_values[1][2]) + return ActiveSetQuadraticReloaded(tuple_values, A, b, lp_optimizer, lp_frequency=lp_frequency) +end + +function ActiveSetQuadraticReloaded(tuple_values::AbstractVector{Tuple{R,AT}}, A::H, b, lp_optimizer=nothing; lp_frequency=10) where {AT,R,H} + n = length(tuple_values) + weights = Vector{R}(undef, n) + atoms = Vector{AT}(undef, n) + @inbounds for idx in 1:n + weights[idx] = tuple_values[idx][1] + atoms[idx] = tuple_values[idx][2] + end + x = similar(b) + as = ActiveSetQuadraticReloaded(weights, atoms, x, A, b, lp_optimizer, lp_frequency, Ref(0)) + compute_active_set_iterate!(as) + return as +end + +function ActiveSetQuadraticReloaded{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, grad!::Function, lp_optimizer=nothing; lp_frequency=10) where {AT,R} + A, b = detect_quadratic_function(grad!, tuple_values[1][2]) + return ActiveSetQuadraticReloaded{AT,R}(tuple_values, A, b, lp_optimizer; lp_frequency=lp_frequency) +end + +function ActiveSetQuadraticReloaded{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::H, b, lp_optimizer=nothing; lp_frequency=lp_frequency) where {AT,R,H} + n = length(tuple_values) + weights = Vector{R}(undef, n) + atoms = Vector{AT}(undef, n) + @inbounds for idx in 1:n + weights[idx] = tuple_values[idx][1] + atoms[idx] = tuple_values[idx][2] + end + x = similar(b) + as = ActiveSetQuadraticReloaded{AT,R,typeof(x),H}(weights, atoms, x, A, b, lp_optimizer, lp_frequency, Ref(0)) + compute_active_set_iterate!(as) + return as +end + +function ActiveSetQuadraticReloaded(tuple_values::AbstractVector{Tuple{R,AT}}, A::UniformScaling, b, lp_optimizer=nothing; lp_frequency=10) where {AT,R} + return ActiveSetQuadraticReloaded(tuple_values, Identity(A.λ), b, lp_optimizer; lp_frequency=lp_frequency) +end +function ActiveSetQuadraticReloaded{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::UniformScaling, b, lp_optimizer=nothing; lp_frequency=10) where {AT,R} + return ActiveSetQuadraticReloaded{AT,R}(tuple_values, Identity(A.λ), b, lp_optimizer; lp_frequency=lp_frequency) +end + +# these three functions do not update the active set iterate + +function Base.push!(as::ActiveSetQuadraticReloaded{AT,R}, (λ, a)) where {AT,R} + push!(as.weights, λ) + push!(as.atoms, a) + return as +end + +function Base.deleteat!(as::ActiveSetQuadraticReloaded, idx::Int) + deleteat!(as.weights, idx) + deleteat!(as.atoms, idx) + return as +end + +function Base.empty!(as::ActiveSetQuadraticReloaded) + empty!(as.atoms) + empty!(as.weights) + as.x .= 0 + return as +end + +function active_set_update!( + active_set::ActiveSetQuadraticReloaded{AT,R}, + lambda, atom, renorm=true, idx=nothing; + weight_purge_threshold=weight_purge_threshold_default(R), + add_dropped_vertices=false, + vertex_storage=nothing, +) where {AT,R} + # rescale active set + active_set.weights .*= (1 - lambda) + # add value for new atom + if idx === nothing + idx = find_atom(active_set, atom) + end + if idx > 0 + @info "old atom" + @inbounds active_set.weights[idx] += lambda + else + push!(active_set, (lambda, atom)) + if active_set.lp_optimizer !== nothing + active_set.counter[] += 1 + @show active_set.counter[] + @show mod(active_set.counter[], active_set.lp_frequency) + if mod(active_set.counter[], active_set.lp_frequency) == 0 + @show "solving quadratic" + solve_quadratic_activeset_lp!(active_set) + return active_set + end + end + end + if renorm + add_dropped_vertices = add_dropped_vertices ? vertex_storage !== nothing : add_dropped_vertices + active_set_cleanup!(active_set; weight_purge_threshold=weight_purge_threshold, update=false, add_dropped_vertices=add_dropped_vertices, vertex_storage=vertex_storage) + active_set_renormalize!(active_set) + end + active_set_update_scale!(active_set.x, lambda, atom) + return active_set +end + +# generic quadratic +function solve_quadratic_activeset_lp!(active_set::ActiveSetQuadraticReloaded{AT, R, IT, H}) where {AT, R, IT, H} + error("TODO") +end + +# special case of scaled identity Hessian +function solve_quadratic_activeset_lp!(active_set::ActiveSetQuadraticReloaded{AT, R, IT, <: Identity}) where {AT, R, IT} + hessian_scaling = active_set.A.λ + # number of vertices and ambient dimension + nv = length(active_set) + o = active_set.lp_optimizer + MOI.empty!(o) + λ = MOI.add_variables(o, nv) + A = zeros(length(active_set.x), nv) + for (idx, atom) in enumerate(active_set.atoms) + A[:,idx] .= atom + end + # λ ≥ 0, ∑ λ == 1 + MOI.add_constraint.(o, λ, MOI.GreaterThan(0.0)) + MOI.add_constraint(o, sum(λ; init=0.0), MOI.EqualTo(1.0)) + # 2 * a Aᵗ A λ == -Aᵗ b + lhs = 0.0 * λ + rhs = 0 * active_set.weights + for (idx, atom) in enumerate(active_set.atoms) + lhs[idx] = 2 * hessian_scaling * sum(λ[j] * dot(atom, active_set.atoms[j]) for j in 1:nv) + rhs[idx] = -dot(atom, active_set.b) + end + MOI.add_constraint.(o, lhs, MOI.EqualTo.(rhs)) + dummy_objective = sum(λ, init=0.0) + MOI.set(o, MOI.ObjectiveFunction{typeof(dummy_objective)}(), dummy_objective) + MOI.set(o, MOI.ObjectiveSense(), MOI.MIN_SENSE) + MOI.optimize!(o) + if MOI.get(o, MOI.TerminationStatus()) in (MOI.OPTIMAL, MOI.FEASIBLE_POINT, MOI.ALMOST_OPTIMAL) + indices_to_remove = Int[] + new_weights = R[] + for idx in eachindex(λ) + weight_value = MOI.get(o, MOI.VariablePrimal(), λ[idx]) + if weight_value <= min(1e-3 / nv, 1e-8) + push!(indices_to_remove, idx) + else + push!(new_weights, weight_value) + end + end + deleteat!(active_set.atoms, indices_to_remove) + deleteat!(active_set.weights, indices_to_remove) + @assert length(active_set) == length(new_weights) + active_set.weights .= new_weights + active_set_renormalize!(active_set) + end + return active_set +end + +function active_set_renormalize!(active_set::ActiveSetQuadraticReloaded) + renorm = sum(active_set.weights) + active_set.weights ./= renorm + return active_set +end From a708bcee099c0108ab12a9b0dc73b134a6f73672 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= Date: Wed, 9 Oct 2024 14:14:41 +0200 Subject: [PATCH 04/19] remove quadratic LP from current --- src/active_set_quadratic.jl | 87 ++++++------------------------------- 1 file changed, 13 insertions(+), 74 deletions(-) diff --git a/src/active_set_quadratic.jl b/src/active_set_quadratic.jl index 3332958e1..fe40c85b4 100644 --- a/src/active_set_quadratic.jl +++ b/src/active_set_quadratic.jl @@ -9,7 +9,7 @@ The iterate `x = ∑λ_i a_i` is stored in x with type `IT`. The objective function is assumed to be of the form `f(x)=½⟨x,Ax⟩+⟨b,x⟩+c` so that the gradient is simply `∇f(x)=Ax+b`. """ -struct ActiveSetQuadratic{AT, R <: Real, IT, H, OT <: Union{Nothing, MOI.AbstractOptimizer}} <: AbstractActiveSet{AT,R,IT} +struct ActiveSetQuadratic{AT, R <: Real, IT, H} <: AbstractActiveSet{AT,R,IT} weights::Vector{R} atoms::Vector{AT} x::IT @@ -20,9 +20,6 @@ struct ActiveSetQuadratic{AT, R <: Real, IT, H, OT <: Union{Nothing, MOI.Abstrac dots_b::Vector{R} # stores ⟨b, atoms[i]⟩ weights_prev::Vector{R} modified::BitVector - lp_optimizer::OT - lp_frequency::Int - counter::Base.RefValue{Int} end function detect_quadratic_function(grad!, x0; test=true) @@ -51,12 +48,12 @@ function detect_quadratic_function(grad!, x0; test=true) return A, b end -function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, grad!::Function, lp_optimizer=nothing; lp_frequency=10) where {AT,R} +function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, grad!::Function) where {AT,R} A, b = detect_quadratic_function(grad!, tuple_values[1][2]) - return ActiveSetQuadratic(tuple_values, A, b, lp_optimizer, lp_frequency=lp_frequency) + return ActiveSetQuadratic(tuple_values, A, b) end -function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::H, b, lp_optimizer=nothing; lp_frequency=10) where {AT,R,H} +function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::H, b) where {AT,R,H} n = length(tuple_values) weights = Vector{R}(undef, n) atoms = Vector{AT}(undef, n) @@ -75,18 +72,18 @@ function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::H, b, dots_b[idx] = fast_dot(b, atoms[idx]) end x = similar(b) - as = ActiveSetQuadratic(weights, atoms, x, A, b, dots_x, dots_A, dots_b, weights_prev, modified, lp_optimizer, lp_frequency, Ref(0)) + as = ActiveSetQuadratic(weights, atoms, x, A, b, dots_x, dots_A, dots_b, weights_prev, modified) compute_active_set_iterate!(as) return as end -function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, grad!::Function, lp_optimizer=nothing; lp_frequency=10) where {AT,R} +function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, grad!::Function) where {AT,R} A, b = detect_quadratic_function(grad!, tuple_values[1][2]) - return ActiveSetQuadratic{AT,R}(tuple_values, A, b, lp_optimizer; lp_frequency=lp_frequency) + return ActiveSetQuadratic{AT,R}(tuple_values, A, b) end -function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::H, b, lp_optimizer=nothing; lp_frequency=lp_frequency) where {AT,R,H} +function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::H, b) where {AT,R,H} n = length(tuple_values) weights = Vector{R}(undef, n) atoms = Vector{AT}(undef, n) @@ -105,7 +102,7 @@ function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number, dots_b[idx] = fast_dot(b, atoms[idx]) end x = similar(b) - as = ActiveSetQuadratic{AT,R,typeof(x),H}(weights, atoms, x, A, b, dots_x, dots_A, dots_b, weights_prev, modified, lp_optimizer, lp_frequency, Ref(0)) + as = ActiveSetQuadratic{AT,R,typeof(x),H}(weights, atoms, x, A, b, dots_x, dots_A, dots_b, weights_prev, modified) compute_active_set_iterate!(as) return as end @@ -122,11 +119,11 @@ function Base.:*(a::Identity, b) return a.λ * b end end -function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::UniformScaling, b, lp_optimizer=nothing; lp_frequency=10) where {AT,R} - return ActiveSetQuadratic(tuple_values, Identity(A.λ), b, lp_optimizer; lp_frequency=lp_frequency) +function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::UniformScaling, b) where {AT,R} + return ActiveSetQuadratic(tuple_values, Identity(A.λ), b) end -function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::UniformScaling, b, lp_optimizer=nothing; lp_frequency=10) where {AT,R} - return ActiveSetQuadratic{AT,R}(tuple_values, Identity(A.λ), b, lp_optimizer; lp_frequency=lp_frequency) +function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::UniformScaling, b) where {AT,R} + return ActiveSetQuadratic{AT,R}(tuple_values, Identity(A.λ), b) end # these three functions do not update the active set iterate @@ -204,16 +201,6 @@ function active_set_update!( @inbounds active_set.modified[idx] = true else push!(active_set, (lambda, atom)) - if active_set.lp_optimizer !== nothing - active_set.counter[] += 1 - @show active_set.counter[] - @show mod(active_set.counter[], active_set.lp_frequency) - if mod(active_set.counter[], active_set.lp_frequency) == 0 - @show "solving quadratic" - solve_quadratic_activeset_lp!(active_set) - return active_set - end - end end if renorm add_dropped_vertices = add_dropped_vertices ? vertex_storage !== nothing : add_dropped_vertices @@ -224,54 +211,6 @@ function active_set_update!( return active_set end -# generic quadratic -function solve_quadratic_activeset_lp!(active_set::ActiveSetQuadratic{AT, R, IT, H}) where {AT, R, IT, H} - # TODO -end - -# special case of scaled identity Hessian -function solve_quadratic_activeset_lp!(active_set::ActiveSetQuadratic{AT, R, IT, <: Identity}) where {AT, R, IT} - hessian_scaling = active_set.A.λ - # number of vertices and ambient dimension - nv = length(active_set) - o = active_set.lp_optimizer - MOI.empty!(o) - λ = MOI.add_variables(o, nv) - # λ ∈ Δ_n ⇔ λ ≥ 0, ∑ λ == 1 - MOI.add_constraint.(o, λ, MOI.GreaterThan(0.0)) - MOI.add_constraint(o, sum(λ; init=0.0), MOI.EqualTo(1.0)) - # 2 * a Aᵗ A λ == -Aᵗ b - lhs = 0.0 * λ - rhs = 0 * active_set.weights - for (idx, atom) in enumerate(active_set.atoms) - rhs[idx] = -dot(atom, active_set.b) - lhs[idx] = 2 * hessian_scaling * sum(λ[j] * dot(atom, active_set.atoms[j]) for j in 1:nv) - end - MOI.add_constraint.(o, lhs, MOI.EqualTo.(rhs)) - dummy_objective = sum(λ, init=0.0) - MOI.set(o, MOI.ObjectiveFunction{typeof(dummy_objective)}(), dummy_objective) - MOI.set(o, MOI.ObjectiveSense(), MOI.MIN_SENSE) - MOI.optimize!(o) - if MOI.get(o, MOI.TerminationStatus()) in (MOI.OPTIMAL, MOI.FEASIBLE_POINT, MOI.ALMOST_OPTIMAL) - indices_to_remove = Int[] - new_weights = R[] - for idx in eachindex(λ) - weight_value = MOI.get(o, MOI.VariablePrimal(), λ[idx]) - if weight_value <= min(1e-3 / nv, 1e-8) - push!(indices_to_remove, idx) - else - push!(new_weights, weight_value) - end - end - deleteat!(active_set, indices_to_remove) - @assert length(active_set) == length(new_weights) - active_set.weights .= new_weights - active_set.weights_prev .= new_weights - active_set_renormalize!(active_set) - end - return active_set -end - function active_set_renormalize!(active_set::ActiveSetQuadratic) renorm = sum(active_set.weights) active_set.weights ./= renorm From f67d381069a7463051e51ac4f1139e1ca114e7b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= Date: Wed, 9 Oct 2024 14:23:53 +0200 Subject: [PATCH 05/19] cleanup --- examples/blended_pairwise_with_direct.jl | 38 ++++-------------------- src/active_set_quadratic_reloaded.jl | 4 --- 2 files changed, 6 insertions(+), 36 deletions(-) diff --git a/examples/blended_pairwise_with_direct.jl b/examples/blended_pairwise_with_direct.jl index 138b07147..50909d8d7 100644 --- a/examples/blended_pairwise_with_direct.jl +++ b/examples/blended_pairwise_with_direct.jl @@ -31,7 +31,7 @@ lp_solver = HiGHS.Optimizer include("../examples/plot_utils.jl") n = Int(1e4) -k = 1000 +k = 10000 # s = rand(1:100) s = 10 @@ -59,7 +59,7 @@ lmo = FrankWolfe.KSparseLMO(5, 1.0) # lmo = FrankWolfe.ProbabilitySimplexOracle(1.0); # lmo = FrankWolfe.UnitSimplexOracle(1.0); -x00 = FrankWolfe.compute_extreme_point(lmo, rand(n)) +const x00 = FrankWolfe.compute_extreme_point(lmo, rand(n)) function build_callback(trajectory_arr) return function callback(state, active_set, args...) @@ -73,12 +73,11 @@ FrankWolfe.benchmark_oracles(f, grad!, () -> randn(n), lmo; k=100) trajectoryBPCG_standard = [] callback = build_callback(trajectoryBPCG_standard) -x0 = deepcopy(x00) @time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( f, grad!, lmo, - x0, + deepcopy(x00), max_iteration=k, line_search=FrankWolfe.Shortstep(2.0), print_iter=k / 10, @@ -92,12 +91,11 @@ x0 = deepcopy(x00) trajectoryBPCG_quadratic = [] callback = build_callback(trajectoryBPCG_quadratic) -x0 = deepcopy(x00) @time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( f, grad!, lmo, - x0, + deepcopy(x00), max_iteration=k, line_search=FrankWolfe.Shortstep(2.0), print_iter=k / 10, @@ -149,30 +147,6 @@ as_quad = FrankWolfe.ActiveSetQuadratic([(1.0, copy(x00))], 2 * LinearAlgebra.I, callback=callback, ); -as_quad_lp = FrankWolfe.ActiveSetQuadratic( - [(1.0, copy(x00))], - 2 * LinearAlgebra.I, -2xp, - MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), -) - -# with LP acceleration -trajectoryBPCG_quadratic_as_lp = [] -callback = build_callback(trajectoryBPCG_quadratic_as_lp) - -@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( - f, - grad!, - lmo, - as_quad_lp, - max_iteration=1000, - line_search=FrankWolfe.Shortstep(2.0), - print_iter=k / 10, - memory_mode=FrankWolfe.InplaceEmphasis(), - verbose=true, - trajectory=true, - callback=callback, -); - as_quad_reloaded = FrankWolfe.ActiveSetQuadraticReloaded( [(1.0, copy(x00))], 2 * LinearAlgebra.I, -2xp, @@ -199,8 +173,8 @@ callback = build_callback(trajectoryBPCG_quadratic_reloaded) # Update the data and labels for plotting -dataSparsity = [trajectoryBPCG_standard, trajectoryBPCG_quadratic, trajectoryBPCG_quadratic_nosquad, trajectoryBPCG_quadratic_as, trajectoryBPCG_quadratic_as_lp, trajectoryBPCG_quadratic_reloaded] -labelSparsity = ["BPCG (Standard)", "BPCG (Specific Direct)", "BPCG (Generic Direct)", "AS_Quad", "AS_Quad_LP", "Reloaded"] +dataSparsity = [trajectoryBPCG_standard, trajectoryBPCG_quadratic, trajectoryBPCG_quadratic_nosquad, trajectoryBPCG_quadratic_as, trajectoryBPCG_quadratic_reloaded] +labelSparsity = ["BPCG (Standard)", "BPCG (Specific Direct)", "BPCG (Generic Direct)", "AS_Quad", "Reloaded"] # Plot sparsity # plot_sparsity(dataSparsity, labelSparsity, legend_position=:topright) diff --git a/src/active_set_quadratic_reloaded.jl b/src/active_set_quadratic_reloaded.jl index 6207c307d..ad1a7e75d 100644 --- a/src/active_set_quadratic_reloaded.jl +++ b/src/active_set_quadratic_reloaded.jl @@ -100,16 +100,12 @@ function active_set_update!( idx = find_atom(active_set, atom) end if idx > 0 - @info "old atom" @inbounds active_set.weights[idx] += lambda else push!(active_set, (lambda, atom)) if active_set.lp_optimizer !== nothing active_set.counter[] += 1 - @show active_set.counter[] - @show mod(active_set.counter[], active_set.lp_frequency) if mod(active_set.counter[], active_set.lp_frequency) == 0 - @show "solving quadratic" solve_quadratic_activeset_lp!(active_set) return active_set end From fa4051f664db9ce2e92cf65a1819202195075b0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= Date: Wed, 9 Oct 2024 21:28:18 +0200 Subject: [PATCH 06/19] HiGHS in test deps --- Project.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index e56223008..ca298fa1a 100644 --- a/Project.toml +++ b/Project.toml @@ -43,6 +43,7 @@ DynamicPolynomials = "7c1d4256-1411-5781-91ec-d7bc3513ac07" FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" GLPK = "60bf3e95-4087-53dc-ae20-288a0d20c6a6" +HiGHS = "87dc4568-4c63-4d18-b0c0-bb2238e4078b" Hypatia = "b99e6be6-89ff-11e8-14f8-45c827f4f8f2" JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" JuMP = "4076af6c-e467-56ae-b986-b466b2749572" @@ -58,4 +59,4 @@ Tullio = "bc48ee85-29a4-5162-ae0b-a64e1601d4bc" ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea" [targets] -test = ["CSV", "Combinatorics", "DataFrames", "Distributions", "DoubleFloats", "DynamicPolynomials", "FiniteDifferences", "ForwardDiff", "GLPK", "JSON", "JuMP", "LaTeXStrings", "MAT", "MultivariatePolynomials", "Plots", "PlotThemes", "Polyhedra", "ReverseDiff", "ZipFile", "Test", "Tullio", "Clp", "Hypatia"] +test = ["CSV", "Combinatorics", "DataFrames", "Distributions", "DoubleFloats", "DynamicPolynomials", "FiniteDifferences", "ForwardDiff", "GLPK", "HiGHS", "JSON", "JuMP", "LaTeXStrings", "MAT", "MultivariatePolynomials", "Plots", "PlotThemes", "Polyhedra", "ReverseDiff", "ZipFile", "Test", "Tullio", "Clp", "Hypatia"] From 7c1ff01a37e93644218c2c73d59dae93c850eb0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= Date: Wed, 9 Oct 2024 21:29:16 +0200 Subject: [PATCH 07/19] working reworked LP quadratic --- examples/blended_pairwise_with_direct.jl | 34 +++++++++++++++++----- src/active_set_quadratic_reloaded.jl | 37 +++++++++++++----------- 2 files changed, 46 insertions(+), 25 deletions(-) diff --git a/examples/blended_pairwise_with_direct.jl b/examples/blended_pairwise_with_direct.jl index 50909d8d7..77cff99e6 100644 --- a/examples/blended_pairwise_with_direct.jl +++ b/examples/blended_pairwise_with_direct.jl @@ -16,10 +16,6 @@ using FrankWolfe using LinearAlgebra using Random -import Pkg -Pkg.add("GLPK") -Pkg.add("HiGHS") -import GLPK import HiGHS import MathOptInterface as MOI @@ -31,7 +27,7 @@ lp_solver = HiGHS.Optimizer include("../examples/plot_utils.jl") n = Int(1e4) -k = 10000 +k = 5000 # s = rand(1:100) s = 10 @@ -147,6 +143,28 @@ as_quad = FrankWolfe.ActiveSetQuadratic([(1.0, copy(x00))], 2 * LinearAlgebra.I, callback=callback, ); +as_quad = FrankWolfe.ActiveSetQuadratic( + [(1.0, copy(x00))], + 2 * LinearAlgebra.I, -2xp, +) + +# with quadratic active set +trajectoryBPCG_quadratic_as = [] +callback = build_callback(trajectoryBPCG_quadratic_as) +@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + as_quad, + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + print_iter=k / 10, + memory_mode=FrankWolfe.InplaceEmphasis(), + verbose=true, + trajectory=true, + callback=callback, +); + as_quad_reloaded = FrankWolfe.ActiveSetQuadraticReloaded( [(1.0, copy(x00))], 2 * LinearAlgebra.I, -2xp, @@ -162,7 +180,7 @@ callback = build_callback(trajectoryBPCG_quadratic_reloaded) grad!, lmo, as_quad_reloaded, - max_iteration=1000, + max_iteration=k, line_search=FrankWolfe.Shortstep(2.0), print_iter=k / 10, memory_mode=FrankWolfe.InplaceEmphasis(), @@ -173,8 +191,8 @@ callback = build_callback(trajectoryBPCG_quadratic_reloaded) # Update the data and labels for plotting -dataSparsity = [trajectoryBPCG_standard, trajectoryBPCG_quadratic, trajectoryBPCG_quadratic_nosquad, trajectoryBPCG_quadratic_as, trajectoryBPCG_quadratic_reloaded] -labelSparsity = ["BPCG (Standard)", "BPCG (Specific Direct)", "BPCG (Generic Direct)", "AS_Quad", "Reloaded"] +dataSparsity = [trajectoryBPCG_standard, trajectoryBPCG_quadratic, trajectoryBPCG_quadratic_as, trajectoryBPCG_quadratic_reloaded] +labelSparsity = ["BPCG (Standard)", "BPCG (Specific Direct)", "AS_Quad", "Reloaded"] # Plot sparsity # plot_sparsity(dataSparsity, labelSparsity, legend_position=:topright) diff --git a/src/active_set_quadratic_reloaded.jl b/src/active_set_quadratic_reloaded.jl index ad1a7e75d..0fe816073 100644 --- a/src/active_set_quadratic_reloaded.jl +++ b/src/active_set_quadratic_reloaded.jl @@ -20,7 +20,7 @@ struct ActiveSetQuadraticReloaded{AT, R <: Real, IT, H, OT <: Union{Nothing, MOI counter::Base.RefValue{Int} end -function ActiveSetQuadraticReloaded(tuple_values::AbstractVector{Tuple{R,AT}}, grad!::Function, lp_optimizer=nothing; lp_frequency=10) where {AT,R} +function ActiveSetQuadraticReloaded(tuple_values::AbstractVector{Tuple{R,AT}}, grad!::Function, lp_optimizer=nothing; lp_frequency=100) where {AT,R} A, b = detect_quadratic_function(grad!, tuple_values[1][2]) return ActiveSetQuadraticReloaded(tuple_values, A, b, lp_optimizer, lp_frequency=lp_frequency) end @@ -39,7 +39,7 @@ function ActiveSetQuadraticReloaded(tuple_values::AbstractVector{Tuple{R,AT}}, A return as end -function ActiveSetQuadraticReloaded{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, grad!::Function, lp_optimizer=nothing; lp_frequency=10) where {AT,R} +function ActiveSetQuadraticReloaded{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, grad!::Function, lp_optimizer=nothing; lp_frequency=100) where {AT,R} A, b = detect_quadratic_function(grad!, tuple_values[1][2]) return ActiveSetQuadraticReloaded{AT,R}(tuple_values, A, b, lp_optimizer; lp_frequency=lp_frequency) end @@ -58,10 +58,10 @@ function ActiveSetQuadraticReloaded{AT,R}(tuple_values::AbstractVector{<:Tuple{< return as end -function ActiveSetQuadraticReloaded(tuple_values::AbstractVector{Tuple{R,AT}}, A::UniformScaling, b, lp_optimizer=nothing; lp_frequency=10) where {AT,R} +function ActiveSetQuadraticReloaded(tuple_values::AbstractVector{Tuple{R,AT}}, A::UniformScaling, b, lp_optimizer=nothing; lp_frequency=100) where {AT,R} return ActiveSetQuadraticReloaded(tuple_values, Identity(A.λ), b, lp_optimizer; lp_frequency=lp_frequency) end -function ActiveSetQuadraticReloaded{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::UniformScaling, b, lp_optimizer=nothing; lp_frequency=10) where {AT,R} +function ActiveSetQuadraticReloaded{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::UniformScaling, b, lp_optimizer=nothing; lp_frequency=100) where {AT,R} return ActiveSetQuadraticReloaded{AT,R}(tuple_values, Identity(A.λ), b, lp_optimizer; lp_frequency=lp_frequency) end @@ -133,22 +133,23 @@ function solve_quadratic_activeset_lp!(active_set::ActiveSetQuadraticReloaded{AT o = active_set.lp_optimizer MOI.empty!(o) λ = MOI.add_variables(o, nv) - A = zeros(length(active_set.x), nv) - for (idx, atom) in enumerate(active_set.atoms) - A[:,idx] .= atom - end # λ ≥ 0, ∑ λ == 1 MOI.add_constraint.(o, λ, MOI.GreaterThan(0.0)) MOI.add_constraint(o, sum(λ; init=0.0), MOI.EqualTo(1.0)) - # 2 * a Aᵗ A λ == -Aᵗ b - lhs = 0.0 * λ - rhs = 0 * active_set.weights - for (idx, atom) in enumerate(active_set.atoms) - lhs[idx] = 2 * hessian_scaling * sum(λ[j] * dot(atom, active_set.atoms[j]) for j in 1:nv) - rhs[idx] = -dot(atom, active_set.b) + # a Aᵗ A λ == -Aᵗ b + # lhs = 0.0 * λ + # rhs = 0 * active_set.weights + for atom in active_set.atoms + lhs = MOI.ScalarAffineFunction{Float64}([], 0.0) + Base.sizehint!(lhs.terms, nv) + # replaces direct sum because of MOI and MutableArithmetic slow sums + for j in 1:nv + push!(lhs.terms, MOI.ScalarAffineTerm(hessian_scaling * dot(atom, active_set.atoms[j]), λ[j])) + end + rhs = -dot(atom, active_set.b) + MOI.add_constraint(o, lhs, MOI.EqualTo(rhs)) end - MOI.add_constraint.(o, lhs, MOI.EqualTo.(rhs)) - dummy_objective = sum(λ, init=0.0) + dummy_objective = MOI.ScalarAffineFunction(MOI.ScalarAffineTerm.(1.0, λ), 0.0) MOI.set(o, MOI.ObjectiveFunction{typeof(dummy_objective)}(), dummy_objective) MOI.set(o, MOI.ObjectiveSense(), MOI.MIN_SENSE) MOI.optimize!(o) @@ -157,7 +158,7 @@ function solve_quadratic_activeset_lp!(active_set::ActiveSetQuadraticReloaded{AT new_weights = R[] for idx in eachindex(λ) weight_value = MOI.get(o, MOI.VariablePrimal(), λ[idx]) - if weight_value <= min(1e-3 / nv, 1e-8) + if weight_value <= 1e-10 push!(indices_to_remove, idx) else push!(new_weights, weight_value) @@ -167,7 +168,9 @@ function solve_quadratic_activeset_lp!(active_set::ActiveSetQuadraticReloaded{AT deleteat!(active_set.weights, indices_to_remove) @assert length(active_set) == length(new_weights) active_set.weights .= new_weights + @assert all(>=(0), new_weights) active_set_renormalize!(active_set) + compute_active_set_iterate!(active_set) end return active_set end From 73873d23e1133d536775cb100cb50eaf4d20d860 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= Date: Thu, 10 Oct 2024 17:45:24 +0200 Subject: [PATCH 08/19] working version generic quadratic --- examples/blended_pairwise_with_direct.jl | 34 ++++- src/FrankWolfe.jl | 2 +- src/active_set_quadratic.jl | 1 + src/active_set_quadratic_direct_solve.jl | 165 ++++++++++++++++++++ src/active_set_quadratic_reloaded.jl | 182 ----------------------- 5 files changed, 196 insertions(+), 188 deletions(-) create mode 100644 src/active_set_quadratic_direct_solve.jl delete mode 100644 src/active_set_quadratic_reloaded.jl diff --git a/examples/blended_pairwise_with_direct.jl b/examples/blended_pairwise_with_direct.jl index 77cff99e6..066d7ed03 100644 --- a/examples/blended_pairwise_with_direct.jl +++ b/examples/blended_pairwise_with_direct.jl @@ -165,21 +165,45 @@ callback = build_callback(trajectoryBPCG_quadratic_as) callback=callback, ); -as_quad_reloaded = FrankWolfe.ActiveSetQuadraticReloaded( +as_quad_direct = FrankWolfe.ActiveSetQuadraticLinearSolve( [(1.0, copy(x00))], 2 * LinearAlgebra.I, -2xp, MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), ) # with LP acceleration -trajectoryBPCG_quadratic_reloaded = [] -callback = build_callback(trajectoryBPCG_quadratic_reloaded) +trajectoryBPCG_quadratic_direct = [] +callback = build_callback(trajectoryBPCG_quadratic_direct) @time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( f, grad!, lmo, - as_quad_reloaded, + as_quad_direct, + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + print_iter=k / 10, + memory_mode=FrankWolfe.InplaceEmphasis(), + verbose=true, + trajectory=true, + callback=callback, +); + +as_quad_direct_generic = FrankWolfe.ActiveSetQuadraticLinearSolve( + [(1.0, copy(x00))], + 2 * Diagonal(ones(length(xp))), -2xp, + MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), +) + +# with LP acceleration +trajectoryBPCG_quadratic_direct_generic = [] +callback = build_callback(trajectoryBPCG_quadratic_direct_generic) + +@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + as_quad_direct_generic, max_iteration=k, line_search=FrankWolfe.Shortstep(2.0), print_iter=k / 10, @@ -191,7 +215,7 @@ callback = build_callback(trajectoryBPCG_quadratic_reloaded) # Update the data and labels for plotting -dataSparsity = [trajectoryBPCG_standard, trajectoryBPCG_quadratic, trajectoryBPCG_quadratic_as, trajectoryBPCG_quadratic_reloaded] +dataSparsity = [trajectoryBPCG_standard, trajectoryBPCG_quadratic, trajectoryBPCG_quadratic_as, trajectoryBPCG_quadratic_direct] labelSparsity = ["BPCG (Standard)", "BPCG (Specific Direct)", "AS_Quad", "Reloaded"] # Plot sparsity diff --git a/src/FrankWolfe.jl b/src/FrankWolfe.jl index 5aa0cbee1..cc2fef145 100644 --- a/src/FrankWolfe.jl +++ b/src/FrankWolfe.jl @@ -34,7 +34,7 @@ include("moi_oracle.jl") include("function_gradient.jl") include("active_set.jl") include("active_set_quadratic.jl") -include("active_set_quadratic_reloaded.jl") +include("active_set_quadratic_direct_solve.jl") include("active_set_sparsifier.jl") include("blended_cg.jl") diff --git a/src/active_set_quadratic.jl b/src/active_set_quadratic.jl index fe40c85b4..9736c911b 100644 --- a/src/active_set_quadratic.jl +++ b/src/active_set_quadratic.jl @@ -119,6 +119,7 @@ function Base.:*(a::Identity, b) return a.λ * b end end + function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::UniformScaling, b) where {AT,R} return ActiveSetQuadratic(tuple_values, Identity(A.λ), b) end diff --git a/src/active_set_quadratic_direct_solve.jl b/src/active_set_quadratic_direct_solve.jl new file mode 100644 index 000000000..687c22491 --- /dev/null +++ b/src/active_set_quadratic_direct_solve.jl @@ -0,0 +1,165 @@ + +""" + ActiveSetQuadraticLinearSolve{AT, R, IT} + +Represents an active set of extreme vertices collected in a FW algorithm, +along with their coefficients `(λ_i, a_i)`. +`R` is the type of the `λ_i`, `AT` is the type of the atoms `a_i`. +The iterate `x = ∑λ_i a_i` is stored in x with type `IT`. +The objective function is assumed to be of the form `f(x)=½⟨x,Ax⟩+⟨b,x⟩+c` +so that the gradient is simply `∇f(x)=Ax+b`. +""" +struct ActiveSetQuadraticLinearSolve{AT, R <: Real, IT, H, OT <: MOI.AbstractOptimizer, AS <: AbstractActiveSet} <: AbstractActiveSet{AT,R,IT} + weights::Vector{R} + atoms::Vector{AT} + x::IT + A::H # Hessian matrix + b::IT # linear term + active_set::AS + lp_optimizer::OT + lp_frequency::Int + counter::Base.RefValue{Int} +end + +function ActiveSetQuadraticLinearSolve(tuple_values::AbstractVector{Tuple{R,AT}}, grad!::Function, lp_optimizer; lp_frequency=100) where {AT,R} + A, b = detect_quadratic_function(grad!, tuple_values[1][2]) + return ActiveSetQuadraticLinearSolve(tuple_values, A, b, lp_optimizer, lp_frequency=lp_frequency) +end + +function ActiveSetQuadraticLinearSolve(tuple_values::AbstractVector{Tuple{R,AT}}, A::H, b, lp_optimizer; lp_frequency=10) where {AT,R,H} + inner_as = ActiveSetQuadratic(tuple_values, A, b) + as = ActiveSetQuadraticLinearSolve(inner_as.weights, inner_as.atoms, inner_as.x, A, b, inner_as, lp_optimizer, lp_frequency, Ref(0)) + compute_active_set_iterate!(as) + return as +end + +function ActiveSetQuadraticLinearSolve{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, grad!::Function, lp_optimizer; lp_frequency=100) where {AT,R} + A, b = detect_quadratic_function(grad!, tuple_values[1][2]) + return ActiveSetQuadraticLinearSolve{AT,R}(tuple_values, A, b, lp_optimizer; lp_frequency=lp_frequency) +end + +function ActiveSetQuadraticLinearSolve{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::H, b, lp_optimizer; lp_frequency=lp_frequency) where {AT,R,H} + inner_as = ActiveSetQuadratic{AT,R}(tuple_values, A, b) + as = ActiveSetQuadraticLinearSolve{AT,R,typeof(x),H}(inner_as.weights, inner_as.atoms, inner_as.x, A, b, inner_as, lp_optimizer, lp_frequency, Ref(0)) + compute_active_set_iterate!(as) + return as +end + +function ActiveSetQuadraticLinearSolve(tuple_values::AbstractVector{Tuple{R,AT}}, A::UniformScaling, b, lp_optimizer; lp_frequency=100) where {AT,R} + return ActiveSetQuadraticLinearSolve(tuple_values, Identity(A.λ), b, lp_optimizer; lp_frequency=lp_frequency) +end +function ActiveSetQuadraticLinearSolve{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::UniformScaling, b, lp_optimizer; lp_frequency=100) where {AT,R} + return ActiveSetQuadraticLinearSolve{AT,R}(tuple_values, Identity(A.λ), b, lp_optimizer; lp_frequency=lp_frequency) +end + +# all mutating functions are delegated to the inner active set + +Base.push!(as::ActiveSetQuadraticLinearSolve, tuple) = push!(as.active_set, tuple) + +Base.deleteat!(as::ActiveSetQuadraticLinearSolve, idx) = deleteat!(as.active_set, idx) + +Base.empty!(as::ActiveSetQuadraticLinearSolve) = empty!(as.active_set) + +function active_set_update!( + as::ActiveSetQuadraticLinearSolve{AT,R}, + lambda, atom, renorm=true, idx=nothing; + weight_purge_threshold=weight_purge_threshold_default(R), + add_dropped_vertices=false, + vertex_storage=nothing, +) where {AT,R} + if idx === nothing + idx = find_atom(as, atom) + end + active_set_update!(as.active_set, lambda, atom, renorm, idx; weight_purge_threshold=weight_purge_threshold, add_dropped_vertices=add_dropped_vertices, vertex_storage=vertex_storage) + # new atom introduced, we can solve the auxiliary LP + if idx < 0 + as.counter[] += 1 + if mod(as.counter[], as.lp_frequency) == 0 + solve_quadratic_activeset_lp!(as) + end + end + return as +end + +active_set_renormalize!(as::ActiveSetQuadraticLinearSolve) = active_set_renormalize!(as.active_set) + +function active_set_argmin(as::ActiveSetQuadraticLinearSolve, direction) + return active_set_argmin(as.active_set, direction) +end + +function active_set_argminmax(as::ActiveSetQuadraticLinearSolve, direction; Φ=0.5) + return active_set_argminmax(as.active_set, direction; Φ=Φ) +end + +# generic quadratic with quadratic information provided +function solve_quadratic_activeset_lp!(as::ActiveSetQuadraticLinearSolve{AT, R, IT, <: AbstractMatrix}) where {AT, R, IT} + nv = length(as) + o = as.lp_optimizer + MOI.empty!(o) + λ = MOI.add_variables(o, nv) + # λ ≥ 0, ∑ λ == 1 + MOI.add_constraint.(o, λ, MOI.GreaterThan(0.0)) + MOI.add_constraint(o, sum(λ; init=0.0), MOI.EqualTo(1.0)) + # Aᵗ Q A λ == -Aᵗ b + for atom in as.atoms + lhs = MOI.ScalarAffineFunction{Float64}([], 0.0) + Base.sizehint!(lhs.terms, nv) + # replaces direct sum because of MOI and MutableArithmetic slow sums + for j in 1:nv + push!(lhs.terms, MOI.ScalarAffineTerm(dot(atom, as.A, as.atoms[j]), λ[j])) + end + rhs = -dot(atom, as.b) + MOI.add_constraint(o, lhs, MOI.EqualTo(rhs)) + end + dummy_objective = MOI.ScalarAffineFunction(MOI.ScalarAffineTerm.(1.0, λ), 0.0) + MOI.set(o, MOI.ObjectiveFunction{typeof(dummy_objective)}(), dummy_objective) + MOI.set(o, MOI.ObjectiveSense(), MOI.MIN_SENSE) + MOI.optimize!(o) +end + +# special case of scaled identity Hessian +function solve_quadratic_activeset_lp!(as::ActiveSetQuadraticLinearSolve{AT, R, IT, <: Identity}) where {AT, R, IT} + hessian_scaling = as.A.λ + nv = length(as) + o = as.lp_optimizer + MOI.empty!(o) + λ = MOI.add_variables(o, nv) + # λ ≥ 0, ∑ λ == 1 + MOI.add_constraint.(o, λ, MOI.GreaterThan(0.0)) + MOI.add_constraint(o, sum(λ; init=0.0), MOI.EqualTo(1.0)) + # a Aᵗ A λ == -Aᵗ b + for atom in as.atoms + lhs = MOI.ScalarAffineFunction{Float64}([], 0.0) + Base.sizehint!(lhs.terms, nv) + # replaces direct sum because of MOI and MutableArithmetic slow sums + for j in 1:nv + push!(lhs.terms, MOI.ScalarAffineTerm(hessian_scaling * dot(atom, as.atoms[j]), λ[j])) + end + rhs = -dot(atom, as.b) + MOI.add_constraint(o, lhs, MOI.EqualTo(rhs)) + end + dummy_objective = MOI.ScalarAffineFunction(MOI.ScalarAffineTerm.(1.0, λ), 0.0) + MOI.set(o, MOI.ObjectiveFunction{typeof(dummy_objective)}(), dummy_objective) + MOI.set(o, MOI.ObjectiveSense(), MOI.MIN_SENSE) + MOI.optimize!(o) + if MOI.get(o, MOI.TerminationStatus()) in (MOI.OPTIMAL, MOI.FEASIBLE_POINT, MOI.ALMOST_OPTIMAL) + indices_to_remove = Int[] + new_weights = R[] + for idx in eachindex(λ) + weight_value = MOI.get(o, MOI.VariablePrimal(), λ[idx]) + if weight_value <= 1e-10 + push!(indices_to_remove, idx) + else + push!(new_weights, weight_value) + end + end + deleteat!(as.atoms, indices_to_remove) + deleteat!(as.weights, indices_to_remove) + @assert length(as) == length(new_weights) + as.weights .= new_weights + @assert all(>=(0), new_weights) + active_set_renormalize!(as) + compute_active_set_iterate!(as) + end + return as +end diff --git a/src/active_set_quadratic_reloaded.jl b/src/active_set_quadratic_reloaded.jl deleted file mode 100644 index 0fe816073..000000000 --- a/src/active_set_quadratic_reloaded.jl +++ /dev/null @@ -1,182 +0,0 @@ - -""" - ActiveSetQuadraticReloaded{AT, R, IT} - -Represents an active set of extreme vertices collected in a FW algorithm, -along with their coefficients `(λ_i, a_i)`. -`R` is the type of the `λ_i`, `AT` is the type of the atoms `a_i`. -The iterate `x = ∑λ_i a_i` is stored in x with type `IT`. -The objective function is assumed to be of the form `f(x)=½⟨x,Ax⟩+⟨b,x⟩+c` -so that the gradient is simply `∇f(x)=Ax+b`. -""" -struct ActiveSetQuadraticReloaded{AT, R <: Real, IT, H, OT <: Union{Nothing, MOI.AbstractOptimizer}} <: AbstractActiveSet{AT,R,IT} - weights::Vector{R} - atoms::Vector{AT} - x::IT - A::H # Hessian matrix - b::IT # linear term - lp_optimizer::OT - lp_frequency::Int - counter::Base.RefValue{Int} -end - -function ActiveSetQuadraticReloaded(tuple_values::AbstractVector{Tuple{R,AT}}, grad!::Function, lp_optimizer=nothing; lp_frequency=100) where {AT,R} - A, b = detect_quadratic_function(grad!, tuple_values[1][2]) - return ActiveSetQuadraticReloaded(tuple_values, A, b, lp_optimizer, lp_frequency=lp_frequency) -end - -function ActiveSetQuadraticReloaded(tuple_values::AbstractVector{Tuple{R,AT}}, A::H, b, lp_optimizer=nothing; lp_frequency=10) where {AT,R,H} - n = length(tuple_values) - weights = Vector{R}(undef, n) - atoms = Vector{AT}(undef, n) - @inbounds for idx in 1:n - weights[idx] = tuple_values[idx][1] - atoms[idx] = tuple_values[idx][2] - end - x = similar(b) - as = ActiveSetQuadraticReloaded(weights, atoms, x, A, b, lp_optimizer, lp_frequency, Ref(0)) - compute_active_set_iterate!(as) - return as -end - -function ActiveSetQuadraticReloaded{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, grad!::Function, lp_optimizer=nothing; lp_frequency=100) where {AT,R} - A, b = detect_quadratic_function(grad!, tuple_values[1][2]) - return ActiveSetQuadraticReloaded{AT,R}(tuple_values, A, b, lp_optimizer; lp_frequency=lp_frequency) -end - -function ActiveSetQuadraticReloaded{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::H, b, lp_optimizer=nothing; lp_frequency=lp_frequency) where {AT,R,H} - n = length(tuple_values) - weights = Vector{R}(undef, n) - atoms = Vector{AT}(undef, n) - @inbounds for idx in 1:n - weights[idx] = tuple_values[idx][1] - atoms[idx] = tuple_values[idx][2] - end - x = similar(b) - as = ActiveSetQuadraticReloaded{AT,R,typeof(x),H}(weights, atoms, x, A, b, lp_optimizer, lp_frequency, Ref(0)) - compute_active_set_iterate!(as) - return as -end - -function ActiveSetQuadraticReloaded(tuple_values::AbstractVector{Tuple{R,AT}}, A::UniformScaling, b, lp_optimizer=nothing; lp_frequency=100) where {AT,R} - return ActiveSetQuadraticReloaded(tuple_values, Identity(A.λ), b, lp_optimizer; lp_frequency=lp_frequency) -end -function ActiveSetQuadraticReloaded{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::UniformScaling, b, lp_optimizer=nothing; lp_frequency=100) where {AT,R} - return ActiveSetQuadraticReloaded{AT,R}(tuple_values, Identity(A.λ), b, lp_optimizer; lp_frequency=lp_frequency) -end - -# these three functions do not update the active set iterate - -function Base.push!(as::ActiveSetQuadraticReloaded{AT,R}, (λ, a)) where {AT,R} - push!(as.weights, λ) - push!(as.atoms, a) - return as -end - -function Base.deleteat!(as::ActiveSetQuadraticReloaded, idx::Int) - deleteat!(as.weights, idx) - deleteat!(as.atoms, idx) - return as -end - -function Base.empty!(as::ActiveSetQuadraticReloaded) - empty!(as.atoms) - empty!(as.weights) - as.x .= 0 - return as -end - -function active_set_update!( - active_set::ActiveSetQuadraticReloaded{AT,R}, - lambda, atom, renorm=true, idx=nothing; - weight_purge_threshold=weight_purge_threshold_default(R), - add_dropped_vertices=false, - vertex_storage=nothing, -) where {AT,R} - # rescale active set - active_set.weights .*= (1 - lambda) - # add value for new atom - if idx === nothing - idx = find_atom(active_set, atom) - end - if idx > 0 - @inbounds active_set.weights[idx] += lambda - else - push!(active_set, (lambda, atom)) - if active_set.lp_optimizer !== nothing - active_set.counter[] += 1 - if mod(active_set.counter[], active_set.lp_frequency) == 0 - solve_quadratic_activeset_lp!(active_set) - return active_set - end - end - end - if renorm - add_dropped_vertices = add_dropped_vertices ? vertex_storage !== nothing : add_dropped_vertices - active_set_cleanup!(active_set; weight_purge_threshold=weight_purge_threshold, update=false, add_dropped_vertices=add_dropped_vertices, vertex_storage=vertex_storage) - active_set_renormalize!(active_set) - end - active_set_update_scale!(active_set.x, lambda, atom) - return active_set -end - -# generic quadratic -function solve_quadratic_activeset_lp!(active_set::ActiveSetQuadraticReloaded{AT, R, IT, H}) where {AT, R, IT, H} - error("TODO") -end - -# special case of scaled identity Hessian -function solve_quadratic_activeset_lp!(active_set::ActiveSetQuadraticReloaded{AT, R, IT, <: Identity}) where {AT, R, IT} - hessian_scaling = active_set.A.λ - # number of vertices and ambient dimension - nv = length(active_set) - o = active_set.lp_optimizer - MOI.empty!(o) - λ = MOI.add_variables(o, nv) - # λ ≥ 0, ∑ λ == 1 - MOI.add_constraint.(o, λ, MOI.GreaterThan(0.0)) - MOI.add_constraint(o, sum(λ; init=0.0), MOI.EqualTo(1.0)) - # a Aᵗ A λ == -Aᵗ b - # lhs = 0.0 * λ - # rhs = 0 * active_set.weights - for atom in active_set.atoms - lhs = MOI.ScalarAffineFunction{Float64}([], 0.0) - Base.sizehint!(lhs.terms, nv) - # replaces direct sum because of MOI and MutableArithmetic slow sums - for j in 1:nv - push!(lhs.terms, MOI.ScalarAffineTerm(hessian_scaling * dot(atom, active_set.atoms[j]), λ[j])) - end - rhs = -dot(atom, active_set.b) - MOI.add_constraint(o, lhs, MOI.EqualTo(rhs)) - end - dummy_objective = MOI.ScalarAffineFunction(MOI.ScalarAffineTerm.(1.0, λ), 0.0) - MOI.set(o, MOI.ObjectiveFunction{typeof(dummy_objective)}(), dummy_objective) - MOI.set(o, MOI.ObjectiveSense(), MOI.MIN_SENSE) - MOI.optimize!(o) - if MOI.get(o, MOI.TerminationStatus()) in (MOI.OPTIMAL, MOI.FEASIBLE_POINT, MOI.ALMOST_OPTIMAL) - indices_to_remove = Int[] - new_weights = R[] - for idx in eachindex(λ) - weight_value = MOI.get(o, MOI.VariablePrimal(), λ[idx]) - if weight_value <= 1e-10 - push!(indices_to_remove, idx) - else - push!(new_weights, weight_value) - end - end - deleteat!(active_set.atoms, indices_to_remove) - deleteat!(active_set.weights, indices_to_remove) - @assert length(active_set) == length(new_weights) - active_set.weights .= new_weights - @assert all(>=(0), new_weights) - active_set_renormalize!(active_set) - compute_active_set_iterate!(active_set) - end - return active_set -end - -function active_set_renormalize!(active_set::ActiveSetQuadraticReloaded) - renorm = sum(active_set.weights) - active_set.weights ./= renorm - return active_set -end From 31dc8be78fdb54011626d00eee15fc52cf02ee7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= Date: Fri, 11 Oct 2024 09:01:49 +0200 Subject: [PATCH 09/19] slow version generic quadratic --- examples/blended_pairwise_with_direct.jl | 2 +- src/active_set_quadratic_direct_solve.jl | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/blended_pairwise_with_direct.jl b/examples/blended_pairwise_with_direct.jl index 066d7ed03..3711094c0 100644 --- a/examples/blended_pairwise_with_direct.jl +++ b/examples/blended_pairwise_with_direct.jl @@ -204,7 +204,7 @@ callback = build_callback(trajectoryBPCG_quadratic_direct_generic) grad!, lmo, as_quad_direct_generic, - max_iteration=k, + max_iteration=5, line_search=FrankWolfe.Shortstep(2.0), print_iter=k / 10, memory_mode=FrankWolfe.InplaceEmphasis(), diff --git a/src/active_set_quadratic_direct_solve.jl b/src/active_set_quadratic_direct_solve.jl index 687c22491..f7fef1a97 100644 --- a/src/active_set_quadratic_direct_solve.jl +++ b/src/active_set_quadratic_direct_solve.jl @@ -106,6 +106,7 @@ function solve_quadratic_activeset_lp!(as::ActiveSetQuadraticLinearSolve{AT, R, Base.sizehint!(lhs.terms, nv) # replaces direct sum because of MOI and MutableArithmetic slow sums for j in 1:nv + # TODO slow push!(lhs.terms, MOI.ScalarAffineTerm(dot(atom, as.A, as.atoms[j]), λ[j])) end rhs = -dot(atom, as.b) From ed312c5b58112060c880db9b298ed0c5b857ab49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= Date: Fri, 11 Oct 2024 17:29:27 +0200 Subject: [PATCH 10/19] faster term manipulation --- examples/blended_pairwise_with_direct.jl | 6 +- src/active_set_quadratic_direct_solve.jl | 73 +++++++++++++++--------- src/utils.jl | 71 +++++++++++++++++++++++ 3 files changed, 121 insertions(+), 29 deletions(-) diff --git a/examples/blended_pairwise_with_direct.jl b/examples/blended_pairwise_with_direct.jl index 3711094c0..5cfe6bf8f 100644 --- a/examples/blended_pairwise_with_direct.jl +++ b/examples/blended_pairwise_with_direct.jl @@ -204,7 +204,7 @@ callback = build_callback(trajectoryBPCG_quadratic_direct_generic) grad!, lmo, as_quad_direct_generic, - max_iteration=5, + max_iteration=k, line_search=FrankWolfe.Shortstep(2.0), print_iter=k / 10, memory_mode=FrankWolfe.InplaceEmphasis(), @@ -215,8 +215,8 @@ callback = build_callback(trajectoryBPCG_quadratic_direct_generic) # Update the data and labels for plotting -dataSparsity = [trajectoryBPCG_standard, trajectoryBPCG_quadratic, trajectoryBPCG_quadratic_as, trajectoryBPCG_quadratic_direct] -labelSparsity = ["BPCG (Standard)", "BPCG (Specific Direct)", "AS_Quad", "Reloaded"] +dataSparsity = [trajectoryBPCG_standard, trajectoryBPCG_quadratic, trajectoryBPCG_quadratic_as, trajectoryBPCG_quadratic_direct, trajectoryBPCG_quadratic_direct_generic] +labelSparsity = ["BPCG (Standard)", "BPCG (Specific Direct)", "AS_Quad", "Reloaded", "Reloaded_generic"] # Plot sparsity # plot_sparsity(dataSparsity, labelSparsity, legend_position=:topright) diff --git a/src/active_set_quadratic_direct_solve.jl b/src/active_set_quadratic_direct_solve.jl index f7fef1a97..ced0d551c 100644 --- a/src/active_set_quadratic_direct_solve.jl +++ b/src/active_set_quadratic_direct_solve.jl @@ -99,23 +99,43 @@ function solve_quadratic_activeset_lp!(as::ActiveSetQuadraticLinearSolve{AT, R, λ = MOI.add_variables(o, nv) # λ ≥ 0, ∑ λ == 1 MOI.add_constraint.(o, λ, MOI.GreaterThan(0.0)) - MOI.add_constraint(o, sum(λ; init=0.0), MOI.EqualTo(1.0)) - # Aᵗ Q A λ == -Aᵗ b + sum_of_variables = MOI.ScalarAffineFunction(MOI.ScalarAffineTerm.(1.0, λ), 0.0) + MOI.add_constraint(o, sum_of_variables, MOI.EqualTo(1.0)) + # Vᵗ A V λ == -Vᵗ b for atom in as.atoms lhs = MOI.ScalarAffineFunction{Float64}([], 0.0) Base.sizehint!(lhs.terms, nv) # replaces direct sum because of MOI and MutableArithmetic slow sums for j in 1:nv - # TODO slow - push!(lhs.terms, MOI.ScalarAffineTerm(dot(atom, as.A, as.atoms[j]), λ[j])) + push!(lhs.terms, MOI.ScalarAffineTerm(fast_dot(atom, as.A, as.atoms[j]), λ[j])) end rhs = -dot(atom, as.b) MOI.add_constraint(o, lhs, MOI.EqualTo(rhs)) end - dummy_objective = MOI.ScalarAffineFunction(MOI.ScalarAffineTerm.(1.0, λ), 0.0) - MOI.set(o, MOI.ObjectiveFunction{typeof(dummy_objective)}(), dummy_objective) + MOI.set(o, MOI.ObjectiveFunction{typeof(sum_of_variables)}(), sum_of_variables) MOI.set(o, MOI.ObjectiveSense(), MOI.MIN_SENSE) MOI.optimize!(o) + if MOI.get(o, MOI.TerminationStatus()) ∉ (MOI.OPTIMAL, MOI.FEASIBLE_POINT, MOI.ALMOST_OPTIMAL) + return as + end + indices_to_remove = Int[] + new_weights = R[] + for idx in eachindex(λ) + weight_value = MOI.get(o, MOI.VariablePrimal(), λ[idx]) + if weight_value <= 1e-10 + push!(indices_to_remove, idx) + else + push!(new_weights, weight_value) + end + end + deleteat!(as.atoms, indices_to_remove) + deleteat!(as.weights, indices_to_remove) + @assert length(as) == length(new_weights) + as.weights .= new_weights + @assert all(>=(0), new_weights) + active_set_renormalize!(as) + compute_active_set_iterate!(as) + return as end # special case of scaled identity Hessian @@ -127,7 +147,8 @@ function solve_quadratic_activeset_lp!(as::ActiveSetQuadraticLinearSolve{AT, R, λ = MOI.add_variables(o, nv) # λ ≥ 0, ∑ λ == 1 MOI.add_constraint.(o, λ, MOI.GreaterThan(0.0)) - MOI.add_constraint(o, sum(λ; init=0.0), MOI.EqualTo(1.0)) + sum_of_variables = MOI.ScalarAffineFunction(MOI.ScalarAffineTerm.(1.0, λ), 0.0) + MOI.add_constraint(o, sum_of_variables, MOI.EqualTo(1.0)) # a Aᵗ A λ == -Aᵗ b for atom in as.atoms lhs = MOI.ScalarAffineFunction{Float64}([], 0.0) @@ -139,28 +160,28 @@ function solve_quadratic_activeset_lp!(as::ActiveSetQuadraticLinearSolve{AT, R, rhs = -dot(atom, as.b) MOI.add_constraint(o, lhs, MOI.EqualTo(rhs)) end - dummy_objective = MOI.ScalarAffineFunction(MOI.ScalarAffineTerm.(1.0, λ), 0.0) - MOI.set(o, MOI.ObjectiveFunction{typeof(dummy_objective)}(), dummy_objective) + MOI.set(o, MOI.ObjectiveFunction{typeof(sum_of_variables)}(), sum_of_variables) MOI.set(o, MOI.ObjectiveSense(), MOI.MIN_SENSE) MOI.optimize!(o) - if MOI.get(o, MOI.TerminationStatus()) in (MOI.OPTIMAL, MOI.FEASIBLE_POINT, MOI.ALMOST_OPTIMAL) - indices_to_remove = Int[] - new_weights = R[] - for idx in eachindex(λ) - weight_value = MOI.get(o, MOI.VariablePrimal(), λ[idx]) - if weight_value <= 1e-10 - push!(indices_to_remove, idx) - else - push!(new_weights, weight_value) - end + if MOI.get(o, MOI.TerminationStatus()) ∉ (MOI.OPTIMAL, MOI.FEASIBLE_POINT, MOI.ALMOST_OPTIMAL) + return as + end + indices_to_remove = Int[] + new_weights = R[] + for idx in eachindex(λ) + weight_value = MOI.get(o, MOI.VariablePrimal(), λ[idx]) + if weight_value <= 1e-10 + push!(indices_to_remove, idx) + else + push!(new_weights, weight_value) end - deleteat!(as.atoms, indices_to_remove) - deleteat!(as.weights, indices_to_remove) - @assert length(as) == length(new_weights) - as.weights .= new_weights - @assert all(>=(0), new_weights) - active_set_renormalize!(as) - compute_active_set_iterate!(as) end + deleteat!(as.atoms, indices_to_remove) + deleteat!(as.weights, indices_to_remove) + @assert length(as) == length(new_weights) + as.weights .= new_weights + @assert all(>=(0), new_weights) + active_set_renormalize!(as) + compute_active_set_iterate!(as) return as end diff --git a/src/utils.jl b/src/utils.jl index f3d950884..27d24e03c 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -163,6 +163,77 @@ function fast_dot(A::Matrix{T1}, B::SparseArrays.SparseMatrixCSC{T2}) where {T1, return s end +fast_dot(a, Q, b) = dot(a, Q, b) + +function fast_dot(a::SparseArrays.AbstractSparseVector{<:Real}, Q::Diagonal{<:Real}, b::AbstractVector{<:Real}) + if a === b + return _fast_quadratic_form_symmetric(a, Q) + end + d = Q.diag + nzvals = SparseArrays.nonzeros(a) + nzinds = SparseArrays.nonzeroinds(a) + return sum(eachindex(nzvals); init=zero(eltype(a))) do nzidx + nzvals[nzidx] * d[nzinds[nzidx]] * b[nzinds[nzidx]] + end +end + +function fast_dot(a::SparseArrays.AbstractSparseVector{<:Real}, Q::Diagonal{<:Real}, b::SparseArrays.AbstractSparseVector{<:Real}) + if a === b + return _fast_quadratic_form_symmetric(a, Q) + end + n = length(a) + if length(b) != n + throw( + DimensionMismatch("Vector a has a length $n but b has a length $(length(b))") + ) + end + anzind = SparseArrays.nonzeroinds(a) + bnzind = SparseArrays.nonzeroinds(b) + anzval = SparseArrays.nonzeros(a) + bnzval = SparseArrays.nonzeros(b) + s = zero(Base.promote_eltype(a, Q, b)) + + if isempty(anzind) || isempty(bnzind) + return s + end + + a_idx = 1 + b_idx = 1 + a_idx_last = length(anzind) + b_idx_last = length(bnzind) + + # go through the nonzero indices of a and b simultaneously + @inbounds while a_idx <= a_idx_last && b_idx <= b_idx_last + ia = anzind[a_idx] + ib = bnzind[b_idx] + if ia == ib + s += dot(anzval[a_idx], Q.diag[ia], bnzval[b_idx]) + a_idx += 1 + b_idx += 1 + elseif ia < ib + a_idx += 1 + else + b_idx += 1 + end + end + return s +end + + +function _fast_quadratic_form_symmetric(a, Q) + d = Q.diag + if length(d) != length(a) + throw(DimensionMismatch()) + end + nzvals = SparseArrays.nonzeros(a) + nzinds = SparseArrays.nonzeroinds(a) + s = zero(Base.promote_eltype(a, Q)) + @inbounds for nzidx in eachindex(nzvals) + s += nzvals[nzidx]^2 * d[nzinds[nzidx]] + end + return s +end + """ trajectory_callback(storage) From d09f0dfe3efea150a0fd1f62484eeb3307ccd3de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= Date: Fri, 11 Oct 2024 17:52:41 +0200 Subject: [PATCH 11/19] copy sufficient --- examples/blended_pairwise_with_direct.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/blended_pairwise_with_direct.jl b/examples/blended_pairwise_with_direct.jl index 5cfe6bf8f..cd8e53dc0 100644 --- a/examples/blended_pairwise_with_direct.jl +++ b/examples/blended_pairwise_with_direct.jl @@ -73,7 +73,7 @@ callback = build_callback(trajectoryBPCG_standard) f, grad!, lmo, - deepcopy(x00), + copy(x00), max_iteration=k, line_search=FrankWolfe.Shortstep(2.0), print_iter=k / 10, @@ -91,7 +91,7 @@ callback = build_callback(trajectoryBPCG_quadratic) f, grad!, lmo, - deepcopy(x00), + copy(x00), max_iteration=k, line_search=FrankWolfe.Shortstep(2.0), print_iter=k / 10, From d2d5397e3b62fe0132b83508801b6e8f36b14905 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= Date: Fri, 11 Oct 2024 17:55:09 +0200 Subject: [PATCH 12/19] remove comment --- src/active_set_quadratic.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/active_set_quadratic.jl b/src/active_set_quadratic.jl index 9736c911b..bc4edaa85 100644 --- a/src/active_set_quadratic.jl +++ b/src/active_set_quadratic.jl @@ -197,7 +197,6 @@ function active_set_update!( idx = find_atom(active_set, atom) end if idx > 0 - @info "old atom" @inbounds active_set.weights[idx] += lambda @inbounds active_set.modified[idx] = true else From 13c40dd97f83b1610694f54f8a92a88a79ac32e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= Date: Mon, 14 Oct 2024 23:15:47 +0200 Subject: [PATCH 13/19] added test for quadratic --- examples/blended_pairwise_with_direct.jl | 48 ++++---- ...wise_with_direct_non-standard-quadratic.jl | 6 - src/active_set_quadratic_direct_solve.jl | 104 +++++++++++++---- test/quadratic_lp_active_set.jl | 110 ++++++++++++++++++ test/runtests.jl | 7 ++ 5 files changed, 228 insertions(+), 47 deletions(-) create mode 100644 test/quadratic_lp_active_set.jl diff --git a/examples/blended_pairwise_with_direct.jl b/examples/blended_pairwise_with_direct.jl index cd8e53dc0..2b7661f57 100644 --- a/examples/blended_pairwise_with_direct.jl +++ b/examples/blended_pairwise_with_direct.jl @@ -19,17 +19,13 @@ using Random import HiGHS import MathOptInterface as MOI -# lp_solver = GLPK.Optimizer lp_solver = HiGHS.Optimizer -# lp_solver = Clp.Optimizer # buggy / does not work properly - include("../examples/plot_utils.jl") n = Int(1e4) -k = 5000 +k = 10_000 -# s = rand(1:100) s = 10 @info "Seed $s" Random.seed!(s) @@ -37,9 +33,6 @@ Random.seed!(s) xpi = rand(n); total = sum(xpi); -# here the optimal solution lies in the interior if you want an optimal solution on a face and not the interior use: -# const xp = xpi; - const xp = xpi ./ total; f(x) = norm(x - xp)^2 @@ -49,12 +42,6 @@ end lmo = FrankWolfe.KSparseLMO(5, 1.0) -## other LMOs to try -# lmo_big = FrankWolfe.KSparseLMO(100, big"1.0") -# lmo = FrankWolfe.LpNormLMO{Float64,5}(1.0) -# lmo = FrankWolfe.ProbabilitySimplexOracle(1.0); -# lmo = FrankWolfe.UnitSimplexOracle(1.0); - const x00 = FrankWolfe.compute_extreme_point(lmo, rand(n)) function build_callback(trajectory_arr) @@ -213,13 +200,34 @@ callback = build_callback(trajectoryBPCG_quadratic_direct_generic) callback=callback, ); +as_quad_direct_basic_as = FrankWolfe.ActiveSetQuadraticLinearSolve( + FrankWolfe.ActiveSet([1.0], [copy(x00)], collect(x00)), + 2 * LinearAlgebra.I, -2xp, + MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), +) -# Update the data and labels for plotting -dataSparsity = [trajectoryBPCG_standard, trajectoryBPCG_quadratic, trajectoryBPCG_quadratic_as, trajectoryBPCG_quadratic_direct, trajectoryBPCG_quadratic_direct_generic] -labelSparsity = ["BPCG (Standard)", "BPCG (Specific Direct)", "AS_Quad", "Reloaded", "Reloaded_generic"] +# with LP acceleration +trajectoryBPCG_quadratic_noqas = [] +callback = build_callback(trajectoryBPCG_quadratic_noqas) -# Plot sparsity -# plot_sparsity(dataSparsity, labelSparsity, legend_position=:topright) +@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + as_quad_direct_basic_as, + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + print_iter=k / 10, + memory_mode=FrankWolfe.InplaceEmphasis(), + verbose=true, + trajectory=true, + callback=callback, +); + + +# Update the data and labels for plotting +data_trajectories = [trajectoryBPCG_standard, trajectoryBPCG_quadratic, trajectoryBPCG_quadratic_as, trajectoryBPCG_quadratic_direct, trajectoryBPCG_quadratic_direct_generic, trajectoryBPCG_quadratic_noqas] +labels_trajectories = ["BPCG (Standard)", "BPCG (Specific Direct)", "AS_Quad", "Reloaded", "Reloaded_generic", "Reloaded_noqas"] # Plot trajectories -plot_trajectories(dataSparsity, labelSparsity, xscalelog=false) +plot_trajectories(data_trajectories, labels_trajectories, xscalelog=false) diff --git a/examples/blended_pairwise_with_direct_non-standard-quadratic.jl b/examples/blended_pairwise_with_direct_non-standard-quadratic.jl index ff7736be1..f7da7f3d0 100644 --- a/examples/blended_pairwise_with_direct_non-standard-quadratic.jl +++ b/examples/blended_pairwise_with_direct_non-standard-quadratic.jl @@ -14,15 +14,9 @@ using FrankWolfe using LinearAlgebra using Random -import Pkg -Pkg.add("GLPK") -Pkg.add("HiGHS") -import GLPK import HiGHS -# lp_solver = GLPK.Optimizer lp_solver = HiGHS.Optimizer -# lp_solver = Clp.Optimizer # buggy / does not work properly include("../examples/plot_utils.jl") diff --git a/src/active_set_quadratic_direct_solve.jl b/src/active_set_quadratic_direct_solve.jl index ced0d551c..63a1ea394 100644 --- a/src/active_set_quadratic_direct_solve.jl +++ b/src/active_set_quadratic_direct_solve.jl @@ -7,9 +7,16 @@ along with their coefficients `(λ_i, a_i)`. `R` is the type of the `λ_i`, `AT` is the type of the atoms `a_i`. The iterate `x = ∑λ_i a_i` is stored in x with type `IT`. The objective function is assumed to be of the form `f(x)=½⟨x,Ax⟩+⟨b,x⟩+c` -so that the gradient is simply `∇f(x)=Ax+b`. +so that the gradient is `∇f(x)=Ax+b`. + +This active set stores an inner `active_set` that keeps track of the current set of vertices and convex decomposition. +It therefore delegates all update, deletion, and addition operations to this inner active set. +The `weight`, `atoms`, and `x` fields should only be accessed to read and are effectively the same objects as those in the inner active set. + +The structure also contains a scheduler struct which is called with the `should_solve_lp` function. +To define a new frequency at which the LP should be solved, one can define another scheduler struct and implement the corresponding method. """ -struct ActiveSetQuadraticLinearSolve{AT, R <: Real, IT, H, OT <: MOI.AbstractOptimizer, AS <: AbstractActiveSet} <: AbstractActiveSet{AT,R,IT} +struct ActiveSetQuadraticLinearSolve{AT, R <: Real, IT, H, OT <: MOI.AbstractOptimizer, AS <: AbstractActiveSet, SF} <: AbstractActiveSet{AT,R,IT} weights::Vector{R} atoms::Vector{AT} x::IT @@ -17,46 +24,71 @@ struct ActiveSetQuadraticLinearSolve{AT, R <: Real, IT, H, OT <: MOI.AbstractOpt b::IT # linear term active_set::AS lp_optimizer::OT - lp_frequency::Int + scheduler::SF counter::Base.RefValue{Int} end -function ActiveSetQuadraticLinearSolve(tuple_values::AbstractVector{Tuple{R,AT}}, grad!::Function, lp_optimizer; lp_frequency=100) where {AT,R} +""" + ActiveSetQuadraticLinearSolve(tuple_values::Vector{Tuple{R,AT}}, grad!::Function, lp_optimizer) + +Creates an ActiveSetQuadraticLinearSolve by computing the Hessian and linear term from `grad!`. +""" +function ActiveSetQuadraticLinearSolve(tuple_values::Vector{Tuple{R,AT}}, grad!::Function, lp_optimizer; scheduler=LogScheduler()) where {AT,R} A, b = detect_quadratic_function(grad!, tuple_values[1][2]) - return ActiveSetQuadraticLinearSolve(tuple_values, A, b, lp_optimizer, lp_frequency=lp_frequency) + return ActiveSetQuadraticLinearSolve(tuple_values, A, b, lp_optimizer, scheduler=scheduler) end -function ActiveSetQuadraticLinearSolve(tuple_values::AbstractVector{Tuple{R,AT}}, A::H, b, lp_optimizer; lp_frequency=10) where {AT,R,H} +""" + ActiveSetQuadraticLinearSolve(tuple_values::Vector{Tuple{R,AT}}, A, b, lp_optimizer) + +Creates an `ActiveSetQuadraticLinearSolve` from the given Hessian `A`, linear term `b` and `lp_optimizer` by creating an inner `ActiveSetQuadratic` active set. +""" +function ActiveSetQuadraticLinearSolve(tuple_values::Vector{Tuple{R,AT}}, A::H, b, lp_optimizer; scheduler=LogScheduler()) where {AT,R,H} inner_as = ActiveSetQuadratic(tuple_values, A, b) - as = ActiveSetQuadraticLinearSolve(inner_as.weights, inner_as.atoms, inner_as.x, A, b, inner_as, lp_optimizer, lp_frequency, Ref(0)) + return ActiveSetQuadraticLinearSolve(inner_as.weights, inner_as.atoms, inner_as.x, inner_as.A, inner_as.b, inner_as, lp_optimizer, scheduler, Ref(0)) +end + +function ActiveSetQuadraticLinearSolve(inner_as::AbstractActiveSet, A, b, lp_optimizer; scheduler=LogScheduler()) + as = ActiveSetQuadraticLinearSolve(inner_as.weights, inner_as.atoms, inner_as.x, A, b, inner_as, lp_optimizer, scheduler, Ref(0)) + compute_active_set_iterate!(as) + return as +end + +function ActiveSetQuadraticLinearSolve(inner_as::AbstractActiveSet, A::LinearAlgebra.UniformScaling, b, lp_optimizer; scheduler=LogScheduler()) + as = ActiveSetQuadraticLinearSolve(inner_as.weights, inner_as.atoms, inner_as.x, A, b, inner_as, lp_optimizer, scheduler, Ref(0)) compute_active_set_iterate!(as) return as end -function ActiveSetQuadraticLinearSolve{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, grad!::Function, lp_optimizer; lp_frequency=100) where {AT,R} +function ActiveSetQuadraticLinearSolve(inner_as::AbstractActiveSet, grad!::Function, lp_optimizer; scheduler=LogScheduler()) A, b = detect_quadratic_function(grad!, tuple_values[1][2]) - return ActiveSetQuadraticLinearSolve{AT,R}(tuple_values, A, b, lp_optimizer; lp_frequency=lp_frequency) + return ActiveSetQuadraticLinearSolve(inner_as, A, b, lp_optimizer; scheduler=scheduler) end -function ActiveSetQuadraticLinearSolve{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::H, b, lp_optimizer; lp_frequency=lp_frequency) where {AT,R,H} +function ActiveSetQuadraticLinearSolve{AT,R}(tuple_values::Vector{<:Tuple{<:Number,<:Any}}, grad!::Function, lp_optimizer; scheduler=LogScheduler()) where {AT,R} + A, b = detect_quadratic_function(grad!, tuple_values[1][2]) + return ActiveSetQuadraticLinearSolve{AT,R}(tuple_values, A, b, lp_optimizer; scheduler=scheduler) +end + +function ActiveSetQuadraticLinearSolve{AT,R}(tuple_values::Vector{<:Tuple{<:Number,<:Any}}, A::H, b, lp_optimizer; scheduler=LogScheduler()) where {AT,R,H} inner_as = ActiveSetQuadratic{AT,R}(tuple_values, A, b) - as = ActiveSetQuadraticLinearSolve{AT,R,typeof(x),H}(inner_as.weights, inner_as.atoms, inner_as.x, A, b, inner_as, lp_optimizer, lp_frequency, Ref(0)) + as = ActiveSetQuadraticLinearSolve{AT,R,typeof(x),H}(inner_as.weights, inner_as.atoms, inner_as.x, A, b, inner_as, lp_optimizer, scheduler, Ref(0)) compute_active_set_iterate!(as) return as end -function ActiveSetQuadraticLinearSolve(tuple_values::AbstractVector{Tuple{R,AT}}, A::UniformScaling, b, lp_optimizer; lp_frequency=100) where {AT,R} - return ActiveSetQuadraticLinearSolve(tuple_values, Identity(A.λ), b, lp_optimizer; lp_frequency=lp_frequency) +function ActiveSetQuadraticLinearSolve(tuple_values::Vector{Tuple{R,AT}}, A::UniformScaling, b, lp_optimizer; scheduler=LogScheduler()) where {AT,R} + return ActiveSetQuadraticLinearSolve(tuple_values, Identity(A.λ), b, lp_optimizer; scheduler=scheduler) end -function ActiveSetQuadraticLinearSolve{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::UniformScaling, b, lp_optimizer; lp_frequency=100) where {AT,R} - return ActiveSetQuadraticLinearSolve{AT,R}(tuple_values, Identity(A.λ), b, lp_optimizer; lp_frequency=lp_frequency) +function ActiveSetQuadraticLinearSolve{AT,R}(tuple_values::Vector{<:Tuple{<:Number,<:Any}}, A::UniformScaling, b, lp_optimizer; scheduler=LogScheduler()) where {AT,R} + return ActiveSetQuadraticLinearSolve{AT,R}(tuple_values, Identity(A.λ), b, lp_optimizer; scheduler=scheduler) end # all mutating functions are delegated to the inner active set Base.push!(as::ActiveSetQuadraticLinearSolve, tuple) = push!(as.active_set, tuple) -Base.deleteat!(as::ActiveSetQuadraticLinearSolve, idx) = deleteat!(as.active_set, idx) +Base.deleteat!(as::ActiveSetQuadraticLinearSolve, idx::Int) = deleteat!(as.active_set, idx) Base.empty!(as::ActiveSetQuadraticLinearSolve) = empty!(as.active_set) @@ -74,7 +106,7 @@ function active_set_update!( # new atom introduced, we can solve the auxiliary LP if idx < 0 as.counter[] += 1 - if mod(as.counter[], as.lp_frequency) == 0 + if should_solve_lp(as, as.scheduler) solve_quadratic_activeset_lp!(as) end end @@ -92,6 +124,12 @@ function active_set_argminmax(as::ActiveSetQuadraticLinearSolve, direction; Φ=0 end # generic quadratic with quadratic information provided +""" + solve_quadratic_activeset_lp!(as::ActiveSetQuadraticLinearSolve{AT, R, IT, H})) + +Solves the auxiliary LP over the current active set. +The method is specialized by type `H` of the Hessian matrix `A`. +""" function solve_quadratic_activeset_lp!(as::ActiveSetQuadraticLinearSolve{AT, R, IT, <: AbstractMatrix}) where {AT, R, IT} nv = length(as) o = as.lp_optimizer @@ -122,7 +160,7 @@ function solve_quadratic_activeset_lp!(as::ActiveSetQuadraticLinearSolve{AT, R, new_weights = R[] for idx in eachindex(λ) weight_value = MOI.get(o, MOI.VariablePrimal(), λ[idx]) - if weight_value <= 1e-10 + if weight_value <= 2 * weight_purge_threshold_default(typeof(weight_value)) push!(indices_to_remove, idx) else push!(new_weights, weight_value) @@ -132,14 +170,15 @@ function solve_quadratic_activeset_lp!(as::ActiveSetQuadraticLinearSolve{AT, R, deleteat!(as.weights, indices_to_remove) @assert length(as) == length(new_weights) as.weights .= new_weights - @assert all(>=(0), new_weights) + active_set_cleanup!(as) active_set_renormalize!(as) compute_active_set_iterate!(as) return as end # special case of scaled identity Hessian -function solve_quadratic_activeset_lp!(as::ActiveSetQuadraticLinearSolve{AT, R, IT, <: Identity}) where {AT, R, IT} + +function solve_quadratic_activeset_lp!(as::ActiveSetQuadraticLinearSolve{AT, R, IT, <: Union{Identity, LinearAlgebra.UniformScaling}}) where {AT, R, IT} hessian_scaling = as.A.λ nv = length(as) o = as.lp_optimizer @@ -170,7 +209,7 @@ function solve_quadratic_activeset_lp!(as::ActiveSetQuadraticLinearSolve{AT, R, new_weights = R[] for idx in eachindex(λ) weight_value = MOI.get(o, MOI.VariablePrimal(), λ[idx]) - if weight_value <= 1e-10 + if weight_value <= 2 * weight_purge_threshold_default(typeof(weight_value)) push!(indices_to_remove, idx) else push!(new_weights, weight_value) @@ -180,8 +219,31 @@ function solve_quadratic_activeset_lp!(as::ActiveSetQuadraticLinearSolve{AT, R, deleteat!(as.weights, indices_to_remove) @assert length(as) == length(new_weights) as.weights .= new_weights + active_set_cleanup!(as) @assert all(>=(0), new_weights) active_set_renormalize!(as) compute_active_set_iterate!(as) return as end + +struct LogScheduler{T} + start_time::Int + scaling_factor::T + max_interval::Int + current_interval::Base.RefValue{Int} + last_solve_counter::Base.RefValue{Int} +end + +LogScheduler(; start_time=20, scaling_factor=1.5, max_interval=1000) = LogScheduler(start_time, scaling_factor, max_interval, Ref(start_time), Ref(0)) + +function should_solve_lp(as::ActiveSetQuadraticLinearSolve, scheduler::LogScheduler) + if as.counter[] - scheduler.last_solve_counter[] >= scheduler.current_interval[] + scheduler.last_solve_counter[] = as.counter[] + scheduler.current_interval[] = min( + round(Int, scheduler.scaling_factor * scheduler.current_interval[]), + scheduler.max_interval, + ) + return true + end + return false +end diff --git a/test/quadratic_lp_active_set.jl b/test/quadratic_lp_active_set.jl new file mode 100644 index 000000000..a86c0b713 --- /dev/null +++ b/test/quadratic_lp_active_set.jl @@ -0,0 +1,110 @@ +using FrankWolfe +using LinearAlgebra +using Random +using Test +import HiGHS +import MathOptInterface as MOI + +n = Int(1e4) +k = 5000 + +s = 10 +@info "Seed $s" +Random.seed!(s) + +xpi = rand(n); +total = sum(xpi); + +const xp = xpi ./ total; + +f(x) = norm(x - xp)^2 +function grad!(storage, x) + @. storage = 2 * (x - xp) +end + +lmo = FrankWolfe.KSparseLMO(5, 1.0) + +const x00 = FrankWolfe.compute_extreme_point(lmo, rand(n)) + +function build_callback(trajectory_arr) + return function callback(state, active_set, args...) + return push!(trajectory_arr, (FrankWolfe.callback_state(state)..., length(active_set))) + end +end + +trajectoryBPCG_standard = [] +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + copy(x00), + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + verbose=false, + callback=build_callback(trajectoryBPCG_standard), +); + +as_quad_direct = FrankWolfe.ActiveSetQuadraticLinearSolve( + [(1.0, copy(x00))], + 2 * LinearAlgebra.I, -2xp, + MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), +) + +trajectoryBPCG_quadratic_direct_specialized = [] +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + as_quad_direct, + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + verbose=false, + callback=build_callback(trajectoryBPCG_quadratic_direct_specialized), +); + +as_quad_direct_generic = FrankWolfe.ActiveSetQuadraticLinearSolve( + [(1.0, copy(x00))], + 2 * Diagonal(ones(length(xp))), -2xp, + MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), +) + +trajectoryBPCG_quadratic_direct_generic = [] +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + as_quad_direct_generic, + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + verbose=true, + callback=build_callback(trajectoryBPCG_quadratic_direct_generic), +); + +as_quad_direct_basic_as = FrankWolfe.ActiveSetQuadraticLinearSolve( + FrankWolfe.ActiveSet([1.0], [copy(x00)], collect(x00)), + 2 * LinearAlgebra.I, -2xp, + MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), +) + +trajectoryBPCG_quadratic_noqas = [] +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + as_quad_direct_basic_as, + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + verbose=true, + callback=build_callback(trajectoryBPCG_quadratic_noqas), +); + + +dual_gaps_quadratic_specialized = getindex.(trajectoryBPCG_quadratic_direct_specialized, 4) +dual_gaps_quadratic_generic = getindex.(trajectoryBPCG_quadratic_direct_generic, 4) +dual_gaps_quadratic_noqas = getindex.(trajectoryBPCG_quadratic_noqas, 4) +dual_gaps_bpcg = getindex.(trajectoryBPCG_standard, 4) + + +@test dual_gaps_quadratic_specialized[end] < dual_gaps_bpcg[end] +@test dual_gaps_quadratic_noqas[end] < dual_gaps_bpcg[end] +@test norm(dual_gaps_quadratic_noqas - dual_gaps_quadratic_noqas) ≤ k * 1e-5 diff --git a/test/runtests.jl b/test/runtests.jl index d984043dc..5f3cca81e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -798,6 +798,13 @@ using Test end end +module LpDirectSolveTest +using Test +@testset "LP solving for quadratic functions and active set" begin + include("quadratic_lp_active_set.jl") +end +end + include("generic-arrays.jl") include("blocks.jl") From 866158a62c893c2412bf008780a1c5efeecfaaf3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= Date: Tue, 15 Oct 2024 16:35:41 +0200 Subject: [PATCH 14/19] minor --- examples/blended_pairwise_with_sparsify.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/blended_pairwise_with_sparsify.jl b/examples/blended_pairwise_with_sparsify.jl index 8481850ea..941df79cd 100644 --- a/examples/blended_pairwise_with_sparsify.jl +++ b/examples/blended_pairwise_with_sparsify.jl @@ -14,7 +14,6 @@ using FrankWolfe using LinearAlgebra using Random -import Pkg import HiGHS # lp_solver = GLPK.Optimizer From 08671fbf5188aace9120347bb01ecc3fd37e8030 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= Date: Tue, 15 Oct 2024 17:18:23 +0200 Subject: [PATCH 15/19] simplify example --- examples/blended_pairwise_with_direct.jl | 81 ++---------------------- 1 file changed, 5 insertions(+), 76 deletions(-) diff --git a/examples/blended_pairwise_with_direct.jl b/examples/blended_pairwise_with_direct.jl index 2b7661f57..417b996e2 100644 --- a/examples/blended_pairwise_with_direct.jl +++ b/examples/blended_pairwise_with_direct.jl @@ -19,8 +19,6 @@ using Random import HiGHS import MathOptInterface as MOI -lp_solver = HiGHS.Optimizer - include("../examples/plot_utils.jl") n = Int(1e4) @@ -51,29 +49,7 @@ function build_callback(trajectory_arr) end -FrankWolfe.benchmark_oracles(f, grad!, () -> randn(n), lmo; k=100) - trajectoryBPCG_standard = [] -callback = build_callback(trajectoryBPCG_standard) - -@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( - f, - grad!, - lmo, - copy(x00), - max_iteration=k, - line_search=FrankWolfe.Shortstep(2.0), - print_iter=k / 10, - memory_mode=FrankWolfe.InplaceEmphasis(), - verbose=true, - trajectory=true, - callback=callback, - squadratic=false, -); - -trajectoryBPCG_quadratic = [] -callback = build_callback(trajectoryBPCG_quadratic) - @time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( f, grad!, @@ -81,34 +57,8 @@ callback = build_callback(trajectoryBPCG_quadratic) copy(x00), max_iteration=k, line_search=FrankWolfe.Shortstep(2.0), - print_iter=k / 10, - memory_mode=FrankWolfe.InplaceEmphasis(), verbose=true, - trajectory=true, - callback=callback, - squadratic=true, - lp_solver=lp_solver, -); - -# Just using that qudratic (more expensive) -trajectoryBPCG_quadratic_nosquad = [] -callback = build_callback(trajectoryBPCG_quadratic_nosquad) - -@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( - f, - grad!, - lmo, - copy(x00), - max_iteration=k, - line_search=FrankWolfe.Shortstep(2.0), - print_iter=k / 10, - memory_mode=FrankWolfe.InplaceEmphasis(), - verbose=true, - trajectory=true, - callback=callback, - quadratic=true, - squadratic=false, - lp_solver=lp_solver, + callback=build_callback(trajectoryBPCG_standard), ); # Just projection quadratic @@ -123,10 +73,7 @@ as_quad = FrankWolfe.ActiveSetQuadratic([(1.0, copy(x00))], 2 * LinearAlgebra.I, as_quad, max_iteration=k, line_search=FrankWolfe.Shortstep(2.0), - print_iter=k / 10, - memory_mode=FrankWolfe.InplaceEmphasis(), verbose=true, - trajectory=true, callback=callback, ); @@ -137,7 +84,6 @@ as_quad = FrankWolfe.ActiveSetQuadratic( # with quadratic active set trajectoryBPCG_quadratic_as = [] -callback = build_callback(trajectoryBPCG_quadratic_as) @time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( f, grad!, @@ -145,11 +91,8 @@ callback = build_callback(trajectoryBPCG_quadratic_as) as_quad, max_iteration=k, line_search=FrankWolfe.Shortstep(2.0), - print_iter=k / 10, - memory_mode=FrankWolfe.InplaceEmphasis(), verbose=true, - trajectory=true, - callback=callback, + callback=build_callback(trajectoryBPCG_quadratic_as), ); as_quad_direct = FrankWolfe.ActiveSetQuadraticLinearSolve( @@ -160,8 +103,6 @@ as_quad_direct = FrankWolfe.ActiveSetQuadraticLinearSolve( # with LP acceleration trajectoryBPCG_quadratic_direct = [] -callback = build_callback(trajectoryBPCG_quadratic_direct) - @time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( f, grad!, @@ -169,11 +110,8 @@ callback = build_callback(trajectoryBPCG_quadratic_direct) as_quad_direct, max_iteration=k, line_search=FrankWolfe.Shortstep(2.0), - print_iter=k / 10, - memory_mode=FrankWolfe.InplaceEmphasis(), verbose=true, - trajectory=true, - callback=callback, + callback=build_callback(trajectoryBPCG_quadratic_direct), ); as_quad_direct_generic = FrankWolfe.ActiveSetQuadraticLinearSolve( @@ -184,8 +122,6 @@ as_quad_direct_generic = FrankWolfe.ActiveSetQuadraticLinearSolve( # with LP acceleration trajectoryBPCG_quadratic_direct_generic = [] -callback = build_callback(trajectoryBPCG_quadratic_direct_generic) - @time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( f, grad!, @@ -193,11 +129,8 @@ callback = build_callback(trajectoryBPCG_quadratic_direct_generic) as_quad_direct_generic, max_iteration=k, line_search=FrankWolfe.Shortstep(2.0), - print_iter=k / 10, - memory_mode=FrankWolfe.InplaceEmphasis(), verbose=true, - trajectory=true, - callback=callback, + callback=build_callback(trajectoryBPCG_quadratic_direct_generic), ); as_quad_direct_basic_as = FrankWolfe.ActiveSetQuadraticLinearSolve( @@ -208,7 +141,6 @@ as_quad_direct_basic_as = FrankWolfe.ActiveSetQuadraticLinearSolve( # with LP acceleration trajectoryBPCG_quadratic_noqas = [] -callback = build_callback(trajectoryBPCG_quadratic_noqas) @time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( f, @@ -217,11 +149,8 @@ callback = build_callback(trajectoryBPCG_quadratic_noqas) as_quad_direct_basic_as, max_iteration=k, line_search=FrankWolfe.Shortstep(2.0), - print_iter=k / 10, - memory_mode=FrankWolfe.InplaceEmphasis(), verbose=true, - trajectory=true, - callback=callback, + callback=build_callback(trajectoryBPCG_quadratic_noqas), ); From 4c7e12091df40f165e7073310e5437acd724d095 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= Date: Thu, 17 Oct 2024 09:45:38 +0200 Subject: [PATCH 16/19] clean up code, verify error with ASQuad --- ...wise_with_direct_non-standard-quadratic.jl | 77 ++++++++++++++++--- src/active_set_quadratic.jl | 1 + src/active_set_quadratic_direct_solve.jl | 16 ++-- 3 files changed, 76 insertions(+), 18 deletions(-) diff --git a/examples/blended_pairwise_with_direct_non-standard-quadratic.jl b/examples/blended_pairwise_with_direct_non-standard-quadratic.jl index f7da7f3d0..9466771ee 100644 --- a/examples/blended_pairwise_with_direct_non-standard-quadratic.jl +++ b/examples/blended_pairwise_with_direct_non-standard-quadratic.jl @@ -15,7 +15,7 @@ using LinearAlgebra using Random import HiGHS - +import MathOptInterface as MOI lp_solver = HiGHS.Optimizer @@ -33,7 +33,6 @@ A = let A = randn(n, n) A' * A end - @assert isposdef(A) == true const y = Random.rand(Bool, n) * 0.6 .+ 0.3 @@ -72,12 +71,11 @@ FrankWolfe.benchmark_oracles(f, grad!, () -> randn(n), lmo; k=100) trajectoryBPCG_standard = [] callback = build_callback(trajectoryBPCG_standard) -x0 = deepcopy(x00) @time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( f, grad!, lmo, - x0, + copy(x00), max_iteration=k, line_search=FrankWolfe.Adaptive(), print_iter=k / 10, @@ -89,29 +87,84 @@ x0 = deepcopy(x00) ); trajectoryBPCG_quadratic = [] -callback = build_callback(trajectoryBPCG_quadratic) - -x0 = deepcopy(x00) @time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( f, grad!, lmo, - x0, + copy(x00), max_iteration=k, line_search=FrankWolfe.Adaptive(), print_iter=k / 10, - memory_mode=FrankWolfe.InplaceEmphasis(), verbose=true, trajectory=true, - callback=callback, + callback=build_callback(trajectoryBPCG_quadratic), quadratic=true, lp_solver=lp_solver, ); +active_set_quadratic_automatic = FrankWolfe.ActiveSetQuadraticLinearSolve( + [(1.0, copy(x00))], + grad!, + MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), + scheduler=FrankWolfe.LogScheduler(start_time=100, scaling_factor=1.2, max_interval=100), +) +trajectoryBPCG_quadratic_automatic = [] +@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + active_set_quadratic_automatic, + # print_iter=1, + max_iteration=k, + verbose=true, + callback=build_callback(trajectoryBPCG_quadratic_automatic), +); + +active_set_quadratic_automatic2 = FrankWolfe.ActiveSetQuadraticLinearSolve( + [(1.0, copy(x00))], + grad!, + MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), + scheduler=FrankWolfe.LogScheduler(start_time=10, scaling_factor=2), +) +trajectoryBPCG_quadratic_automatic2 = [] +@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + active_set_quadratic_automatic2, + # print_iter=1, + max_iteration=k, + verbose=true, + callback=build_callback(trajectoryBPCG_quadratic_automatic2), +); + + +active_set_quadratic_automatic_reloaded = FrankWolfe.ActiveSetQuadraticLinearSolve( + FrankWolfe.ActiveSet([(1.0, copy(x00))]), + grad!, + MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), + scheduler=FrankWolfe.LogScheduler(start_time=10, scaling_factor=2), +) +trajectoryBPCG_quadratic_automatic_reloaded = [] +@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + active_set_quadratic_automatic_reloaded, + max_iteration=k, + verbose=true, + callback=build_callback(trajectoryBPCG_quadratic_automatic_reloaded), +); + + + # Reduction primal/dual error vs. sparsity of solution -dataSparsity = [trajectoryBPCG_standard, trajectoryBPCG_quadratic] -labelSparsity = ["BPCG (Standard)", "BPCG (Direct)"] +# dataSparsity = [trajectoryBPCG_standard, trajectoryBPCG_quadratic, trajectoryBPCG_quadratic_automatic, trajectoryBPCG_quadratic_automatic_reloaded, trajectoryBPCG_quadratic_automatic] +# labelSparsity = ["BPCG (Standard)", "BPCG (Direct)", "AS_Quad", "R", "A2"] + +dataSparsity = [trajectoryBPCG_quadratic_automatic2, trajectoryBPCG_quadratic_automatic_reloaded] +labelSparsity = ["AS_Quad", "AS_standard"] # Plot sparsity # plot_sparsity(dataSparsity, labelSparsity, legend_position=:topright) diff --git a/src/active_set_quadratic.jl b/src/active_set_quadratic.jl index bc4edaa85..ba2f2cc98 100644 --- a/src/active_set_quadratic.jl +++ b/src/active_set_quadratic.jl @@ -151,6 +151,7 @@ function Base.push!(as::ActiveSetQuadratic{AT,R}, (λ, a)) where {AT,R} return as end +# TODO multi-indices version function Base.deleteat!(as::ActiveSetQuadratic, idx::Int) @inbounds for i in 1:idx-1 as.dots_x[i] -= as.weights_prev[idx] * as.dots_A[idx][i] diff --git a/src/active_set_quadratic_direct_solve.jl b/src/active_set_quadratic_direct_solve.jl index 63a1ea394..974a5439b 100644 --- a/src/active_set_quadratic_direct_solve.jl +++ b/src/active_set_quadratic_direct_solve.jl @@ -16,12 +16,12 @@ The `weight`, `atoms`, and `x` fields should only be accessed to read and are ef The structure also contains a scheduler struct which is called with the `should_solve_lp` function. To define a new frequency at which the LP should be solved, one can define another scheduler struct and implement the corresponding method. """ -struct ActiveSetQuadraticLinearSolve{AT, R <: Real, IT, H, OT <: MOI.AbstractOptimizer, AS <: AbstractActiveSet, SF} <: AbstractActiveSet{AT,R,IT} +struct ActiveSetQuadraticLinearSolve{AT, R <: Real, IT, H, BT, OT <: MOI.AbstractOptimizer, AS <: AbstractActiveSet, SF} <: AbstractActiveSet{AT,R,IT} weights::Vector{R} atoms::Vector{AT} x::IT A::H # Hessian matrix - b::IT # linear term + b::BT # linear term active_set::AS lp_optimizer::OT scheduler::SF @@ -61,7 +61,7 @@ function ActiveSetQuadraticLinearSolve(inner_as::AbstractActiveSet, A::LinearAlg end function ActiveSetQuadraticLinearSolve(inner_as::AbstractActiveSet, grad!::Function, lp_optimizer; scheduler=LogScheduler()) - A, b = detect_quadratic_function(grad!, tuple_values[1][2]) + A, b = detect_quadratic_function(grad!, inner_as.atoms[1]) return ActiveSetQuadraticLinearSolve(inner_as, A, b, lp_optimizer; scheduler=scheduler) end @@ -124,6 +124,7 @@ function active_set_argminmax(as::ActiveSetQuadraticLinearSolve, direction; Φ=0 end # generic quadratic with quadratic information provided + """ solve_quadratic_activeset_lp!(as::ActiveSetQuadraticLinearSolve{AT, R, IT, H})) @@ -160,14 +161,17 @@ function solve_quadratic_activeset_lp!(as::ActiveSetQuadraticLinearSolve{AT, R, new_weights = R[] for idx in eachindex(λ) weight_value = MOI.get(o, MOI.VariablePrimal(), λ[idx]) - if weight_value <= 2 * weight_purge_threshold_default(typeof(weight_value)) + if weight_value <= 1e-5 push!(indices_to_remove, idx) else push!(new_weights, weight_value) end end - deleteat!(as.atoms, indices_to_remove) - deleteat!(as.weights, indices_to_remove) + # we delete the elements in reverse order to avoid messing up the internal active set + sort!(indices_to_remove, rev=true) + for idx in indices_to_remove + deleteat!(as.active_set, idx) + end @assert length(as) == length(new_weights) as.weights .= new_weights active_set_cleanup!(as) From 8f9f2beafd1a050e7bb7c8a59331709fc2ed17aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Designolle?= Date: Thu, 24 Oct 2024 10:42:37 +0200 Subject: [PATCH 17/19] Add update_weights! to fix direct solve with active_set_quadratic --- examples/blended_pairwise_with_direct.jl | 6 ++---- src/active_set.jl | 4 ++++ src/active_set_quadratic.jl | 6 ++++++ src/active_set_quadratic_direct_solve.jl | 15 +++++---------- 4 files changed, 17 insertions(+), 14 deletions(-) diff --git a/examples/blended_pairwise_with_direct.jl b/examples/blended_pairwise_with_direct.jl index 417b996e2..1304749d2 100644 --- a/examples/blended_pairwise_with_direct.jl +++ b/examples/blended_pairwise_with_direct.jl @@ -62,9 +62,7 @@ trajectoryBPCG_standard = [] ); # Just projection quadratic -trajectoryBPCG_quadratic_as = [] -callback = build_callback(trajectoryBPCG_quadratic_as) - +trajectoryBPCG_quadratic = [] as_quad = FrankWolfe.ActiveSetQuadratic([(1.0, copy(x00))], 2 * LinearAlgebra.I, -2xp) @time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( f, @@ -74,7 +72,7 @@ as_quad = FrankWolfe.ActiveSetQuadratic([(1.0, copy(x00))], 2 * LinearAlgebra.I, max_iteration=k, line_search=FrankWolfe.Shortstep(2.0), verbose=true, - callback=callback, + callback=build_callback(trajectoryBPCG_quadratic), ); as_quad = FrankWolfe.ActiveSetQuadratic( diff --git a/src/active_set.jl b/src/active_set.jl index 62393b47f..c02145666 100644 --- a/src/active_set.jl +++ b/src/active_set.jl @@ -326,3 +326,7 @@ function compute_active_set_iterate!(active_set::AbstractActiveSet{<:ScaledHotVe end return active_set.x end + +function update_weights!(as::AbstractActiveSet, new_weights) + as.weights .= new_weights +end diff --git a/src/active_set_quadratic.jl b/src/active_set_quadratic.jl index ba2f2cc98..5739198f2 100644 --- a/src/active_set_quadratic.jl +++ b/src/active_set_quadratic.jl @@ -315,3 +315,9 @@ function update_active_set_quadratic!(warm_as::ActiveSetQuadratic{AT,R,IT,H}, b) compute_active_set_iterate!(warm_as) return warm_as end + +function update_weights!(as::ActiveSetQuadratic, new_weights) + as.weights_prev .= as.weights + as.weights .= new_weights + as.modified .= true +end diff --git a/src/active_set_quadratic_direct_solve.jl b/src/active_set_quadratic_direct_solve.jl index 974a5439b..0d71db82f 100644 --- a/src/active_set_quadratic_direct_solve.jl +++ b/src/active_set_quadratic_direct_solve.jl @@ -88,7 +88,7 @@ end Base.push!(as::ActiveSetQuadraticLinearSolve, tuple) = push!(as.active_set, tuple) -Base.deleteat!(as::ActiveSetQuadraticLinearSolve, idx::Int) = deleteat!(as.active_set, idx) +Base.deleteat!(as::ActiveSetQuadraticLinearSolve, idx) = deleteat!(as.active_set, idx) Base.empty!(as::ActiveSetQuadraticLinearSolve) = empty!(as.active_set) @@ -167,13 +167,9 @@ function solve_quadratic_activeset_lp!(as::ActiveSetQuadraticLinearSolve{AT, R, push!(new_weights, weight_value) end end - # we delete the elements in reverse order to avoid messing up the internal active set - sort!(indices_to_remove, rev=true) - for idx in indices_to_remove - deleteat!(as.active_set, idx) - end + deleteat!(as.active_set, indices_to_remove) @assert length(as) == length(new_weights) - as.weights .= new_weights + update_weights!(as.active_set, new_weights) active_set_cleanup!(as) active_set_renormalize!(as) compute_active_set_iterate!(as) @@ -219,10 +215,9 @@ function solve_quadratic_activeset_lp!(as::ActiveSetQuadraticLinearSolve{AT, R, push!(new_weights, weight_value) end end - deleteat!(as.atoms, indices_to_remove) - deleteat!(as.weights, indices_to_remove) + deleteat!(as.active_set, indices_to_remove) @assert length(as) == length(new_weights) - as.weights .= new_weights + update_weights!(as.active_set, new_weights) active_set_cleanup!(as) @assert all(>=(0), new_weights) active_set_renormalize!(as) From 4e1f4917a5e37ad8a424987a255e03eae85d3778 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= Date: Thu, 24 Oct 2024 20:07:44 +0200 Subject: [PATCH 18/19] remove direct solve from BPCG --- ...wise_with_direct_non-standard-quadratic.jl | 48 ++++--------------- src/blended_pairwise.jl | 37 +------------- 2 files changed, 11 insertions(+), 74 deletions(-) diff --git a/examples/blended_pairwise_with_direct_non-standard-quadratic.jl b/examples/blended_pairwise_with_direct_non-standard-quadratic.jl index 9466771ee..5710e92d9 100644 --- a/examples/blended_pairwise_with_direct_non-standard-quadratic.jl +++ b/examples/blended_pairwise_with_direct_non-standard-quadratic.jl @@ -16,8 +16,6 @@ using Random import HiGHS import MathOptInterface as MOI -lp_solver = HiGHS.Optimizer - include("../examples/plot_utils.jl") @@ -66,12 +64,10 @@ function build_callback(trajectory_arr) end -FrankWolfe.benchmark_oracles(f, grad!, () -> randn(n), lmo; k=100) - trajectoryBPCG_standard = [] callback = build_callback(trajectoryBPCG_standard) -@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( f, grad!, lmo, @@ -86,22 +82,6 @@ callback = build_callback(trajectoryBPCG_standard) squadratic=false, ); -trajectoryBPCG_quadratic = [] -@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( - f, - grad!, - lmo, - copy(x00), - max_iteration=k, - line_search=FrankWolfe.Adaptive(), - print_iter=k / 10, - verbose=true, - trajectory=true, - callback=build_callback(trajectoryBPCG_quadratic), - quadratic=true, - lp_solver=lp_solver, -); - active_set_quadratic_automatic = FrankWolfe.ActiveSetQuadraticLinearSolve( [(1.0, copy(x00))], grad!, @@ -109,7 +89,7 @@ active_set_quadratic_automatic = FrankWolfe.ActiveSetQuadraticLinearSolve( scheduler=FrankWolfe.LogScheduler(start_time=100, scaling_factor=1.2, max_interval=100), ) trajectoryBPCG_quadratic_automatic = [] -@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( f, grad!, lmo, @@ -127,7 +107,7 @@ active_set_quadratic_automatic2 = FrankWolfe.ActiveSetQuadraticLinearSolve( scheduler=FrankWolfe.LogScheduler(start_time=10, scaling_factor=2), ) trajectoryBPCG_quadratic_automatic2 = [] -@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( f, grad!, lmo, @@ -139,35 +119,27 @@ trajectoryBPCG_quadratic_automatic2 = [] ); -active_set_quadratic_automatic_reloaded = FrankWolfe.ActiveSetQuadraticLinearSolve( +active_set_quadratic_automatic_standard = FrankWolfe.ActiveSetQuadraticLinearSolve( FrankWolfe.ActiveSet([(1.0, copy(x00))]), grad!, MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), scheduler=FrankWolfe.LogScheduler(start_time=10, scaling_factor=2), ) -trajectoryBPCG_quadratic_automatic_reloaded = [] -@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( +trajectoryBPCG_quadratic_automatic_standard = [] +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( f, grad!, lmo, - active_set_quadratic_automatic_reloaded, + active_set_quadratic_automatic_standard, max_iteration=k, verbose=true, - callback=build_callback(trajectoryBPCG_quadratic_automatic_reloaded), + callback=build_callback(trajectoryBPCG_quadratic_automatic_standard), ); +dataSparsity = [trajectoryBPCG_standard, trajectoryBPCG_quadratic_automatic, trajectoryBPCG_quadratic_automatic_standard] +labelSparsity = ["BPCG (Standard)", "AS_Quad", "AS_Standard"] -# Reduction primal/dual error vs. sparsity of solution - -# dataSparsity = [trajectoryBPCG_standard, trajectoryBPCG_quadratic, trajectoryBPCG_quadratic_automatic, trajectoryBPCG_quadratic_automatic_reloaded, trajectoryBPCG_quadratic_automatic] -# labelSparsity = ["BPCG (Standard)", "BPCG (Direct)", "AS_Quad", "R", "A2"] - -dataSparsity = [trajectoryBPCG_quadratic_automatic2, trajectoryBPCG_quadratic_automatic_reloaded] -labelSparsity = ["AS_Quad", "AS_standard"] - -# Plot sparsity -# plot_sparsity(dataSparsity, labelSparsity, legend_position=:topright) # Plot trajectories plot_trajectories(dataSparsity, labelSparsity,xscalelog=false) diff --git a/src/blended_pairwise.jl b/src/blended_pairwise.jl index a1341135a..40435d946 100644 --- a/src/blended_pairwise.jl +++ b/src/blended_pairwise.jl @@ -30,10 +30,6 @@ function blended_pairwise_conditional_gradient( add_dropped_vertices=false, use_extra_vertex_storage=false, recompute_last_vertex=true, - squadratic=false, # standard quadratic objective - quadratic=false, # quadratic objective but not standard quadratic - lp_solver=nothing, # lp_solver used for direct solve or sparsification - ds_scale_up=2, # direct solve scale up parameter ) # add the first vertex to active set from initialization active_set = ActiveSet([(1.0, x0)]) @@ -63,10 +59,6 @@ function blended_pairwise_conditional_gradient( add_dropped_vertices=add_dropped_vertices, use_extra_vertex_storage=use_extra_vertex_storage, recompute_last_vertex=recompute_last_vertex, - squadratic=squadratic, - quadratic=quadratic, # Pass the new parameter - lp_solver=lp_solver, - ds_scale_up=ds_scale_up, # Pass the new parameter ) end @@ -100,10 +92,6 @@ function blended_pairwise_conditional_gradient( add_dropped_vertices=false, use_extra_vertex_storage=false, recompute_last_vertex=true, - squadratic=false, # non-standard quadratic objective - quadratic=false, # quadratic objective but not standard quadratic - lp_solver=nothing, # lp_solver used for direct solve or sparsification - ds_scale_up=2, # direct solve scale up parameter ) where {AT,R} # format string for output of the algorithm @@ -132,8 +120,6 @@ function blended_pairwise_conditional_gradient( end t = 0 - next_direct_solve_interval = 10 - last_direct_solve = 0 compute_active_set_iterate!(active_set) x = get_active_set_iterate(active_set) primal = convert(eltype(x), Inf) @@ -146,10 +132,6 @@ function blended_pairwise_conditional_gradient( gradient = collect(x) end - if squadratic || quadratic # when using LPs then lp_solver must be provided - @assert lp_solver !== nothing "When squadratic, sparsify, or quadratic is true, lp_solver must be provided" - end - if verbose println("\nBlended Pairwise Conditional Gradient Algorithm.") NumType = eltype(x) @@ -159,9 +141,6 @@ function blended_pairwise_conditional_gradient( grad_type = typeof(gradient) println("GRADIENTTYPE: $grad_type LAZY: $lazy lazy_tolerance: $lazy_tolerance") println("LMO: $(typeof(lmo))") - if lp_solver !== nothing - println("LP SOLVER: $(lp_solver)") - end if use_extra_vertex_storage && !lazy @info("vertex storage only used in lazy mode") end @@ -312,20 +291,6 @@ function blended_pairwise_conditional_gradient( end vertex_taken = v # short circuit for quadratic case - if (squadratic || quadratic) && ((t > last_direct_solve && t - last_direct_solve >= next_direct_solve_interval)) # Updated condition - next_direct_solve_interval *= ds_scale_up - last_direct_solve = t - if quadratic - active_set = direct_solve_gq(active_set, grad!, lp_solver) # direct solve for non-standard quadratic problem - else - active_set = direct_solve(active_set, grad!, lp_solver) # direct solve for standard quadratic problem - end - active_set_cleanup!(active_set; weight_purge_threshold=weight_purge_threshold) - compute_active_set_iterate!(active_set) - x = get_active_set_iterate(active_set) - grad!(gradient, x) - step_type = ST_DIRECT - end dual_gap = fast_dot(gradient, x) - fast_dot(gradient, v) # if we are about to exit, compute dual_gap with the cleaned-up x if dual_gap ≤ epsilon @@ -519,4 +484,4 @@ function blended_pairwise_conditional_gradient( end return (x=x, v=v, primal=primal, dual_gap=dual_gap, traj_data=traj_data, active_set=active_set) -end \ No newline at end of file +end From 80dc2b22427a54cbd7ff73e7cf2e17caa97575f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= Date: Thu, 24 Oct 2024 20:22:17 +0200 Subject: [PATCH 19/19] rng changed --- examples/simple_stateless.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/simple_stateless.jl b/examples/simple_stateless.jl index 6a550a963..6bf230e26 100644 --- a/examples/simple_stateless.jl +++ b/examples/simple_stateless.jl @@ -71,7 +71,7 @@ plot_trajectories([trajectory_simple[1:end], trajectory_restart[1:end]], ["simpl # simple step iterations about 33% faster -@test line_search.factor <= 44 +@test line_search.factor <= 51 x, v, primal, dual_gap, trajectory_restart_highpres = FrankWolfe.frank_wolfe( f,