From 935a5f332f6b0422d1496e9d4814c680d9f464bc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= <mathieu.besancon@gmail.com>
Date: Mon, 7 Oct 2024 18:19:54 +0200
Subject: [PATCH 01/19] sparsifier active set

---
 examples/blended_pairwise_with_sparsify.jl | 30 +++++++--
 src/FrankWolfe.jl                          |  1 +
 src/active_set_quadratic.jl                |  1 -
 src/active_set_sparsifier.jl               | 76 ++++++++++++++++++++++
 src/moi_oracle.jl                          |  2 +-
 5 files changed, 101 insertions(+), 9 deletions(-)
 create mode 100644 src/active_set_sparsifier.jl

diff --git a/examples/blended_pairwise_with_sparsify.jl b/examples/blended_pairwise_with_sparsify.jl
index 6927b8e4e..30792a8ec 100644
--- a/examples/blended_pairwise_with_sparsify.jl
+++ b/examples/blended_pairwise_with_sparsify.jl
@@ -15,9 +15,6 @@ using LinearAlgebra
 using Random
 
 import Pkg
-Pkg.add("GLPK")
-Pkg.add("HiGHS")
-import GLPK
 import HiGHS
 
 # lp_solver = GLPK.Optimizer
@@ -80,12 +77,11 @@ FrankWolfe.benchmark_oracles(f, grad!, () -> randn(n), lmo; k=100)
 trajectoryBPCG_standard = []
 callback = build_callback(trajectoryBPCG_standard)
 
-x0 = deepcopy(x00)
 @time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
     f,
     grad!,
     lmo,
-    x0,
+    copy(x00),
     max_iteration=k,
     line_search=FrankWolfe.Shortstep(2.0),
     print_iter=k / 10,
@@ -98,12 +94,11 @@ x0 = deepcopy(x00)
 trajectoryBPCG_quadratic = []
 callback = build_callback(trajectoryBPCG_quadratic)
 
-x0 = deepcopy(x00)
 @time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
     f,
     grad!,
     lmo,
-    x0,
+    copy(x00),
     max_iteration=k,
     line_search=FrankWolfe.Shortstep(2.0),
     print_iter=k / 10,
@@ -116,6 +111,27 @@ x0 = deepcopy(x00)
     lp_solver=lp_solver,
 );
 
+trajectoryBPCG_as_sparse = []
+callback = build_callback(trajectoryBPCG_as_sparse)
+
+active_set_sparse = FrankWolfe.ActiveSetSparsifier(FrankWolfe.ActiveSet([1.0], [x00], similar(x00)), HiGHS.Optimizer())
+
+@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    copy(x00),
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    print_iter=k / 10,
+    memory_mode=FrankWolfe.InplaceEmphasis(),
+    verbose=true,
+    trajectory=true,
+    callback=callback,
+    sparsify=false,
+#    squadratic=true, # activate to see the effect of the numerical precision of the LP solver
+);
+
 # Reduction primal/dual error vs. sparsity of solution
 
 dataSparsity = [trajectoryBPCG_standard, trajectoryBPCG_quadratic]
diff --git a/src/FrankWolfe.jl b/src/FrankWolfe.jl
index e6d29c3cc..e17e40038 100644
--- a/src/FrankWolfe.jl
+++ b/src/FrankWolfe.jl
@@ -34,6 +34,7 @@ include("moi_oracle.jl")
 include("function_gradient.jl")
 include("active_set.jl")
 include("active_set_quadratic.jl")
+include("active_set_sparsifier.jl")
 
 include("blended_cg.jl")
 include("afw.jl")
diff --git a/src/active_set_quadratic.jl b/src/active_set_quadratic.jl
index b200a8912..d252d186d 100644
--- a/src/active_set_quadratic.jl
+++ b/src/active_set_quadratic.jl
@@ -310,4 +310,3 @@ function update_active_set_quadratic!(warm_as::ActiveSetQuadratic{AT,R,IT,H}, b)
     compute_active_set_iterate!(warm_as)
     return warm_as
 end
-
diff --git a/src/active_set_sparsifier.jl b/src/active_set_sparsifier.jl
new file mode 100644
index 000000000..bc54c10bf
--- /dev/null
+++ b/src/active_set_sparsifier.jl
@@ -0,0 +1,76 @@
+struct ActiveSetSparsifier{AT, R, IT, AS <: AbstractActiveSet{AT, R, IT}, OT <: MOI.AbstractOptimizer} <: AbstractActiveSet{AT,R,IT}
+    active_set::AS
+    weights::Vector{R}
+    atoms::Vector{AT}
+    x::IT
+    optimizer::OT
+    minimum_vertices::Int
+    solve_frequency::Int
+    counter::Base.RefValue{Int}
+end
+
+function ActiveSetSparsifier(active_set::AbstractActiveSet, optimizer::MOI.AbstractOptimizer; minimum_vertices=50, solve_frequency=50)
+    return ActiveSetSparsifier(active_set, active_set.weights, active_set.atoms, active_set.x, optimizer, minimum_vertices, solve_frequency, Ref(0))
+end
+
+function Base.push!(as::ActiveSetSparsifier, (λ, a))
+    push!(as.active_set, (λ, a))
+end
+
+Base.deleteat!(as::ActiveSetSparsifier, idx::Int) = deleteat!(as.active_set, idx)
+
+Base.empty!(as::ActiveSetSparsifier) = empty!(as.active_set)
+
+function active_set_update!(
+    as::ActiveSetSparsifier{AS, OT, AT, R, IT},
+    lambda, atom, renorm=true, idx=nothing;
+    kwargs...
+) where {AS, OT, AT, R, IT}
+    active_set_update!(as.active_set, lambda, atom, renorm, idx; kwargs...)
+    n = length(as)
+    as.counter[] += 1
+    if n > as.minimum_vertices && mod(as.counter[], as.solve_frequency) == 0
+        # sparsifying active set
+        MOI.empty!(as.optimizer)
+        x0 = as.active_set.x
+        λ = MOI.add_variables(as.optimizer, n)
+        # λ ∈ Δ_n ⇔ λ ≥ 0, ∑ λ == 1
+        MOI.add_constraint.(as.optimizer, λ, MOI.GreaterThan(0.0))
+        MOI.add_constraint(as.optimizer, sum(λ; init=0.0), MOI.EqualTo(1.0))
+        x_sum = 0 * as.active_set.atoms[1]
+        for (idx, atom) in enumerate(as.active_set.atoms)
+            x_sum += λ[idx] * as.active_set.atoms
+        end
+        for idx in eachindex(x_sum)
+            MOI.add_constraint(as.optimizer, x_sum[idx], MOI.EqualTo(x0[idx]))
+        end
+        # Set a dummy objective (minimize ∑λ)
+        dummy_objective = sum(λ; init=0.0)
+        MOI.set(as.optimizer, MOI.ObjectiveFunction{typeof(dummy_objective)}(), dummy_objective)
+        MOI.set(as.optimizer, MOI.ObjectiveSense(), MOI.MIN_SENSE)
+        MOI.optimize!(as.optimizer)
+        if MOI.get(as.optimizer, MOI.TerminationStatus()) in (MOI.OPTIMAL, MOI.FEASIBLE_POINT, MOI.ALMOST_OPTIMAL)
+            indices_to_remove = Int[]
+            new_weights = R[]
+            for idx in eachindex(λ)
+                weight_value = MOI.get(as.optimizer, MOI.VariablePrimal(), λ[idx])
+                if weight_value <= min(1e-3 / n, 1e-8)
+                    push!(indices_to_remove, idx)
+                else
+                    push!(new_weights, weight_value)
+                end
+            end
+            deleteat!(as.active_set.atoms, indices_to_remove)
+            deleteat!(as.active_set.weights, indices_to_remove)
+            @assert length(as) == length(new_weights)
+            as.active_set.weights .= new_weights
+            active_set_renormalize!(as)
+        end
+    end
+    return as
+end
+
+active_set_renormalize!(as::ActiveSetSparsifier) = active_set_renormalize!(as.active_set)
+
+active_set_argmin(as::ActiveSetSparsifier, direction) = active_set_argmin(as.active_set, direction)
+active_set_argminmax(as::ActiveSetSparsifier, direction; Φ=0.5) = active_set_argminmax(as.active_set, direction; Φ=Φ)
diff --git a/src/moi_oracle.jl b/src/moi_oracle.jl
index 875b8001d..1471bd6e7 100644
--- a/src/moi_oracle.jl
+++ b/src/moi_oracle.jl
@@ -1,6 +1,6 @@
 
 """
-    MathOptLMO{OT <: MOI.Optimizer} <: LinearMinimizationOracle
+    MathOptLMO{OT <: MOI.AbstractOptimizer} <: LinearMinimizationOracle
 
 Linear minimization oracle with feasible space defined through a MathOptInterface.Optimizer.
 The oracle call sets the direction and reruns the optimizer.

From d84dadb29f13a28fea36ebd00b0cb275494323a9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= <mathieu.besancon@gmail.com>
Date: Tue, 8 Oct 2024 16:17:09 +0200
Subject: [PATCH 02/19] start working on LP AS

---
 examples/blended_pairwise_with_direct.jl | 53 ++++++++++++--
 src/active_set_quadratic.jl              | 91 ++++++++++++++++++++----
 2 files changed, 126 insertions(+), 18 deletions(-)

diff --git a/examples/blended_pairwise_with_direct.jl b/examples/blended_pairwise_with_direct.jl
index 576361db1..f37a593d1 100644
--- a/examples/blended_pairwise_with_direct.jl
+++ b/examples/blended_pairwise_with_direct.jl
@@ -21,6 +21,7 @@ Pkg.add("GLPK")
 Pkg.add("HiGHS")
 import GLPK
 import HiGHS
+import MathOptInterface as MOI
 
 # lp_solver = GLPK.Optimizer
 lp_solver = HiGHS.Optimizer
@@ -30,7 +31,7 @@ lp_solver = HiGHS.Optimizer
 include("../examples/plot_utils.jl")
 
 n = Int(1e4)
-k = 10000
+k = 1000
 
 # s = rand(1:100)
 s = 10
@@ -112,12 +113,11 @@ x0 = deepcopy(x00)
 trajectoryBPCG_quadratic_nosquad = []
 callback = build_callback(trajectoryBPCG_quadratic_nosquad)
 
-x0 = deepcopy(x00)
 @time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
     f,
     grad!,
     lmo,
-    x0,
+    copy(x00),
     max_iteration=k,
     line_search=FrankWolfe.Shortstep(2.0),
     print_iter=k / 10,
@@ -130,9 +130,52 @@ x0 = deepcopy(x00)
     lp_solver=lp_solver,
 );
 
+# Just projection quadratic
+trajectoryBPCG_quadratic_as = []
+callback = build_callback(trajectoryBPCG_quadratic_as)
+
+as_quad = FrankWolfe.ActiveSetQuadratic([(1.0, copy(x00))], 2 * LinearAlgebra.I, -2xp)
+@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    as_quad,
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    print_iter=k / 10,
+    memory_mode=FrankWolfe.InplaceEmphasis(),
+    verbose=true,
+    trajectory=true,
+    callback=callback,
+);
+
+# with LP acceleration
+trajectoryBPCG_quadratic_as_lp = []
+callback = build_callback(trajectoryBPCG_quadratic_as_lp)
+
+as_quad_lp = FrankWolfe.ActiveSetQuadratic(
+    [(1.0, copy(x00))],
+    2 * LinearAlgebra.I, -2xp,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+)
+@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    as_quad_lp,
+    max_iteration=1000,
+    line_search=FrankWolfe.Shortstep(2.0),
+    print_iter=k / 10,
+    memory_mode=FrankWolfe.InplaceEmphasis(),
+    verbose=true,
+    trajectory=true,
+    callback=callback,
+);
+
+
 # Update the data and labels for plotting
-dataSparsity = [trajectoryBPCG_standard, trajectoryBPCG_quadratic, trajectoryBPCG_quadratic_nosquad]
-labelSparsity = ["BPCG (Standard)", "BPCG (Specific Direct)", "BPCG (Generic Direct)"]
+dataSparsity = [trajectoryBPCG_standard, trajectoryBPCG_quadratic, trajectoryBPCG_quadratic_nosquad, trajectoryBPCG_quadratic_as, trajectoryBPCG_quadratic_as_lp]
+labelSparsity = ["BPCG (Standard)", "BPCG (Specific Direct)", "BPCG (Generic Direct)", "AS_Quad", "AS_Quad_LP"]
 
 # Plot sparsity
 # plot_sparsity(dataSparsity, labelSparsity, legend_position=:topright)
diff --git a/src/active_set_quadratic.jl b/src/active_set_quadratic.jl
index d252d186d..c7849663d 100644
--- a/src/active_set_quadratic.jl
+++ b/src/active_set_quadratic.jl
@@ -9,7 +9,7 @@ The iterate `x = ∑λ_i a_i` is stored in x with type `IT`.
 The objective function is assumed to be of the form `f(x)=½⟨x,Ax⟩+⟨b,x⟩+c`
 so that the gradient is simply `∇f(x)=Ax+b`.
 """
-struct ActiveSetQuadratic{AT, R <: Real, IT, H} <: AbstractActiveSet{AT,R,IT}
+struct ActiveSetQuadratic{AT, R <: Real, IT, H, OT <: Union{Nothing, MOI.AbstractOptimizer}} <: AbstractActiveSet{AT,R,IT}
     weights::Vector{R}
     atoms::Vector{AT}
     x::IT
@@ -20,6 +20,9 @@ struct ActiveSetQuadratic{AT, R <: Real, IT, H} <: AbstractActiveSet{AT,R,IT}
     dots_b::Vector{R} # stores ⟨b, atoms[i]⟩
     weights_prev::Vector{R}
     modified::BitVector
+    lp_optimizer::OT
+    lp_frequency::Int
+    counter::Base.RefValue{Int}
 end
 
 function detect_quadratic_function(grad!, x0; test=true)
@@ -48,11 +51,12 @@ function detect_quadratic_function(grad!, x0; test=true)
     return A, b
 end
 
-function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, grad!::Function) where {AT,R}
-    return ActiveSetQuadratic(tuple_values, detect_quadratic_function(grad!, tuple_values[1][2])...)
+function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, grad!::Function, lp_optimizer=nothing; lp_frequency=10) where {AT,R}
+    A, b = detect_quadratic_function(grad!, tuple_values[1][2])
+    return ActiveSetQuadratic(tuple_values, A, b, lp_optimizer, lp_frequency=lp_frequency)
 end
 
-function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::H, b) where {AT,R,H}
+function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::H, b, lp_optimizer=nothing; lp_frequency=10) where {AT,R,H}
     n = length(tuple_values)
     weights = Vector{R}(undef, n)
     atoms = Vector{AT}(undef, n)
@@ -71,16 +75,18 @@ function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::H, b)
         dots_b[idx] = fast_dot(b, atoms[idx])
     end
     x = similar(b)
-    as = ActiveSetQuadratic{AT,R,typeof(x),H}(weights, atoms, x, A, b, dots_x, dots_A, dots_b, weights_prev, modified)
+    as = ActiveSetQuadratic(weights, atoms, x, A, b, dots_x, dots_A, dots_b, weights_prev, modified, lp_optimizer, lp_frequency, Ref(0))
+
     compute_active_set_iterate!(as)
     return as
 end
 
-function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, grad!) where {AT,R}
-    return ActiveSetQuadratic{AT,R}(tuple_values, detect_quadratic_function(grad!, tuple_values[1][2])...)
+function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, grad!::Function, lp_optimizer=nothing; lp_frequency=10) where {AT,R}
+    A, b = detect_quadratic_function(grad!, tuple_values[1][2])
+    return ActiveSetQuadratic{AT,R}(tuple_values, A, b, lp_optimizer; lp_frequency=lp_frequency)
 end
 
-function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::H, b) where {AT,R,H}
+function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::H, b, lp_optimizer=nothing; lp_frequency=lp_frequency) where {AT,R,H}
     n = length(tuple_values)
     weights = Vector{R}(undef, n)
     atoms = Vector{AT}(undef, n)
@@ -99,7 +105,7 @@ function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,
         dots_b[idx] = fast_dot(b, atoms[idx])
     end
     x = similar(b)
-    as = ActiveSetQuadratic{AT,R,typeof(x),H}(weights, atoms, x, A, b, dots_x, dots_A, dots_b, weights_prev, modified)
+    as = ActiveSetQuadratic{AT,R,typeof(x),H}(weights, atoms, x, A, b, dots_x, dots_A, dots_b, weights_prev, modified, lp_optimizer, lp_frequency, Ref(0))
     compute_active_set_iterate!(as)
     return as
 end
@@ -116,11 +122,11 @@ function Base.:*(a::Identity, b)
         return a.λ * b
     end
 end
-function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::UniformScaling, b) where {AT,R}
-    return ActiveSetQuadratic(tuple_values, Identity(A.λ), b)
+function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::UniformScaling, b, lp_optimizer=nothing; lp_frequency=10) where {AT,R}
+    return ActiveSetQuadratic(tuple_values, Identity(A.λ), b, lp_optimizer; lp_frequency=lp_frequency)
 end
-function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::UniformScaling, b) where {AT,R}
-    return ActiveSetQuadratic{AT,R}(tuple_values, Identity(A.λ), b)
+function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::UniformScaling, b, lp_optimizer=nothing; lp_frequency=10) where {AT,R}
+    return ActiveSetQuadratic{AT,R}(tuple_values, Identity(A.λ), b, lp_optimizer; lp_frequency=lp_frequency)
 end
 
 # these three functions do not update the active set iterate
@@ -193,10 +199,21 @@ function active_set_update!(
         idx = find_atom(active_set, atom)
     end
     if idx > 0
+        @info "old atom"
         @inbounds active_set.weights[idx] += lambda
         @inbounds active_set.modified[idx] = true
     else
         push!(active_set, (lambda, atom))
+        if active_set.lp_optimizer !== nothing
+            active_set.counter[] += 1
+            @show active_set.counter[]
+            @show mod(active_set.counter[], active_set.lp_frequency)
+            if mod(active_set.counter[], active_set.lp_frequency) == 0
+                @show "solving quadratic"
+                solve_quadratic_activeset_lp!(active_set)
+                return active_set
+            end
+        end
     end
     if renorm
         add_dropped_vertices = add_dropped_vertices ? vertex_storage !== nothing : add_dropped_vertices
@@ -207,6 +224,54 @@ function active_set_update!(
     return active_set
 end
 
+# generic quadratic
+function solve_quadratic_activeset_lp!(active_set::ActiveSetQuadratic{AT, R, IT, H}) where {AT, R, IT, H}
+    # TODO
+end
+
+# special case of scaled identity Hessian
+function solve_quadratic_activeset_lp!(active_set::ActiveSetQuadratic{AT, R, IT, <: Identity}) where {AT, R, IT}
+    hessian_scaling = active_set.A.λ
+    # number of vertices and ambient dimension
+    nv = length(active_set)
+    o = active_set.lp_optimizer
+    MOI.empty!(o)
+    λ = MOI.add_variables(o, nv)
+    # λ ∈ Δ_n ⇔ λ ≥ 0, ∑ λ == 1
+    MOI.add_constraint.(o, λ, MOI.GreaterThan(0.0))
+    MOI.add_constraint(o, sum(λ; init=0.0), MOI.EqualTo(1.0))
+    # 2 * a Aᵗ A λ == -Aᵗ b
+    lhs = 0.0 * λ
+    rhs = 0 * active_set.weights
+    for (idx, atom) in enumerate(active_set.atoms)
+        rhs[idx] = -dot(atom, active_set.b)
+        lhs[idx] = 2 * hessian_scaling * sum(λ[j] * dot(atom, active_set.atoms[j]) for j in 1:nv)
+    end
+    MOI.add_constraint.(o, lhs, MOI.EqualTo.(rhs))
+    dummy_objective = sum(λ, init=0.0)
+    MOI.set(o, MOI.ObjectiveFunction{typeof(dummy_objective)}(), dummy_objective)
+    MOI.set(o, MOI.ObjectiveSense(), MOI.MIN_SENSE)
+    MOI.optimize!(o)
+    if MOI.get(o, MOI.TerminationStatus()) in (MOI.OPTIMAL, MOI.FEASIBLE_POINT, MOI.ALMOST_OPTIMAL)
+        indices_to_remove = Int[]
+        new_weights = R[]
+        for idx in eachindex(λ)
+            weight_value = MOI.get(o, MOI.VariablePrimal(), λ[idx])
+            if weight_value <= min(1e-3 / nv, 1e-8)
+                push!(indices_to_remove, idx)
+            else
+                push!(new_weights, weight_value)
+            end
+        end
+        deleteat!(active_set.atoms, indices_to_remove)
+        deleteat!(active_set.weights, indices_to_remove)
+        @assert length(active_set) == length(new_weights)
+        active_set.weights .= new_weights
+        active_set_renormalize!(active_set)
+    end
+    return active_set
+end
+
 function active_set_renormalize!(active_set::ActiveSetQuadratic)
     renorm = sum(active_set.weights)
     active_set.weights ./= renorm

From bd2c6c022788777e3cdae704d898dd4e884a2a0b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= <mathieu.besancon@gmail.com>
Date: Wed, 9 Oct 2024 14:11:16 +0200
Subject: [PATCH 03/19] first working quadratic

---
 examples/blended_pairwise_with_direct.jl |  33 +++-
 src/FrankWolfe.jl                        |   1 +
 src/active_set_quadratic.jl              |   4 +-
 src/active_set_quadratic_reloaded.jl     | 183 +++++++++++++++++++++++
 4 files changed, 215 insertions(+), 6 deletions(-)
 create mode 100644 src/active_set_quadratic_reloaded.jl

diff --git a/examples/blended_pairwise_with_direct.jl b/examples/blended_pairwise_with_direct.jl
index f37a593d1..138b07147 100644
--- a/examples/blended_pairwise_with_direct.jl
+++ b/examples/blended_pairwise_with_direct.jl
@@ -149,20 +149,45 @@ as_quad = FrankWolfe.ActiveSetQuadratic([(1.0, copy(x00))], 2 * LinearAlgebra.I,
     callback=callback,
 );
 
+as_quad_lp = FrankWolfe.ActiveSetQuadratic(
+    [(1.0, copy(x00))],
+    2 * LinearAlgebra.I, -2xp,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+)
+
 # with LP acceleration
 trajectoryBPCG_quadratic_as_lp = []
 callback = build_callback(trajectoryBPCG_quadratic_as_lp)
 
-as_quad_lp = FrankWolfe.ActiveSetQuadratic(
+@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    as_quad_lp,
+    max_iteration=1000,
+    line_search=FrankWolfe.Shortstep(2.0),
+    print_iter=k / 10,
+    memory_mode=FrankWolfe.InplaceEmphasis(),
+    verbose=true,
+    trajectory=true,
+    callback=callback,
+);
+
+as_quad_reloaded = FrankWolfe.ActiveSetQuadraticReloaded(
     [(1.0, copy(x00))],
     2 * LinearAlgebra.I, -2xp,
     MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
 )
+
+# with LP acceleration
+trajectoryBPCG_quadratic_reloaded = []
+callback = build_callback(trajectoryBPCG_quadratic_reloaded)
+
 @time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
     f,
     grad!,
     lmo,
-    as_quad_lp,
+    as_quad_reloaded,
     max_iteration=1000,
     line_search=FrankWolfe.Shortstep(2.0),
     print_iter=k / 10,
@@ -174,8 +199,8 @@ as_quad_lp = FrankWolfe.ActiveSetQuadratic(
 
 
 # Update the data and labels for plotting
-dataSparsity = [trajectoryBPCG_standard, trajectoryBPCG_quadratic, trajectoryBPCG_quadratic_nosquad, trajectoryBPCG_quadratic_as, trajectoryBPCG_quadratic_as_lp]
-labelSparsity = ["BPCG (Standard)", "BPCG (Specific Direct)", "BPCG (Generic Direct)", "AS_Quad", "AS_Quad_LP"]
+dataSparsity = [trajectoryBPCG_standard, trajectoryBPCG_quadratic, trajectoryBPCG_quadratic_nosquad, trajectoryBPCG_quadratic_as, trajectoryBPCG_quadratic_as_lp, trajectoryBPCG_quadratic_reloaded]
+labelSparsity = ["BPCG (Standard)", "BPCG (Specific Direct)", "BPCG (Generic Direct)", "AS_Quad", "AS_Quad_LP", "Reloaded"]
 
 # Plot sparsity
 # plot_sparsity(dataSparsity, labelSparsity, legend_position=:topright)
diff --git a/src/FrankWolfe.jl b/src/FrankWolfe.jl
index e17e40038..5aa0cbee1 100644
--- a/src/FrankWolfe.jl
+++ b/src/FrankWolfe.jl
@@ -34,6 +34,7 @@ include("moi_oracle.jl")
 include("function_gradient.jl")
 include("active_set.jl")
 include("active_set_quadratic.jl")
+include("active_set_quadratic_reloaded.jl")
 include("active_set_sparsifier.jl")
 
 include("blended_cg.jl")
diff --git a/src/active_set_quadratic.jl b/src/active_set_quadratic.jl
index c7849663d..3332958e1 100644
--- a/src/active_set_quadratic.jl
+++ b/src/active_set_quadratic.jl
@@ -263,10 +263,10 @@ function solve_quadratic_activeset_lp!(active_set::ActiveSetQuadratic{AT, R, IT,
                 push!(new_weights, weight_value)
             end
         end
-        deleteat!(active_set.atoms, indices_to_remove)
-        deleteat!(active_set.weights, indices_to_remove)
+        deleteat!(active_set, indices_to_remove)
         @assert length(active_set) == length(new_weights)
         active_set.weights .= new_weights
+        active_set.weights_prev .= new_weights
         active_set_renormalize!(active_set)
     end
     return active_set
diff --git a/src/active_set_quadratic_reloaded.jl b/src/active_set_quadratic_reloaded.jl
new file mode 100644
index 000000000..6207c307d
--- /dev/null
+++ b/src/active_set_quadratic_reloaded.jl
@@ -0,0 +1,183 @@
+
+"""
+    ActiveSetQuadraticReloaded{AT, R, IT}
+
+Represents an active set of extreme vertices collected in a FW algorithm,
+along with their coefficients `(λ_i, a_i)`.
+`R` is the type of the `λ_i`, `AT` is the type of the atoms `a_i`.
+The iterate `x = ∑λ_i a_i` is stored in x with type `IT`.
+The objective function is assumed to be of the form `f(x)=½⟨x,Ax⟩+⟨b,x⟩+c`
+so that the gradient is simply `∇f(x)=Ax+b`.
+"""
+struct ActiveSetQuadraticReloaded{AT, R <: Real, IT, H, OT <: Union{Nothing, MOI.AbstractOptimizer}} <: AbstractActiveSet{AT,R,IT}
+    weights::Vector{R}
+    atoms::Vector{AT}
+    x::IT
+    A::H # Hessian matrix
+    b::IT # linear term
+    lp_optimizer::OT
+    lp_frequency::Int
+    counter::Base.RefValue{Int}
+end
+
+function ActiveSetQuadraticReloaded(tuple_values::AbstractVector{Tuple{R,AT}}, grad!::Function, lp_optimizer=nothing; lp_frequency=10) where {AT,R}
+    A, b = detect_quadratic_function(grad!, tuple_values[1][2])
+    return ActiveSetQuadraticReloaded(tuple_values, A, b, lp_optimizer, lp_frequency=lp_frequency)
+end
+
+function ActiveSetQuadraticReloaded(tuple_values::AbstractVector{Tuple{R,AT}}, A::H, b, lp_optimizer=nothing; lp_frequency=10) where {AT,R,H}
+    n = length(tuple_values)
+    weights = Vector{R}(undef, n)
+    atoms = Vector{AT}(undef, n)
+    @inbounds for idx in 1:n
+        weights[idx] = tuple_values[idx][1]
+        atoms[idx] = tuple_values[idx][2]
+    end
+    x = similar(b)
+    as = ActiveSetQuadraticReloaded(weights, atoms, x, A, b, lp_optimizer, lp_frequency, Ref(0))
+    compute_active_set_iterate!(as)
+    return as
+end
+
+function ActiveSetQuadraticReloaded{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, grad!::Function, lp_optimizer=nothing; lp_frequency=10) where {AT,R}
+    A, b = detect_quadratic_function(grad!, tuple_values[1][2])
+    return ActiveSetQuadraticReloaded{AT,R}(tuple_values, A, b, lp_optimizer; lp_frequency=lp_frequency)
+end
+
+function ActiveSetQuadraticReloaded{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::H, b, lp_optimizer=nothing; lp_frequency=lp_frequency) where {AT,R,H}
+    n = length(tuple_values)
+    weights = Vector{R}(undef, n)
+    atoms = Vector{AT}(undef, n)
+    @inbounds for idx in 1:n
+        weights[idx] = tuple_values[idx][1]
+        atoms[idx] = tuple_values[idx][2]
+    end
+    x = similar(b)
+    as = ActiveSetQuadraticReloaded{AT,R,typeof(x),H}(weights, atoms, x, A, b, lp_optimizer, lp_frequency, Ref(0))
+    compute_active_set_iterate!(as)
+    return as
+end
+
+function ActiveSetQuadraticReloaded(tuple_values::AbstractVector{Tuple{R,AT}}, A::UniformScaling, b, lp_optimizer=nothing; lp_frequency=10) where {AT,R}
+    return ActiveSetQuadraticReloaded(tuple_values, Identity(A.λ), b, lp_optimizer; lp_frequency=lp_frequency)
+end
+function ActiveSetQuadraticReloaded{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::UniformScaling, b, lp_optimizer=nothing; lp_frequency=10) where {AT,R}
+    return ActiveSetQuadraticReloaded{AT,R}(tuple_values, Identity(A.λ), b, lp_optimizer; lp_frequency=lp_frequency)
+end
+
+# these three functions do not update the active set iterate
+
+function Base.push!(as::ActiveSetQuadraticReloaded{AT,R}, (λ, a)) where {AT,R}
+    push!(as.weights, λ)
+    push!(as.atoms, a)
+    return as
+end
+
+function Base.deleteat!(as::ActiveSetQuadraticReloaded, idx::Int)
+    deleteat!(as.weights, idx)
+    deleteat!(as.atoms, idx)
+    return as
+end
+
+function Base.empty!(as::ActiveSetQuadraticReloaded)
+    empty!(as.atoms)
+    empty!(as.weights)
+    as.x .= 0
+    return as
+end
+
+function active_set_update!(
+    active_set::ActiveSetQuadraticReloaded{AT,R},
+    lambda, atom, renorm=true, idx=nothing;
+    weight_purge_threshold=weight_purge_threshold_default(R),
+    add_dropped_vertices=false,
+    vertex_storage=nothing,
+) where {AT,R}
+    # rescale active set
+    active_set.weights .*= (1 - lambda)
+    # add value for new atom
+    if idx === nothing
+        idx = find_atom(active_set, atom)
+    end
+    if idx > 0
+        @info "old atom"
+        @inbounds active_set.weights[idx] += lambda
+    else
+        push!(active_set, (lambda, atom))
+        if active_set.lp_optimizer !== nothing
+            active_set.counter[] += 1
+            @show active_set.counter[]
+            @show mod(active_set.counter[], active_set.lp_frequency)
+            if mod(active_set.counter[], active_set.lp_frequency) == 0
+                @show "solving quadratic"
+                solve_quadratic_activeset_lp!(active_set)
+                return active_set
+            end
+        end
+    end
+    if renorm
+        add_dropped_vertices = add_dropped_vertices ? vertex_storage !== nothing : add_dropped_vertices
+        active_set_cleanup!(active_set; weight_purge_threshold=weight_purge_threshold, update=false, add_dropped_vertices=add_dropped_vertices, vertex_storage=vertex_storage)
+        active_set_renormalize!(active_set)
+    end
+    active_set_update_scale!(active_set.x, lambda, atom)
+    return active_set
+end
+
+# generic quadratic
+function solve_quadratic_activeset_lp!(active_set::ActiveSetQuadraticReloaded{AT, R, IT, H}) where {AT, R, IT, H}
+    error("TODO")
+end
+
+# special case of scaled identity Hessian
+function solve_quadratic_activeset_lp!(active_set::ActiveSetQuadraticReloaded{AT, R, IT, <: Identity}) where {AT, R, IT}
+    hessian_scaling = active_set.A.λ
+    # number of vertices and ambient dimension
+    nv = length(active_set)
+    o = active_set.lp_optimizer
+    MOI.empty!(o)
+    λ = MOI.add_variables(o, nv)
+    A = zeros(length(active_set.x), nv)
+    for (idx, atom) in enumerate(active_set.atoms)
+        A[:,idx] .= atom
+    end
+    # λ ≥ 0, ∑ λ == 1
+    MOI.add_constraint.(o, λ, MOI.GreaterThan(0.0))
+    MOI.add_constraint(o, sum(λ; init=0.0), MOI.EqualTo(1.0))
+    # 2 * a Aᵗ A λ == -Aᵗ b
+    lhs = 0.0 * λ
+    rhs = 0 * active_set.weights
+    for (idx, atom) in enumerate(active_set.atoms)
+        lhs[idx] = 2 * hessian_scaling * sum(λ[j] * dot(atom, active_set.atoms[j]) for j in 1:nv)
+        rhs[idx] = -dot(atom, active_set.b)
+    end
+    MOI.add_constraint.(o, lhs, MOI.EqualTo.(rhs))
+    dummy_objective = sum(λ, init=0.0)
+    MOI.set(o, MOI.ObjectiveFunction{typeof(dummy_objective)}(), dummy_objective)
+    MOI.set(o, MOI.ObjectiveSense(), MOI.MIN_SENSE)
+    MOI.optimize!(o)
+    if MOI.get(o, MOI.TerminationStatus()) in (MOI.OPTIMAL, MOI.FEASIBLE_POINT, MOI.ALMOST_OPTIMAL)
+        indices_to_remove = Int[]
+        new_weights = R[]
+        for idx in eachindex(λ)
+            weight_value = MOI.get(o, MOI.VariablePrimal(), λ[idx])
+            if weight_value <= min(1e-3 / nv, 1e-8)
+                push!(indices_to_remove, idx)
+            else
+                push!(new_weights, weight_value)
+            end
+        end
+        deleteat!(active_set.atoms, indices_to_remove)
+        deleteat!(active_set.weights, indices_to_remove)
+        @assert length(active_set) == length(new_weights)
+        active_set.weights .= new_weights
+        active_set_renormalize!(active_set)
+    end
+    return active_set
+end
+
+function active_set_renormalize!(active_set::ActiveSetQuadraticReloaded)
+    renorm = sum(active_set.weights)
+    active_set.weights ./= renorm
+    return active_set
+end

From a708bcee099c0108ab12a9b0dc73b134a6f73672 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= <mathieu.besancon@gmail.com>
Date: Wed, 9 Oct 2024 14:14:41 +0200
Subject: [PATCH 04/19] remove quadratic LP from current

---
 src/active_set_quadratic.jl | 87 ++++++-------------------------------
 1 file changed, 13 insertions(+), 74 deletions(-)

diff --git a/src/active_set_quadratic.jl b/src/active_set_quadratic.jl
index 3332958e1..fe40c85b4 100644
--- a/src/active_set_quadratic.jl
+++ b/src/active_set_quadratic.jl
@@ -9,7 +9,7 @@ The iterate `x = ∑λ_i a_i` is stored in x with type `IT`.
 The objective function is assumed to be of the form `f(x)=½⟨x,Ax⟩+⟨b,x⟩+c`
 so that the gradient is simply `∇f(x)=Ax+b`.
 """
-struct ActiveSetQuadratic{AT, R <: Real, IT, H, OT <: Union{Nothing, MOI.AbstractOptimizer}} <: AbstractActiveSet{AT,R,IT}
+struct ActiveSetQuadratic{AT, R <: Real, IT, H} <: AbstractActiveSet{AT,R,IT}
     weights::Vector{R}
     atoms::Vector{AT}
     x::IT
@@ -20,9 +20,6 @@ struct ActiveSetQuadratic{AT, R <: Real, IT, H, OT <: Union{Nothing, MOI.Abstrac
     dots_b::Vector{R} # stores ⟨b, atoms[i]⟩
     weights_prev::Vector{R}
     modified::BitVector
-    lp_optimizer::OT
-    lp_frequency::Int
-    counter::Base.RefValue{Int}
 end
 
 function detect_quadratic_function(grad!, x0; test=true)
@@ -51,12 +48,12 @@ function detect_quadratic_function(grad!, x0; test=true)
     return A, b
 end
 
-function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, grad!::Function, lp_optimizer=nothing; lp_frequency=10) where {AT,R}
+function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, grad!::Function) where {AT,R}
     A, b = detect_quadratic_function(grad!, tuple_values[1][2])
-    return ActiveSetQuadratic(tuple_values, A, b, lp_optimizer, lp_frequency=lp_frequency)
+    return ActiveSetQuadratic(tuple_values, A, b)
 end
 
-function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::H, b, lp_optimizer=nothing; lp_frequency=10) where {AT,R,H}
+function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::H, b) where {AT,R,H}
     n = length(tuple_values)
     weights = Vector{R}(undef, n)
     atoms = Vector{AT}(undef, n)
@@ -75,18 +72,18 @@ function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::H, b,
         dots_b[idx] = fast_dot(b, atoms[idx])
     end
     x = similar(b)
-    as = ActiveSetQuadratic(weights, atoms, x, A, b, dots_x, dots_A, dots_b, weights_prev, modified, lp_optimizer, lp_frequency, Ref(0))
+    as = ActiveSetQuadratic(weights, atoms, x, A, b, dots_x, dots_A, dots_b, weights_prev, modified)
 
     compute_active_set_iterate!(as)
     return as
 end
 
-function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, grad!::Function, lp_optimizer=nothing; lp_frequency=10) where {AT,R}
+function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, grad!::Function) where {AT,R}
     A, b = detect_quadratic_function(grad!, tuple_values[1][2])
-    return ActiveSetQuadratic{AT,R}(tuple_values, A, b, lp_optimizer; lp_frequency=lp_frequency)
+    return ActiveSetQuadratic{AT,R}(tuple_values, A, b)
 end
 
-function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::H, b, lp_optimizer=nothing; lp_frequency=lp_frequency) where {AT,R,H}
+function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::H, b) where {AT,R,H}
     n = length(tuple_values)
     weights = Vector{R}(undef, n)
     atoms = Vector{AT}(undef, n)
@@ -105,7 +102,7 @@ function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,
         dots_b[idx] = fast_dot(b, atoms[idx])
     end
     x = similar(b)
-    as = ActiveSetQuadratic{AT,R,typeof(x),H}(weights, atoms, x, A, b, dots_x, dots_A, dots_b, weights_prev, modified, lp_optimizer, lp_frequency, Ref(0))
+    as = ActiveSetQuadratic{AT,R,typeof(x),H}(weights, atoms, x, A, b, dots_x, dots_A, dots_b, weights_prev, modified)
     compute_active_set_iterate!(as)
     return as
 end
@@ -122,11 +119,11 @@ function Base.:*(a::Identity, b)
         return a.λ * b
     end
 end
-function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::UniformScaling, b, lp_optimizer=nothing; lp_frequency=10) where {AT,R}
-    return ActiveSetQuadratic(tuple_values, Identity(A.λ), b, lp_optimizer; lp_frequency=lp_frequency)
+function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::UniformScaling, b) where {AT,R}
+    return ActiveSetQuadratic(tuple_values, Identity(A.λ), b)
 end
-function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::UniformScaling, b, lp_optimizer=nothing; lp_frequency=10) where {AT,R}
-    return ActiveSetQuadratic{AT,R}(tuple_values, Identity(A.λ), b, lp_optimizer; lp_frequency=lp_frequency)
+function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::UniformScaling, b) where {AT,R}
+    return ActiveSetQuadratic{AT,R}(tuple_values, Identity(A.λ), b)
 end
 
 # these three functions do not update the active set iterate
@@ -204,16 +201,6 @@ function active_set_update!(
         @inbounds active_set.modified[idx] = true
     else
         push!(active_set, (lambda, atom))
-        if active_set.lp_optimizer !== nothing
-            active_set.counter[] += 1
-            @show active_set.counter[]
-            @show mod(active_set.counter[], active_set.lp_frequency)
-            if mod(active_set.counter[], active_set.lp_frequency) == 0
-                @show "solving quadratic"
-                solve_quadratic_activeset_lp!(active_set)
-                return active_set
-            end
-        end
     end
     if renorm
         add_dropped_vertices = add_dropped_vertices ? vertex_storage !== nothing : add_dropped_vertices
@@ -224,54 +211,6 @@ function active_set_update!(
     return active_set
 end
 
-# generic quadratic
-function solve_quadratic_activeset_lp!(active_set::ActiveSetQuadratic{AT, R, IT, H}) where {AT, R, IT, H}
-    # TODO
-end
-
-# special case of scaled identity Hessian
-function solve_quadratic_activeset_lp!(active_set::ActiveSetQuadratic{AT, R, IT, <: Identity}) where {AT, R, IT}
-    hessian_scaling = active_set.A.λ
-    # number of vertices and ambient dimension
-    nv = length(active_set)
-    o = active_set.lp_optimizer
-    MOI.empty!(o)
-    λ = MOI.add_variables(o, nv)
-    # λ ∈ Δ_n ⇔ λ ≥ 0, ∑ λ == 1
-    MOI.add_constraint.(o, λ, MOI.GreaterThan(0.0))
-    MOI.add_constraint(o, sum(λ; init=0.0), MOI.EqualTo(1.0))
-    # 2 * a Aᵗ A λ == -Aᵗ b
-    lhs = 0.0 * λ
-    rhs = 0 * active_set.weights
-    for (idx, atom) in enumerate(active_set.atoms)
-        rhs[idx] = -dot(atom, active_set.b)
-        lhs[idx] = 2 * hessian_scaling * sum(λ[j] * dot(atom, active_set.atoms[j]) for j in 1:nv)
-    end
-    MOI.add_constraint.(o, lhs, MOI.EqualTo.(rhs))
-    dummy_objective = sum(λ, init=0.0)
-    MOI.set(o, MOI.ObjectiveFunction{typeof(dummy_objective)}(), dummy_objective)
-    MOI.set(o, MOI.ObjectiveSense(), MOI.MIN_SENSE)
-    MOI.optimize!(o)
-    if MOI.get(o, MOI.TerminationStatus()) in (MOI.OPTIMAL, MOI.FEASIBLE_POINT, MOI.ALMOST_OPTIMAL)
-        indices_to_remove = Int[]
-        new_weights = R[]
-        for idx in eachindex(λ)
-            weight_value = MOI.get(o, MOI.VariablePrimal(), λ[idx])
-            if weight_value <= min(1e-3 / nv, 1e-8)
-                push!(indices_to_remove, idx)
-            else
-                push!(new_weights, weight_value)
-            end
-        end
-        deleteat!(active_set, indices_to_remove)
-        @assert length(active_set) == length(new_weights)
-        active_set.weights .= new_weights
-        active_set.weights_prev .= new_weights
-        active_set_renormalize!(active_set)
-    end
-    return active_set
-end
-
 function active_set_renormalize!(active_set::ActiveSetQuadratic)
     renorm = sum(active_set.weights)
     active_set.weights ./= renorm

From f67d381069a7463051e51ac4f1139e1ca114e7b5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= <mathieu.besancon@gmail.com>
Date: Wed, 9 Oct 2024 14:23:53 +0200
Subject: [PATCH 05/19] cleanup

---
 examples/blended_pairwise_with_direct.jl | 38 ++++--------------------
 src/active_set_quadratic_reloaded.jl     |  4 ---
 2 files changed, 6 insertions(+), 36 deletions(-)

diff --git a/examples/blended_pairwise_with_direct.jl b/examples/blended_pairwise_with_direct.jl
index 138b07147..50909d8d7 100644
--- a/examples/blended_pairwise_with_direct.jl
+++ b/examples/blended_pairwise_with_direct.jl
@@ -31,7 +31,7 @@ lp_solver = HiGHS.Optimizer
 include("../examples/plot_utils.jl")
 
 n = Int(1e4)
-k = 1000
+k = 10000
 
 # s = rand(1:100)
 s = 10
@@ -59,7 +59,7 @@ lmo = FrankWolfe.KSparseLMO(5, 1.0)
 # lmo = FrankWolfe.ProbabilitySimplexOracle(1.0);
 # lmo = FrankWolfe.UnitSimplexOracle(1.0);
 
-x00 = FrankWolfe.compute_extreme_point(lmo, rand(n))
+const x00 = FrankWolfe.compute_extreme_point(lmo, rand(n))
 
 function build_callback(trajectory_arr)
     return function callback(state, active_set, args...)
@@ -73,12 +73,11 @@ FrankWolfe.benchmark_oracles(f, grad!, () -> randn(n), lmo; k=100)
 trajectoryBPCG_standard = []
 callback = build_callback(trajectoryBPCG_standard)
 
-x0 = deepcopy(x00)
 @time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
     f,
     grad!,
     lmo,
-    x0,
+    deepcopy(x00),
     max_iteration=k,
     line_search=FrankWolfe.Shortstep(2.0),
     print_iter=k / 10,
@@ -92,12 +91,11 @@ x0 = deepcopy(x00)
 trajectoryBPCG_quadratic = []
 callback = build_callback(trajectoryBPCG_quadratic)
 
-x0 = deepcopy(x00)
 @time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
     f,
     grad!,
     lmo,
-    x0,
+    deepcopy(x00),
     max_iteration=k,
     line_search=FrankWolfe.Shortstep(2.0),
     print_iter=k / 10,
@@ -149,30 +147,6 @@ as_quad = FrankWolfe.ActiveSetQuadratic([(1.0, copy(x00))], 2 * LinearAlgebra.I,
     callback=callback,
 );
 
-as_quad_lp = FrankWolfe.ActiveSetQuadratic(
-    [(1.0, copy(x00))],
-    2 * LinearAlgebra.I, -2xp,
-    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
-)
-
-# with LP acceleration
-trajectoryBPCG_quadratic_as_lp = []
-callback = build_callback(trajectoryBPCG_quadratic_as_lp)
-
-@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
-    f,
-    grad!,
-    lmo,
-    as_quad_lp,
-    max_iteration=1000,
-    line_search=FrankWolfe.Shortstep(2.0),
-    print_iter=k / 10,
-    memory_mode=FrankWolfe.InplaceEmphasis(),
-    verbose=true,
-    trajectory=true,
-    callback=callback,
-);
-
 as_quad_reloaded = FrankWolfe.ActiveSetQuadraticReloaded(
     [(1.0, copy(x00))],
     2 * LinearAlgebra.I, -2xp,
@@ -199,8 +173,8 @@ callback = build_callback(trajectoryBPCG_quadratic_reloaded)
 
 
 # Update the data and labels for plotting
-dataSparsity = [trajectoryBPCG_standard, trajectoryBPCG_quadratic, trajectoryBPCG_quadratic_nosquad, trajectoryBPCG_quadratic_as, trajectoryBPCG_quadratic_as_lp, trajectoryBPCG_quadratic_reloaded]
-labelSparsity = ["BPCG (Standard)", "BPCG (Specific Direct)", "BPCG (Generic Direct)", "AS_Quad", "AS_Quad_LP", "Reloaded"]
+dataSparsity = [trajectoryBPCG_standard, trajectoryBPCG_quadratic, trajectoryBPCG_quadratic_nosquad, trajectoryBPCG_quadratic_as, trajectoryBPCG_quadratic_reloaded]
+labelSparsity = ["BPCG (Standard)", "BPCG (Specific Direct)", "BPCG (Generic Direct)", "AS_Quad", "Reloaded"]
 
 # Plot sparsity
 # plot_sparsity(dataSparsity, labelSparsity, legend_position=:topright)
diff --git a/src/active_set_quadratic_reloaded.jl b/src/active_set_quadratic_reloaded.jl
index 6207c307d..ad1a7e75d 100644
--- a/src/active_set_quadratic_reloaded.jl
+++ b/src/active_set_quadratic_reloaded.jl
@@ -100,16 +100,12 @@ function active_set_update!(
         idx = find_atom(active_set, atom)
     end
     if idx > 0
-        @info "old atom"
         @inbounds active_set.weights[idx] += lambda
     else
         push!(active_set, (lambda, atom))
         if active_set.lp_optimizer !== nothing
             active_set.counter[] += 1
-            @show active_set.counter[]
-            @show mod(active_set.counter[], active_set.lp_frequency)
             if mod(active_set.counter[], active_set.lp_frequency) == 0
-                @show "solving quadratic"
                 solve_quadratic_activeset_lp!(active_set)
                 return active_set
             end

From fa4051f664db9ce2e92cf65a1819202195075b0b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= <mathieu.besancon@gmail.com>
Date: Wed, 9 Oct 2024 21:28:18 +0200
Subject: [PATCH 06/19] HiGHS in test deps

---
 Project.toml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index e56223008..ca298fa1a 100644
--- a/Project.toml
+++ b/Project.toml
@@ -43,6 +43,7 @@ DynamicPolynomials = "7c1d4256-1411-5781-91ec-d7bc3513ac07"
 FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 GLPK = "60bf3e95-4087-53dc-ae20-288a0d20c6a6"
+HiGHS = "87dc4568-4c63-4d18-b0c0-bb2238e4078b"
 Hypatia = "b99e6be6-89ff-11e8-14f8-45c827f4f8f2"
 JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
 JuMP = "4076af6c-e467-56ae-b986-b466b2749572"
@@ -58,4 +59,4 @@ Tullio = "bc48ee85-29a4-5162-ae0b-a64e1601d4bc"
 ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea"
 
 [targets]
-test = ["CSV", "Combinatorics", "DataFrames", "Distributions", "DoubleFloats", "DynamicPolynomials", "FiniteDifferences", "ForwardDiff", "GLPK", "JSON", "JuMP", "LaTeXStrings", "MAT", "MultivariatePolynomials", "Plots", "PlotThemes", "Polyhedra", "ReverseDiff", "ZipFile", "Test", "Tullio", "Clp", "Hypatia"]
+test = ["CSV", "Combinatorics", "DataFrames", "Distributions", "DoubleFloats", "DynamicPolynomials", "FiniteDifferences", "ForwardDiff", "GLPK", "HiGHS", "JSON", "JuMP", "LaTeXStrings", "MAT", "MultivariatePolynomials", "Plots", "PlotThemes", "Polyhedra", "ReverseDiff", "ZipFile", "Test", "Tullio", "Clp", "Hypatia"]

From 7c1ff01a37e93644218c2c73d59dae93c850eb0c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= <mathieu.besancon@gmail.com>
Date: Wed, 9 Oct 2024 21:29:16 +0200
Subject: [PATCH 07/19] working reworked LP quadratic

---
 examples/blended_pairwise_with_direct.jl | 34 +++++++++++++++++-----
 src/active_set_quadratic_reloaded.jl     | 37 +++++++++++++-----------
 2 files changed, 46 insertions(+), 25 deletions(-)

diff --git a/examples/blended_pairwise_with_direct.jl b/examples/blended_pairwise_with_direct.jl
index 50909d8d7..77cff99e6 100644
--- a/examples/blended_pairwise_with_direct.jl
+++ b/examples/blended_pairwise_with_direct.jl
@@ -16,10 +16,6 @@ using FrankWolfe
 using LinearAlgebra
 using Random
 
-import Pkg
-Pkg.add("GLPK")
-Pkg.add("HiGHS")
-import GLPK
 import HiGHS
 import MathOptInterface as MOI
 
@@ -31,7 +27,7 @@ lp_solver = HiGHS.Optimizer
 include("../examples/plot_utils.jl")
 
 n = Int(1e4)
-k = 10000
+k = 5000
 
 # s = rand(1:100)
 s = 10
@@ -147,6 +143,28 @@ as_quad = FrankWolfe.ActiveSetQuadratic([(1.0, copy(x00))], 2 * LinearAlgebra.I,
     callback=callback,
 );
 
+as_quad = FrankWolfe.ActiveSetQuadratic(
+    [(1.0, copy(x00))],
+    2 * LinearAlgebra.I, -2xp,
+)
+
+# with quadratic active set
+trajectoryBPCG_quadratic_as = []
+callback = build_callback(trajectoryBPCG_quadratic_as)
+@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    as_quad,
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    print_iter=k / 10,
+    memory_mode=FrankWolfe.InplaceEmphasis(),
+    verbose=true,
+    trajectory=true,
+    callback=callback,
+);
+
 as_quad_reloaded = FrankWolfe.ActiveSetQuadraticReloaded(
     [(1.0, copy(x00))],
     2 * LinearAlgebra.I, -2xp,
@@ -162,7 +180,7 @@ callback = build_callback(trajectoryBPCG_quadratic_reloaded)
     grad!,
     lmo,
     as_quad_reloaded,
-    max_iteration=1000,
+    max_iteration=k,
     line_search=FrankWolfe.Shortstep(2.0),
     print_iter=k / 10,
     memory_mode=FrankWolfe.InplaceEmphasis(),
@@ -173,8 +191,8 @@ callback = build_callback(trajectoryBPCG_quadratic_reloaded)
 
 
 # Update the data and labels for plotting
-dataSparsity = [trajectoryBPCG_standard, trajectoryBPCG_quadratic, trajectoryBPCG_quadratic_nosquad, trajectoryBPCG_quadratic_as, trajectoryBPCG_quadratic_reloaded]
-labelSparsity = ["BPCG (Standard)", "BPCG (Specific Direct)", "BPCG (Generic Direct)", "AS_Quad", "Reloaded"]
+dataSparsity = [trajectoryBPCG_standard, trajectoryBPCG_quadratic, trajectoryBPCG_quadratic_as, trajectoryBPCG_quadratic_reloaded]
+labelSparsity = ["BPCG (Standard)", "BPCG (Specific Direct)", "AS_Quad", "Reloaded"]
 
 # Plot sparsity
 # plot_sparsity(dataSparsity, labelSparsity, legend_position=:topright)
diff --git a/src/active_set_quadratic_reloaded.jl b/src/active_set_quadratic_reloaded.jl
index ad1a7e75d..0fe816073 100644
--- a/src/active_set_quadratic_reloaded.jl
+++ b/src/active_set_quadratic_reloaded.jl
@@ -20,7 +20,7 @@ struct ActiveSetQuadraticReloaded{AT, R <: Real, IT, H, OT <: Union{Nothing, MOI
     counter::Base.RefValue{Int}
 end
 
-function ActiveSetQuadraticReloaded(tuple_values::AbstractVector{Tuple{R,AT}}, grad!::Function, lp_optimizer=nothing; lp_frequency=10) where {AT,R}
+function ActiveSetQuadraticReloaded(tuple_values::AbstractVector{Tuple{R,AT}}, grad!::Function, lp_optimizer=nothing; lp_frequency=100) where {AT,R}
     A, b = detect_quadratic_function(grad!, tuple_values[1][2])
     return ActiveSetQuadraticReloaded(tuple_values, A, b, lp_optimizer, lp_frequency=lp_frequency)
 end
@@ -39,7 +39,7 @@ function ActiveSetQuadraticReloaded(tuple_values::AbstractVector{Tuple{R,AT}}, A
     return as
 end
 
-function ActiveSetQuadraticReloaded{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, grad!::Function, lp_optimizer=nothing; lp_frequency=10) where {AT,R}
+function ActiveSetQuadraticReloaded{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, grad!::Function, lp_optimizer=nothing; lp_frequency=100) where {AT,R}
     A, b = detect_quadratic_function(grad!, tuple_values[1][2])
     return ActiveSetQuadraticReloaded{AT,R}(tuple_values, A, b, lp_optimizer; lp_frequency=lp_frequency)
 end
@@ -58,10 +58,10 @@ function ActiveSetQuadraticReloaded{AT,R}(tuple_values::AbstractVector{<:Tuple{<
     return as
 end
 
-function ActiveSetQuadraticReloaded(tuple_values::AbstractVector{Tuple{R,AT}}, A::UniformScaling, b, lp_optimizer=nothing; lp_frequency=10) where {AT,R}
+function ActiveSetQuadraticReloaded(tuple_values::AbstractVector{Tuple{R,AT}}, A::UniformScaling, b, lp_optimizer=nothing; lp_frequency=100) where {AT,R}
     return ActiveSetQuadraticReloaded(tuple_values, Identity(A.λ), b, lp_optimizer; lp_frequency=lp_frequency)
 end
-function ActiveSetQuadraticReloaded{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::UniformScaling, b, lp_optimizer=nothing; lp_frequency=10) where {AT,R}
+function ActiveSetQuadraticReloaded{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::UniformScaling, b, lp_optimizer=nothing; lp_frequency=100) where {AT,R}
     return ActiveSetQuadraticReloaded{AT,R}(tuple_values, Identity(A.λ), b, lp_optimizer; lp_frequency=lp_frequency)
 end
 
@@ -133,22 +133,23 @@ function solve_quadratic_activeset_lp!(active_set::ActiveSetQuadraticReloaded{AT
     o = active_set.lp_optimizer
     MOI.empty!(o)
     λ = MOI.add_variables(o, nv)
-    A = zeros(length(active_set.x), nv)
-    for (idx, atom) in enumerate(active_set.atoms)
-        A[:,idx] .= atom
-    end
     # λ ≥ 0, ∑ λ == 1
     MOI.add_constraint.(o, λ, MOI.GreaterThan(0.0))
     MOI.add_constraint(o, sum(λ; init=0.0), MOI.EqualTo(1.0))
-    # 2 * a Aᵗ A λ == -Aᵗ b
-    lhs = 0.0 * λ
-    rhs = 0 * active_set.weights
-    for (idx, atom) in enumerate(active_set.atoms)
-        lhs[idx] = 2 * hessian_scaling * sum(λ[j] * dot(atom, active_set.atoms[j]) for j in 1:nv)
-        rhs[idx] = -dot(atom, active_set.b)
+    # a Aᵗ A λ == -Aᵗ b
+    # lhs = 0.0 * λ
+    # rhs = 0 * active_set.weights
+    for atom in active_set.atoms
+        lhs = MOI.ScalarAffineFunction{Float64}([], 0.0)
+        Base.sizehint!(lhs.terms, nv)
+        # replaces direct sum because of MOI and MutableArithmetic slow sums
+        for j in 1:nv
+            push!(lhs.terms, MOI.ScalarAffineTerm(hessian_scaling * dot(atom, active_set.atoms[j]), λ[j]))
+        end
+        rhs = -dot(atom, active_set.b)
+        MOI.add_constraint(o, lhs, MOI.EqualTo(rhs))
     end
-    MOI.add_constraint.(o, lhs, MOI.EqualTo.(rhs))
-    dummy_objective = sum(λ, init=0.0)
+    dummy_objective = MOI.ScalarAffineFunction(MOI.ScalarAffineTerm.(1.0, λ), 0.0)
     MOI.set(o, MOI.ObjectiveFunction{typeof(dummy_objective)}(), dummy_objective)
     MOI.set(o, MOI.ObjectiveSense(), MOI.MIN_SENSE)
     MOI.optimize!(o)
@@ -157,7 +158,7 @@ function solve_quadratic_activeset_lp!(active_set::ActiveSetQuadraticReloaded{AT
         new_weights = R[]
         for idx in eachindex(λ)
             weight_value = MOI.get(o, MOI.VariablePrimal(), λ[idx])
-            if weight_value <= min(1e-3 / nv, 1e-8)
+            if weight_value <= 1e-10
                 push!(indices_to_remove, idx)
             else
                 push!(new_weights, weight_value)
@@ -167,7 +168,9 @@ function solve_quadratic_activeset_lp!(active_set::ActiveSetQuadraticReloaded{AT
         deleteat!(active_set.weights, indices_to_remove)
         @assert length(active_set) == length(new_weights)
         active_set.weights .= new_weights
+        @assert all(>=(0), new_weights)
         active_set_renormalize!(active_set)
+        compute_active_set_iterate!(active_set)
     end
     return active_set
 end

From 73873d23e1133d536775cb100cb50eaf4d20d860 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= <mathieu.besancon@gmail.com>
Date: Thu, 10 Oct 2024 17:45:24 +0200
Subject: [PATCH 08/19] working version generic quadratic

---
 examples/blended_pairwise_with_direct.jl |  34 ++++-
 src/FrankWolfe.jl                        |   2 +-
 src/active_set_quadratic.jl              |   1 +
 src/active_set_quadratic_direct_solve.jl | 165 ++++++++++++++++++++
 src/active_set_quadratic_reloaded.jl     | 182 -----------------------
 5 files changed, 196 insertions(+), 188 deletions(-)
 create mode 100644 src/active_set_quadratic_direct_solve.jl
 delete mode 100644 src/active_set_quadratic_reloaded.jl

diff --git a/examples/blended_pairwise_with_direct.jl b/examples/blended_pairwise_with_direct.jl
index 77cff99e6..066d7ed03 100644
--- a/examples/blended_pairwise_with_direct.jl
+++ b/examples/blended_pairwise_with_direct.jl
@@ -165,21 +165,45 @@ callback = build_callback(trajectoryBPCG_quadratic_as)
     callback=callback,
 );
 
-as_quad_reloaded = FrankWolfe.ActiveSetQuadraticReloaded(
+as_quad_direct = FrankWolfe.ActiveSetQuadraticLinearSolve(
     [(1.0, copy(x00))],
     2 * LinearAlgebra.I, -2xp,
     MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
 )
 
 # with LP acceleration
-trajectoryBPCG_quadratic_reloaded = []
-callback = build_callback(trajectoryBPCG_quadratic_reloaded)
+trajectoryBPCG_quadratic_direct = []
+callback = build_callback(trajectoryBPCG_quadratic_direct)
 
 @time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
     f,
     grad!,
     lmo,
-    as_quad_reloaded,
+    as_quad_direct,
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    print_iter=k / 10,
+    memory_mode=FrankWolfe.InplaceEmphasis(),
+    verbose=true,
+    trajectory=true,
+    callback=callback,
+);
+
+as_quad_direct_generic = FrankWolfe.ActiveSetQuadraticLinearSolve(
+    [(1.0, copy(x00))],
+    2 * Diagonal(ones(length(xp))), -2xp,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+)
+
+# with LP acceleration
+trajectoryBPCG_quadratic_direct_generic = []
+callback = build_callback(trajectoryBPCG_quadratic_direct_generic)
+
+@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    as_quad_direct_generic,
     max_iteration=k,
     line_search=FrankWolfe.Shortstep(2.0),
     print_iter=k / 10,
@@ -191,7 +215,7 @@ callback = build_callback(trajectoryBPCG_quadratic_reloaded)
 
 
 # Update the data and labels for plotting
-dataSparsity = [trajectoryBPCG_standard, trajectoryBPCG_quadratic, trajectoryBPCG_quadratic_as, trajectoryBPCG_quadratic_reloaded]
+dataSparsity = [trajectoryBPCG_standard, trajectoryBPCG_quadratic, trajectoryBPCG_quadratic_as, trajectoryBPCG_quadratic_direct]
 labelSparsity = ["BPCG (Standard)", "BPCG (Specific Direct)", "AS_Quad", "Reloaded"]
 
 # Plot sparsity
diff --git a/src/FrankWolfe.jl b/src/FrankWolfe.jl
index 5aa0cbee1..cc2fef145 100644
--- a/src/FrankWolfe.jl
+++ b/src/FrankWolfe.jl
@@ -34,7 +34,7 @@ include("moi_oracle.jl")
 include("function_gradient.jl")
 include("active_set.jl")
 include("active_set_quadratic.jl")
-include("active_set_quadratic_reloaded.jl")
+include("active_set_quadratic_direct_solve.jl")
 include("active_set_sparsifier.jl")
 
 include("blended_cg.jl")
diff --git a/src/active_set_quadratic.jl b/src/active_set_quadratic.jl
index fe40c85b4..9736c911b 100644
--- a/src/active_set_quadratic.jl
+++ b/src/active_set_quadratic.jl
@@ -119,6 +119,7 @@ function Base.:*(a::Identity, b)
         return a.λ * b
     end
 end
+
 function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::UniformScaling, b) where {AT,R}
     return ActiveSetQuadratic(tuple_values, Identity(A.λ), b)
 end
diff --git a/src/active_set_quadratic_direct_solve.jl b/src/active_set_quadratic_direct_solve.jl
new file mode 100644
index 000000000..687c22491
--- /dev/null
+++ b/src/active_set_quadratic_direct_solve.jl
@@ -0,0 +1,165 @@
+
+"""
+    ActiveSetQuadraticLinearSolve{AT, R, IT}
+
+Represents an active set of extreme vertices collected in a FW algorithm,
+along with their coefficients `(λ_i, a_i)`.
+`R` is the type of the `λ_i`, `AT` is the type of the atoms `a_i`.
+The iterate `x = ∑λ_i a_i` is stored in x with type `IT`.
+The objective function is assumed to be of the form `f(x)=½⟨x,Ax⟩+⟨b,x⟩+c`
+so that the gradient is simply `∇f(x)=Ax+b`.
+"""
+struct ActiveSetQuadraticLinearSolve{AT, R <: Real, IT, H, OT <: MOI.AbstractOptimizer, AS <: AbstractActiveSet} <: AbstractActiveSet{AT,R,IT}
+    weights::Vector{R}
+    atoms::Vector{AT}
+    x::IT
+    A::H # Hessian matrix
+    b::IT # linear term
+    active_set::AS
+    lp_optimizer::OT
+    lp_frequency::Int
+    counter::Base.RefValue{Int}
+end
+
+function ActiveSetQuadraticLinearSolve(tuple_values::AbstractVector{Tuple{R,AT}}, grad!::Function, lp_optimizer; lp_frequency=100) where {AT,R}
+    A, b = detect_quadratic_function(grad!, tuple_values[1][2])
+    return ActiveSetQuadraticLinearSolve(tuple_values, A, b, lp_optimizer, lp_frequency=lp_frequency)
+end
+
+function ActiveSetQuadraticLinearSolve(tuple_values::AbstractVector{Tuple{R,AT}}, A::H, b, lp_optimizer; lp_frequency=10) where {AT,R,H}
+    inner_as = ActiveSetQuadratic(tuple_values, A, b)
+    as = ActiveSetQuadraticLinearSolve(inner_as.weights, inner_as.atoms, inner_as.x, A, b, inner_as, lp_optimizer, lp_frequency, Ref(0))
+    compute_active_set_iterate!(as)
+    return as
+end
+
+function ActiveSetQuadraticLinearSolve{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, grad!::Function, lp_optimizer; lp_frequency=100) where {AT,R}
+    A, b = detect_quadratic_function(grad!, tuple_values[1][2])
+    return ActiveSetQuadraticLinearSolve{AT,R}(tuple_values, A, b, lp_optimizer; lp_frequency=lp_frequency)
+end
+
+function ActiveSetQuadraticLinearSolve{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::H, b, lp_optimizer; lp_frequency=lp_frequency) where {AT,R,H}
+    inner_as = ActiveSetQuadratic{AT,R}(tuple_values, A, b)
+    as = ActiveSetQuadraticLinearSolve{AT,R,typeof(x),H}(inner_as.weights, inner_as.atoms, inner_as.x, A, b, inner_as, lp_optimizer, lp_frequency, Ref(0))
+    compute_active_set_iterate!(as)
+    return as
+end
+
+function ActiveSetQuadraticLinearSolve(tuple_values::AbstractVector{Tuple{R,AT}}, A::UniformScaling, b, lp_optimizer; lp_frequency=100) where {AT,R}
+    return ActiveSetQuadraticLinearSolve(tuple_values, Identity(A.λ), b, lp_optimizer; lp_frequency=lp_frequency)
+end
+function ActiveSetQuadraticLinearSolve{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::UniformScaling, b, lp_optimizer; lp_frequency=100) where {AT,R}
+    return ActiveSetQuadraticLinearSolve{AT,R}(tuple_values, Identity(A.λ), b, lp_optimizer; lp_frequency=lp_frequency)
+end
+
+# all mutating functions are delegated to the inner active set
+
+Base.push!(as::ActiveSetQuadraticLinearSolve, tuple) = push!(as.active_set, tuple)
+
+Base.deleteat!(as::ActiveSetQuadraticLinearSolve, idx) = deleteat!(as.active_set, idx)
+
+Base.empty!(as::ActiveSetQuadraticLinearSolve) = empty!(as.active_set)
+
+function active_set_update!(
+    as::ActiveSetQuadraticLinearSolve{AT,R},
+    lambda, atom, renorm=true, idx=nothing;
+    weight_purge_threshold=weight_purge_threshold_default(R),
+    add_dropped_vertices=false,
+    vertex_storage=nothing,
+) where {AT,R}
+    if idx === nothing
+        idx = find_atom(as, atom)
+    end
+    active_set_update!(as.active_set, lambda, atom, renorm, idx; weight_purge_threshold=weight_purge_threshold, add_dropped_vertices=add_dropped_vertices, vertex_storage=vertex_storage)
+    # new atom introduced, we can solve the auxiliary LP
+    if idx < 0
+        as.counter[] += 1
+        if mod(as.counter[], as.lp_frequency) == 0
+            solve_quadratic_activeset_lp!(as)
+        end
+    end
+    return as
+end
+
+active_set_renormalize!(as::ActiveSetQuadraticLinearSolve) = active_set_renormalize!(as.active_set)
+
+function active_set_argmin(as::ActiveSetQuadraticLinearSolve, direction)
+    return active_set_argmin(as.active_set, direction)
+end
+
+function active_set_argminmax(as::ActiveSetQuadraticLinearSolve, direction; Φ=0.5)
+    return active_set_argminmax(as.active_set, direction; Φ=Φ)
+end
+
+# generic quadratic with quadratic information provided
+function solve_quadratic_activeset_lp!(as::ActiveSetQuadraticLinearSolve{AT, R, IT, <: AbstractMatrix}) where {AT, R, IT}
+    nv = length(as)
+    o = as.lp_optimizer
+    MOI.empty!(o)
+    λ = MOI.add_variables(o, nv)
+    # λ ≥ 0, ∑ λ == 1
+    MOI.add_constraint.(o, λ, MOI.GreaterThan(0.0))
+    MOI.add_constraint(o, sum(λ; init=0.0), MOI.EqualTo(1.0))
+    # Aᵗ Q A λ == -Aᵗ b
+    for atom in as.atoms
+        lhs = MOI.ScalarAffineFunction{Float64}([], 0.0)
+        Base.sizehint!(lhs.terms, nv)
+        # replaces direct sum because of MOI and MutableArithmetic slow sums
+        for j in 1:nv
+            push!(lhs.terms, MOI.ScalarAffineTerm(dot(atom, as.A, as.atoms[j]), λ[j]))
+        end
+        rhs = -dot(atom, as.b)
+        MOI.add_constraint(o, lhs, MOI.EqualTo(rhs))
+    end
+    dummy_objective = MOI.ScalarAffineFunction(MOI.ScalarAffineTerm.(1.0, λ), 0.0)
+    MOI.set(o, MOI.ObjectiveFunction{typeof(dummy_objective)}(), dummy_objective)
+    MOI.set(o, MOI.ObjectiveSense(), MOI.MIN_SENSE)
+    MOI.optimize!(o)
+end
+
+# special case of scaled identity Hessian
+function solve_quadratic_activeset_lp!(as::ActiveSetQuadraticLinearSolve{AT, R, IT, <: Identity}) where {AT, R, IT}
+    hessian_scaling = as.A.λ
+    nv = length(as)
+    o = as.lp_optimizer
+    MOI.empty!(o)
+    λ = MOI.add_variables(o, nv)
+    # λ ≥ 0, ∑ λ == 1
+    MOI.add_constraint.(o, λ, MOI.GreaterThan(0.0))
+    MOI.add_constraint(o, sum(λ; init=0.0), MOI.EqualTo(1.0))
+    # a Aᵗ A λ == -Aᵗ b
+    for atom in as.atoms
+        lhs = MOI.ScalarAffineFunction{Float64}([], 0.0)
+        Base.sizehint!(lhs.terms, nv)
+        # replaces direct sum because of MOI and MutableArithmetic slow sums
+        for j in 1:nv
+            push!(lhs.terms, MOI.ScalarAffineTerm(hessian_scaling * dot(atom, as.atoms[j]), λ[j]))
+        end
+        rhs = -dot(atom, as.b)
+        MOI.add_constraint(o, lhs, MOI.EqualTo(rhs))
+    end
+    dummy_objective = MOI.ScalarAffineFunction(MOI.ScalarAffineTerm.(1.0, λ), 0.0)
+    MOI.set(o, MOI.ObjectiveFunction{typeof(dummy_objective)}(), dummy_objective)
+    MOI.set(o, MOI.ObjectiveSense(), MOI.MIN_SENSE)
+    MOI.optimize!(o)
+    if MOI.get(o, MOI.TerminationStatus()) in (MOI.OPTIMAL, MOI.FEASIBLE_POINT, MOI.ALMOST_OPTIMAL)
+        indices_to_remove = Int[]
+        new_weights = R[]
+        for idx in eachindex(λ)
+            weight_value = MOI.get(o, MOI.VariablePrimal(), λ[idx])
+            if weight_value <= 1e-10
+                push!(indices_to_remove, idx)
+            else
+                push!(new_weights, weight_value)
+            end
+        end
+        deleteat!(as.atoms, indices_to_remove)
+        deleteat!(as.weights, indices_to_remove)
+        @assert length(as) == length(new_weights)
+        as.weights .= new_weights
+        @assert all(>=(0), new_weights)
+        active_set_renormalize!(as)
+        compute_active_set_iterate!(as)
+    end
+    return as
+end
diff --git a/src/active_set_quadratic_reloaded.jl b/src/active_set_quadratic_reloaded.jl
deleted file mode 100644
index 0fe816073..000000000
--- a/src/active_set_quadratic_reloaded.jl
+++ /dev/null
@@ -1,182 +0,0 @@
-
-"""
-    ActiveSetQuadraticReloaded{AT, R, IT}
-
-Represents an active set of extreme vertices collected in a FW algorithm,
-along with their coefficients `(λ_i, a_i)`.
-`R` is the type of the `λ_i`, `AT` is the type of the atoms `a_i`.
-The iterate `x = ∑λ_i a_i` is stored in x with type `IT`.
-The objective function is assumed to be of the form `f(x)=½⟨x,Ax⟩+⟨b,x⟩+c`
-so that the gradient is simply `∇f(x)=Ax+b`.
-"""
-struct ActiveSetQuadraticReloaded{AT, R <: Real, IT, H, OT <: Union{Nothing, MOI.AbstractOptimizer}} <: AbstractActiveSet{AT,R,IT}
-    weights::Vector{R}
-    atoms::Vector{AT}
-    x::IT
-    A::H # Hessian matrix
-    b::IT # linear term
-    lp_optimizer::OT
-    lp_frequency::Int
-    counter::Base.RefValue{Int}
-end
-
-function ActiveSetQuadraticReloaded(tuple_values::AbstractVector{Tuple{R,AT}}, grad!::Function, lp_optimizer=nothing; lp_frequency=100) where {AT,R}
-    A, b = detect_quadratic_function(grad!, tuple_values[1][2])
-    return ActiveSetQuadraticReloaded(tuple_values, A, b, lp_optimizer, lp_frequency=lp_frequency)
-end
-
-function ActiveSetQuadraticReloaded(tuple_values::AbstractVector{Tuple{R,AT}}, A::H, b, lp_optimizer=nothing; lp_frequency=10) where {AT,R,H}
-    n = length(tuple_values)
-    weights = Vector{R}(undef, n)
-    atoms = Vector{AT}(undef, n)
-    @inbounds for idx in 1:n
-        weights[idx] = tuple_values[idx][1]
-        atoms[idx] = tuple_values[idx][2]
-    end
-    x = similar(b)
-    as = ActiveSetQuadraticReloaded(weights, atoms, x, A, b, lp_optimizer, lp_frequency, Ref(0))
-    compute_active_set_iterate!(as)
-    return as
-end
-
-function ActiveSetQuadraticReloaded{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, grad!::Function, lp_optimizer=nothing; lp_frequency=100) where {AT,R}
-    A, b = detect_quadratic_function(grad!, tuple_values[1][2])
-    return ActiveSetQuadraticReloaded{AT,R}(tuple_values, A, b, lp_optimizer; lp_frequency=lp_frequency)
-end
-
-function ActiveSetQuadraticReloaded{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::H, b, lp_optimizer=nothing; lp_frequency=lp_frequency) where {AT,R,H}
-    n = length(tuple_values)
-    weights = Vector{R}(undef, n)
-    atoms = Vector{AT}(undef, n)
-    @inbounds for idx in 1:n
-        weights[idx] = tuple_values[idx][1]
-        atoms[idx] = tuple_values[idx][2]
-    end
-    x = similar(b)
-    as = ActiveSetQuadraticReloaded{AT,R,typeof(x),H}(weights, atoms, x, A, b, lp_optimizer, lp_frequency, Ref(0))
-    compute_active_set_iterate!(as)
-    return as
-end
-
-function ActiveSetQuadraticReloaded(tuple_values::AbstractVector{Tuple{R,AT}}, A::UniformScaling, b, lp_optimizer=nothing; lp_frequency=100) where {AT,R}
-    return ActiveSetQuadraticReloaded(tuple_values, Identity(A.λ), b, lp_optimizer; lp_frequency=lp_frequency)
-end
-function ActiveSetQuadraticReloaded{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::UniformScaling, b, lp_optimizer=nothing; lp_frequency=100) where {AT,R}
-    return ActiveSetQuadraticReloaded{AT,R}(tuple_values, Identity(A.λ), b, lp_optimizer; lp_frequency=lp_frequency)
-end
-
-# these three functions do not update the active set iterate
-
-function Base.push!(as::ActiveSetQuadraticReloaded{AT,R}, (λ, a)) where {AT,R}
-    push!(as.weights, λ)
-    push!(as.atoms, a)
-    return as
-end
-
-function Base.deleteat!(as::ActiveSetQuadraticReloaded, idx::Int)
-    deleteat!(as.weights, idx)
-    deleteat!(as.atoms, idx)
-    return as
-end
-
-function Base.empty!(as::ActiveSetQuadraticReloaded)
-    empty!(as.atoms)
-    empty!(as.weights)
-    as.x .= 0
-    return as
-end
-
-function active_set_update!(
-    active_set::ActiveSetQuadraticReloaded{AT,R},
-    lambda, atom, renorm=true, idx=nothing;
-    weight_purge_threshold=weight_purge_threshold_default(R),
-    add_dropped_vertices=false,
-    vertex_storage=nothing,
-) where {AT,R}
-    # rescale active set
-    active_set.weights .*= (1 - lambda)
-    # add value for new atom
-    if idx === nothing
-        idx = find_atom(active_set, atom)
-    end
-    if idx > 0
-        @inbounds active_set.weights[idx] += lambda
-    else
-        push!(active_set, (lambda, atom))
-        if active_set.lp_optimizer !== nothing
-            active_set.counter[] += 1
-            if mod(active_set.counter[], active_set.lp_frequency) == 0
-                solve_quadratic_activeset_lp!(active_set)
-                return active_set
-            end
-        end
-    end
-    if renorm
-        add_dropped_vertices = add_dropped_vertices ? vertex_storage !== nothing : add_dropped_vertices
-        active_set_cleanup!(active_set; weight_purge_threshold=weight_purge_threshold, update=false, add_dropped_vertices=add_dropped_vertices, vertex_storage=vertex_storage)
-        active_set_renormalize!(active_set)
-    end
-    active_set_update_scale!(active_set.x, lambda, atom)
-    return active_set
-end
-
-# generic quadratic
-function solve_quadratic_activeset_lp!(active_set::ActiveSetQuadraticReloaded{AT, R, IT, H}) where {AT, R, IT, H}
-    error("TODO")
-end
-
-# special case of scaled identity Hessian
-function solve_quadratic_activeset_lp!(active_set::ActiveSetQuadraticReloaded{AT, R, IT, <: Identity}) where {AT, R, IT}
-    hessian_scaling = active_set.A.λ
-    # number of vertices and ambient dimension
-    nv = length(active_set)
-    o = active_set.lp_optimizer
-    MOI.empty!(o)
-    λ = MOI.add_variables(o, nv)
-    # λ ≥ 0, ∑ λ == 1
-    MOI.add_constraint.(o, λ, MOI.GreaterThan(0.0))
-    MOI.add_constraint(o, sum(λ; init=0.0), MOI.EqualTo(1.0))
-    # a Aᵗ A λ == -Aᵗ b
-    # lhs = 0.0 * λ
-    # rhs = 0 * active_set.weights
-    for atom in active_set.atoms
-        lhs = MOI.ScalarAffineFunction{Float64}([], 0.0)
-        Base.sizehint!(lhs.terms, nv)
-        # replaces direct sum because of MOI and MutableArithmetic slow sums
-        for j in 1:nv
-            push!(lhs.terms, MOI.ScalarAffineTerm(hessian_scaling * dot(atom, active_set.atoms[j]), λ[j]))
-        end
-        rhs = -dot(atom, active_set.b)
-        MOI.add_constraint(o, lhs, MOI.EqualTo(rhs))
-    end
-    dummy_objective = MOI.ScalarAffineFunction(MOI.ScalarAffineTerm.(1.0, λ), 0.0)
-    MOI.set(o, MOI.ObjectiveFunction{typeof(dummy_objective)}(), dummy_objective)
-    MOI.set(o, MOI.ObjectiveSense(), MOI.MIN_SENSE)
-    MOI.optimize!(o)
-    if MOI.get(o, MOI.TerminationStatus()) in (MOI.OPTIMAL, MOI.FEASIBLE_POINT, MOI.ALMOST_OPTIMAL)
-        indices_to_remove = Int[]
-        new_weights = R[]
-        for idx in eachindex(λ)
-            weight_value = MOI.get(o, MOI.VariablePrimal(), λ[idx])
-            if weight_value <= 1e-10
-                push!(indices_to_remove, idx)
-            else
-                push!(new_weights, weight_value)
-            end
-        end
-        deleteat!(active_set.atoms, indices_to_remove)
-        deleteat!(active_set.weights, indices_to_remove)
-        @assert length(active_set) == length(new_weights)
-        active_set.weights .= new_weights
-        @assert all(>=(0), new_weights)
-        active_set_renormalize!(active_set)
-        compute_active_set_iterate!(active_set)
-    end
-    return active_set
-end
-
-function active_set_renormalize!(active_set::ActiveSetQuadraticReloaded)
-    renorm = sum(active_set.weights)
-    active_set.weights ./= renorm
-    return active_set
-end

From 31dc8be78fdb54011626d00eee15fc52cf02ee7c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= <mathieu.besancon@gmail.com>
Date: Fri, 11 Oct 2024 09:01:49 +0200
Subject: [PATCH 09/19] slow version generic quadratic

---
 examples/blended_pairwise_with_direct.jl | 2 +-
 src/active_set_quadratic_direct_solve.jl | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/blended_pairwise_with_direct.jl b/examples/blended_pairwise_with_direct.jl
index 066d7ed03..3711094c0 100644
--- a/examples/blended_pairwise_with_direct.jl
+++ b/examples/blended_pairwise_with_direct.jl
@@ -204,7 +204,7 @@ callback = build_callback(trajectoryBPCG_quadratic_direct_generic)
     grad!,
     lmo,
     as_quad_direct_generic,
-    max_iteration=k,
+    max_iteration=5,
     line_search=FrankWolfe.Shortstep(2.0),
     print_iter=k / 10,
     memory_mode=FrankWolfe.InplaceEmphasis(),
diff --git a/src/active_set_quadratic_direct_solve.jl b/src/active_set_quadratic_direct_solve.jl
index 687c22491..f7fef1a97 100644
--- a/src/active_set_quadratic_direct_solve.jl
+++ b/src/active_set_quadratic_direct_solve.jl
@@ -106,6 +106,7 @@ function solve_quadratic_activeset_lp!(as::ActiveSetQuadraticLinearSolve{AT, R,
         Base.sizehint!(lhs.terms, nv)
         # replaces direct sum because of MOI and MutableArithmetic slow sums
         for j in 1:nv
+            # TODO slow
             push!(lhs.terms, MOI.ScalarAffineTerm(dot(atom, as.A, as.atoms[j]), λ[j]))
         end
         rhs = -dot(atom, as.b)

From ed312c5b58112060c880db9b298ed0c5b857ab49 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= <mathieu.besancon@gmail.com>
Date: Fri, 11 Oct 2024 17:29:27 +0200
Subject: [PATCH 10/19] faster term manipulation

---
 examples/blended_pairwise_with_direct.jl |  6 +-
 src/active_set_quadratic_direct_solve.jl | 73 +++++++++++++++---------
 src/utils.jl                             | 71 +++++++++++++++++++++++
 3 files changed, 121 insertions(+), 29 deletions(-)

diff --git a/examples/blended_pairwise_with_direct.jl b/examples/blended_pairwise_with_direct.jl
index 3711094c0..5cfe6bf8f 100644
--- a/examples/blended_pairwise_with_direct.jl
+++ b/examples/blended_pairwise_with_direct.jl
@@ -204,7 +204,7 @@ callback = build_callback(trajectoryBPCG_quadratic_direct_generic)
     grad!,
     lmo,
     as_quad_direct_generic,
-    max_iteration=5,
+    max_iteration=k,
     line_search=FrankWolfe.Shortstep(2.0),
     print_iter=k / 10,
     memory_mode=FrankWolfe.InplaceEmphasis(),
@@ -215,8 +215,8 @@ callback = build_callback(trajectoryBPCG_quadratic_direct_generic)
 
 
 # Update the data and labels for plotting
-dataSparsity = [trajectoryBPCG_standard, trajectoryBPCG_quadratic, trajectoryBPCG_quadratic_as, trajectoryBPCG_quadratic_direct]
-labelSparsity = ["BPCG (Standard)", "BPCG (Specific Direct)", "AS_Quad", "Reloaded"]
+dataSparsity = [trajectoryBPCG_standard, trajectoryBPCG_quadratic, trajectoryBPCG_quadratic_as, trajectoryBPCG_quadratic_direct, trajectoryBPCG_quadratic_direct_generic]
+labelSparsity = ["BPCG (Standard)", "BPCG (Specific Direct)", "AS_Quad", "Reloaded", "Reloaded_generic"]
 
 # Plot sparsity
 # plot_sparsity(dataSparsity, labelSparsity, legend_position=:topright)
diff --git a/src/active_set_quadratic_direct_solve.jl b/src/active_set_quadratic_direct_solve.jl
index f7fef1a97..ced0d551c 100644
--- a/src/active_set_quadratic_direct_solve.jl
+++ b/src/active_set_quadratic_direct_solve.jl
@@ -99,23 +99,43 @@ function solve_quadratic_activeset_lp!(as::ActiveSetQuadraticLinearSolve{AT, R,
     λ = MOI.add_variables(o, nv)
     # λ ≥ 0, ∑ λ == 1
     MOI.add_constraint.(o, λ, MOI.GreaterThan(0.0))
-    MOI.add_constraint(o, sum(λ; init=0.0), MOI.EqualTo(1.0))
-    # Aᵗ Q A λ == -Aᵗ b
+    sum_of_variables = MOI.ScalarAffineFunction(MOI.ScalarAffineTerm.(1.0, λ), 0.0)
+    MOI.add_constraint(o, sum_of_variables, MOI.EqualTo(1.0))
+    # Vᵗ A V λ == -Vᵗ b
     for atom in as.atoms
         lhs = MOI.ScalarAffineFunction{Float64}([], 0.0)
         Base.sizehint!(lhs.terms, nv)
         # replaces direct sum because of MOI and MutableArithmetic slow sums
         for j in 1:nv
-            # TODO slow
-            push!(lhs.terms, MOI.ScalarAffineTerm(dot(atom, as.A, as.atoms[j]), λ[j]))
+            push!(lhs.terms, MOI.ScalarAffineTerm(fast_dot(atom, as.A, as.atoms[j]), λ[j]))
         end
         rhs = -dot(atom, as.b)
         MOI.add_constraint(o, lhs, MOI.EqualTo(rhs))
     end
-    dummy_objective = MOI.ScalarAffineFunction(MOI.ScalarAffineTerm.(1.0, λ), 0.0)
-    MOI.set(o, MOI.ObjectiveFunction{typeof(dummy_objective)}(), dummy_objective)
+    MOI.set(o, MOI.ObjectiveFunction{typeof(sum_of_variables)}(), sum_of_variables)
     MOI.set(o, MOI.ObjectiveSense(), MOI.MIN_SENSE)
     MOI.optimize!(o)
+    if MOI.get(o, MOI.TerminationStatus()) ∉ (MOI.OPTIMAL, MOI.FEASIBLE_POINT, MOI.ALMOST_OPTIMAL)
+        return as
+    end
+    indices_to_remove = Int[]
+    new_weights = R[]
+    for idx in eachindex(λ)
+        weight_value = MOI.get(o, MOI.VariablePrimal(), λ[idx])
+        if weight_value <= 1e-10
+            push!(indices_to_remove, idx)
+        else
+            push!(new_weights, weight_value)
+        end
+    end
+    deleteat!(as.atoms, indices_to_remove)
+    deleteat!(as.weights, indices_to_remove)
+    @assert length(as) == length(new_weights)
+    as.weights .= new_weights
+    @assert all(>=(0), new_weights)
+    active_set_renormalize!(as)
+    compute_active_set_iterate!(as)
+    return as
 end
 
 # special case of scaled identity Hessian
@@ -127,7 +147,8 @@ function solve_quadratic_activeset_lp!(as::ActiveSetQuadraticLinearSolve{AT, R,
     λ = MOI.add_variables(o, nv)
     # λ ≥ 0, ∑ λ == 1
     MOI.add_constraint.(o, λ, MOI.GreaterThan(0.0))
-    MOI.add_constraint(o, sum(λ; init=0.0), MOI.EqualTo(1.0))
+    sum_of_variables = MOI.ScalarAffineFunction(MOI.ScalarAffineTerm.(1.0, λ), 0.0)
+    MOI.add_constraint(o, sum_of_variables, MOI.EqualTo(1.0))
     # a Aᵗ A λ == -Aᵗ b
     for atom in as.atoms
         lhs = MOI.ScalarAffineFunction{Float64}([], 0.0)
@@ -139,28 +160,28 @@ function solve_quadratic_activeset_lp!(as::ActiveSetQuadraticLinearSolve{AT, R,
         rhs = -dot(atom, as.b)
         MOI.add_constraint(o, lhs, MOI.EqualTo(rhs))
     end
-    dummy_objective = MOI.ScalarAffineFunction(MOI.ScalarAffineTerm.(1.0, λ), 0.0)
-    MOI.set(o, MOI.ObjectiveFunction{typeof(dummy_objective)}(), dummy_objective)
+    MOI.set(o, MOI.ObjectiveFunction{typeof(sum_of_variables)}(), sum_of_variables)
     MOI.set(o, MOI.ObjectiveSense(), MOI.MIN_SENSE)
     MOI.optimize!(o)
-    if MOI.get(o, MOI.TerminationStatus()) in (MOI.OPTIMAL, MOI.FEASIBLE_POINT, MOI.ALMOST_OPTIMAL)
-        indices_to_remove = Int[]
-        new_weights = R[]
-        for idx in eachindex(λ)
-            weight_value = MOI.get(o, MOI.VariablePrimal(), λ[idx])
-            if weight_value <= 1e-10
-                push!(indices_to_remove, idx)
-            else
-                push!(new_weights, weight_value)
-            end
+    if MOI.get(o, MOI.TerminationStatus()) ∉ (MOI.OPTIMAL, MOI.FEASIBLE_POINT, MOI.ALMOST_OPTIMAL)
+        return as
+    end
+    indices_to_remove = Int[]
+    new_weights = R[]
+    for idx in eachindex(λ)
+        weight_value = MOI.get(o, MOI.VariablePrimal(), λ[idx])
+        if weight_value <= 1e-10
+            push!(indices_to_remove, idx)
+        else
+            push!(new_weights, weight_value)
         end
-        deleteat!(as.atoms, indices_to_remove)
-        deleteat!(as.weights, indices_to_remove)
-        @assert length(as) == length(new_weights)
-        as.weights .= new_weights
-        @assert all(>=(0), new_weights)
-        active_set_renormalize!(as)
-        compute_active_set_iterate!(as)
     end
+    deleteat!(as.atoms, indices_to_remove)
+    deleteat!(as.weights, indices_to_remove)
+    @assert length(as) == length(new_weights)
+    as.weights .= new_weights
+    @assert all(>=(0), new_weights)
+    active_set_renormalize!(as)
+    compute_active_set_iterate!(as)
     return as
 end
diff --git a/src/utils.jl b/src/utils.jl
index f3d950884..27d24e03c 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -163,6 +163,77 @@ function fast_dot(A::Matrix{T1}, B::SparseArrays.SparseMatrixCSC{T2}) where {T1,
     return s
 end
 
+fast_dot(a, Q, b) = dot(a, Q, b)
+
+function fast_dot(a::SparseArrays.AbstractSparseVector{<:Real}, Q::Diagonal{<:Real}, b::AbstractVector{<:Real})
+    if a === b
+        return _fast_quadratic_form_symmetric(a, Q)
+    end
+    d = Q.diag
+    nzvals = SparseArrays.nonzeros(a)
+    nzinds = SparseArrays.nonzeroinds(a)
+    return sum(eachindex(nzvals); init=zero(eltype(a))) do nzidx
+        nzvals[nzidx] * d[nzinds[nzidx]] * b[nzinds[nzidx]]
+    end
+end
+
+function fast_dot(a::SparseArrays.AbstractSparseVector{<:Real}, Q::Diagonal{<:Real}, b::SparseArrays.AbstractSparseVector{<:Real})
+    if a === b
+        return _fast_quadratic_form_symmetric(a, Q)
+    end
+    n = length(a)
+    if length(b) != n
+        throw(
+            DimensionMismatch("Vector a has a length $n but b has a length $(length(b))")
+        )
+    end
+    anzind = SparseArrays.nonzeroinds(a)
+    bnzind = SparseArrays.nonzeroinds(b)
+    anzval = SparseArrays.nonzeros(a)
+    bnzval = SparseArrays.nonzeros(b)
+    s = zero(Base.promote_eltype(a, Q, b))
+
+    if isempty(anzind) || isempty(bnzind)
+        return s
+    end
+
+    a_idx = 1
+    b_idx = 1
+    a_idx_last = length(anzind)
+    b_idx_last = length(bnzind)
+
+    # go through the nonzero indices of a and b simultaneously
+    @inbounds while a_idx <= a_idx_last && b_idx <= b_idx_last
+        ia = anzind[a_idx]
+        ib = bnzind[b_idx]
+        if ia == ib
+            s += dot(anzval[a_idx], Q.diag[ia], bnzval[b_idx])
+            a_idx += 1
+            b_idx += 1
+        elseif ia < ib
+            a_idx += 1
+        else
+            b_idx += 1
+        end
+    end
+    return s
+end
+
+
+function _fast_quadratic_form_symmetric(a, Q)
+    d = Q.diag
+    if length(d) != length(a)
+        throw(DimensionMismatch())
+    end
+    nzvals = SparseArrays.nonzeros(a)
+    nzinds = SparseArrays.nonzeroinds(a)
+    s = zero(Base.promote_eltype(a, Q))
+    @inbounds for nzidx in eachindex(nzvals)
+        s += nzvals[nzidx]^2 * d[nzinds[nzidx]]
+    end
+    return s
+end
+
 """
     trajectory_callback(storage)
 

From d09f0dfe3efea150a0fd1f62484eeb3307ccd3de Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= <mathieu.besancon@gmail.com>
Date: Fri, 11 Oct 2024 17:52:41 +0200
Subject: [PATCH 11/19] copy sufficient

---
 examples/blended_pairwise_with_direct.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/blended_pairwise_with_direct.jl b/examples/blended_pairwise_with_direct.jl
index 5cfe6bf8f..cd8e53dc0 100644
--- a/examples/blended_pairwise_with_direct.jl
+++ b/examples/blended_pairwise_with_direct.jl
@@ -73,7 +73,7 @@ callback = build_callback(trajectoryBPCG_standard)
     f,
     grad!,
     lmo,
-    deepcopy(x00),
+    copy(x00),
     max_iteration=k,
     line_search=FrankWolfe.Shortstep(2.0),
     print_iter=k / 10,
@@ -91,7 +91,7 @@ callback = build_callback(trajectoryBPCG_quadratic)
     f,
     grad!,
     lmo,
-    deepcopy(x00),
+    copy(x00),
     max_iteration=k,
     line_search=FrankWolfe.Shortstep(2.0),
     print_iter=k / 10,

From d2d5397e3b62fe0132b83508801b6e8f36b14905 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= <mathieu.besancon@gmail.com>
Date: Fri, 11 Oct 2024 17:55:09 +0200
Subject: [PATCH 12/19] remove comment

---
 src/active_set_quadratic.jl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/active_set_quadratic.jl b/src/active_set_quadratic.jl
index 9736c911b..bc4edaa85 100644
--- a/src/active_set_quadratic.jl
+++ b/src/active_set_quadratic.jl
@@ -197,7 +197,6 @@ function active_set_update!(
         idx = find_atom(active_set, atom)
     end
     if idx > 0
-        @info "old atom"
         @inbounds active_set.weights[idx] += lambda
         @inbounds active_set.modified[idx] = true
     else

From 13c40dd97f83b1610694f54f8a92a88a79ac32e2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= <mathieu.besancon@gmail.com>
Date: Mon, 14 Oct 2024 23:15:47 +0200
Subject: [PATCH 13/19] added test for quadratic

---
 examples/blended_pairwise_with_direct.jl      |  48 ++++----
 ...wise_with_direct_non-standard-quadratic.jl |   6 -
 src/active_set_quadratic_direct_solve.jl      | 104 +++++++++++++----
 test/quadratic_lp_active_set.jl               | 110 ++++++++++++++++++
 test/runtests.jl                              |   7 ++
 5 files changed, 228 insertions(+), 47 deletions(-)
 create mode 100644 test/quadratic_lp_active_set.jl

diff --git a/examples/blended_pairwise_with_direct.jl b/examples/blended_pairwise_with_direct.jl
index cd8e53dc0..2b7661f57 100644
--- a/examples/blended_pairwise_with_direct.jl
+++ b/examples/blended_pairwise_with_direct.jl
@@ -19,17 +19,13 @@ using Random
 import HiGHS
 import MathOptInterface as MOI
 
-# lp_solver = GLPK.Optimizer
 lp_solver = HiGHS.Optimizer
-# lp_solver = Clp.Optimizer # buggy / does not work properly
-
 
 include("../examples/plot_utils.jl")
 
 n = Int(1e4)
-k = 5000
+k = 10_000
 
-# s = rand(1:100)
 s = 10
 @info "Seed $s"
 Random.seed!(s)
@@ -37,9 +33,6 @@ Random.seed!(s)
 xpi = rand(n);
 total = sum(xpi);
 
-# here the optimal solution lies in the interior if you want an optimal solution on a face and not the interior use:
-# const xp = xpi;
-
 const xp = xpi ./ total;
 
 f(x) = norm(x - xp)^2
@@ -49,12 +42,6 @@ end
 
 lmo = FrankWolfe.KSparseLMO(5, 1.0)
 
-## other LMOs to try
-# lmo_big = FrankWolfe.KSparseLMO(100, big"1.0")
-# lmo = FrankWolfe.LpNormLMO{Float64,5}(1.0)
-# lmo = FrankWolfe.ProbabilitySimplexOracle(1.0);
-# lmo = FrankWolfe.UnitSimplexOracle(1.0);
-
 const x00 = FrankWolfe.compute_extreme_point(lmo, rand(n))
 
 function build_callback(trajectory_arr)
@@ -213,13 +200,34 @@ callback = build_callback(trajectoryBPCG_quadratic_direct_generic)
     callback=callback,
 );
 
+as_quad_direct_basic_as = FrankWolfe.ActiveSetQuadraticLinearSolve(
+    FrankWolfe.ActiveSet([1.0], [copy(x00)], collect(x00)),
+    2 * LinearAlgebra.I, -2xp,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+)
 
-# Update the data and labels for plotting
-dataSparsity = [trajectoryBPCG_standard, trajectoryBPCG_quadratic, trajectoryBPCG_quadratic_as, trajectoryBPCG_quadratic_direct, trajectoryBPCG_quadratic_direct_generic]
-labelSparsity = ["BPCG (Standard)", "BPCG (Specific Direct)", "AS_Quad", "Reloaded", "Reloaded_generic"]
+# with LP acceleration
+trajectoryBPCG_quadratic_noqas = []
+callback = build_callback(trajectoryBPCG_quadratic_noqas)
 
-# Plot sparsity
-# plot_sparsity(dataSparsity, labelSparsity, legend_position=:topright)
+@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    as_quad_direct_basic_as,
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    print_iter=k / 10,
+    memory_mode=FrankWolfe.InplaceEmphasis(),
+    verbose=true,
+    trajectory=true,
+    callback=callback,
+);
+
+
+# Update the data and labels for plotting
+data_trajectories = [trajectoryBPCG_standard, trajectoryBPCG_quadratic, trajectoryBPCG_quadratic_as, trajectoryBPCG_quadratic_direct, trajectoryBPCG_quadratic_direct_generic, trajectoryBPCG_quadratic_noqas]
+labels_trajectories = ["BPCG (Standard)", "BPCG (Specific Direct)", "AS_Quad", "Reloaded", "Reloaded_generic", "Reloaded_noqas"]
 
 # Plot trajectories
-plot_trajectories(dataSparsity, labelSparsity, xscalelog=false)
+plot_trajectories(data_trajectories, labels_trajectories, xscalelog=false)
diff --git a/examples/blended_pairwise_with_direct_non-standard-quadratic.jl b/examples/blended_pairwise_with_direct_non-standard-quadratic.jl
index ff7736be1..f7da7f3d0 100644
--- a/examples/blended_pairwise_with_direct_non-standard-quadratic.jl
+++ b/examples/blended_pairwise_with_direct_non-standard-quadratic.jl
@@ -14,15 +14,9 @@ using FrankWolfe
 using LinearAlgebra
 using Random
 
-import Pkg
-Pkg.add("GLPK")
-Pkg.add("HiGHS")
-import GLPK
 import HiGHS
 
-# lp_solver = GLPK.Optimizer
 lp_solver = HiGHS.Optimizer
-# lp_solver = Clp.Optimizer # buggy / does not work properly
 
 
 include("../examples/plot_utils.jl")
diff --git a/src/active_set_quadratic_direct_solve.jl b/src/active_set_quadratic_direct_solve.jl
index ced0d551c..63a1ea394 100644
--- a/src/active_set_quadratic_direct_solve.jl
+++ b/src/active_set_quadratic_direct_solve.jl
@@ -7,9 +7,16 @@ along with their coefficients `(λ_i, a_i)`.
 `R` is the type of the `λ_i`, `AT` is the type of the atoms `a_i`.
 The iterate `x = ∑λ_i a_i` is stored in x with type `IT`.
 The objective function is assumed to be of the form `f(x)=½⟨x,Ax⟩+⟨b,x⟩+c`
-so that the gradient is simply `∇f(x)=Ax+b`.
+so that the gradient is `∇f(x)=Ax+b`.
+
+This active set stores an inner `active_set` that keeps track of the current set of vertices and convex decomposition.
+It therefore delegates all update, deletion, and addition operations to this inner active set.
+The `weight`, `atoms`, and `x` fields should only be accessed to read and are effectively the same objects as those in the inner active set.
+
+The structure also contains a scheduler struct which is called with the `should_solve_lp` function.
+To define a new frequency at which the LP should be solved, one can define another scheduler struct and implement the corresponding method.
 """
-struct ActiveSetQuadraticLinearSolve{AT, R <: Real, IT, H, OT <: MOI.AbstractOptimizer, AS <: AbstractActiveSet} <: AbstractActiveSet{AT,R,IT}
+struct ActiveSetQuadraticLinearSolve{AT, R <: Real, IT, H, OT <: MOI.AbstractOptimizer, AS <: AbstractActiveSet, SF} <: AbstractActiveSet{AT,R,IT}
     weights::Vector{R}
     atoms::Vector{AT}
     x::IT
@@ -17,46 +24,71 @@ struct ActiveSetQuadraticLinearSolve{AT, R <: Real, IT, H, OT <: MOI.AbstractOpt
     b::IT # linear term
     active_set::AS
     lp_optimizer::OT
-    lp_frequency::Int
+    scheduler::SF
     counter::Base.RefValue{Int}
 end
 
-function ActiveSetQuadraticLinearSolve(tuple_values::AbstractVector{Tuple{R,AT}}, grad!::Function, lp_optimizer; lp_frequency=100) where {AT,R}
+"""
+    ActiveSetQuadraticLinearSolve(tuple_values::Vector{Tuple{R,AT}}, grad!::Function, lp_optimizer)
+
+Creates an ActiveSetQuadraticLinearSolve by computing the Hessian and linear term from `grad!`.
+"""
+function ActiveSetQuadraticLinearSolve(tuple_values::Vector{Tuple{R,AT}}, grad!::Function, lp_optimizer; scheduler=LogScheduler()) where {AT,R}
     A, b = detect_quadratic_function(grad!, tuple_values[1][2])
-    return ActiveSetQuadraticLinearSolve(tuple_values, A, b, lp_optimizer, lp_frequency=lp_frequency)
+    return ActiveSetQuadraticLinearSolve(tuple_values, A, b, lp_optimizer, scheduler=scheduler)
 end
 
-function ActiveSetQuadraticLinearSolve(tuple_values::AbstractVector{Tuple{R,AT}}, A::H, b, lp_optimizer; lp_frequency=10) where {AT,R,H}
+"""
+    ActiveSetQuadraticLinearSolve(tuple_values::Vector{Tuple{R,AT}}, A, b, lp_optimizer)
+
+Creates an `ActiveSetQuadraticLinearSolve` from the given Hessian `A`, linear term `b` and `lp_optimizer` by creating an inner `ActiveSetQuadratic` active set.
+"""
+function ActiveSetQuadraticLinearSolve(tuple_values::Vector{Tuple{R,AT}}, A::H, b, lp_optimizer; scheduler=LogScheduler()) where {AT,R,H}
     inner_as = ActiveSetQuadratic(tuple_values, A, b)
-    as = ActiveSetQuadraticLinearSolve(inner_as.weights, inner_as.atoms, inner_as.x, A, b, inner_as, lp_optimizer, lp_frequency, Ref(0))
+    return ActiveSetQuadraticLinearSolve(inner_as.weights, inner_as.atoms, inner_as.x, inner_as.A, inner_as.b, inner_as, lp_optimizer, scheduler, Ref(0))
+end
+
+function ActiveSetQuadraticLinearSolve(inner_as::AbstractActiveSet, A, b, lp_optimizer; scheduler=LogScheduler())
+    as = ActiveSetQuadraticLinearSolve(inner_as.weights, inner_as.atoms, inner_as.x, A, b, inner_as, lp_optimizer, scheduler, Ref(0))
+    compute_active_set_iterate!(as)
+    return as
+end
+
+function ActiveSetQuadraticLinearSolve(inner_as::AbstractActiveSet, A::LinearAlgebra.UniformScaling, b, lp_optimizer; scheduler=LogScheduler())
+    as = ActiveSetQuadraticLinearSolve(inner_as.weights, inner_as.atoms, inner_as.x, A, b, inner_as, lp_optimizer, scheduler, Ref(0))
     compute_active_set_iterate!(as)
     return as
 end
 
-function ActiveSetQuadraticLinearSolve{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, grad!::Function, lp_optimizer; lp_frequency=100) where {AT,R}
+function ActiveSetQuadraticLinearSolve(inner_as::AbstractActiveSet, grad!::Function, lp_optimizer; scheduler=LogScheduler())
     A, b = detect_quadratic_function(grad!, tuple_values[1][2])
-    return ActiveSetQuadraticLinearSolve{AT,R}(tuple_values, A, b, lp_optimizer; lp_frequency=lp_frequency)
+    return ActiveSetQuadraticLinearSolve(inner_as, A, b, lp_optimizer; scheduler=scheduler)
 end
 
-function ActiveSetQuadraticLinearSolve{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::H, b, lp_optimizer; lp_frequency=lp_frequency) where {AT,R,H}
+function ActiveSetQuadraticLinearSolve{AT,R}(tuple_values::Vector{<:Tuple{<:Number,<:Any}}, grad!::Function, lp_optimizer; scheduler=LogScheduler()) where {AT,R}
+    A, b = detect_quadratic_function(grad!, tuple_values[1][2])
+    return ActiveSetQuadraticLinearSolve{AT,R}(tuple_values, A, b, lp_optimizer; scheduler=scheduler)
+end
+
+function ActiveSetQuadraticLinearSolve{AT,R}(tuple_values::Vector{<:Tuple{<:Number,<:Any}}, A::H, b, lp_optimizer; scheduler=LogScheduler()) where {AT,R,H}
     inner_as = ActiveSetQuadratic{AT,R}(tuple_values, A, b)
-    as = ActiveSetQuadraticLinearSolve{AT,R,typeof(x),H}(inner_as.weights, inner_as.atoms, inner_as.x, A, b, inner_as, lp_optimizer, lp_frequency, Ref(0))
+    as = ActiveSetQuadraticLinearSolve{AT,R,typeof(x),H}(inner_as.weights, inner_as.atoms, inner_as.x, A, b, inner_as, lp_optimizer, scheduler, Ref(0))
     compute_active_set_iterate!(as)
     return as
 end
 
-function ActiveSetQuadraticLinearSolve(tuple_values::AbstractVector{Tuple{R,AT}}, A::UniformScaling, b, lp_optimizer; lp_frequency=100) where {AT,R}
-    return ActiveSetQuadraticLinearSolve(tuple_values, Identity(A.λ), b, lp_optimizer; lp_frequency=lp_frequency)
+function ActiveSetQuadraticLinearSolve(tuple_values::Vector{Tuple{R,AT}}, A::UniformScaling, b, lp_optimizer; scheduler=LogScheduler()) where {AT,R}
+    return ActiveSetQuadraticLinearSolve(tuple_values, Identity(A.λ), b, lp_optimizer; scheduler=scheduler)
 end
-function ActiveSetQuadraticLinearSolve{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::UniformScaling, b, lp_optimizer; lp_frequency=100) where {AT,R}
-    return ActiveSetQuadraticLinearSolve{AT,R}(tuple_values, Identity(A.λ), b, lp_optimizer; lp_frequency=lp_frequency)
+function ActiveSetQuadraticLinearSolve{AT,R}(tuple_values::Vector{<:Tuple{<:Number,<:Any}}, A::UniformScaling, b, lp_optimizer; scheduler=LogScheduler()) where {AT,R}
+    return ActiveSetQuadraticLinearSolve{AT,R}(tuple_values, Identity(A.λ), b, lp_optimizer; scheduler=scheduler)
 end
 
 # all mutating functions are delegated to the inner active set
 
 Base.push!(as::ActiveSetQuadraticLinearSolve, tuple) = push!(as.active_set, tuple)
 
-Base.deleteat!(as::ActiveSetQuadraticLinearSolve, idx) = deleteat!(as.active_set, idx)
+Base.deleteat!(as::ActiveSetQuadraticLinearSolve, idx::Int) = deleteat!(as.active_set, idx)
 
 Base.empty!(as::ActiveSetQuadraticLinearSolve) = empty!(as.active_set)
 
@@ -74,7 +106,7 @@ function active_set_update!(
     # new atom introduced, we can solve the auxiliary LP
     if idx < 0
         as.counter[] += 1
-        if mod(as.counter[], as.lp_frequency) == 0
+        if should_solve_lp(as, as.scheduler)
             solve_quadratic_activeset_lp!(as)
         end
     end
@@ -92,6 +124,12 @@ function active_set_argminmax(as::ActiveSetQuadraticLinearSolve, direction; Φ=0
 end
 
 # generic quadratic with quadratic information provided
+"""
+    solve_quadratic_activeset_lp!(as::ActiveSetQuadraticLinearSolve{AT, R, IT, H}))
+
+Solves the auxiliary LP over the current active set.
+The method is specialized by type `H` of the Hessian matrix `A`.
+"""
 function solve_quadratic_activeset_lp!(as::ActiveSetQuadraticLinearSolve{AT, R, IT, <: AbstractMatrix}) where {AT, R, IT}
     nv = length(as)
     o = as.lp_optimizer
@@ -122,7 +160,7 @@ function solve_quadratic_activeset_lp!(as::ActiveSetQuadraticLinearSolve{AT, R,
     new_weights = R[]
     for idx in eachindex(λ)
         weight_value = MOI.get(o, MOI.VariablePrimal(), λ[idx])
-        if weight_value <= 1e-10
+        if weight_value <= 2 * weight_purge_threshold_default(typeof(weight_value))
             push!(indices_to_remove, idx)
         else
             push!(new_weights, weight_value)
@@ -132,14 +170,15 @@ function solve_quadratic_activeset_lp!(as::ActiveSetQuadraticLinearSolve{AT, R,
     deleteat!(as.weights, indices_to_remove)
     @assert length(as) == length(new_weights)
     as.weights .= new_weights
-    @assert all(>=(0), new_weights)
+    active_set_cleanup!(as)
     active_set_renormalize!(as)
     compute_active_set_iterate!(as)
     return as
 end
 
 # special case of scaled identity Hessian
-function solve_quadratic_activeset_lp!(as::ActiveSetQuadraticLinearSolve{AT, R, IT, <: Identity}) where {AT, R, IT}
+
+function solve_quadratic_activeset_lp!(as::ActiveSetQuadraticLinearSolve{AT, R, IT, <: Union{Identity, LinearAlgebra.UniformScaling}}) where {AT, R, IT}
     hessian_scaling = as.A.λ
     nv = length(as)
     o = as.lp_optimizer
@@ -170,7 +209,7 @@ function solve_quadratic_activeset_lp!(as::ActiveSetQuadraticLinearSolve{AT, R,
     new_weights = R[]
     for idx in eachindex(λ)
         weight_value = MOI.get(o, MOI.VariablePrimal(), λ[idx])
-        if weight_value <= 1e-10
+        if weight_value <= 2 * weight_purge_threshold_default(typeof(weight_value))
             push!(indices_to_remove, idx)
         else
             push!(new_weights, weight_value)
@@ -180,8 +219,31 @@ function solve_quadratic_activeset_lp!(as::ActiveSetQuadraticLinearSolve{AT, R,
     deleteat!(as.weights, indices_to_remove)
     @assert length(as) == length(new_weights)
     as.weights .= new_weights
+    active_set_cleanup!(as)
     @assert all(>=(0), new_weights)
     active_set_renormalize!(as)
     compute_active_set_iterate!(as)
     return as
 end
+
+struct LogScheduler{T}
+    start_time::Int
+    scaling_factor::T
+    max_interval::Int
+    current_interval::Base.RefValue{Int}
+    last_solve_counter::Base.RefValue{Int}
+end
+
+LogScheduler(; start_time=20, scaling_factor=1.5, max_interval=1000) = LogScheduler(start_time, scaling_factor, max_interval, Ref(start_time), Ref(0))
+
+function should_solve_lp(as::ActiveSetQuadraticLinearSolve, scheduler::LogScheduler)
+    if as.counter[] - scheduler.last_solve_counter[] >= scheduler.current_interval[]
+        scheduler.last_solve_counter[] = as.counter[]
+        scheduler.current_interval[] = min(
+            round(Int, scheduler.scaling_factor * scheduler.current_interval[]),
+            scheduler.max_interval,
+        )
+        return true
+    end
+    return false
+end
diff --git a/test/quadratic_lp_active_set.jl b/test/quadratic_lp_active_set.jl
new file mode 100644
index 000000000..a86c0b713
--- /dev/null
+++ b/test/quadratic_lp_active_set.jl
@@ -0,0 +1,110 @@
+using FrankWolfe
+using LinearAlgebra
+using Random
+using Test
+import HiGHS
+import MathOptInterface as MOI
+
+n = Int(1e4)
+k = 5000
+
+s = 10
+@info "Seed $s"
+Random.seed!(s)
+
+xpi = rand(n);
+total = sum(xpi);
+
+const xp = xpi ./ total;
+
+f(x) = norm(x - xp)^2
+function grad!(storage, x)
+    @. storage = 2 * (x - xp)
+end
+
+lmo = FrankWolfe.KSparseLMO(5, 1.0)
+
+const x00 = FrankWolfe.compute_extreme_point(lmo, rand(n))
+
+function build_callback(trajectory_arr)
+    return function callback(state, active_set, args...)
+        return push!(trajectory_arr, (FrankWolfe.callback_state(state)..., length(active_set)))
+    end
+end
+
+trajectoryBPCG_standard = []
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    copy(x00),
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=false,
+    callback=build_callback(trajectoryBPCG_standard),
+);
+
+as_quad_direct = FrankWolfe.ActiveSetQuadraticLinearSolve(
+    [(1.0, copy(x00))],
+    2 * LinearAlgebra.I, -2xp,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+)
+
+trajectoryBPCG_quadratic_direct_specialized = []
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    as_quad_direct,
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=false,
+    callback=build_callback(trajectoryBPCG_quadratic_direct_specialized),
+);
+
+as_quad_direct_generic = FrankWolfe.ActiveSetQuadraticLinearSolve(
+    [(1.0, copy(x00))],
+    2 * Diagonal(ones(length(xp))), -2xp,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+)
+
+trajectoryBPCG_quadratic_direct_generic = []
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    as_quad_direct_generic,
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=true,
+    callback=build_callback(trajectoryBPCG_quadratic_direct_generic),
+);
+
+as_quad_direct_basic_as = FrankWolfe.ActiveSetQuadraticLinearSolve(
+    FrankWolfe.ActiveSet([1.0], [copy(x00)], collect(x00)),
+    2 * LinearAlgebra.I, -2xp,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+)
+
+trajectoryBPCG_quadratic_noqas = []
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    as_quad_direct_basic_as,
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=true,
+    callback=build_callback(trajectoryBPCG_quadratic_noqas),
+);
+
+
+dual_gaps_quadratic_specialized = getindex.(trajectoryBPCG_quadratic_direct_specialized, 4)
+dual_gaps_quadratic_generic = getindex.(trajectoryBPCG_quadratic_direct_generic, 4)
+dual_gaps_quadratic_noqas = getindex.(trajectoryBPCG_quadratic_noqas, 4)
+dual_gaps_bpcg = getindex.(trajectoryBPCG_standard, 4)
+
+
+@test dual_gaps_quadratic_specialized[end] < dual_gaps_bpcg[end]
+@test dual_gaps_quadratic_noqas[end] < dual_gaps_bpcg[end]
+@test norm(dual_gaps_quadratic_noqas - dual_gaps_quadratic_noqas) ≤ k * 1e-5
diff --git a/test/runtests.jl b/test/runtests.jl
index d984043dc..5f3cca81e 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -798,6 +798,13 @@ using Test
 end
 end
 
+module LpDirectSolveTest
+using Test
+@testset "LP solving for quadratic functions and active set" begin
+    include("quadratic_lp_active_set.jl")
+end
+end
+
 include("generic-arrays.jl")
 
 include("blocks.jl")

From 866158a62c893c2412bf008780a1c5efeecfaaf3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= <mathieu.besancon@gmail.com>
Date: Tue, 15 Oct 2024 16:35:41 +0200
Subject: [PATCH 14/19] minor

---
 examples/blended_pairwise_with_sparsify.jl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/examples/blended_pairwise_with_sparsify.jl b/examples/blended_pairwise_with_sparsify.jl
index 8481850ea..941df79cd 100644
--- a/examples/blended_pairwise_with_sparsify.jl
+++ b/examples/blended_pairwise_with_sparsify.jl
@@ -14,7 +14,6 @@ using FrankWolfe
 using LinearAlgebra
 using Random
 
-import Pkg
 import HiGHS
 
 # lp_solver = GLPK.Optimizer

From 08671fbf5188aace9120347bb01ecc3fd37e8030 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= <mathieu.besancon@gmail.com>
Date: Tue, 15 Oct 2024 17:18:23 +0200
Subject: [PATCH 15/19] simplify example

---
 examples/blended_pairwise_with_direct.jl | 81 ++----------------------
 1 file changed, 5 insertions(+), 76 deletions(-)

diff --git a/examples/blended_pairwise_with_direct.jl b/examples/blended_pairwise_with_direct.jl
index 2b7661f57..417b996e2 100644
--- a/examples/blended_pairwise_with_direct.jl
+++ b/examples/blended_pairwise_with_direct.jl
@@ -19,8 +19,6 @@ using Random
 import HiGHS
 import MathOptInterface as MOI
 
-lp_solver = HiGHS.Optimizer
-
 include("../examples/plot_utils.jl")
 
 n = Int(1e4)
@@ -51,29 +49,7 @@ function build_callback(trajectory_arr)
 end
 
 
-FrankWolfe.benchmark_oracles(f, grad!, () -> randn(n), lmo; k=100)
-
 trajectoryBPCG_standard = []
-callback = build_callback(trajectoryBPCG_standard)
-
-@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
-    f,
-    grad!,
-    lmo,
-    copy(x00),
-    max_iteration=k,
-    line_search=FrankWolfe.Shortstep(2.0),
-    print_iter=k / 10,
-    memory_mode=FrankWolfe.InplaceEmphasis(),
-    verbose=true,
-    trajectory=true,
-    callback=callback,
-    squadratic=false,
-);
-
-trajectoryBPCG_quadratic = []
-callback = build_callback(trajectoryBPCG_quadratic)
-
 @time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
     f,
     grad!,
@@ -81,34 +57,8 @@ callback = build_callback(trajectoryBPCG_quadratic)
     copy(x00),
     max_iteration=k,
     line_search=FrankWolfe.Shortstep(2.0),
-    print_iter=k / 10,
-    memory_mode=FrankWolfe.InplaceEmphasis(),
     verbose=true,
-    trajectory=true,
-    callback=callback,
-    squadratic=true,
-    lp_solver=lp_solver,
-);
-
-# Just using that qudratic (more expensive)
-trajectoryBPCG_quadratic_nosquad = []
-callback = build_callback(trajectoryBPCG_quadratic_nosquad)
-
-@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
-    f,
-    grad!,
-    lmo,
-    copy(x00),
-    max_iteration=k,
-    line_search=FrankWolfe.Shortstep(2.0),
-    print_iter=k / 10,
-    memory_mode=FrankWolfe.InplaceEmphasis(),
-    verbose=true,
-    trajectory=true,
-    callback=callback,
-    quadratic=true,
-    squadratic=false,
-    lp_solver=lp_solver,
+    callback=build_callback(trajectoryBPCG_standard),
 );
 
 # Just projection quadratic
@@ -123,10 +73,7 @@ as_quad = FrankWolfe.ActiveSetQuadratic([(1.0, copy(x00))], 2 * LinearAlgebra.I,
     as_quad,
     max_iteration=k,
     line_search=FrankWolfe.Shortstep(2.0),
-    print_iter=k / 10,
-    memory_mode=FrankWolfe.InplaceEmphasis(),
     verbose=true,
-    trajectory=true,
     callback=callback,
 );
 
@@ -137,7 +84,6 @@ as_quad = FrankWolfe.ActiveSetQuadratic(
 
 # with quadratic active set
 trajectoryBPCG_quadratic_as = []
-callback = build_callback(trajectoryBPCG_quadratic_as)
 @time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
     f,
     grad!,
@@ -145,11 +91,8 @@ callback = build_callback(trajectoryBPCG_quadratic_as)
     as_quad,
     max_iteration=k,
     line_search=FrankWolfe.Shortstep(2.0),
-    print_iter=k / 10,
-    memory_mode=FrankWolfe.InplaceEmphasis(),
     verbose=true,
-    trajectory=true,
-    callback=callback,
+    callback=build_callback(trajectoryBPCG_quadratic_as),
 );
 
 as_quad_direct = FrankWolfe.ActiveSetQuadraticLinearSolve(
@@ -160,8 +103,6 @@ as_quad_direct = FrankWolfe.ActiveSetQuadraticLinearSolve(
 
 # with LP acceleration
 trajectoryBPCG_quadratic_direct = []
-callback = build_callback(trajectoryBPCG_quadratic_direct)
-
 @time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
     f,
     grad!,
@@ -169,11 +110,8 @@ callback = build_callback(trajectoryBPCG_quadratic_direct)
     as_quad_direct,
     max_iteration=k,
     line_search=FrankWolfe.Shortstep(2.0),
-    print_iter=k / 10,
-    memory_mode=FrankWolfe.InplaceEmphasis(),
     verbose=true,
-    trajectory=true,
-    callback=callback,
+    callback=build_callback(trajectoryBPCG_quadratic_direct),
 );
 
 as_quad_direct_generic = FrankWolfe.ActiveSetQuadraticLinearSolve(
@@ -184,8 +122,6 @@ as_quad_direct_generic = FrankWolfe.ActiveSetQuadraticLinearSolve(
 
 # with LP acceleration
 trajectoryBPCG_quadratic_direct_generic = []
-callback = build_callback(trajectoryBPCG_quadratic_direct_generic)
-
 @time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
     f,
     grad!,
@@ -193,11 +129,8 @@ callback = build_callback(trajectoryBPCG_quadratic_direct_generic)
     as_quad_direct_generic,
     max_iteration=k,
     line_search=FrankWolfe.Shortstep(2.0),
-    print_iter=k / 10,
-    memory_mode=FrankWolfe.InplaceEmphasis(),
     verbose=true,
-    trajectory=true,
-    callback=callback,
+    callback=build_callback(trajectoryBPCG_quadratic_direct_generic),
 );
 
 as_quad_direct_basic_as = FrankWolfe.ActiveSetQuadraticLinearSolve(
@@ -208,7 +141,6 @@ as_quad_direct_basic_as = FrankWolfe.ActiveSetQuadraticLinearSolve(
 
 # with LP acceleration
 trajectoryBPCG_quadratic_noqas = []
-callback = build_callback(trajectoryBPCG_quadratic_noqas)
 
 @time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
     f,
@@ -217,11 +149,8 @@ callback = build_callback(trajectoryBPCG_quadratic_noqas)
     as_quad_direct_basic_as,
     max_iteration=k,
     line_search=FrankWolfe.Shortstep(2.0),
-    print_iter=k / 10,
-    memory_mode=FrankWolfe.InplaceEmphasis(),
     verbose=true,
-    trajectory=true,
-    callback=callback,
+    callback=build_callback(trajectoryBPCG_quadratic_noqas),
 );
 
 

From 4c7e12091df40f165e7073310e5437acd724d095 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= <mathieu.besancon@gmail.com>
Date: Thu, 17 Oct 2024 09:45:38 +0200
Subject: [PATCH 16/19] clean up code, verify error with ASQuad

---
 ...wise_with_direct_non-standard-quadratic.jl | 77 ++++++++++++++++---
 src/active_set_quadratic.jl                   |  1 +
 src/active_set_quadratic_direct_solve.jl      | 16 ++--
 3 files changed, 76 insertions(+), 18 deletions(-)

diff --git a/examples/blended_pairwise_with_direct_non-standard-quadratic.jl b/examples/blended_pairwise_with_direct_non-standard-quadratic.jl
index f7da7f3d0..9466771ee 100644
--- a/examples/blended_pairwise_with_direct_non-standard-quadratic.jl
+++ b/examples/blended_pairwise_with_direct_non-standard-quadratic.jl
@@ -15,7 +15,7 @@ using LinearAlgebra
 using Random
 
 import HiGHS
-
+import MathOptInterface as MOI
 lp_solver = HiGHS.Optimizer
 
 
@@ -33,7 +33,6 @@ A = let
     A = randn(n, n)
     A' * A
 end
-
 @assert isposdef(A) == true
 
 const y = Random.rand(Bool, n) * 0.6 .+ 0.3
@@ -72,12 +71,11 @@ FrankWolfe.benchmark_oracles(f, grad!, () -> randn(n), lmo; k=100)
 trajectoryBPCG_standard = []
 callback = build_callback(trajectoryBPCG_standard)
 
-x0 = deepcopy(x00)
 @time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
     f,
     grad!,
     lmo,
-    x0,
+    copy(x00),
     max_iteration=k,
     line_search=FrankWolfe.Adaptive(),
     print_iter=k / 10,
@@ -89,29 +87,84 @@ x0 = deepcopy(x00)
 );
 
 trajectoryBPCG_quadratic = []
-callback = build_callback(trajectoryBPCG_quadratic)
-
-x0 = deepcopy(x00)
 @time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
     f,
     grad!,
     lmo,
-    x0,
+    copy(x00),
     max_iteration=k,
     line_search=FrankWolfe.Adaptive(),
     print_iter=k / 10,
-    memory_mode=FrankWolfe.InplaceEmphasis(),
     verbose=true,
     trajectory=true,
-    callback=callback,
+    callback=build_callback(trajectoryBPCG_quadratic),
     quadratic=true,
     lp_solver=lp_solver,
 );
 
+active_set_quadratic_automatic = FrankWolfe.ActiveSetQuadraticLinearSolve(
+    [(1.0, copy(x00))],
+    grad!,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+    scheduler=FrankWolfe.LogScheduler(start_time=100, scaling_factor=1.2, max_interval=100),
+)
+trajectoryBPCG_quadratic_automatic = []
+@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    active_set_quadratic_automatic,
+    # print_iter=1,
+    max_iteration=k,
+    verbose=true,
+    callback=build_callback(trajectoryBPCG_quadratic_automatic),
+);
+
+active_set_quadratic_automatic2 = FrankWolfe.ActiveSetQuadraticLinearSolve(
+    [(1.0, copy(x00))],
+    grad!,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+    scheduler=FrankWolfe.LogScheduler(start_time=10, scaling_factor=2),
+)
+trajectoryBPCG_quadratic_automatic2 = []
+@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    active_set_quadratic_automatic2,
+    # print_iter=1,
+    max_iteration=k,
+    verbose=true,
+    callback=build_callback(trajectoryBPCG_quadratic_automatic2),
+);
+
+
+active_set_quadratic_automatic_reloaded = FrankWolfe.ActiveSetQuadraticLinearSolve(
+    FrankWolfe.ActiveSet([(1.0, copy(x00))]),
+    grad!,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+    scheduler=FrankWolfe.LogScheduler(start_time=10, scaling_factor=2),
+)
+trajectoryBPCG_quadratic_automatic_reloaded = []
+@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    active_set_quadratic_automatic_reloaded,
+    max_iteration=k,
+    verbose=true,
+    callback=build_callback(trajectoryBPCG_quadratic_automatic_reloaded),
+);
+
+
+
 # Reduction primal/dual error vs. sparsity of solution
 
-dataSparsity = [trajectoryBPCG_standard, trajectoryBPCG_quadratic]
-labelSparsity = ["BPCG (Standard)", "BPCG (Direct)"]
+# dataSparsity = [trajectoryBPCG_standard, trajectoryBPCG_quadratic, trajectoryBPCG_quadratic_automatic, trajectoryBPCG_quadratic_automatic_reloaded, trajectoryBPCG_quadratic_automatic]
+# labelSparsity = ["BPCG (Standard)", "BPCG (Direct)", "AS_Quad", "R", "A2"]
+
+dataSparsity = [trajectoryBPCG_quadratic_automatic2, trajectoryBPCG_quadratic_automatic_reloaded]
+labelSparsity = ["AS_Quad", "AS_standard"]
 
 # Plot sparsity
 # plot_sparsity(dataSparsity, labelSparsity, legend_position=:topright)
diff --git a/src/active_set_quadratic.jl b/src/active_set_quadratic.jl
index bc4edaa85..ba2f2cc98 100644
--- a/src/active_set_quadratic.jl
+++ b/src/active_set_quadratic.jl
@@ -151,6 +151,7 @@ function Base.push!(as::ActiveSetQuadratic{AT,R}, (λ, a)) where {AT,R}
     return as
 end
 
+# TODO multi-indices version
 function Base.deleteat!(as::ActiveSetQuadratic, idx::Int)
     @inbounds for i in 1:idx-1
         as.dots_x[i] -= as.weights_prev[idx] * as.dots_A[idx][i]
diff --git a/src/active_set_quadratic_direct_solve.jl b/src/active_set_quadratic_direct_solve.jl
index 63a1ea394..974a5439b 100644
--- a/src/active_set_quadratic_direct_solve.jl
+++ b/src/active_set_quadratic_direct_solve.jl
@@ -16,12 +16,12 @@ The `weight`, `atoms`, and `x` fields should only be accessed to read and are ef
 The structure also contains a scheduler struct which is called with the `should_solve_lp` function.
 To define a new frequency at which the LP should be solved, one can define another scheduler struct and implement the corresponding method.
 """
-struct ActiveSetQuadraticLinearSolve{AT, R <: Real, IT, H, OT <: MOI.AbstractOptimizer, AS <: AbstractActiveSet, SF} <: AbstractActiveSet{AT,R,IT}
+struct ActiveSetQuadraticLinearSolve{AT, R <: Real, IT, H, BT, OT <: MOI.AbstractOptimizer, AS <: AbstractActiveSet, SF} <: AbstractActiveSet{AT,R,IT}
     weights::Vector{R}
     atoms::Vector{AT}
     x::IT
     A::H # Hessian matrix
-    b::IT # linear term
+    b::BT # linear term
     active_set::AS
     lp_optimizer::OT
     scheduler::SF
@@ -61,7 +61,7 @@ function ActiveSetQuadraticLinearSolve(inner_as::AbstractActiveSet, A::LinearAlg
 end
 
 function ActiveSetQuadraticLinearSolve(inner_as::AbstractActiveSet, grad!::Function, lp_optimizer; scheduler=LogScheduler())
-    A, b = detect_quadratic_function(grad!, tuple_values[1][2])
+    A, b = detect_quadratic_function(grad!, inner_as.atoms[1])
     return ActiveSetQuadraticLinearSolve(inner_as, A, b, lp_optimizer; scheduler=scheduler)
 end
 
@@ -124,6 +124,7 @@ function active_set_argminmax(as::ActiveSetQuadraticLinearSolve, direction; Φ=0
 end
 
 # generic quadratic with quadratic information provided
+
 """
     solve_quadratic_activeset_lp!(as::ActiveSetQuadraticLinearSolve{AT, R, IT, H}))
 
@@ -160,14 +161,17 @@ function solve_quadratic_activeset_lp!(as::ActiveSetQuadraticLinearSolve{AT, R,
     new_weights = R[]
     for idx in eachindex(λ)
         weight_value = MOI.get(o, MOI.VariablePrimal(), λ[idx])
-        if weight_value <= 2 * weight_purge_threshold_default(typeof(weight_value))
+        if weight_value <= 1e-5
             push!(indices_to_remove, idx)
         else
             push!(new_weights, weight_value)
         end
     end
-    deleteat!(as.atoms, indices_to_remove)
-    deleteat!(as.weights, indices_to_remove)
+    # we delete the elements in reverse order to avoid messing up the internal active set
+    sort!(indices_to_remove, rev=true)
+    for idx in indices_to_remove
+        deleteat!(as.active_set, idx)
+    end
     @assert length(as) == length(new_weights)
     as.weights .= new_weights
     active_set_cleanup!(as)

From 8f9f2beafd1a050e7bb7c8a59331709fc2ed17aa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Designolle?=
 <sebastien.designolle@gmail.com>
Date: Thu, 24 Oct 2024 10:42:37 +0200
Subject: [PATCH 17/19] Add update_weights! to fix direct solve with
 active_set_quadratic

---
 examples/blended_pairwise_with_direct.jl |  6 ++----
 src/active_set.jl                        |  4 ++++
 src/active_set_quadratic.jl              |  6 ++++++
 src/active_set_quadratic_direct_solve.jl | 15 +++++----------
 4 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/examples/blended_pairwise_with_direct.jl b/examples/blended_pairwise_with_direct.jl
index 417b996e2..1304749d2 100644
--- a/examples/blended_pairwise_with_direct.jl
+++ b/examples/blended_pairwise_with_direct.jl
@@ -62,9 +62,7 @@ trajectoryBPCG_standard = []
 );
 
 # Just projection quadratic
-trajectoryBPCG_quadratic_as = []
-callback = build_callback(trajectoryBPCG_quadratic_as)
-
+trajectoryBPCG_quadratic = []
 as_quad = FrankWolfe.ActiveSetQuadratic([(1.0, copy(x00))], 2 * LinearAlgebra.I, -2xp)
 @time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
     f,
@@ -74,7 +72,7 @@ as_quad = FrankWolfe.ActiveSetQuadratic([(1.0, copy(x00))], 2 * LinearAlgebra.I,
     max_iteration=k,
     line_search=FrankWolfe.Shortstep(2.0),
     verbose=true,
-    callback=callback,
+    callback=build_callback(trajectoryBPCG_quadratic),
 );
 
 as_quad = FrankWolfe.ActiveSetQuadratic(
diff --git a/src/active_set.jl b/src/active_set.jl
index 62393b47f..c02145666 100644
--- a/src/active_set.jl
+++ b/src/active_set.jl
@@ -326,3 +326,7 @@ function compute_active_set_iterate!(active_set::AbstractActiveSet{<:ScaledHotVe
     end
     return active_set.x
 end
+
+function update_weights!(as::AbstractActiveSet, new_weights)
+    as.weights .= new_weights
+end
diff --git a/src/active_set_quadratic.jl b/src/active_set_quadratic.jl
index ba2f2cc98..5739198f2 100644
--- a/src/active_set_quadratic.jl
+++ b/src/active_set_quadratic.jl
@@ -315,3 +315,9 @@ function update_active_set_quadratic!(warm_as::ActiveSetQuadratic{AT,R,IT,H}, b)
     compute_active_set_iterate!(warm_as)
     return warm_as
 end
+
+function update_weights!(as::ActiveSetQuadratic, new_weights)
+    as.weights_prev .= as.weights
+    as.weights .= new_weights
+    as.modified .= true
+end
diff --git a/src/active_set_quadratic_direct_solve.jl b/src/active_set_quadratic_direct_solve.jl
index 974a5439b..0d71db82f 100644
--- a/src/active_set_quadratic_direct_solve.jl
+++ b/src/active_set_quadratic_direct_solve.jl
@@ -88,7 +88,7 @@ end
 
 Base.push!(as::ActiveSetQuadraticLinearSolve, tuple) = push!(as.active_set, tuple)
 
-Base.deleteat!(as::ActiveSetQuadraticLinearSolve, idx::Int) = deleteat!(as.active_set, idx)
+Base.deleteat!(as::ActiveSetQuadraticLinearSolve, idx) = deleteat!(as.active_set, idx)
 
 Base.empty!(as::ActiveSetQuadraticLinearSolve) = empty!(as.active_set)
 
@@ -167,13 +167,9 @@ function solve_quadratic_activeset_lp!(as::ActiveSetQuadraticLinearSolve{AT, R,
             push!(new_weights, weight_value)
         end
     end
-    # we delete the elements in reverse order to avoid messing up the internal active set
-    sort!(indices_to_remove, rev=true)
-    for idx in indices_to_remove
-        deleteat!(as.active_set, idx)
-    end
+    deleteat!(as.active_set, indices_to_remove)
     @assert length(as) == length(new_weights)
-    as.weights .= new_weights
+    update_weights!(as.active_set, new_weights)
     active_set_cleanup!(as)
     active_set_renormalize!(as)
     compute_active_set_iterate!(as)
@@ -219,10 +215,9 @@ function solve_quadratic_activeset_lp!(as::ActiveSetQuadraticLinearSolve{AT, R,
             push!(new_weights, weight_value)
         end
     end
-    deleteat!(as.atoms, indices_to_remove)
-    deleteat!(as.weights, indices_to_remove)
+    deleteat!(as.active_set, indices_to_remove)
     @assert length(as) == length(new_weights)
-    as.weights .= new_weights
+    update_weights!(as.active_set, new_weights)
     active_set_cleanup!(as)
     @assert all(>=(0), new_weights)
     active_set_renormalize!(as)

From 4e1f4917a5e37ad8a424987a255e03eae85d3778 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= <mathieu.besancon@gmail.com>
Date: Thu, 24 Oct 2024 20:07:44 +0200
Subject: [PATCH 18/19] remove direct solve from BPCG

---
 ...wise_with_direct_non-standard-quadratic.jl | 48 ++++---------------
 src/blended_pairwise.jl                       | 37 +-------------
 2 files changed, 11 insertions(+), 74 deletions(-)

diff --git a/examples/blended_pairwise_with_direct_non-standard-quadratic.jl b/examples/blended_pairwise_with_direct_non-standard-quadratic.jl
index 9466771ee..5710e92d9 100644
--- a/examples/blended_pairwise_with_direct_non-standard-quadratic.jl
+++ b/examples/blended_pairwise_with_direct_non-standard-quadratic.jl
@@ -16,8 +16,6 @@ using Random
 
 import HiGHS
 import MathOptInterface as MOI
-lp_solver = HiGHS.Optimizer
-
 
 include("../examples/plot_utils.jl")
 
@@ -66,12 +64,10 @@ function build_callback(trajectory_arr)
 end
 
 
-FrankWolfe.benchmark_oracles(f, grad!, () -> randn(n), lmo; k=100)
-
 trajectoryBPCG_standard = []
 callback = build_callback(trajectoryBPCG_standard)
 
-@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
     f,
     grad!,
     lmo,
@@ -86,22 +82,6 @@ callback = build_callback(trajectoryBPCG_standard)
     squadratic=false,
 );
 
-trajectoryBPCG_quadratic = []
-@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
-    f,
-    grad!,
-    lmo,
-    copy(x00),
-    max_iteration=k,
-    line_search=FrankWolfe.Adaptive(),
-    print_iter=k / 10,
-    verbose=true,
-    trajectory=true,
-    callback=build_callback(trajectoryBPCG_quadratic),
-    quadratic=true,
-    lp_solver=lp_solver,
-);
-
 active_set_quadratic_automatic = FrankWolfe.ActiveSetQuadraticLinearSolve(
     [(1.0, copy(x00))],
     grad!,
@@ -109,7 +89,7 @@ active_set_quadratic_automatic = FrankWolfe.ActiveSetQuadraticLinearSolve(
     scheduler=FrankWolfe.LogScheduler(start_time=100, scaling_factor=1.2, max_interval=100),
 )
 trajectoryBPCG_quadratic_automatic = []
-@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
     f,
     grad!,
     lmo,
@@ -127,7 +107,7 @@ active_set_quadratic_automatic2 = FrankWolfe.ActiveSetQuadraticLinearSolve(
     scheduler=FrankWolfe.LogScheduler(start_time=10, scaling_factor=2),
 )
 trajectoryBPCG_quadratic_automatic2 = []
-@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
     f,
     grad!,
     lmo,
@@ -139,35 +119,27 @@ trajectoryBPCG_quadratic_automatic2 = []
 );
 
 
-active_set_quadratic_automatic_reloaded = FrankWolfe.ActiveSetQuadraticLinearSolve(
+active_set_quadratic_automatic_standard = FrankWolfe.ActiveSetQuadraticLinearSolve(
     FrankWolfe.ActiveSet([(1.0, copy(x00))]),
     grad!,
     MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
     scheduler=FrankWolfe.LogScheduler(start_time=10, scaling_factor=2),
 )
-trajectoryBPCG_quadratic_automatic_reloaded = []
-@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+trajectoryBPCG_quadratic_automatic_standard = []
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
     f,
     grad!,
     lmo,
-    active_set_quadratic_automatic_reloaded,
+    active_set_quadratic_automatic_standard,
     max_iteration=k,
     verbose=true,
-    callback=build_callback(trajectoryBPCG_quadratic_automatic_reloaded),
+    callback=build_callback(trajectoryBPCG_quadratic_automatic_standard),
 );
 
 
+dataSparsity = [trajectoryBPCG_standard, trajectoryBPCG_quadratic_automatic, trajectoryBPCG_quadratic_automatic_standard]
+labelSparsity = ["BPCG (Standard)", "AS_Quad", "AS_Standard"]
 
-# Reduction primal/dual error vs. sparsity of solution
-
-# dataSparsity = [trajectoryBPCG_standard, trajectoryBPCG_quadratic, trajectoryBPCG_quadratic_automatic, trajectoryBPCG_quadratic_automatic_reloaded, trajectoryBPCG_quadratic_automatic]
-# labelSparsity = ["BPCG (Standard)", "BPCG (Direct)", "AS_Quad", "R", "A2"]
-
-dataSparsity = [trajectoryBPCG_quadratic_automatic2, trajectoryBPCG_quadratic_automatic_reloaded]
-labelSparsity = ["AS_Quad", "AS_standard"]
-
-# Plot sparsity
-# plot_sparsity(dataSparsity, labelSparsity, legend_position=:topright)
 
 # Plot trajectories
 plot_trajectories(dataSparsity, labelSparsity,xscalelog=false)
diff --git a/src/blended_pairwise.jl b/src/blended_pairwise.jl
index a1341135a..40435d946 100644
--- a/src/blended_pairwise.jl
+++ b/src/blended_pairwise.jl
@@ -30,10 +30,6 @@ function blended_pairwise_conditional_gradient(
     add_dropped_vertices=false,
     use_extra_vertex_storage=false,
     recompute_last_vertex=true,
-    squadratic=false, # standard quadratic objective
-    quadratic=false,  # quadratic objective but not standard quadratic
-    lp_solver=nothing, # lp_solver used for direct solve or sparsification
-    ds_scale_up=2,  # direct solve scale up parameter
 )
     # add the first vertex to active set from initialization
     active_set = ActiveSet([(1.0, x0)])
@@ -63,10 +59,6 @@ function blended_pairwise_conditional_gradient(
         add_dropped_vertices=add_dropped_vertices,
         use_extra_vertex_storage=use_extra_vertex_storage,
         recompute_last_vertex=recompute_last_vertex,
-        squadratic=squadratic,
-        quadratic=quadratic,  # Pass the new parameter
-        lp_solver=lp_solver,
-        ds_scale_up=ds_scale_up,  # Pass the new parameter
     )
 end
 
@@ -100,10 +92,6 @@ function blended_pairwise_conditional_gradient(
     add_dropped_vertices=false,
     use_extra_vertex_storage=false,
     recompute_last_vertex=true,
-    squadratic=false, # non-standard quadratic objective
-    quadratic=false,  # quadratic objective but not standard quadratic
-    lp_solver=nothing, # lp_solver used for direct solve or sparsification
-    ds_scale_up=2,  # direct solve scale up parameter
 ) where {AT,R}
 
     # format string for output of the algorithm
@@ -132,8 +120,6 @@ function blended_pairwise_conditional_gradient(
     end
 
     t = 0
-    next_direct_solve_interval = 10
-    last_direct_solve = 0
     compute_active_set_iterate!(active_set)
     x = get_active_set_iterate(active_set)
     primal = convert(eltype(x), Inf)
@@ -146,10 +132,6 @@ function blended_pairwise_conditional_gradient(
         gradient = collect(x)
     end
 
-    if squadratic || quadratic  # when using LPs then lp_solver must be provided
-        @assert lp_solver !== nothing "When squadratic, sparsify, or quadratic is true, lp_solver must be provided"
-    end
-
     if verbose
         println("\nBlended Pairwise Conditional Gradient Algorithm.")
         NumType = eltype(x)
@@ -159,9 +141,6 @@ function blended_pairwise_conditional_gradient(
         grad_type = typeof(gradient)
         println("GRADIENTTYPE: $grad_type LAZY: $lazy lazy_tolerance: $lazy_tolerance")
         println("LMO: $(typeof(lmo))")
-        if lp_solver !== nothing
-            println("LP SOLVER: $(lp_solver)")
-        end
         if use_extra_vertex_storage && !lazy
             @info("vertex storage only used in lazy mode")
         end
@@ -312,20 +291,6 @@ function blended_pairwise_conditional_gradient(
             end
             vertex_taken = v
             # short circuit for quadratic case
-            if (squadratic || quadratic) && ((t > last_direct_solve && t - last_direct_solve >= next_direct_solve_interval))  # Updated condition
-                next_direct_solve_interval *= ds_scale_up
-                last_direct_solve = t
-                if quadratic
-                    active_set = direct_solve_gq(active_set, grad!, lp_solver) # direct solve for non-standard quadratic problem
-                else
-                    active_set = direct_solve(active_set, grad!, lp_solver) # direct solve for standard quadratic problem
-                end
-                active_set_cleanup!(active_set; weight_purge_threshold=weight_purge_threshold)
-                compute_active_set_iterate!(active_set)
-                x = get_active_set_iterate(active_set)
-                grad!(gradient, x)
-                step_type = ST_DIRECT
-            end
             dual_gap = fast_dot(gradient, x) - fast_dot(gradient, v)
             # if we are about to exit, compute dual_gap with the cleaned-up x
             if dual_gap ≤ epsilon
@@ -519,4 +484,4 @@ function blended_pairwise_conditional_gradient(
     end
 
     return (x=x, v=v, primal=primal, dual_gap=dual_gap, traj_data=traj_data, active_set=active_set)
-end
\ No newline at end of file
+end

From 80dc2b22427a54cbd7ff73e7cf2e17caa97575f6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= <mathieu.besancon@gmail.com>
Date: Thu, 24 Oct 2024 20:22:17 +0200
Subject: [PATCH 19/19] rng changed

---
 examples/simple_stateless.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/simple_stateless.jl b/examples/simple_stateless.jl
index 6a550a963..6bf230e26 100644
--- a/examples/simple_stateless.jl
+++ b/examples/simple_stateless.jl
@@ -71,7 +71,7 @@ plot_trajectories([trajectory_simple[1:end], trajectory_restart[1:end]], ["simpl
 
 # simple step iterations about 33% faster
 
-@test line_search.factor <= 44
+@test line_search.factor <= 51
 
 x, v, primal, dual_gap, trajectory_restart_highpres = FrankWolfe.frank_wolfe(
     f,