From 456139a6a5e81199ce0e764d39a0877c86b15e51 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mathieu=20Besan=C3=A7on?= <mathieu.besancon@gmail.com>
Date: Thu, 24 Oct 2024 20:22:49 +0200
Subject: [PATCH 1/4] inverse to linsolve for AS quadratic (#510)

---
 src/active_set_quadratic.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/active_set_quadratic.jl b/src/active_set_quadratic.jl
index b200a8912..8f26b632e 100644
--- a/src/active_set_quadratic.jl
+++ b/src/active_set_quadratic.jl
@@ -36,7 +36,7 @@ function detect_quadratic_function(grad!, x0; test=true)
         X[:, i] .-= x0
         G[:, i] .= storage .- g0
     end
-    A = G * inv(X)
+    A = G \ X
     b = g0 - A * x0
     if test
         x_test = randn(T, n)

From ffc360caf3027de2f334a407890943f664a89696 Mon Sep 17 00:00:00 2001
From: "Jannis H." <44263801+JannisHal@users.noreply.github.com>
Date: Thu, 24 Oct 2024 23:19:50 +0200
Subject: [PATCH 2/4] Alternating projection update (#513)

* Update and generalize AP

* Reuse active step and dual_gap information

* Example comparing active vanilla FW, bpcg and active set reuse

* Bug fix

* dist2 - reduce allocations

* Format document

* Remove obsolete dependence
---
 examples/alternating_projections.jl |  37 +++++++
 src/alternating_methods.jl          | 163 ++++++++++++++++------------
 test/alternating_methods_tests.jl   |  35 +++---
 3 files changed, 149 insertions(+), 86 deletions(-)
 create mode 100644 examples/alternating_projections.jl

diff --git a/examples/alternating_projections.jl b/examples/alternating_projections.jl
new file mode 100644
index 000000000..159d204ff
--- /dev/null
+++ b/examples/alternating_projections.jl
@@ -0,0 +1,37 @@
+import FrankWolfe
+using LinearAlgebra
+using Random
+
+
+include("../examples/plot_utils.jl")
+
+Random.seed!(100)
+
+n = 500
+lmo = FrankWolfe.ConvexHullOracle([rand(Float64, (n,)) .+ 1.0 for _ in 1:n])
+lmo2 = FrankWolfe.ConvexHullOracle([rand(Float64, (n,)) .- 1.0 for _ in 1:n])
+
+
+trajectories = []
+
+methods = [FrankWolfe.frank_wolfe, FrankWolfe.blended_pairwise_conditional_gradient, FrankWolfe.blended_pairwise_conditional_gradient]
+
+for (i,m) in enumerate(methods)
+    if i == 1
+        x, _, _, _, traj_data = FrankWolfe.alternating_projections((lmo, lmo2), ones(n); verbose=true, print_iter=100, trajectory=true, proj_method=m)
+    elseif i== 2
+        x, _, _, _, traj_data = FrankWolfe.alternating_projections((lmo, lmo2), ones(n); verbose=true, print_iter=100, trajectory=true, proj_method=m, reuse_active_set=false, lazy=true)
+    else
+        x, _, _, _, traj_data = FrankWolfe.alternating_projections((lmo, lmo2), ones(n); verbose=true, print_iter=100, trajectory=true, proj_method=m, reuse_active_set=true, lazy=true)
+    end
+    push!(trajectories, traj_data)
+end
+
+
+
+labels = ["FW", "BPCG" ,"BPCG (reuse)"]
+
+plot_trajectories(trajectories, labels, xscalelog=true)
+
+
+
diff --git a/src/alternating_methods.jl b/src/alternating_methods.jl
index ec1c5b2f5..d07a73649 100644
--- a/src/alternating_methods.jl
+++ b/src/alternating_methods.jl
@@ -35,12 +35,12 @@ end
 
 Alternating Linear Minimization minimizes the objective `f` over the intersections of the feasible domains specified by `lmos`.
 The tuple `x0` defines the initial points for each domain.
-Returns a tuple `(x, v, primal, dual_gap, infeas, traj_data)` with:
+Returns a tuple `(x, v, primal, dual_gap, dist2, traj_data)` with:
 - `x` cartesian product of final iterates
 - `v` cartesian product of last vertices of the LMOs
 - `primal` primal value `f(x)`
 - `dual_gap` final Frank-Wolfe gap
-- `infeas` sum of squared, pairwise distances between iterates
+- `dist2` is 1/2 of the sum of squared, pairwise distances between iterates
 - `traj_data` vector of trajectory information.
 """
 function alternating_linear_minimization(
@@ -49,17 +49,17 @@ function alternating_linear_minimization(
     grad!,
     lmos::NTuple{N,LinearMinimizationOracle},
     x0::Tuple{Vararg{Any,N}};
-    lambda::Union{Float64, Function}=1.0,
+    lambda::Union{Float64,Function}=1.0,
     verbose=false,
     trajectory=false,
     callback=nothing,
     max_iteration=10000,
-    print_iter = max_iteration / 10,
+    print_iter=max_iteration / 10,
     memory_mode=InplaceEmphasis(),
     line_search::LS=Adaptive(),
     epsilon=1e-7,
     kwargs...,
-) where {N, LS<:Union{LineSearchMethod,NTuple{N,LineSearchMethod}}}
+) where {N,LS<:Union{LineSearchMethod,NTuple{N,LineSearchMethod}}}
 
     x0_bc = BlockVector([x0[i] for i in 1:N], [size(x0[i]) for i in 1:N], sum(length, x0))
     gradf = similar(x0_bc)
@@ -74,21 +74,14 @@ function alternating_linear_minimization(
             for i in 1:N
                 grad!(gradf.blocks[i], x.blocks[i])
             end
-            t = [2.0 * (N * b - sum(x.blocks)) for b in x.blocks]
+            t = [N * b - sum(x.blocks) for b in x.blocks]
             return storage.blocks = λ[] * gradf.blocks + t
         end
     end
 
-    function dist2(x::BlockVector) 
-        s = 0
-        for i=1:N
-            for j=1:i-1
-                diff = x.blocks[i] - x.blocks[j]
-                s += fast_dot(diff, diff)
-            end
-        end
-        return s
-    end
+    dist2(x::BlockVector) =
+        0.5 * (N - 1) * sum(fast_dot(x.blocks[i], x.blocks[i]) for i in 1:N) -
+        sum(fast_dot(x.blocks[i], x.blocks[j]) for i in 1:N for j in 1:i-1)
 
     function build_objective()
         λ = Ref(λ0)
@@ -122,8 +115,11 @@ function alternating_linear_minimization(
 
         num_type = eltype(x0[1])
         grad_type = eltype(gradf.blocks[1])
-        line_search_type = line_search isa Tuple ? [typeof(a) for a in line_search] : typeof(line_search)
-        println("MEMORY_MODE: $memory_mode STEPSIZE: $line_search_type EPSILON: $epsilon MAXITERATION: $max_iteration")
+        line_search_type =
+            line_search isa Tuple ? [typeof(a) for a in line_search] : typeof(line_search)
+        println(
+            "MEMORY_MODE: $memory_mode STEPSIZE: $line_search_type EPSILON: $epsilon MAXITERATION: $max_iteration",
+        )
         println("TYPE: $num_type GRADIENTTYPE: $grad_type")
         println("LAMBDA: $lambda")
 
@@ -177,7 +173,7 @@ function alternating_linear_minimization(
     end
 
     if lambda isa Function
-        callback  = function (state,args...)
+        callback = function (state, args...)
             state.f.λ[] = lambda(state)
             state.grad!.λ[] = state.f.λ[]
 
@@ -211,33 +207,15 @@ function alternating_linear_minimization(
 end
 
 
-function ProjectionFW(y, lmo; max_iter=10000, eps=1e-3)
-    f(x) = sum(abs2, x - y)
-    grad!(storage, x) = storage .= 2 * (x - y)
-
-    x0 = FrankWolfe.compute_extreme_point(lmo, y)
-    x_opt, _ = FrankWolfe.frank_wolfe(
-        f,
-        grad!,
-        lmo,
-        x0,
-        epsilon=eps,
-        max_iteration=max_iter,
-        trajectory=true,
-        line_search=FrankWolfe.Adaptive(verbose=false, relaxed_smoothness=false),
-    )
-    return x_opt
-end
-
 """
     alternating_projections(lmos::NTuple{N,LinearMinimizationOracle}, x0; ...) where {N}
 
 Computes a point in the intersection of feasible domains specified by `lmos`.
-Returns a tuple `(x, v, dual_gap, infeas, traj_data)` with:
+Returns a tuple `(x, v, dual_gap, dist2, traj_data)` with:
 - `x` cartesian product of final iterates
 - `v` cartesian product of last vertices of the LMOs
 - `dual_gap` final Frank-Wolfe gap
-- `infeas` sum of squared, pairwise distances between iterates
+- `dist2` is 1/2 * sum of squared, pairwise distances between iterates
 - `traj_data` vector of trajectory information.
 """
 function alternating_projections(
@@ -252,6 +230,10 @@ function alternating_projections(
     callback=nothing,
     traj_data=[],
     timeout=Inf,
+    proj_method=frank_wolfe,
+    inner_epsilon::Function=t -> 1 / (t^2 + 1),
+    reuse_active_set=false,
+    kwargs...,
 ) where {N}
     return alternating_projections(
         ProductLMO(lmos),
@@ -265,6 +247,10 @@ function alternating_projections(
         callback,
         traj_data,
         timeout,
+        proj_method,
+        inner_epsilon,
+        reuse_active_set,
+        kwargs...,
     )
 end
 
@@ -281,17 +267,21 @@ function alternating_projections(
     callback=nothing,
     traj_data=[],
     timeout=Inf,
+    proj_method=frank_wolfe,
+    inner_epsilon::Function=t -> 1 / (t^2 + 1),
+    reuse_active_set=false,
+    kwargs...,
 ) where {N}
 
     # header and format string for output of the algorithm
-    headers = ["Type", "Iteration", "Dual Gap", "Infeas", "Time", "It/sec"]
+    headers = ["Type", "Iteration", "Dual Gap", "dist2", "Time", "It/sec"]
     format_string = "%6s %13s %14e %14e %14e %14e\n"
-    function format_state(state, infeas)
+    function format_state(state, primal)
         rep = (
             steptype_string[Symbol(state.step_type)],
             string(state.t),
             Float64(state.dual_gap),
-            Float64(infeas),
+            Float64(primal),
             state.time,
             state.t / state.time,
         )
@@ -300,27 +290,61 @@ function alternating_projections(
 
     t = 0
     dual_gap = Inf
-    x = fill(x0, N)
-    v = similar(x)
+    dual_gaps = fill(Inf, N)
+    x = BlockVector(compute_extreme_point.(lmo.lmos, fill(x0, N)))
     step_type = ST_REGULAR
     gradient = similar(x)
-    ndim = ndims(x)
 
-    infeasibility(x) = sum(
-        fast_dot(
-            selectdim(x, ndim, i) - selectdim(x, ndim, j),
-            selectdim(x, ndim, i) - selectdim(x, ndim, j),
-        ) for i in 1:N for j in 1:i-1
-    )
+    if reuse_active_set
+        if proj_method ∉
+           [away_frank_wolfe, blended_pairwise_conditional_gradient, pairwise_frank_wolfe]
+            error("The selected FW method does not support active sets reuse.")
+        end
+        active_sets = [ActiveSet([(1.0, x.blocks[i])]) for i in 1:N]
+    end
 
-    partial_infeasibility(x) =
-        sum(fast_dot(x[mod(i - 2, N)+1] - x[i], x[mod(i - 2, N)+1] - x[i]) for i in 1:N)
+    dist2(x::BlockVector) =
+        0.5 * (N - 1) * sum(fast_dot(x.blocks[i], x.blocks[i]) for i in 1:N) -
+        sum(fast_dot(x.blocks[i], x.blocks[j]) for i in 1:N for j in 1:i-1)
 
     function grad!(storage, x)
-        @. storage = [2 * (x[i] - x[mod(i - 2, N)+1]) for i in 1:N]
+        return storage.blocks = [2.0 * (N * b - sum(x.blocks)) for b in x.blocks]
     end
 
-    projection_step(x, i, t) = ProjectionFW(x, lmo.lmos[i]; eps=1 / (t^2 + 1))
+    function projection_step(i, t)
+        xii = x.blocks[mod(i - 2, N)+1] # iterate in previous block
+        f(y) = sum(abs2, y - xii)
+        function grad_proj!(storage, y)
+            return storage .= 2 * (y - xii)
+        end
+
+        if reuse_active_set
+
+            results = proj_method(
+                f,
+                grad_proj!,
+                lmo.lmos[i],
+                active_sets[i];
+                epsilon=inner_epsilon(t),
+                max_iteration=10000,
+                line_search=Adaptive(),
+                kwargs...,
+            )
+            active_sets[i] = results[:active_set]
+        else
+            results = proj_method(
+                f,
+                grad_proj!,
+                lmo.lmos[i],
+                x.blocks[i];
+                epsilon=inner_epsilon(t),
+                max_iteration=10000,
+                line_search=Adaptive(),
+                kwargs...,
+            )
+        end
+        return results[:x], results[:dual_gap]
+    end
 
 
     if trajectory
@@ -372,19 +396,18 @@ function alternating_projections(
         # Projection step:
         for i in 1:N
             # project the previous iterate on the i-th feasible region
-            x[i] = projection_step(x[mod(i - 2, N)+1], i, t)
+            x.blocks[i], dual_gaps[i] = projection_step(i, t)
         end
 
+        dual_gap = sum(dual_gaps)
+
         # Update gradients
         grad!(gradient, x)
 
-        # Update dual gaps
-        v = compute_extreme_point.(lmo.lmos, gradient)
-        dual_gap = fast_dot(x - v, gradient)
 
         # go easy on the memory - only compute if really needed
         if ((mod(t, print_iter) == 0 && verbose) || callback !== nothing)
-            infeas = infeasibility(x)
+            primal = dist2(x)
         end
 
         first_iter = false
@@ -393,12 +416,12 @@ function alternating_projections(
         if callback !== nothing
             state = CallbackState(
                 t,
-                infeas,
-                infeas - dual_gap,
+                primal,
+                primal - dual_gap,
                 dual_gap,
                 tot_time,
                 x,
-                v,
+                nothing,
                 nothing,
                 nothing,
                 nothing,
@@ -408,7 +431,7 @@ function alternating_projections(
                 step_type,
             )
             # @show state
-            if callback(state, infeas) === false
+            if callback(state, primal) === false
                 break
             end
         end
@@ -419,9 +442,9 @@ function alternating_projections(
     # this is important as some variants do not recompute f(x) and the dual_gap regularly but only when reporting
     # hence the final computation.
     step_type = ST_LAST
-    infeas = infeasibility(x)
+    primal = dist2(x)
     grad!(gradient, x)
-    v = compute_extreme_point.(lmo.lmos, gradient)
+    v = compute_extreme_point(lmo, gradient)
     dual_gap = fast_dot(x - v, gradient)
 
     tot_time = (time_ns() - time_start) / 1.0e9
@@ -429,8 +452,8 @@ function alternating_projections(
     if callback !== nothing
         state = CallbackState(
             t,
-            infeas,
-            infeas - dual_gap,
+            primal,
+            primal - dual_gap,
             dual_gap,
             tot_time,
             x,
@@ -443,9 +466,9 @@ function alternating_projections(
             gradient,
             step_type,
         )
-        callback(state, infeas)
+        callback(state, primal)
     end
 
-    return x, v, dual_gap, infeas, traj_data
+    return x, v, dual_gap, primal, traj_data
 
 end
diff --git a/test/alternating_methods_tests.jl b/test/alternating_methods_tests.jl
index 904de49d8..21deecbe4 100644
--- a/test/alternating_methods_tests.jl
+++ b/test/alternating_methods_tests.jl
@@ -27,7 +27,7 @@ lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n))
         grad!,
         (lmo_nb, lmo_prob),
         ones(n),
-        lambda=1.0,
+        lambda=0.5,
         line_search=FrankWolfe.Adaptive(relaxed_smoothness=true),
     )
 
@@ -40,7 +40,7 @@ lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n))
         grad!,
         (lmo_nb, lmo_prob),
         ones(n),
-        lambda=1/3,
+        lambda=1/6,
         line_search=FrankWolfe.Adaptive(relaxed_smoothness=true),
     )
 
@@ -53,7 +53,7 @@ lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n))
         grad!,
         (lmo_nb, lmo_prob),
         ones(n),
-        lambda=1/9,
+        lambda=1/18,
         line_search=FrankWolfe.Adaptive(relaxed_smoothness=true),
     )
 
@@ -66,7 +66,7 @@ lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n))
         grad!,
         (lmo_nb, lmo_prob),
         ones(n),
-        lambda=3.0,
+        lambda=1.5,
         line_search=FrankWolfe.Adaptive(relaxed_smoothness=true),
     )
 
@@ -134,6 +134,7 @@ lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n))
         ones(n),
         line_search=FrankWolfe.Agnostic(),
         momentum=0.9,
+        lambda=0.5
     )
 
     @test abs(x.blocks[1][1] - 0.5 / n) < 1e-3
@@ -160,6 +161,7 @@ end
             ones(n),
             line_search=FrankWolfe.Adaptive(relaxed_smoothness=true),
             update_order=order,
+            lambda=0.5,
         )
         @test abs(x.blocks[1][1] - 0.5 / n) < 1e-5
         @test abs(x.blocks[2][1] - 1 / n) < 1e-5
@@ -181,6 +183,7 @@ end
             grad!,
             (lmo2, lmo_prob),
             ones(n),
+            lambda=0.5
         )
 
         @test abs(x.blocks[1][1] - 0.5 / n) < 1e-6
@@ -244,8 +247,8 @@ end
         grad!,
         (lmo_nb, lmo_prob),
         ones(n),
-        lambda=3.0,
-        line_search=(FrankWolfe.Shortstep(8.0), FrankWolfe.Adaptive()), # L-smooth in coordinates for L = 2+2*λ
+        lambda=1.5,
+        line_search=(FrankWolfe.Shortstep(4.0), FrankWolfe.Adaptive()), # L-smooth in coordinates for L = 1+2*λ
         update_step=(FrankWolfe.BPCGStep(), FrankWolfe.FrankWolfeStep()),
     )
 
@@ -257,31 +260,31 @@ end
 
     x, _, _, _, _ = FrankWolfe.alternating_projections((lmo1, lmo_prob), rand(n), verbose=true)
 
-    @test abs(x[1][1]) < 1e-6
-    @test abs(x[2][1] - 1 / n) < 1e-6
+    @test abs(x.blocks[1][1]) < 1e-6
+    @test abs(x.blocks[2][1] - 1 / n) < 1e-6
 
     x, _, _, _, _ = FrankWolfe.alternating_projections((lmo3, lmo_prob), rand(n))
 
-    @test abs(x[1][1] - 1) < 1e-6
-    @test abs(x[2][1] - 1 / n) < 1e-6
+    @test abs(x.blocks[1][1] - 1) < 1e-6
+    @test abs(x.blocks[2][1] - 1 / n) < 1e-6
 
     x, _, _, infeas, _ = FrankWolfe.alternating_projections((lmo1, lmo2), rand(n))
 
-    @test abs(x[1][1]) < 1e-6
-    @test abs(x[2][1]) < 1e-6
+    @test abs(x.blocks[1][1]) < 1e-6
+    @test abs(x.blocks[2][1]) < 1e-6
     @test infeas < 1e-6
 
     x, _, _, infeas, _ = FrankWolfe.alternating_projections((lmo2, lmo3), rand(n))
 
-    @test abs(x[1][1] - 1) < 1e-4
-    @test abs(x[2][1] - 1) < 1e-4
+    @test abs(x.blocks[1][1] - 1) < 1e-4
+    @test abs(x.blocks[2][1] - 1) < 1e-4
     @test infeas < 1e-6
 
     x, _, _, infeas, traj_data =
         FrankWolfe.alternating_projections((lmo1, lmo3), rand(n), trajectory=true)
 
-    @test abs(x[1][1]) < 1e-6
-    @test abs(x[2][1] - 1) < 1e-6
+    @test abs(x.blocks[1][1]) < 1e-6
+    @test abs(x.blocks[2][1] - 1) < 1e-6
     @test traj_data != []
     @test length(traj_data[1]) == 5
     @test length(traj_data) >= 2

From 73585af8bdb92e84d1c5afc554f31da3168ec302 Mon Sep 17 00:00:00 2001
From: "Jannis H." <44263801+JannisHal@users.noreply.github.com>
Date: Thu, 24 Oct 2024 23:20:20 +0200
Subject: [PATCH 3/4] Update Block-Coordinate FW (#512)

* BPCG step fixes

* Update Dual(Progress/Gap)Order

* Clean up

* Remove bug in test

* Immutable DualGapOrder
---
 src/block_coordinate_algorithms.jl | 77 +++++++++++++++++++++---------
 test/alternating_methods_tests.jl  |  2 +-
 2 files changed, 55 insertions(+), 24 deletions(-)

diff --git a/src/block_coordinate_algorithms.jl b/src/block_coordinate_algorithms.jl
index 4a1fc7e6a..0d034a7d3 100644
--- a/src/block_coordinate_algorithms.jl
+++ b/src/block_coordinate_algorithms.jl
@@ -41,20 +41,29 @@ end
 
 StochasticUpdate() = StochasticUpdate(-1)
 
-mutable struct DualGapOrder <: BlockCoordinateUpdateOrder
+"""
+The dual gap order initiates one round of `ļimit` many updates.
+The according blocks are sampled with probabilties proportional to their respective dual gaps.
+"""
+struct DualGapOrder <: BlockCoordinateUpdateOrder
     limit::Int
 end
 
 DualGapOrder() = DualGapOrder(-1)
 
+"""
+The dual progress order initiates one round of `ļimit` many updates.
+The according blocks are sampled with probabilties proportional to their respective dual progress.
+"""
 mutable struct DualProgressOrder <: BlockCoordinateUpdateOrder
     limit::Int
     previous_dual_gaps::Vector{Float64}
     dual_progress::Vector{Float64}
+    last_indices::Vector{Int}
 end
 
-DualProgressOrder() = DualProgressOrder(-1, [], [])
-DualProgressOrder(i::Int64) = DualProgressOrder(i, [], [])
+DualProgressOrder() = DualProgressOrder(-1, [], [], [])
+DualProgressOrder(i::Int64) = DualProgressOrder(i, [], [], [])
 
 """
 The Lazy update order is discussed in "Flexible block-iterative
@@ -122,6 +131,19 @@ function select_update_indices(u::StochasticUpdate, s::CallbackState, _)
     return [[rand(1:l)] for i in 1:u.limit]
 end
 
+function sample_without_replacement(n::Int, weights::Vector{Float64})
+    weights = weights ./ sum(weights)
+    cumsum_weights = cumsum(weights)
+    indices = zeros(Int, n)
+    for i in 1:n
+        indices[i] = findfirst(cumsum_weights .> rand())
+        weights[indices[i]] = 0
+        weights = weights ./ sum(weights)
+        cumsum_weights = cumsum(weights)
+    end
+    return indices
+end
+
 function select_update_indices(u::DualProgressOrder, s::CallbackState, dual_gaps)
 
     l = length(s.lmo.lmos)
@@ -129,22 +151,32 @@ function select_update_indices(u::DualProgressOrder, s::CallbackState, dual_gaps
     @assert u.limit <= l
     @assert u.limit > 0 || u.limit == -1
 
-    # In the first iteration update every block so we get finite dual progress
-    if s.t < 2 
-        u.previous_dual_gaps = copy(dual_gaps)
-        return [[i] for i=1:l] 
-    end
-
-    if s.t == 1
-        u.dual_progress = dual_gaps - u.previous_dual_gaps
+    # In the first two iterations update every block so we get finite dual progress
+    if s.t < 2
+        u.dual_progress = ones(l)
+        indices = [[i for i=1:l]]
     else
-        diff = dual_gaps - u.previous_dual_gaps
-        u.dual_progress = [d == 0 ? u.dual_progress[i] : d for (i, d) in enumerate(diff)]
+        # Update dual progress only on updated blocks
+        for i in u.last_indices
+            d = u.previous_dual_gaps[i] - dual_gaps[i]
+            if d < 0
+                u.dual_progress[i] = 0
+            else
+                u.dual_progress[i] = d
+            end
+        end
+        n = u.limit == -1 ? l : u.limit
+
+        # If less than n blocks have non-zero dual progress, update all of them
+        if sum(u.dual_progress .!= 0) < n
+            indices = [[i for i=1:l]]
+        else
+            indices = [sample_without_replacement(n, u.dual_progress)]
+        end
     end
     u.previous_dual_gaps = copy(dual_gaps)
-
-    n = u.limit == -1 ? l : u.limit
-    return [[findfirst(cumsum(u.dual_progress/sum(u.dual_progress)) .> rand())] for _=1:n]
+    u.last_indices = vcat(indices...)
+    return indices
 end
 
 function select_update_indices(u::DualGapOrder, s::CallbackState, dual_gaps)
@@ -156,11 +188,11 @@ function select_update_indices(u::DualGapOrder, s::CallbackState, dual_gaps)
 
     # In the first iteration update every block so we get finite dual gaps
     if s.t < 1
-        return [[i] for i=1:l] 
+        return [[i for i=1:l]]
     end
 
     n = u.limit == -1 ? l : u.limit
-    return [[findfirst(cumsum(dual_gaps/sum(dual_gaps)) .> rand())] for _=1:n]
+    return [sample_without_replacement(n, dual_gaps)]
 end
 
 function select_update_indices(update::LazyUpdate, s::CallbackState, dual_gaps)
@@ -237,19 +269,18 @@ mutable struct BPCGStep <: UpdateStep
     phi::Float64
 end
 
-function Base.copy(::FrankWolfeStep)
-    return FrankWolfeStep()
-end
+Base.copy(::FrankWolfeStep) = FrankWolfeStep()
 
 function Base.copy(obj::BPCGStep)
     if obj.active_set === nothing
-        return BPCGStep(false, nothing, obj.renorm_interval, obj.lazy_tolerance, Inf)
+        return BPCGStep(obj.lazy, nothing, obj.renorm_interval, obj.lazy_tolerance, obj.phi)
     else
         return BPCGStep(obj.lazy, copy(obj.active_set), obj.renorm_interval, obj.lazy_tolerance, obj.phi)
     end
 end
 
 BPCGStep() = BPCGStep(false, nothing, 1000, 2.0, Inf)
+BPCGStep(lazy::Bool) = BPCGStep(lazy, nothing, 1000, 2.0, Inf)
 
 function update_iterate(
     ::FrankWolfeStep,
@@ -324,7 +355,7 @@ function update_iterate(
 
     # minor modification from original paper for improved sparsity
     # (proof follows with minor modification when estimating the step)
-    if local_gap > s.phi / s.lazy_tolerance
+    if local_gap > max(s.phi / s.lazy_tolerance, 0) # Robust condition to not drop the zero-vector if the dual_gap is negative by inaccuracy
         d = muladd_memory_mode(memory_mode, d, a, v_local)
         vertex_taken = v_local
         gamma_max = a_lambda
diff --git a/test/alternating_methods_tests.jl b/test/alternating_methods_tests.jl
index 21deecbe4..7bf9487a6 100644
--- a/test/alternating_methods_tests.jl
+++ b/test/alternating_methods_tests.jl
@@ -223,7 +223,7 @@ end
         grad!,
         (lmo1, lmo2),
         ones(n),
-        update_step=FrankWolfe.BPCGStep(true, nothing, 1000, false, Inf),
+        update_step=FrankWolfe.BPCGStep(true),
     )
 
     @test abs(x.blocks[1][1]) < 1e-6

From 1ac1797d59ac9f5df5d882ff811be5f3f81bbf90 Mon Sep 17 00:00:00 2001
From: Sebastian Pokutta <23001135+pokutta@users.noreply.github.com>
Date: Fri, 25 Oct 2024 11:12:54 +0200
Subject: [PATCH 4/4] BPCG with direct solve extension (#507)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* added direct solve feature BPCG via LP solver

* adjusted for arbitrary LP solver

* fixed deps

* cleanup and comment

* minor

* added reporting of direct solve step

* chose highs as standard solver

* added sparsification

* added sparsification code

* cleanup

* minor cleanup

* minor

* added generalized direct_solve

* clean up, docu, additional direct_solve

* docstrings fixed?

* sparsifier active set (#508)

* sparsifier active set

* fix typo

* added sparsifying tests

* generic tolerane

* remove sparsification

* format

* HiGHS dep

* Quadratic solve structure (#511)

* sparsifier active set

* start working on LP AS

* first working quadratic

* remove quadratic LP from current

* cleanup

* HiGHS in test deps

* working reworked LP quadratic

* working version generic quadratic

* slow version generic quadratic

* faster term manipulation

* copy sufficient

* remove comment

* added test for quadratic

* minor

* simplify example

* clean up code, verify error with ASQuad

* Add update_weights! to fix direct solve with active_set_quadratic

* remove direct solve from BPCG

* rng changed

---------

Co-authored-by: Sébastien Designolle <sebastien.designolle@gmail.com>

* update example

* format

* clean up example

* fix callback

---------

Co-authored-by: Mathieu Besançon <mathieu.besancon@gmail.com>
Co-authored-by: Sébastien Designolle <sebastien.designolle@gmail.com>
---
 Project.toml                                  |   3 +-
 examples/blended_pairwise_with_direct.jl      | 174 ++++++++
 ...wise_with_direct_non-standard-quadratic.jl | 146 +++++++
 examples/blended_pairwise_with_sparsify.jl    | 100 +++++
 examples/simple_stateless.jl                  |   2 +-
 src/FrankWolfe.jl                             |   2 +
 src/active_set.jl                             |   4 +
 src/active_set_quadratic.jl                   |  18 +-
 src/active_set_quadratic_direct_solve.jl      | 383 ++++++++++++++++++
 src/active_set_sparsifier.jl                  |  96 +++++
 src/blended_pairwise.jl                       |   2 -
 src/moi_oracle.jl                             |   2 +-
 src/utils.jl                                  |  71 ++++
 test/quadratic_lp_active_set.jl               | 113 ++++++
 test/runtests.jl                              |  14 +
 test/sparsifying_activeset.jl                 |  85 ++++
 16 files changed, 1206 insertions(+), 9 deletions(-)
 create mode 100644 examples/blended_pairwise_with_direct.jl
 create mode 100644 examples/blended_pairwise_with_direct_non-standard-quadratic.jl
 create mode 100644 examples/blended_pairwise_with_sparsify.jl
 create mode 100644 src/active_set_quadratic_direct_solve.jl
 create mode 100644 src/active_set_sparsifier.jl
 create mode 100644 test/quadratic_lp_active_set.jl
 create mode 100644 test/sparsifying_activeset.jl

diff --git a/Project.toml b/Project.toml
index e56223008..ca298fa1a 100644
--- a/Project.toml
+++ b/Project.toml
@@ -43,6 +43,7 @@ DynamicPolynomials = "7c1d4256-1411-5781-91ec-d7bc3513ac07"
 FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 GLPK = "60bf3e95-4087-53dc-ae20-288a0d20c6a6"
+HiGHS = "87dc4568-4c63-4d18-b0c0-bb2238e4078b"
 Hypatia = "b99e6be6-89ff-11e8-14f8-45c827f4f8f2"
 JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
 JuMP = "4076af6c-e467-56ae-b986-b466b2749572"
@@ -58,4 +59,4 @@ Tullio = "bc48ee85-29a4-5162-ae0b-a64e1601d4bc"
 ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea"
 
 [targets]
-test = ["CSV", "Combinatorics", "DataFrames", "Distributions", "DoubleFloats", "DynamicPolynomials", "FiniteDifferences", "ForwardDiff", "GLPK", "JSON", "JuMP", "LaTeXStrings", "MAT", "MultivariatePolynomials", "Plots", "PlotThemes", "Polyhedra", "ReverseDiff", "ZipFile", "Test", "Tullio", "Clp", "Hypatia"]
+test = ["CSV", "Combinatorics", "DataFrames", "Distributions", "DoubleFloats", "DynamicPolynomials", "FiniteDifferences", "ForwardDiff", "GLPK", "HiGHS", "JSON", "JuMP", "LaTeXStrings", "MAT", "MultivariatePolynomials", "Plots", "PlotThemes", "Polyhedra", "ReverseDiff", "ZipFile", "Test", "Tullio", "Clp", "Hypatia"]
diff --git a/examples/blended_pairwise_with_direct.jl b/examples/blended_pairwise_with_direct.jl
new file mode 100644
index 000000000..9a30bde76
--- /dev/null
+++ b/examples/blended_pairwise_with_direct.jl
@@ -0,0 +1,174 @@
+#=
+This example demonstrates the use of the Blended Pairwise Conditional Gradient algorithm
+with direct solve steps for a quadratic optimization problem over a sparse polytope. 
+
+Note the special structure of f(x) =  norm(x - x0)^2 that we assume here
+
+The example showcases how the algorithm balances between:
+- Pairwise steps for efficient optimization
+- Periodic direct solves for handling the quadratic objective
+- Lazy (approximate) linear minimization steps for improved iteration complexity
+
+It also demonstrates how to set up custom callbacks for tracking algorithm progress.
+=#
+
+using FrankWolfe
+using LinearAlgebra
+using Random
+
+import HiGHS
+import MathOptInterface as MOI
+
+include("../examples/plot_utils.jl")
+
+n = Int(1e4)
+k = 10_000
+
+s = 10
+@info "Seed $s"
+Random.seed!(s)
+
+xpi = rand(n);
+total = sum(xpi);
+
+const xp = xpi ./ total;
+
+f(x) = norm(x - xp)^2
+function grad!(storage, x)
+    @. storage = 2 * (x - xp)
+end
+
+lmo = FrankWolfe.KSparseLMO(5, 1.0)
+
+const x00 = FrankWolfe.compute_extreme_point(lmo, rand(n))
+
+function build_callback(trajectory_arr)
+    return function callback(state, active_set, args...)
+        return push!(trajectory_arr, (FrankWolfe.callback_state(state)..., length(active_set)))
+    end
+end
+
+
+trajectoryBPCG_standard = []
+@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    copy(x00),
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=true,
+    callback=build_callback(trajectoryBPCG_standard),
+);
+
+# Just projection quadratic
+trajectoryBPCG_quadratic = []
+as_quad = FrankWolfe.ActiveSetQuadratic([(1.0, copy(x00))], 2 * LinearAlgebra.I, -2xp)
+@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    as_quad,
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=true,
+    callback=build_callback(trajectoryBPCG_quadratic),
+);
+
+as_quad = FrankWolfe.ActiveSetQuadratic([(1.0, copy(x00))], 2 * LinearAlgebra.I, -2xp)
+
+# with quadratic active set
+trajectoryBPCG_quadratic_as = []
+@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    as_quad,
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=true,
+    callback=build_callback(trajectoryBPCG_quadratic_as),
+);
+
+as_quad_direct = FrankWolfe.ActiveSetQuadraticLinearSolve(
+    [(1.0, copy(x00))],
+    2 * LinearAlgebra.I,
+    -2xp,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+)
+
+# with LP acceleration
+trajectoryBPCG_quadratic_direct = []
+@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    as_quad_direct,
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=true,
+    callback=build_callback(trajectoryBPCG_quadratic_direct),
+);
+
+as_quad_direct_generic = FrankWolfe.ActiveSetQuadraticLinearSolve(
+    [(1.0, copy(x00))],
+    2 * Diagonal(ones(length(xp))),
+    -2xp,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+)
+
+# with LP acceleration
+trajectoryBPCG_quadratic_direct_generic = []
+@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    as_quad_direct_generic,
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=true,
+    callback=build_callback(trajectoryBPCG_quadratic_direct_generic),
+);
+
+as_quad_direct_basic_as = FrankWolfe.ActiveSetQuadraticLinearSolve(
+    FrankWolfe.ActiveSet([1.0], [copy(x00)], collect(x00)),
+    2 * LinearAlgebra.I,
+    -2xp,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+)
+
+# with LP acceleration
+trajectoryBPCG_quadratic_noqas = []
+
+@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    as_quad_direct_basic_as,
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=true,
+    callback=build_callback(trajectoryBPCG_quadratic_noqas),
+);
+
+
+# Update the data and labels for plotting
+data_trajectories = [
+    trajectoryBPCG_standard,
+    trajectoryBPCG_quadratic,
+    trajectoryBPCG_quadratic_as,
+    trajectoryBPCG_quadratic_direct,
+    trajectoryBPCG_quadratic_direct_generic,
+    trajectoryBPCG_quadratic_noqas,
+]
+labels_trajectories = [
+    "BPCG (Standard)",
+    "BPCG (Specific Direct)",
+    "AS_Quad",
+    "Reloaded",
+    "Reloaded_generic",
+    "Reloaded_noqas",
+]
+
+# Plot trajectories
+plot_trajectories(data_trajectories, labels_trajectories, xscalelog=false)
diff --git a/examples/blended_pairwise_with_direct_non-standard-quadratic.jl b/examples/blended_pairwise_with_direct_non-standard-quadratic.jl
new file mode 100644
index 000000000..ab934edd2
--- /dev/null
+++ b/examples/blended_pairwise_with_direct_non-standard-quadratic.jl
@@ -0,0 +1,146 @@
+#=
+This example demonstrates the use of the Blended Pairwise Conditional Gradient algorithm
+with direct solve steps for a quadratic optimization problem over a sparse polytope which is not standard quadratic.
+
+The example showcases how the algorithm balances between:
+- Pairwise steps for efficient optimization
+- Periodic direct solves for handling the quadratic objective
+- Lazy (approximate) linear minimization steps for improved iteration complexity
+
+It also demonstrates how to set up custom callbacks for tracking algorithm progress.
+=#
+
+using FrankWolfe
+using LinearAlgebra
+using Random
+
+import HiGHS
+import MathOptInterface as MOI
+
+include("../examples/plot_utils.jl")
+
+n = Int(1e2)
+k = 10000
+
+# s = rand(1:100)
+s = 10
+@info "Seed $s"
+Random.seed!(s)
+
+A = let
+    A = randn(n, n)
+    A' * A
+end
+@assert isposdef(A) == true
+
+const y = Random.rand(Bool, n) * 0.6 .+ 0.3
+
+function f(x)
+    d = x - y
+    return dot(d, A, d)
+end
+
+function grad!(storage, x)
+    mul!(storage, A, x)
+    return mul!(storage, A, y, -2, 2)
+end
+
+
+# lmo = FrankWolfe.KSparseLMO(5, 1000.0)
+
+## other LMOs to try
+# lmo_big = FrankWolfe.KSparseLMO(100, big"1.0")
+# lmo = FrankWolfe.LpNormLMO{Float64,5}(100.0)
+# lmo = FrankWolfe.ProbabilitySimplexOracle(100.0);
+lmo = FrankWolfe.UnitSimplexOracle(10000.0);
+
+x00 = FrankWolfe.compute_extreme_point(lmo, rand(n))
+
+
+function build_callback(trajectory_arr)
+    return function callback(state, active_set, args...)
+        return push!(trajectory_arr, (FrankWolfe.callback_state(state)..., length(active_set)))
+    end
+end
+
+
+trajectoryBPCG_standard = []
+callback = build_callback(trajectoryBPCG_standard)
+
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    copy(x00),
+    max_iteration=k,
+    line_search=FrankWolfe.Adaptive(),
+    print_iter=k / 10,
+    memory_mode=FrankWolfe.InplaceEmphasis(),
+    verbose=true,
+    trajectory=true,
+    callback=callback,
+);
+
+active_set_quadratic_automatic = FrankWolfe.ActiveSetQuadraticLinearSolve(
+    [(1.0, copy(x00))],
+    grad!,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+    scheduler=FrankWolfe.LogScheduler(start_time=100, scaling_factor=1.2, max_interval=100),
+)
+trajectoryBPCG_quadratic_automatic = []
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    active_set_quadratic_automatic,
+    max_iteration=k,
+    verbose=true,
+    callback=build_callback(trajectoryBPCG_quadratic_automatic),
+);
+
+active_set_quadratic_automatic2 = FrankWolfe.ActiveSetQuadraticLinearSolve(
+    [(1.0, copy(x00))],
+    grad!,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+    scheduler=FrankWolfe.LogScheduler(start_time=10, scaling_factor=2),
+)
+trajectoryBPCG_quadratic_automatic2 = []
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    active_set_quadratic_automatic2,
+    max_iteration=k,
+    verbose=true,
+    callback=build_callback(trajectoryBPCG_quadratic_automatic2),
+);
+
+
+active_set_quadratic_automatic_standard = FrankWolfe.ActiveSetQuadraticLinearSolve(
+    FrankWolfe.ActiveSet([(1.0, copy(x00))]),
+    grad!,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+    scheduler=FrankWolfe.LogScheduler(start_time=10, scaling_factor=2),
+)
+trajectoryBPCG_quadratic_automatic_standard = []
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    active_set_quadratic_automatic_standard,
+    max_iteration=k,
+    verbose=true,
+    callback=build_callback(trajectoryBPCG_quadratic_automatic_standard),
+);
+
+
+dataSparsity = [
+    trajectoryBPCG_standard,
+    trajectoryBPCG_quadratic_automatic,
+    trajectoryBPCG_quadratic_automatic_standard,
+]
+labelSparsity = ["BPCG (Standard)", "AS_Quad", "AS_Standard"]
+
+
+# Plot trajectories
+plot_trajectories(dataSparsity, labelSparsity, xscalelog=false)
diff --git a/examples/blended_pairwise_with_sparsify.jl b/examples/blended_pairwise_with_sparsify.jl
new file mode 100644
index 000000000..6d42b7de4
--- /dev/null
+++ b/examples/blended_pairwise_with_sparsify.jl
@@ -0,0 +1,100 @@
+#=
+This example demonstrates the use of the Blended Pairwise Conditional Gradient algorithm
+with direct solve steps for a quadratic optimization problem over a sparse polytope.
+
+The example showcases how the algorithm balances between:
+- Pairwise steps for efficient optimization
+- Periodic direct solves for handling the quadratic objective
+- Lazy (approximate) linear minimization steps for improved iteration complexity
+
+It also demonstrates how to set up custom callbacks for tracking algorithm progress.
+=#
+
+using FrankWolfe
+using LinearAlgebra
+using Random
+
+import HiGHS
+
+include("../examples/plot_utils.jl")
+
+n = Int(1e4)
+k = 10000
+
+s = 10
+@info "Seed $s"
+Random.seed!(s)
+
+xpi = rand(n);
+total = sum(xpi);
+
+# here the optimal solution lies in the interior if you want an optimal solution on a face and not the interior use:
+# const xp = xpi;
+
+const xp = xpi ./ total;
+
+f(x) = norm(x - xp)^2
+function grad!(storage, x)
+    @. storage = 2 * (x - xp)
+end
+
+# lmo = FrankWolfe.KSparseLMO(2, 1.0)
+
+## other LMOs to try
+# lmo_big = FrankWolfe.KSparseLMO(100, big"1.0")
+lmo = FrankWolfe.LpNormLMO{Float64,5}(1.0)
+# lmo = FrankWolfe.ProbabilitySimplexOracle(1.0);
+# lmo = FrankWolfe.UnitSimplexOracle(1.0);
+
+x00 = FrankWolfe.compute_extreme_point(lmo, rand(n))
+
+function build_callback(trajectory_arr)
+    return function callback(state, active_set, args...)
+        return push!(trajectory_arr, (FrankWolfe.callback_state(state)..., length(active_set)))
+    end
+end
+
+FrankWolfe.benchmark_oracles(f, grad!, () -> randn(n), lmo; k=100)
+
+trajectoryBPCG_standard = []
+@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    copy(x00),
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    print_iter=k / 10,
+    memory_mode=FrankWolfe.InplaceEmphasis(),
+    verbose=true,
+    trajectory=true,
+    callback=build_callback(trajectoryBPCG_standard),
+);
+
+trajectoryBPCG_as_sparse = []
+active_set_sparse = FrankWolfe.ActiveSetSparsifier(
+    FrankWolfe.ActiveSet([1.0], [x00], similar(x00)),
+    HiGHS.Optimizer(),
+)
+
+@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    copy(x00),
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    print_iter=k / 10,
+    memory_mode=FrankWolfe.InplaceEmphasis(),
+    verbose=true,
+    trajectory=true,
+    callback=build_callback(trajectoryBPCG_as_sparse),
+);
+
+# Reduction primal/dual error vs. sparsity of solution
+
+dataSparsity = [trajectoryBPCG_standard, trajectoryBPCG_quadratic]
+labelSparsity = ["BPCG (Standard)", "BPCG (Sparsify)"]
+
+# Plot sparsity
+plot_sparsity(dataSparsity, labelSparsity, legend_position=:topright)
diff --git a/examples/simple_stateless.jl b/examples/simple_stateless.jl
index 6a550a963..6bf230e26 100644
--- a/examples/simple_stateless.jl
+++ b/examples/simple_stateless.jl
@@ -71,7 +71,7 @@ plot_trajectories([trajectory_simple[1:end], trajectory_restart[1:end]], ["simpl
 
 # simple step iterations about 33% faster
 
-@test line_search.factor <= 44
+@test line_search.factor <= 51
 
 x, v, primal, dual_gap, trajectory_restart_highpres = FrankWolfe.frank_wolfe(
     f,
diff --git a/src/FrankWolfe.jl b/src/FrankWolfe.jl
index e6d29c3cc..cc2fef145 100644
--- a/src/FrankWolfe.jl
+++ b/src/FrankWolfe.jl
@@ -34,6 +34,8 @@ include("moi_oracle.jl")
 include("function_gradient.jl")
 include("active_set.jl")
 include("active_set_quadratic.jl")
+include("active_set_quadratic_direct_solve.jl")
+include("active_set_sparsifier.jl")
 
 include("blended_cg.jl")
 include("afw.jl")
diff --git a/src/active_set.jl b/src/active_set.jl
index 62393b47f..c02145666 100644
--- a/src/active_set.jl
+++ b/src/active_set.jl
@@ -326,3 +326,7 @@ function compute_active_set_iterate!(active_set::AbstractActiveSet{<:ScaledHotVe
     end
     return active_set.x
 end
+
+function update_weights!(as::AbstractActiveSet, new_weights)
+    as.weights .= new_weights
+end
diff --git a/src/active_set_quadratic.jl b/src/active_set_quadratic.jl
index 8f26b632e..a616dd1fb 100644
--- a/src/active_set_quadratic.jl
+++ b/src/active_set_quadratic.jl
@@ -49,7 +49,8 @@ function detect_quadratic_function(grad!, x0; test=true)
 end
 
 function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, grad!::Function) where {AT,R}
-    return ActiveSetQuadratic(tuple_values, detect_quadratic_function(grad!, tuple_values[1][2])...)
+    A, b = detect_quadratic_function(grad!, tuple_values[1][2])
+    return ActiveSetQuadratic(tuple_values, A, b)
 end
 
 function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::H, b) where {AT,R,H}
@@ -71,13 +72,15 @@ function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::H, b)
         dots_b[idx] = fast_dot(b, atoms[idx])
     end
     x = similar(b)
-    as = ActiveSetQuadratic{AT,R,typeof(x),H}(weights, atoms, x, A, b, dots_x, dots_A, dots_b, weights_prev, modified)
+    as = ActiveSetQuadratic(weights, atoms, x, A, b, dots_x, dots_A, dots_b, weights_prev, modified)
+
     compute_active_set_iterate!(as)
     return as
 end
 
-function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, grad!) where {AT,R}
-    return ActiveSetQuadratic{AT,R}(tuple_values, detect_quadratic_function(grad!, tuple_values[1][2])...)
+function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, grad!::Function) where {AT,R}
+    A, b = detect_quadratic_function(grad!, tuple_values[1][2])
+    return ActiveSetQuadratic{AT,R}(tuple_values, A, b)
 end
 
 function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::H, b) where {AT,R,H}
@@ -116,6 +119,7 @@ function Base.:*(a::Identity, b)
         return a.λ * b
     end
 end
+
 function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::UniformScaling, b) where {AT,R}
     return ActiveSetQuadratic(tuple_values, Identity(A.λ), b)
 end
@@ -147,6 +151,7 @@ function Base.push!(as::ActiveSetQuadratic{AT,R}, (λ, a)) where {AT,R}
     return as
 end
 
+# TODO multi-indices version
 function Base.deleteat!(as::ActiveSetQuadratic, idx::Int)
     @inbounds for i in 1:idx-1
         as.dots_x[i] -= as.weights_prev[idx] * as.dots_A[idx][i]
@@ -311,3 +316,8 @@ function update_active_set_quadratic!(warm_as::ActiveSetQuadratic{AT,R,IT,H}, b)
     return warm_as
 end
 
+function update_weights!(as::ActiveSetQuadratic, new_weights)
+    as.weights_prev .= as.weights
+    as.weights .= new_weights
+    as.modified .= true
+end
diff --git a/src/active_set_quadratic_direct_solve.jl b/src/active_set_quadratic_direct_solve.jl
new file mode 100644
index 000000000..fef53a077
--- /dev/null
+++ b/src/active_set_quadratic_direct_solve.jl
@@ -0,0 +1,383 @@
+
+"""
+    ActiveSetQuadraticLinearSolve{AT, R, IT}
+
+Represents an active set of extreme vertices collected in a FW algorithm,
+along with their coefficients `(λ_i, a_i)`.
+`R` is the type of the `λ_i`, `AT` is the type of the atoms `a_i`.
+The iterate `x = ∑λ_i a_i` is stored in x with type `IT`.
+The objective function is assumed to be of the form `f(x)=½⟨x,Ax⟩+⟨b,x⟩+c`
+so that the gradient is `∇f(x)=Ax+b`.
+
+This active set stores an inner `active_set` that keeps track of the current set of vertices and convex decomposition.
+It therefore delegates all update, deletion, and addition operations to this inner active set.
+The `weight`, `atoms`, and `x` fields should only be accessed to read and are effectively the same objects as those in the inner active set.
+
+The structure also contains a scheduler struct which is called with the `should_solve_lp` function.
+To define a new frequency at which the LP should be solved, one can define another scheduler struct and implement the corresponding method.
+"""
+struct ActiveSetQuadraticLinearSolve{
+    AT,
+    R<:Real,
+    IT,
+    H,
+    BT,
+    OT<:MOI.AbstractOptimizer,
+    AS<:AbstractActiveSet,
+    SF,
+} <: AbstractActiveSet{AT,R,IT}
+    weights::Vector{R}
+    atoms::Vector{AT}
+    x::IT
+    A::H # Hessian matrix
+    b::BT # linear term
+    active_set::AS
+    lp_optimizer::OT
+    scheduler::SF
+    counter::Base.RefValue{Int}
+end
+
+"""
+    ActiveSetQuadraticLinearSolve(tuple_values::Vector{Tuple{R,AT}}, grad!::Function, lp_optimizer)
+
+Creates an ActiveSetQuadraticLinearSolve by computing the Hessian and linear term from `grad!`.
+"""
+function ActiveSetQuadraticLinearSolve(
+    tuple_values::Vector{Tuple{R,AT}},
+    grad!::Function,
+    lp_optimizer;
+    scheduler=LogScheduler(),
+) where {AT,R}
+    A, b = detect_quadratic_function(grad!, tuple_values[1][2])
+    return ActiveSetQuadraticLinearSolve(tuple_values, A, b, lp_optimizer, scheduler=scheduler)
+end
+
+"""
+    ActiveSetQuadraticLinearSolve(tuple_values::Vector{Tuple{R,AT}}, A, b, lp_optimizer)
+
+Creates an `ActiveSetQuadraticLinearSolve` from the given Hessian `A`, linear term `b` and `lp_optimizer` by creating an inner `ActiveSetQuadratic` active set.
+"""
+function ActiveSetQuadraticLinearSolve(
+    tuple_values::Vector{Tuple{R,AT}},
+    A::H,
+    b,
+    lp_optimizer;
+    scheduler=LogScheduler(),
+) where {AT,R,H}
+    inner_as = ActiveSetQuadratic(tuple_values, A, b)
+    return ActiveSetQuadraticLinearSolve(
+        inner_as.weights,
+        inner_as.atoms,
+        inner_as.x,
+        inner_as.A,
+        inner_as.b,
+        inner_as,
+        lp_optimizer,
+        scheduler,
+        Ref(0),
+    )
+end
+
+function ActiveSetQuadraticLinearSolve(
+    inner_as::AbstractActiveSet,
+    A,
+    b,
+    lp_optimizer;
+    scheduler=LogScheduler(),
+)
+    as = ActiveSetQuadraticLinearSolve(
+        inner_as.weights,
+        inner_as.atoms,
+        inner_as.x,
+        A,
+        b,
+        inner_as,
+        lp_optimizer,
+        scheduler,
+        Ref(0),
+    )
+    compute_active_set_iterate!(as)
+    return as
+end
+
+function ActiveSetQuadraticLinearSolve(
+    inner_as::AbstractActiveSet,
+    A::LinearAlgebra.UniformScaling,
+    b,
+    lp_optimizer;
+    scheduler=LogScheduler(),
+)
+    as = ActiveSetQuadraticLinearSolve(
+        inner_as.weights,
+        inner_as.atoms,
+        inner_as.x,
+        A,
+        b,
+        inner_as,
+        lp_optimizer,
+        scheduler,
+        Ref(0),
+    )
+    compute_active_set_iterate!(as)
+    return as
+end
+
+function ActiveSetQuadraticLinearSolve(
+    inner_as::AbstractActiveSet,
+    grad!::Function,
+    lp_optimizer;
+    scheduler=LogScheduler(),
+)
+    A, b = detect_quadratic_function(grad!, inner_as.atoms[1])
+    return ActiveSetQuadraticLinearSolve(inner_as, A, b, lp_optimizer; scheduler=scheduler)
+end
+
+function ActiveSetQuadraticLinearSolve{AT,R}(
+    tuple_values::Vector{<:Tuple{<:Number,<:Any}},
+    grad!::Function,
+    lp_optimizer;
+    scheduler=LogScheduler(),
+) where {AT,R}
+    A, b = detect_quadratic_function(grad!, tuple_values[1][2])
+    return ActiveSetQuadraticLinearSolve{AT,R}(
+        tuple_values,
+        A,
+        b,
+        lp_optimizer;
+        scheduler=scheduler,
+    )
+end
+
+function ActiveSetQuadraticLinearSolve{AT,R}(
+    tuple_values::Vector{<:Tuple{<:Number,<:Any}},
+    A::H,
+    b,
+    lp_optimizer;
+    scheduler=LogScheduler(),
+) where {AT,R,H}
+    inner_as = ActiveSetQuadratic{AT,R}(tuple_values, A, b)
+    as = ActiveSetQuadraticLinearSolve{AT,R,typeof(x),H}(
+        inner_as.weights,
+        inner_as.atoms,
+        inner_as.x,
+        A,
+        b,
+        inner_as,
+        lp_optimizer,
+        scheduler,
+        Ref(0),
+    )
+    compute_active_set_iterate!(as)
+    return as
+end
+
+function ActiveSetQuadraticLinearSolve(
+    tuple_values::Vector{Tuple{R,AT}},
+    A::UniformScaling,
+    b,
+    lp_optimizer;
+    scheduler=LogScheduler(),
+) where {AT,R}
+    return ActiveSetQuadraticLinearSolve(
+        tuple_values,
+        Identity(A.λ),
+        b,
+        lp_optimizer;
+        scheduler=scheduler,
+    )
+end
+function ActiveSetQuadraticLinearSolve{AT,R}(
+    tuple_values::Vector{<:Tuple{<:Number,<:Any}},
+    A::UniformScaling,
+    b,
+    lp_optimizer;
+    scheduler=LogScheduler(),
+) where {AT,R}
+    return ActiveSetQuadraticLinearSolve{AT,R}(
+        tuple_values,
+        Identity(A.λ),
+        b,
+        lp_optimizer;
+        scheduler=scheduler,
+    )
+end
+
+# all mutating functions are delegated to the inner active set
+
+Base.push!(as::ActiveSetQuadraticLinearSolve, tuple) = push!(as.active_set, tuple)
+
+Base.deleteat!(as::ActiveSetQuadraticLinearSolve, idx) = deleteat!(as.active_set, idx)
+
+Base.empty!(as::ActiveSetQuadraticLinearSolve) = empty!(as.active_set)
+
+function active_set_update!(
+    as::ActiveSetQuadraticLinearSolve{AT,R},
+    lambda,
+    atom,
+    renorm=true,
+    idx=nothing;
+    weight_purge_threshold=weight_purge_threshold_default(R),
+    add_dropped_vertices=false,
+    vertex_storage=nothing,
+) where {AT,R}
+    if idx === nothing
+        idx = find_atom(as, atom)
+    end
+    active_set_update!(
+        as.active_set,
+        lambda,
+        atom,
+        renorm,
+        idx;
+        weight_purge_threshold=weight_purge_threshold,
+        add_dropped_vertices=add_dropped_vertices,
+        vertex_storage=vertex_storage,
+    )
+    # new atom introduced, we can solve the auxiliary LP
+    if idx < 0
+        as.counter[] += 1
+        if should_solve_lp(as, as.scheduler)
+            solve_quadratic_activeset_lp!(as)
+        end
+    end
+    return as
+end
+
+active_set_renormalize!(as::ActiveSetQuadraticLinearSolve) = active_set_renormalize!(as.active_set)
+
+function active_set_argmin(as::ActiveSetQuadraticLinearSolve, direction)
+    return active_set_argmin(as.active_set, direction)
+end
+
+function active_set_argminmax(as::ActiveSetQuadraticLinearSolve, direction; Φ=0.5)
+    return active_set_argminmax(as.active_set, direction; Φ=Φ)
+end
+
+# generic quadratic with quadratic information provided
+
+"""
+    solve_quadratic_activeset_lp!(as::ActiveSetQuadraticLinearSolve{AT, R, IT, H}))
+
+Solves the auxiliary LP over the current active set.
+The method is specialized by type `H` of the Hessian matrix `A`.
+"""
+function solve_quadratic_activeset_lp!(
+    as::ActiveSetQuadraticLinearSolve{AT,R,IT,<:AbstractMatrix},
+) where {AT,R,IT}
+    nv = length(as)
+    o = as.lp_optimizer
+    MOI.empty!(o)
+    λ = MOI.add_variables(o, nv)
+    # λ ≥ 0, ∑ λ == 1
+    MOI.add_constraint.(o, λ, MOI.GreaterThan(0.0))
+    sum_of_variables = MOI.ScalarAffineFunction(MOI.ScalarAffineTerm.(1.0, λ), 0.0)
+    MOI.add_constraint(o, sum_of_variables, MOI.EqualTo(1.0))
+    # Vᵗ A V λ == -Vᵗ b
+    for atom in as.atoms
+        lhs = MOI.ScalarAffineFunction{Float64}([], 0.0)
+        Base.sizehint!(lhs.terms, nv)
+        # replaces direct sum because of MOI and MutableArithmetic slow sums
+        for j in 1:nv
+            push!(lhs.terms, MOI.ScalarAffineTerm(fast_dot(atom, as.A, as.atoms[j]), λ[j]))
+        end
+        rhs = -dot(atom, as.b)
+        MOI.add_constraint(o, lhs, MOI.EqualTo(rhs))
+    end
+    MOI.set(o, MOI.ObjectiveFunction{typeof(sum_of_variables)}(), sum_of_variables)
+    MOI.set(o, MOI.ObjectiveSense(), MOI.MIN_SENSE)
+    MOI.optimize!(o)
+    if MOI.get(o, MOI.TerminationStatus()) ∉ (MOI.OPTIMAL, MOI.FEASIBLE_POINT, MOI.ALMOST_OPTIMAL)
+        return as
+    end
+    indices_to_remove = Int[]
+    new_weights = R[]
+    for idx in eachindex(λ)
+        weight_value = MOI.get(o, MOI.VariablePrimal(), λ[idx])
+        if weight_value <= 1e-5
+            push!(indices_to_remove, idx)
+        else
+            push!(new_weights, weight_value)
+        end
+    end
+    deleteat!(as.active_set, indices_to_remove)
+    @assert length(as) == length(new_weights)
+    update_weights!(as.active_set, new_weights)
+    active_set_cleanup!(as)
+    active_set_renormalize!(as)
+    compute_active_set_iterate!(as)
+    return as
+end
+
+# special case of scaled identity Hessian
+
+function solve_quadratic_activeset_lp!(
+    as::ActiveSetQuadraticLinearSolve{AT,R,IT,<:Union{Identity,LinearAlgebra.UniformScaling}},
+) where {AT,R,IT}
+    hessian_scaling = as.A.λ
+    nv = length(as)
+    o = as.lp_optimizer
+    MOI.empty!(o)
+    λ = MOI.add_variables(o, nv)
+    # λ ≥ 0, ∑ λ == 1
+    MOI.add_constraint.(o, λ, MOI.GreaterThan(0.0))
+    sum_of_variables = MOI.ScalarAffineFunction(MOI.ScalarAffineTerm.(1.0, λ), 0.0)
+    MOI.add_constraint(o, sum_of_variables, MOI.EqualTo(1.0))
+    # a Aᵗ A λ == -Aᵗ b
+    for atom in as.atoms
+        lhs = MOI.ScalarAffineFunction{Float64}([], 0.0)
+        Base.sizehint!(lhs.terms, nv)
+        # replaces direct sum because of MOI and MutableArithmetic slow sums
+        for j in 1:nv
+            push!(lhs.terms, MOI.ScalarAffineTerm(hessian_scaling * dot(atom, as.atoms[j]), λ[j]))
+        end
+        rhs = -dot(atom, as.b)
+        MOI.add_constraint(o, lhs, MOI.EqualTo(rhs))
+    end
+    MOI.set(o, MOI.ObjectiveFunction{typeof(sum_of_variables)}(), sum_of_variables)
+    MOI.set(o, MOI.ObjectiveSense(), MOI.MIN_SENSE)
+    MOI.optimize!(o)
+    if MOI.get(o, MOI.TerminationStatus()) ∉ (MOI.OPTIMAL, MOI.FEASIBLE_POINT, MOI.ALMOST_OPTIMAL)
+        return as
+    end
+    indices_to_remove = Int[]
+    new_weights = R[]
+    for idx in eachindex(λ)
+        weight_value = MOI.get(o, MOI.VariablePrimal(), λ[idx])
+        if weight_value <= 2 * weight_purge_threshold_default(typeof(weight_value))
+            push!(indices_to_remove, idx)
+        else
+            push!(new_weights, weight_value)
+        end
+    end
+    deleteat!(as.active_set, indices_to_remove)
+    @assert length(as) == length(new_weights)
+    update_weights!(as.active_set, new_weights)
+    active_set_cleanup!(as)
+    @assert all(>=(0), new_weights)
+    active_set_renormalize!(as)
+    compute_active_set_iterate!(as)
+    return as
+end
+
+struct LogScheduler{T}
+    start_time::Int
+    scaling_factor::T
+    max_interval::Int
+    current_interval::Base.RefValue{Int}
+    last_solve_counter::Base.RefValue{Int}
+end
+
+LogScheduler(; start_time=20, scaling_factor=1.5, max_interval=1000) =
+    LogScheduler(start_time, scaling_factor, max_interval, Ref(start_time), Ref(0))
+
+function should_solve_lp(as::ActiveSetQuadraticLinearSolve, scheduler::LogScheduler)
+    if as.counter[] - scheduler.last_solve_counter[] >= scheduler.current_interval[]
+        scheduler.last_solve_counter[] = as.counter[]
+        scheduler.current_interval[] = min(
+            round(Int, scheduler.scaling_factor * scheduler.current_interval[]),
+            scheduler.max_interval,
+        )
+        return true
+    end
+    return false
+end
diff --git a/src/active_set_sparsifier.jl b/src/active_set_sparsifier.jl
new file mode 100644
index 000000000..6bb523217
--- /dev/null
+++ b/src/active_set_sparsifier.jl
@@ -0,0 +1,96 @@
+struct ActiveSetSparsifier{AT,R,IT,AS<:AbstractActiveSet{AT,R,IT},OT<:MOI.AbstractOptimizer} <:
+       AbstractActiveSet{AT,R,IT}
+    active_set::AS
+    weights::Vector{R}
+    atoms::Vector{AT}
+    x::IT
+    optimizer::OT
+    minimum_vertices::Int
+    solve_frequency::Int
+    counter::Base.RefValue{Int}
+end
+
+function ActiveSetSparsifier(
+    active_set::AbstractActiveSet,
+    optimizer::MOI.AbstractOptimizer;
+    minimum_vertices=50,
+    solve_frequency=50,
+)
+    return ActiveSetSparsifier(
+        active_set,
+        active_set.weights,
+        active_set.atoms,
+        active_set.x,
+        optimizer,
+        minimum_vertices,
+        solve_frequency,
+        Ref(0),
+    )
+end
+
+function Base.push!(as::ActiveSetSparsifier, (λ, a))
+    return push!(as.active_set, (λ, a))
+end
+
+Base.deleteat!(as::ActiveSetSparsifier, idx::Int) = deleteat!(as.active_set, idx)
+
+Base.empty!(as::ActiveSetSparsifier) = empty!(as.active_set)
+
+function active_set_update!(
+    as::ActiveSetSparsifier{AS,OT,AT,R,IT},
+    lambda,
+    atom,
+    renorm=true,
+    idx=nothing;
+    kwargs...,
+) where {AS,OT,AT,R,IT}
+    active_set_update!(as.active_set, lambda, atom, renorm, idx; kwargs...)
+    n = length(as)
+    as.counter[] += 1
+    if n > as.minimum_vertices && mod(as.counter[], as.solve_frequency) == 0
+        # sparsifying active set
+        MOI.empty!(as.optimizer)
+        x0 = as.active_set.x
+        λ = MOI.add_variables(as.optimizer, n)
+        # λ ∈ Δ_n ⇔ λ ≥ 0, ∑ λ == 1
+        MOI.add_constraint.(as.optimizer, λ, MOI.GreaterThan(0.0))
+        MOI.add_constraint(as.optimizer, sum(λ; init=0.0), MOI.EqualTo(1.0))
+        x_sum = 0 * as.active_set.atoms[1]
+        for (idx, atom) in enumerate(as.active_set.atoms)
+            x_sum += λ[idx] * atom
+        end
+        for idx in eachindex(x_sum)
+            MOI.add_constraint(as.optimizer, x_sum[idx], MOI.EqualTo(x0[idx]))
+        end
+        # Set a dummy objective (minimize ∑λ)
+        dummy_objective = sum(λ; init=0.0)
+        MOI.set(as.optimizer, MOI.ObjectiveFunction{typeof(dummy_objective)}(), dummy_objective)
+        MOI.set(as.optimizer, MOI.ObjectiveSense(), MOI.MIN_SENSE)
+        MOI.optimize!(as.optimizer)
+        if MOI.get(as.optimizer, MOI.TerminationStatus()) in
+           (MOI.OPTIMAL, MOI.FEASIBLE_POINT, MOI.ALMOST_OPTIMAL)
+            indices_to_remove = Int[]
+            new_weights = R[]
+            for idx in eachindex(λ)
+                weight_value = MOI.get(as.optimizer, MOI.VariablePrimal(), λ[idx])
+                if weight_value <= sqrt(weight_purge_threshold_default(typeof(weight_value)))
+                    push!(indices_to_remove, idx)
+                else
+                    push!(new_weights, weight_value)
+                end
+            end
+            deleteat!(as.active_set.atoms, indices_to_remove)
+            deleteat!(as.active_set.weights, indices_to_remove)
+            @assert length(as) == length(new_weights)
+            as.active_set.weights .= new_weights
+            active_set_renormalize!(as)
+        end
+    end
+    return as
+end
+
+active_set_renormalize!(as::ActiveSetSparsifier) = active_set_renormalize!(as.active_set)
+
+active_set_argmin(as::ActiveSetSparsifier, direction) = active_set_argmin(as.active_set, direction)
+active_set_argminmax(as::ActiveSetSparsifier, direction; Φ=0.5) =
+    active_set_argminmax(as.active_set, direction; Φ=Φ)
diff --git a/src/blended_pairwise.jl b/src/blended_pairwise.jl
index e2efec4d2..a42862063 100644
--- a/src/blended_pairwise.jl
+++ b/src/blended_pairwise.jl
@@ -1,4 +1,3 @@
-
 """
     blended_pairwise_conditional_gradient(f, grad!, lmo, x0; kwargs...)
 
@@ -325,7 +324,6 @@ function blended_pairwise_conditional_gradient(
                     linesearch_workspace,
                     memory_mode,
                 )
-
                 if callback !== nothing
                     state = CallbackState(
                         t,
diff --git a/src/moi_oracle.jl b/src/moi_oracle.jl
index 875b8001d..1471bd6e7 100644
--- a/src/moi_oracle.jl
+++ b/src/moi_oracle.jl
@@ -1,6 +1,6 @@
 
 """
-    MathOptLMO{OT <: MOI.Optimizer} <: LinearMinimizationOracle
+    MathOptLMO{OT <: MOI.AbstractOptimizer} <: LinearMinimizationOracle
 
 Linear minimization oracle with feasible space defined through a MathOptInterface.Optimizer.
 The oracle call sets the direction and reruns the optimizer.
diff --git a/src/utils.jl b/src/utils.jl
index 629f55e15..c6e01fd6f 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -163,6 +163,77 @@ function fast_dot(A::Matrix{T1}, B::SparseArrays.SparseMatrixCSC{T2}) where {T1,
     return s
 end
 
+fast_dot(a, Q, b) = dot(a, Q, b)
+
+function fast_dot(a::SparseArrays.AbstractSparseVector{<:Real}, Q::Diagonal{<:Real}, b::AbstractVector{<:Real})
+    if a === b
+        return _fast_quadratic_form_symmetric(a, Q)
+    end
+    d = Q.diag
+    nzvals = SparseArrays.nonzeros(a)
+    nzinds = SparseArrays.nonzeroinds(a)
+    return sum(eachindex(nzvals); init=zero(eltype(a))) do nzidx
+        nzvals[nzidx] * d[nzinds[nzidx]] * b[nzinds[nzidx]]
+    end
+end
+
+function fast_dot(a::SparseArrays.AbstractSparseVector{<:Real}, Q::Diagonal{<:Real}, b::SparseArrays.AbstractSparseVector{<:Real})
+    if a === b
+        return _fast_quadratic_form_symmetric(a, Q)
+    end
+    n = length(a)
+    if length(b) != n
+        throw(
+            DimensionMismatch("Vector a has a length $n but b has a length $(length(b))")
+        )
+    end
+    anzind = SparseArrays.nonzeroinds(a)
+    bnzind = SparseArrays.nonzeroinds(b)
+    anzval = SparseArrays.nonzeros(a)
+    bnzval = SparseArrays.nonzeros(b)
+    s = zero(Base.promote_eltype(a, Q, b))
+
+    if isempty(anzind) || isempty(bnzind)
+        return s
+    end
+
+    a_idx = 1
+    b_idx = 1
+    a_idx_last = length(anzind)
+    b_idx_last = length(bnzind)
+
+    # go through the nonzero indices of a and b simultaneously
+    @inbounds while a_idx <= a_idx_last && b_idx <= b_idx_last
+        ia = anzind[a_idx]
+        ib = bnzind[b_idx]
+        if ia == ib
+            s += dot(anzval[a_idx], Q.diag[ia], bnzval[b_idx])
+            a_idx += 1
+            b_idx += 1
+        elseif ia < ib
+            a_idx += 1
+        else
+            b_idx += 1
+        end
+    end
+    return s
+end
+
+
+function _fast_quadratic_form_symmetric(a, Q)
+    d = Q.diag
+    if length(d) != length(a)
+        throw(DimensionMismatch())
+    end
+    nzvals = SparseArrays.nonzeros(a)
+    nzinds = SparseArrays.nonzeroinds(a)
+    s = zero(Base.promote_eltype(a, Q))
+    @inbounds for nzidx in eachindex(nzvals)
+        s += nzvals[nzidx]^2 * d[nzinds[nzidx]]
+    end
+    return s
+end
+
 """
     trajectory_callback(storage)
 
diff --git a/test/quadratic_lp_active_set.jl b/test/quadratic_lp_active_set.jl
new file mode 100644
index 000000000..a87b37cea
--- /dev/null
+++ b/test/quadratic_lp_active_set.jl
@@ -0,0 +1,113 @@
+using FrankWolfe
+using LinearAlgebra
+using Random
+using Test
+import HiGHS
+import MathOptInterface as MOI
+
+n = Int(1e4)
+k = 5000
+
+s = 10
+@info "Seed $s"
+Random.seed!(s)
+
+xpi = rand(n);
+total = sum(xpi);
+
+const xp = xpi ./ total;
+
+f(x) = norm(x - xp)^2
+function grad!(storage, x)
+    @. storage = 2 * (x - xp)
+end
+
+lmo = FrankWolfe.KSparseLMO(5, 1.0)
+
+const x00 = FrankWolfe.compute_extreme_point(lmo, rand(n))
+
+function build_callback(trajectory_arr)
+    return function callback(state, active_set, args...)
+        return push!(trajectory_arr, (FrankWolfe.callback_state(state)..., length(active_set)))
+    end
+end
+
+trajectoryBPCG_standard = []
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    copy(x00),
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=false,
+    callback=build_callback(trajectoryBPCG_standard),
+);
+
+as_quad_direct = FrankWolfe.ActiveSetQuadraticLinearSolve(
+    [(1.0, copy(x00))],
+    2 * LinearAlgebra.I,
+    -2xp,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+)
+
+trajectoryBPCG_quadratic_direct_specialized = []
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    as_quad_direct,
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=false,
+    callback=build_callback(trajectoryBPCG_quadratic_direct_specialized),
+);
+
+as_quad_direct_generic = FrankWolfe.ActiveSetQuadraticLinearSolve(
+    [(1.0, copy(x00))],
+    2 * Diagonal(ones(length(xp))),
+    -2xp,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+)
+
+trajectoryBPCG_quadratic_direct_generic = []
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    as_quad_direct_generic,
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=true,
+    callback=build_callback(trajectoryBPCG_quadratic_direct_generic),
+);
+
+as_quad_direct_basic_as = FrankWolfe.ActiveSetQuadraticLinearSolve(
+    FrankWolfe.ActiveSet([1.0], [copy(x00)], collect(x00)),
+    2 * LinearAlgebra.I,
+    -2xp,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+)
+
+trajectoryBPCG_quadratic_noqas = []
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    as_quad_direct_basic_as,
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=true,
+    callback=build_callback(trajectoryBPCG_quadratic_noqas),
+);
+
+
+dual_gaps_quadratic_specialized = getindex.(trajectoryBPCG_quadratic_direct_specialized, 4)
+dual_gaps_quadratic_generic = getindex.(trajectoryBPCG_quadratic_direct_generic, 4)
+dual_gaps_quadratic_noqas = getindex.(trajectoryBPCG_quadratic_noqas, 4)
+dual_gaps_bpcg = getindex.(trajectoryBPCG_standard, 4)
+
+
+@test dual_gaps_quadratic_specialized[end] < dual_gaps_bpcg[end]
+@test dual_gaps_quadratic_noqas[end] < dual_gaps_bpcg[end]
+@test norm(dual_gaps_quadratic_noqas - dual_gaps_quadratic_noqas) ≤ k * 1e-5
diff --git a/test/runtests.jl b/test/runtests.jl
index d984043dc..40f5ac366 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -798,6 +798,20 @@ using Test
 end
 end
 
+module LpDirectSolveTest
+using Test
+@testset "LP solving for quadratic functions and active set" begin
+    include("quadratic_lp_active_set.jl")
+end
+end
+
+module SparsifyingActiveSetTest
+using Test
+@testset "Sparsifying active set" begin
+    include("sparsifying_activeset.jl")
+end
+end
+
 include("generic-arrays.jl")
 
 include("blocks.jl")
diff --git a/test/sparsifying_activeset.jl b/test/sparsifying_activeset.jl
new file mode 100644
index 000000000..1fde16d53
--- /dev/null
+++ b/test/sparsifying_activeset.jl
@@ -0,0 +1,85 @@
+#=
+This example demonstrates the use of the Blended Pairwise Conditional Gradient algorithm
+with direct solve steps for a quadratic optimization problem over a sparse polytope.
+
+The example showcases how the algorithm balances between:
+- Pairwise steps for efficient optimization
+- Periodic direct solves for handling the quadratic objective
+- Lazy (approximate) linear minimization steps for improved iteration complexity
+
+It also demonstrates how to set up custom callbacks for tracking algorithm progress.
+=#
+
+using LinearAlgebra
+using Test
+using Random
+using FrankWolfe
+import HiGHS
+
+n = Int(1e4)
+k = 10000
+
+# s = rand(1:100)
+s = 10
+@info "Seed $s"
+Random.seed!(s)
+
+xpi = rand(n);
+total = sum(xpi);
+
+# here the optimal solution lies in the interior if you want an optimal solution on a face and not the interior use:
+# const xp = xpi;
+
+const xp = xpi ./ total;
+
+f(x) = norm(x - xp)^2
+function grad!(storage, x)
+    @. storage = 2 * (x - xp)
+end
+
+lmo = FrankWolfe.LpNormLMO{Float64,5}(1.0)
+
+const x00 = FrankWolfe.compute_extreme_point(lmo, rand(n))
+
+function build_callback(trajectory_arr)
+    return function callback(state, active_set, args...)
+        return push!(trajectory_arr, (FrankWolfe.callback_state(state)..., length(active_set)))
+    end
+end
+
+trajectoryBPCG_standard = []
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    copy(x00),
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=false,
+    trajectory=false,
+    callback=build_callback(trajectoryBPCG_standard),
+);
+
+active_set_sparse = FrankWolfe.ActiveSetSparsifier(
+    FrankWolfe.ActiveSet([1.0], [x00], similar(x00)),
+    HiGHS.Optimizer(),
+)
+trajectoryBPCG_as_sparse = []
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    copy(x00),
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=false,
+    callback=build_callback(trajectoryBPCG_as_sparse),
+);
+
+as_cardinality_bpcg = getindex.(trajectoryBPCG_standard, 6)
+as_cardinality_sparse = getindex.(trajectoryBPCG_as_sparse, 6)
+@test maximum(as_cardinality_sparse - as_cardinality_bpcg) <= 0
+
+dual_gap_bpcg = getindex.(trajectoryBPCG_standard, 4)
+dual_gap_sparse = getindex.(trajectoryBPCG_as_sparse, 4)
+@test maximum(dual_gap_sparse - dual_gap_bpcg) <= 1e-7