Merge branch 'master' into BPCG-with-direct-solve

ZIB-IOL · Oct 25, 2024 · 24ea326 · 24ea326
2 parents 6d4280f + 1ac1797
commit 24ea326
Show file tree

Hide file tree

Showing 5 changed files with 205 additions and 111 deletions.
diff --git a/examples/alternating_projections.jl b/examples/alternating_projections.jl
@@ -0,0 +1,37 @@
+import FrankWolfe
+using LinearAlgebra
+using Random
+
+
+include("../examples/plot_utils.jl")
+
+Random.seed!(100)
+
+n = 500
+lmo = FrankWolfe.ConvexHullOracle([rand(Float64, (n,)) .+ 1.0 for _ in 1:n])
+lmo2 = FrankWolfe.ConvexHullOracle([rand(Float64, (n,)) .- 1.0 for _ in 1:n])
+
+
+trajectories = []
+
+methods = [FrankWolfe.frank_wolfe, FrankWolfe.blended_pairwise_conditional_gradient, FrankWolfe.blended_pairwise_conditional_gradient]
+
+for (i,m) in enumerate(methods)
+    if i == 1
+        x, _, _, _, traj_data = FrankWolfe.alternating_projections((lmo, lmo2), ones(n); verbose=true, print_iter=100, trajectory=true, proj_method=m)
+    elseif i== 2
+        x, _, _, _, traj_data = FrankWolfe.alternating_projections((lmo, lmo2), ones(n); verbose=true, print_iter=100, trajectory=true, proj_method=m, reuse_active_set=false, lazy=true)
+    else
+        x, _, _, _, traj_data = FrankWolfe.alternating_projections((lmo, lmo2), ones(n); verbose=true, print_iter=100, trajectory=true, proj_method=m, reuse_active_set=true, lazy=true)
+    end
+    push!(trajectories, traj_data)
+end
+
+
+
+labels = ["FW", "BPCG" ,"BPCG (reuse)"]
+
+plot_trajectories(trajectories, labels, xscalelog=true)
+
+
+
diff --git a/src/active_set_quadratic.jl b/src/active_set_quadratic.jl
@@ -36,7 +36,7 @@ function detect_quadratic_function(grad!, x0; test=true)
         X[:, i] .-= x0
         G[:, i] .= storage .- g0
     end
-    A = G * inv(X)
+    A = G \ X
     b = g0 - A * x0
     if test
         x_test = randn(T, n)

diff --git a/src/alternating_methods.jl b/src/alternating_methods.jl
@@ -35,12 +35,12 @@ end
 
 Alternating Linear Minimization minimizes the objective `f` over the intersections of the feasible domains specified by `lmos`.
 The tuple `x0` defines the initial points for each domain.
-Returns a tuple `(x, v, primal, dual_gap, infeas, traj_data)` with:
+Returns a tuple `(x, v, primal, dual_gap, dist2, traj_data)` with:
 - `x` cartesian product of final iterates
 - `v` cartesian product of last vertices of the LMOs
 - `primal` primal value `f(x)`
 - `dual_gap` final Frank-Wolfe gap
-- `infeas` sum of squared, pairwise distances between iterates
+- `dist2` is 1/2 of the sum of squared, pairwise distances between iterates
 - `traj_data` vector of trajectory information.
 """
 function alternating_linear_minimization(
@@ -49,17 +49,17 @@ function alternating_linear_minimization(
     grad!,
     lmos::NTuple{N,LinearMinimizationOracle},
     x0::Tuple{Vararg{Any,N}};
-    lambda::Union{Float64, Function}=1.0,
+    lambda::Union{Float64,Function}=1.0,
     verbose=false,
     trajectory=false,
     callback=nothing,
     max_iteration=10000,
-    print_iter = max_iteration / 10,
+    print_iter=max_iteration / 10,
     memory_mode=InplaceEmphasis(),
     line_search::LS=Adaptive(),
     epsilon=1e-7,
     kwargs...,
-) where {N, LS<:Union{LineSearchMethod,NTuple{N,LineSearchMethod}}}
+) where {N,LS<:Union{LineSearchMethod,NTuple{N,LineSearchMethod}}}
 
     x0_bc = BlockVector([x0[i] for i in 1:N], [size(x0[i]) for i in 1:N], sum(length, x0))
     gradf = similar(x0_bc)
@@ -74,21 +74,14 @@ function alternating_linear_minimization(
             for i in 1:N
                 grad!(gradf.blocks[i], x.blocks[i])
             end
-            t = [2.0 * (N * b - sum(x.blocks)) for b in x.blocks]
+            t = [N * b - sum(x.blocks) for b in x.blocks]
             return storage.blocks = λ[] * gradf.blocks + t
         end
     end
 
-    function dist2(x::BlockVector) 
-        s = 0
-        for i=1:N
-            for j=1:i-1
-                diff = x.blocks[i] - x.blocks[j]
-                s += fast_dot(diff, diff)
-            end
-        end
-        return s
-    end
+    dist2(x::BlockVector) =
+        0.5 * (N - 1) * sum(fast_dot(x.blocks[i], x.blocks[i]) for i in 1:N) -
+        sum(fast_dot(x.blocks[i], x.blocks[j]) for i in 1:N for j in 1:i-1)
 
     function build_objective()
         λ = Ref(λ0)
@@ -122,8 +115,11 @@ function alternating_linear_minimization(
 
         num_type = eltype(x0[1])
         grad_type = eltype(gradf.blocks[1])
-        line_search_type = line_search isa Tuple ? [typeof(a) for a in line_search] : typeof(line_search)
-        println("MEMORY_MODE: $memory_mode STEPSIZE: $line_search_type EPSILON: $epsilon MAXITERATION: $max_iteration")
+        line_search_type =
+            line_search isa Tuple ? [typeof(a) for a in line_search] : typeof(line_search)
+        println(
+            "MEMORY_MODE: $memory_mode STEPSIZE: $line_search_type EPSILON: $epsilon MAXITERATION: $max_iteration",
+        )
         println("TYPE: $num_type GRADIENTTYPE: $grad_type")
         println("LAMBDA: $lambda")
 
@@ -177,7 +173,7 @@ function alternating_linear_minimization(
     end
 
     if lambda isa Function
-        callback  = function (state,args...)
+        callback = function (state, args...)
             state.f.λ[] = lambda(state)
             state.grad!.λ[] = state.f.λ[]
 
@@ -211,33 +207,15 @@ function alternating_linear_minimization(
 end
 
 
-function ProjectionFW(y, lmo; max_iter=10000, eps=1e-3)
-    f(x) = sum(abs2, x - y)
-    grad!(storage, x) = storage .= 2 * (x - y)
-
-    x0 = FrankWolfe.compute_extreme_point(lmo, y)
-    x_opt, _ = FrankWolfe.frank_wolfe(
-        f,
-        grad!,
-        lmo,
-        x0,
-        epsilon=eps,
-        max_iteration=max_iter,
-        trajectory=true,
-        line_search=FrankWolfe.Adaptive(verbose=false, relaxed_smoothness=false),
-    )
-    return x_opt
-end
-
 """
     alternating_projections(lmos::NTuple{N,LinearMinimizationOracle}, x0; ...) where {N}
 
 Computes a point in the intersection of feasible domains specified by `lmos`.
-Returns a tuple `(x, v, dual_gap, infeas, traj_data)` with:
+Returns a tuple `(x, v, dual_gap, dist2, traj_data)` with:
 - `x` cartesian product of final iterates
 - `v` cartesian product of last vertices of the LMOs
 - `dual_gap` final Frank-Wolfe gap
-- `infeas` sum of squared, pairwise distances between iterates
+- `dist2` is 1/2 * sum of squared, pairwise distances between iterates
 - `traj_data` vector of trajectory information.
 """
 function alternating_projections(
@@ -252,6 +230,10 @@ function alternating_projections(
     callback=nothing,
     traj_data=[],
     timeout=Inf,
+    proj_method=frank_wolfe,
+    inner_epsilon::Function=t -> 1 / (t^2 + 1),
+    reuse_active_set=false,
+    kwargs...,
 ) where {N}
     return alternating_projections(
         ProductLMO(lmos),
@@ -265,6 +247,10 @@ function alternating_projections(
         callback,
         traj_data,
         timeout,
+        proj_method,
+        inner_epsilon,
+        reuse_active_set,
+        kwargs...,
     )
 end
 
@@ -281,17 +267,21 @@ function alternating_projections(
     callback=nothing,
     traj_data=[],
     timeout=Inf,
+    proj_method=frank_wolfe,
+    inner_epsilon::Function=t -> 1 / (t^2 + 1),
+    reuse_active_set=false,
+    kwargs...,
 ) where {N}
 
     # header and format string for output of the algorithm
-    headers = ["Type", "Iteration", "Dual Gap", "Infeas", "Time", "It/sec"]
+    headers = ["Type", "Iteration", "Dual Gap", "dist2", "Time", "It/sec"]
     format_string = "%6s %13s %14e %14e %14e %14e\n"
-    function format_state(state, infeas)
+    function format_state(state, primal)
         rep = (
             steptype_string[Symbol(state.step_type)],
             string(state.t),
             Float64(state.dual_gap),
-            Float64(infeas),
+            Float64(primal),
             state.time,
             state.t / state.time,
         )
@@ -300,27 +290,61 @@ function alternating_projections(
 
     t = 0
     dual_gap = Inf
-    x = fill(x0, N)
-    v = similar(x)
+    dual_gaps = fill(Inf, N)
+    x = BlockVector(compute_extreme_point.(lmo.lmos, fill(x0, N)))
     step_type = ST_REGULAR
     gradient = similar(x)
-    ndim = ndims(x)
 
-    infeasibility(x) = sum(
-        fast_dot(
-            selectdim(x, ndim, i) - selectdim(x, ndim, j),
-            selectdim(x, ndim, i) - selectdim(x, ndim, j),
-        ) for i in 1:N for j in 1:i-1
-    )
+    if reuse_active_set
+        if proj_method ∉
+           [away_frank_wolfe, blended_pairwise_conditional_gradient, pairwise_frank_wolfe]
+            error("The selected FW method does not support active sets reuse.")
+        end
+        active_sets = [ActiveSet([(1.0, x.blocks[i])]) for i in 1:N]
+    end
 
-    partial_infeasibility(x) =
-        sum(fast_dot(x[mod(i - 2, N)+1] - x[i], x[mod(i - 2, N)+1] - x[i]) for i in 1:N)
+    dist2(x::BlockVector) =
+        0.5 * (N - 1) * sum(fast_dot(x.blocks[i], x.blocks[i]) for i in 1:N) -
+        sum(fast_dot(x.blocks[i], x.blocks[j]) for i in 1:N for j in 1:i-1)
 
     function grad!(storage, x)
-        @. storage = [2 * (x[i] - x[mod(i - 2, N)+1]) for i in 1:N]
+        return storage.blocks = [2.0 * (N * b - sum(x.blocks)) for b in x.blocks]
     end
 
-    projection_step(x, i, t) = ProjectionFW(x, lmo.lmos[i]; eps=1 / (t^2 + 1))
+    function projection_step(i, t)
+        xii = x.blocks[mod(i - 2, N)+1] # iterate in previous block
+        f(y) = sum(abs2, y - xii)
+        function grad_proj!(storage, y)
+            return storage .= 2 * (y - xii)
+        end
+
+        if reuse_active_set
+
+            results = proj_method(
+                f,
+                grad_proj!,
+                lmo.lmos[i],
+                active_sets[i];
+                epsilon=inner_epsilon(t),
+                max_iteration=10000,
+                line_search=Adaptive(),
+                kwargs...,
+            )
+            active_sets[i] = results[:active_set]
+        else
+            results = proj_method(
+                f,
+                grad_proj!,
+                lmo.lmos[i],
+                x.blocks[i];
+                epsilon=inner_epsilon(t),
+                max_iteration=10000,
+                line_search=Adaptive(),
+                kwargs...,
+            )
+        end
+        return results[:x], results[:dual_gap]
+    end
 
 
     if trajectory
@@ -372,19 +396,18 @@ function alternating_projections(
         # Projection step:
         for i in 1:N
             # project the previous iterate on the i-th feasible region
-            x[i] = projection_step(x[mod(i - 2, N)+1], i, t)
+            x.blocks[i], dual_gaps[i] = projection_step(i, t)
         end
 
+        dual_gap = sum(dual_gaps)
+
         # Update gradients
         grad!(gradient, x)
 
-        # Update dual gaps
-        v = compute_extreme_point.(lmo.lmos, gradient)
-        dual_gap = fast_dot(x - v, gradient)
 
         # go easy on the memory - only compute if really needed
         if ((mod(t, print_iter) == 0 && verbose) || callback !== nothing)
-            infeas = infeasibility(x)
+            primal = dist2(x)
         end
 
         first_iter = false
@@ -393,12 +416,12 @@ function alternating_projections(
         if callback !== nothing
             state = CallbackState(
                 t,
-                infeas,
-                infeas - dual_gap,
+                primal,
+                primal - dual_gap,
                 dual_gap,
                 tot_time,
                 x,
-                v,
+                nothing,
                 nothing,
                 nothing,
                 nothing,
@@ -408,7 +431,7 @@ function alternating_projections(
                 step_type,
             )
             # @show state
-            if callback(state, infeas) === false
+            if callback(state, primal) === false
                 break
             end
         end
@@ -419,18 +442,18 @@ function alternating_projections(
     # this is important as some variants do not recompute f(x) and the dual_gap regularly but only when reporting
     # hence the final computation.
     step_type = ST_LAST
-    infeas = infeasibility(x)
+    primal = dist2(x)
     grad!(gradient, x)
-    v = compute_extreme_point.(lmo.lmos, gradient)
+    v = compute_extreme_point(lmo, gradient)
     dual_gap = fast_dot(x - v, gradient)
 
     tot_time = (time_ns() - time_start) / 1.0e9
 
     if callback !== nothing
         state = CallbackState(
             t,
-            infeas,
-            infeas - dual_gap,
+            primal,
+            primal - dual_gap,
             dual_gap,
             tot_time,
             x,
@@ -443,9 +466,9 @@ function alternating_projections(
             gradient,
             step_type,
         )
-        callback(state, infeas)
+        callback(state, primal)
     end
 
-    return x, v, dual_gap, infeas, traj_data
+    return x, v, dual_gap, primal, traj_data
 
 end