ZIB-IOL · mattplo · Jul 7, 2021 · Jul 8, 2021 · Jul 8, 2021 · Jul 8, 2021
diff --git a/src/FrankWolfe.jl b/src/FrankWolfe.jl
@@ -41,7 +41,7 @@ include("fw_algorithms.jl")
 
 # collecting most common data types etc and precompile 
 # min version req set to 1.5 to prevent stalling of julia 1
-@static if VERSION >= v"1.5"   
+@static if VERSION >= v"1.5"
     println("Precompiling common signatures. This might take a moment...")
     include("precompile.jl")
 end

diff --git a/src/afw.jl b/src/afw.jl
@@ -32,6 +32,7 @@ function away_frank_wolfe(
     callback=nothing,
     timeout=Inf,
     print_callback=print_callback,
+    kwargs...,
 )
 
     # format string for output of the algorithm
@@ -89,7 +90,7 @@ function away_frank_wolfe(
 
     x = compute_active_set_iterate(active_set)
     grad!(gradient, x)
-    v = compute_extreme_point(lmo, gradient)
+    v = compute_extreme_point(lmo, gradient, x=x; kwargs...)
     phi_value = max(0, fast_dot(x, gradient) - fast_dot(v, gradient))
     gamma = 1.0
 
@@ -128,14 +129,14 @@ function away_frank_wolfe(
         if away_steps
             if lazy
                 d, vertex, index, gamma_max, phi_value, away_step_taken, fw_step_taken, tt =
-                    lazy_afw_step(x, gradient, lmo, active_set, phi_value; K=K)
+                    lazy_afw_step(x, gradient, lmo, active_set, phi_value; K=K, kwargs...)
             else
                 d, vertex, index, gamma_max, phi_value, away_step_taken, fw_step_taken, tt =
-                    afw_step(x, gradient, lmo, active_set)
+                    afw_step(x, gradient, lmo, active_set; kwargs...)
             end
         else
             d, vertex, index, gamma_max, phi_value, away_step_taken, fw_step_taken, tt =
-                fw_step(x, gradient, lmo)
+                fw_step(x, gradient, lmo; kwargs...)
         end
 
         if fw_step_taken || away_step_taken
@@ -215,7 +216,7 @@ function away_frank_wolfe(
     if verbose
         x = compute_active_set_iterate(active_set)
         grad!(gradient, x)
-        v = compute_extreme_point(lmo, gradient)
+        v = compute_extreme_point(lmo, gradient, x=x; kwargs...)
         primal = f(x)
         dual_gap = fast_dot(x, gradient) - fast_dot(v, gradient)
         tt = last
@@ -237,7 +238,7 @@ function away_frank_wolfe(
     active_set_cleanup!(active_set)
     x = compute_active_set_iterate(active_set)
     grad!(gradient, x)
-    v = compute_extreme_point(lmo, gradient)
+    v = compute_extreme_point(lmo, gradient, x=x; kwargs...)
     primal = f(x)
     dual_gap = fast_dot(x, gradient) - fast_dot(v, gradient)
     if verbose
@@ -260,9 +261,9 @@ function away_frank_wolfe(
     return x, v, primal, dual_gap, traj_data, active_set
 end
 
-function lazy_afw_step(x, gradient, lmo, active_set, phi; K=2.0)
+function lazy_afw_step(x, gradient, lmo, active_set, phi; K=2.0, kwargs...)
     v_lambda, v, v_loc, a_lambda, a, a_loc = active_set_argminmax(active_set, gradient)
-    #Do lazy FW step
+    # Do lazy FW step
     grad_dot_lazy_fw_vertex = fast_dot(v, gradient)
     grad_dot_x = fast_dot(x, gradient)
     grad_dot_a = fast_dot(a, gradient)
@@ -276,7 +277,7 @@ function lazy_afw_step(x, gradient, lmo, active_set, phi; K=2.0)
         fw_step_taken = true
         index = v_loc
     else
-        #Do away step, as it promises enough progress.
+        # Do away step, as it promises enough progress.
         if grad_dot_a - grad_dot_x > grad_dot_x - grad_dot_lazy_fw_vertex &&
            grad_dot_a - grad_dot_x >= phi / K
             tt = away
@@ -286,9 +287,9 @@ function lazy_afw_step(x, gradient, lmo, active_set, phi; K=2.0)
             away_step_taken = true
             fw_step_taken = false
             index = a_loc
-            #Resort to calling the LMO
+            # Resort to calling the LMO
         else
-            v = compute_extreme_point(lmo, gradient)
+            v = compute_extreme_point(lmo, gradient, x=x; kwargs...)
             # Real dual gap promises enough progress.
             grad_dot_fw_vertex = fast_dot(v, gradient)
             dual_gap = grad_dot_x - grad_dot_fw_vertex
@@ -300,7 +301,7 @@ function lazy_afw_step(x, gradient, lmo, active_set, phi; K=2.0)
                 away_step_taken = false
                 fw_step_taken = true
                 index = nothing
-                #Lower our expectation for progress.
+                # Lower our expectation for progress.
             else
                 tt = dualstep
                 phi = min(dual_gap, phi / 2.0)
@@ -316,11 +317,11 @@ function lazy_afw_step(x, gradient, lmo, active_set, phi; K=2.0)
     return d, vertex, index, gamma_max, phi, away_step_taken, fw_step_taken, tt
 end
 
-function afw_step(x, gradient, lmo, active_set)
+function afw_step(x, gradient, lmo, active_set; kwargs...)
     local_v_lambda, local_v, local_v_loc, a_lambda, a, a_loc =
         active_set_argminmax(active_set, gradient)
     away_gap = fast_dot(a, gradient) - fast_dot(x, gradient)
-    v = compute_extreme_point(lmo, gradient)
+    v = compute_extreme_point(lmo, gradient, x=x; kwargs...)
     grad_dot_x = fast_dot(x, gradient)
     away_gap = fast_dot(a, gradient) - grad_dot_x
     dual_gap = grad_dot_x - fast_dot(v, gradient)
@@ -344,8 +345,8 @@ function afw_step(x, gradient, lmo, active_set)
     return d, vertex, index, gamma_max, dual_gap, away_step_taken, fw_step_taken, tt
 end
 
-function fw_step(x, gradient, lmo)
-    vertex = compute_extreme_point(lmo, gradient)
+function fw_step(x, gradient, lmo; kwargs...)
+    vertex = compute_extreme_point(lmo, gradient, x=x; kwargs...)
     return (
         x - vertex,
         vertex,

diff --git a/src/blended_cg.jl b/src/blended_cg.jl
@@ -51,7 +51,7 @@ function blended_conditional_gradient(
     primal = f(x)
     grad!(gradient, x)
     # initial gap estimate computation
-    vmax = compute_extreme_point(lmo, gradient)
+    vmax = compute_extreme_point(lmo, gradient, x=x; lmo_kwargs...)
     phi = fast_dot(gradient, x0 - vmax) / 2
     dual_gap = phi
     traj_data = []
@@ -243,7 +243,7 @@ function blended_conditional_gradient(
     if verbose
         x = compute_active_set_iterate(active_set)
         grad!(gradient, x)
-        v = compute_extreme_point(lmo, gradient)
+        v = compute_extreme_point(lmo, gradient, x=x; lmo_kwargs...)
         primal = f(x)
         dual_gap = fast_dot(x, gradient) - fast_dot(v, gradient)
         tot_time = (time_ns() - time_start) / 1e9
@@ -267,7 +267,7 @@ function blended_conditional_gradient(
     active_set_renormalize!(active_set)
     x = compute_active_set_iterate(active_set)
     grad!(gradient, x)
-    v = compute_extreme_point(lmo, gradient)
+    v = compute_extreme_point(lmo, gradient, x=x; lmo_kwargs...)
     primal = f(x)
     #dual_gap = 2phi
     dual_gap = fast_dot(x, gradient) - fast_dot(v, gradient)
@@ -1025,7 +1025,7 @@ function lp_separation_oracle(
         end
     end
     # otherwise, call the LMO
-    y = compute_extreme_point(lmo, direction; kwargs...)
+    y = compute_extreme_point(lmo, direction; x=x, kwargs...)
     # don't return nothing but y, fast_dot(direction, y) / use y for step outside / and update phi as in LCG (lines 402 - 406)
     return (y, fast_dot(direction, y))
 end
diff --git a/src/fw_algorithms.jl b/src/fw_algorithms.jl
@@ -31,6 +31,7 @@ function frank_wolfe(
     callback=nothing,
     timeout=Inf,
     print_callback=print_callback,
+    kwargs...,
 )
 
     # format string for output of the algorithm
@@ -131,7 +132,8 @@ function frank_wolfe(
         end
         first_iter = false
 
-        v = compute_extreme_point(lmo, gradient)
+        v = compute_extreme_point(lmo, gradient, x=x; kwargs...)
+        #@show typeof(v)
         # go easy on the memory - only compute if really needed
         if (
             (mod(t, print_iter) == 0 && verbose) ||
@@ -159,6 +161,9 @@ function frank_wolfe(
             step_lim,
             one(eltype(x)),
         )
+        #@show typeof(x)
+        #@show typeof(d)
+        #@show typeof(x-gamma*d)
         if callback !== nothing
             state = (
                 t=t,
@@ -201,7 +206,9 @@ function frank_wolfe(
     # hence the final computation.
 
     grad!(gradient, x)
-    v = compute_extreme_point(lmo, gradient)
+    v = compute_extreme_point(lmo, gradient, x=x; kwargs...)
+    #@show v
+    #@show typeof(v)
     primal = f(x)
     dual_gap = fast_dot(x, gradient) - fast_dot(v, gradient)
     if verbose
@@ -256,6 +263,7 @@ function lazified_conditional_gradient(
     callback=nothing,
     timeout=Inf,
     print_callback=print_callback,
+    kwargs...,
 )
 
     # format string for output of the algorithm
@@ -351,7 +359,14 @@ function lazified_conditional_gradient(
             primal = f(x)
         end
 
-        v = compute_extreme_point(lmo, gradient, threshold=threshold, greedy=greedy_lazy)
+        v = compute_extreme_point(
+            lmo,
+            gradient,
+            threshold=threshold,
+            greedy=greedy_lazy,
+            x=x;
+            kwargs...,
+        )
         tt = lazy
         if fast_dot(v, gradient) > threshold
             tt = dualstep
@@ -418,7 +433,7 @@ function lazified_conditional_gradient(
     # this is important as some variants do not recompute f(x) and the dual_gap regularly but only when reporting
     # hence the final computation.
     grad!(gradient, x)
-    v = compute_extreme_point(lmo, gradient)
+    v = compute_extreme_point(lmo, gradient, x=x; kwargs...)
     primal = f(x)
     dual_gap = fast_dot(x, gradient) - fast_dot(v, gradient)
 
@@ -470,6 +485,7 @@ function stochastic_frank_wolfe(
     callback=nothing,
     timeout=Inf,
     print_callback=print_callback,
+    kwargs...,
 )
 
     # format string for output of the algorithm
@@ -568,7 +584,7 @@ function stochastic_frank_wolfe(
         end
         first_iter = false
 
-        v = compute_extreme_point(lmo, gradient)
+        v = compute_extreme_point(lmo, gradient, x=x; kwargs...)
 
         # go easy on the memory - only compute if really needed
         if (mod(t, print_iter) == 0 && verbose) ||
@@ -632,7 +648,7 @@ function stochastic_frank_wolfe(
     # last computation done with full evaluation for exact gradient
 
     (primal, gradient) = compute_value_gradient(f, x, full_evaluation=true)
-    v = compute_extreme_point(lmo, gradient)
+    v = compute_extreme_point(lmo, gradient, x=x; kwargs...)
     # @show (gradient, primal)
     dual_gap = fast_dot(x, gradient) - fast_dot(v, gradient)
     if verbose