L inf normball (#246)

* basic documentation structure * docs/build in gitignore now * docs/build/ in .gitignore now * docs/build/ added to .gitignore * dollarsigns replaced * Update ci.yml * Project.toml update * docs/Project.toml added * minor fix * basic structure for l-inf ball * Update src/polytope_oracles.jl * Apply suggestions from code review * add compute-extreme-point * Update src/polytope_oracles.jl Co-authored-by: Mathieu Besançon <[email protected]> * Update src/polytope_oracles.jl Co-authored-by: Mathieu Besançon <[email protected]> * fixes * Update src/polytope_oracles.jl * Update test/lmo.jl * minor fix * add tests * Update test/lmo.jl * Update test/lmo.jl * add comments * Update test/lmo.jl * Update test/lmo.jl * add optimizer * Update test/lmo.jl * add tests * Update src/polytope_oracles.jl Co-authored-by: Mathieu Besançon <[email protected]>
ZIB-IOL · Oct 15, 2021 · 78a3ff9 · 78a3ff9
1 parent 7142140
commit 78a3ff9
Show file tree

Hide file tree

Showing 4 changed files with 65 additions and 5 deletions.
diff --git a/README.md b/README.md
@@ -170,7 +170,7 @@ x = Rational{BigInt}[1//100, 1//100, 1//100, 1//100, 1//100, 1//100, 1//100, 1//
 
 Example: `examples/large_scale.jl`
 
-The package is built to scale well, for those conditional gradients variants that can scale well. For exampple, Away-Step Frank-Wolfe and Pairwise Conditional Gradients do in most cases *not scale well* because they need to maintain active sets and maintaining them can be very expensive. Similarly, line search methods might become prohibitive at large sizes. However if we consider scale-friendly variants, e.g., the vanilla Frank-Wolfe algorithm with the agnostic step size rule or short step rule, then these algorithms can scale well to extreme sizes esentially only limited by the amount of memory available. However even for these methods that tend to scale well, allocation of memory itself can be very slow when you need to allocate gigabytes of memory for a single gradient computation. 
+The package is built to scale well, for those conditional gradients variants that can scale well. For example, Away-Step Frank-Wolfe and Pairwise Conditional Gradients do in most cases *not scale well* because they need to maintain active sets and maintaining them can be very expensive. Similarly, line search methods might become prohibitive at large sizes. However if we consider scale-friendly variants, e.g., the vanilla Frank-Wolfe algorithm with the agnostic step size rule or short step rule, then these algorithms can scale well to extreme sizes esentially only limited by the amount of memory available. However even for these methods that tend to scale well, allocation of memory itself can be very slow when you need to allocate gigabytes of memory for a single gradient computation. 
 
 The package is build to support extreme sizes with a special memory efficient emphasis `emphasis=FrankWolfe.memory`, which minimizes expensive memory allocations and performs as many operations in-place as possible.
 

diff --git a/src/norm_oracles.jl b/src/norm_oracles.jl
@@ -5,7 +5,7 @@ import Arpack
 
 LMO with feasible set being a bound on the L-p norm:
 ```
-C = {x ∈ R^n, norm(x, p) ≤ right_side}
+C = {x ∈ R^n, norm(x, p) ≤ right_hand_side}
 ```
 """
 struct LpNormLMO{T,p} <: LinearMinimizationOracle
@@ -28,7 +28,7 @@ function compute_extreme_point(lmo::LpNormLMO{T,2}, direction; kwargs...) where
 end
 
 function compute_extreme_point(lmo::LpNormLMO{T,Inf}, direction; kwargs...) where {T}
-    return -[lmo.right_hand_side * sign(d) for d in direction]
+    return -[lmo.right_hand_side * (1 - 2signbit(d)) for d in direction]
 end
 
 function compute_extreme_point(lmo::LpNormLMO{T,1}, direction; kwargs...) where {T}

diff --git a/src/polytope_oracles.jl b/src/polytope_oracles.jl
@@ -130,6 +130,30 @@ function convert_mathopt(
     return MathOptLMO(optimizer)
 end
 
+
+"""
+    ScaledBoundLInfNormBall(lower_bounds, upper_bounds)
+
+Polytope similar to a L-inf-ball with shifted bounds or general box constraints.
+Lower- and upper-bounds are passed on as abstract vectors, possibly of different types.
+For the standard L-inf ball, all lower- and upper-bounds would be -1 and 1.
+"""
+struct ScaledBoundLInfNormBall{T, VT1 <: AbstractVector{T}, VT2 <: AbstractVector{T}} <: LinearMinimizationOracle
+    lower_bounds::VT1
+    upper_bounds::VT2
+end
+
+function compute_extreme_point(lmo::ScaledBoundLInfNormBall, direction; kwargs...)
+    v = copy(lmo.lower_bounds)
+    for i in eachindex(direction)
+        if direction[i] * lmo.upper_bounds[i] < direction[i] * lmo.lower_bounds[i]
+            v[i] = lmo.upper_bounds[i]
+        end
+    end
+    return v
+end
+
+
 """
     ScaledBoundL1NormBall(lower_bounds, upper_bounds)
 

diff --git a/test/lmo.jl b/test/lmo.jl
@@ -532,11 +532,11 @@ end
     @test vvec ≈ [vinf; v1]
 end
 
-@testset "Scaled norm polytopes" begin
+@testset "Scaled L-1 norm polytopes" begin
     lmo = FrankWolfe.ScaledBoundL1NormBall(-ones(10), ones(10))
     # equivalent to LMO
     lmo_ref = FrankWolfe.LpNormLMO{1}(1)
-    # all coordinates shifted up 
+    # all coordinates shifted up
     lmo_shifted = FrankWolfe.ScaledBoundL1NormBall(zeros(10), 2 * ones(10))
     lmo_scaled = FrankWolfe.ScaledBoundL1NormBall(-2 * ones(10), 2 * ones(10))
     for _ in 1:100
@@ -555,3 +555,39 @@ end
     @test v ≈ vref
     @test norm(v) == 1
 end
+
+
+@testset "Scaled L-inf norm polytopes" begin
+    # tests ScaledBoundLInfNormBall for the standard hypercube, a shifted one, and a scaled one
+    lmo = FrankWolfe.ScaledBoundLInfNormBall(-ones(10), ones(10))
+    lmo_ref = FrankWolfe.LpNormLMO{Inf}(1)
+    lmo_shifted = FrankWolfe.ScaledBoundLInfNormBall(zeros(10), 2 * ones(10))
+    lmo_scaled = FrankWolfe.ScaledBoundLInfNormBall(-2 * ones(10), 2 * ones(10))
+    bounds = collect(1.0:10)
+    # tests another ScaledBoundLInfNormBall with unequal bounds against a MOI optimizer
+    lmo_scaled_unequally = FrankWolfe.ScaledBoundLInfNormBall(-bounds, bounds)
+    o = GLPK.Optimizer()
+    MOI.set(o, MOI.Silent(), true)
+    x = MOI.add_variables(o, 10)
+    MOI.add_constraint.(o, x, MOI.GreaterThan.(-bounds))
+    MOI.add_constraint.(o, x, MOI.LessThan.(bounds))
+    scaled_unequally_opt = FrankWolfe.MathOptLMO(o)
+    for _ in 1:100
+        d = randn(10)
+        v = FrankWolfe.compute_extreme_point(lmo, d)
+        vref = FrankWolfe.compute_extreme_point(lmo_ref, d)
+        @test v ≈ vref
+        vshift = FrankWolfe.compute_extreme_point(lmo_shifted, d)
+        @test v .+ 1 ≈ vshift
+        v2 = FrankWolfe.compute_extreme_point(lmo_scaled, d)
+        @test v2 ≈ 2v
+        v3 = FrankWolfe.compute_extreme_point(lmo_scaled_unequally, d)
+        v3_test = compute_extreme_point(scaled_unequally_opt, d)
+        @test v3 ≈ v3_test
+    end
+    d = zeros(10)
+    v = FrankWolfe.compute_extreme_point(lmo, d)
+    vref = FrankWolfe.compute_extreme_point(lmo_ref, d)
+    @test v ≈ vref
+    @test norm(v, Inf) == 1
+end