diff --git a/README.md b/README.md index 66c134438..084a78fca 100644 --- a/README.md +++ b/README.md @@ -170,7 +170,7 @@ x = Rational{BigInt}[1//100, 1//100, 1//100, 1//100, 1//100, 1//100, 1//100, 1// Example: `examples/large_scale.jl` -The package is built to scale well, for those conditional gradients variants that can scale well. For exampple, Away-Step Frank-Wolfe and Pairwise Conditional Gradients do in most cases *not scale well* because they need to maintain active sets and maintaining them can be very expensive. Similarly, line search methods might become prohibitive at large sizes. However if we consider scale-friendly variants, e.g., the vanilla Frank-Wolfe algorithm with the agnostic step size rule or short step rule, then these algorithms can scale well to extreme sizes esentially only limited by the amount of memory available. However even for these methods that tend to scale well, allocation of memory itself can be very slow when you need to allocate gigabytes of memory for a single gradient computation. +The package is built to scale well, for those conditional gradients variants that can scale well. For example, Away-Step Frank-Wolfe and Pairwise Conditional Gradients do in most cases *not scale well* because they need to maintain active sets and maintaining them can be very expensive. Similarly, line search methods might become prohibitive at large sizes. However if we consider scale-friendly variants, e.g., the vanilla Frank-Wolfe algorithm with the agnostic step size rule or short step rule, then these algorithms can scale well to extreme sizes esentially only limited by the amount of memory available. However even for these methods that tend to scale well, allocation of memory itself can be very slow when you need to allocate gigabytes of memory for a single gradient computation. The package is build to support extreme sizes with a special memory efficient emphasis `emphasis=FrankWolfe.memory`, which minimizes expensive memory allocations and performs as many operations in-place as possible. diff --git a/src/norm_oracles.jl b/src/norm_oracles.jl index ab44bf778..5061288f5 100644 --- a/src/norm_oracles.jl +++ b/src/norm_oracles.jl @@ -5,7 +5,7 @@ import Arpack LMO with feasible set being a bound on the L-p norm: ``` -C = {x ∈ R^n, norm(x, p) ≤ right_side} +C = {x ∈ R^n, norm(x, p) ≤ right_hand_side} ``` """ struct LpNormLMO{T,p} <: LinearMinimizationOracle @@ -28,7 +28,7 @@ function compute_extreme_point(lmo::LpNormLMO{T,2}, direction; kwargs...) where end function compute_extreme_point(lmo::LpNormLMO{T,Inf}, direction; kwargs...) where {T} - return -[lmo.right_hand_side * sign(d) for d in direction] + return -[lmo.right_hand_side * (1 - 2signbit(d)) for d in direction] end function compute_extreme_point(lmo::LpNormLMO{T,1}, direction; kwargs...) where {T} diff --git a/src/polytope_oracles.jl b/src/polytope_oracles.jl index 439f5905c..da197cf6c 100644 --- a/src/polytope_oracles.jl +++ b/src/polytope_oracles.jl @@ -130,6 +130,30 @@ function convert_mathopt( return MathOptLMO(optimizer) end + +""" + ScaledBoundLInfNormBall(lower_bounds, upper_bounds) + +Polytope similar to a L-inf-ball with shifted bounds or general box constraints. +Lower- and upper-bounds are passed on as abstract vectors, possibly of different types. +For the standard L-inf ball, all lower- and upper-bounds would be -1 and 1. +""" +struct ScaledBoundLInfNormBall{T, VT1 <: AbstractVector{T}, VT2 <: AbstractVector{T}} <: LinearMinimizationOracle + lower_bounds::VT1 + upper_bounds::VT2 +end + +function compute_extreme_point(lmo::ScaledBoundLInfNormBall, direction; kwargs...) + v = copy(lmo.lower_bounds) + for i in eachindex(direction) + if direction[i] * lmo.upper_bounds[i] < direction[i] * lmo.lower_bounds[i] + v[i] = lmo.upper_bounds[i] + end + end + return v +end + + """ ScaledBoundL1NormBall(lower_bounds, upper_bounds) diff --git a/test/lmo.jl b/test/lmo.jl index 7d84cab0a..1d6bc6d00 100644 --- a/test/lmo.jl +++ b/test/lmo.jl @@ -532,11 +532,11 @@ end @test vvec ≈ [vinf; v1] end -@testset "Scaled norm polytopes" begin +@testset "Scaled L-1 norm polytopes" begin lmo = FrankWolfe.ScaledBoundL1NormBall(-ones(10), ones(10)) # equivalent to LMO lmo_ref = FrankWolfe.LpNormLMO{1}(1) - # all coordinates shifted up + # all coordinates shifted up lmo_shifted = FrankWolfe.ScaledBoundL1NormBall(zeros(10), 2 * ones(10)) lmo_scaled = FrankWolfe.ScaledBoundL1NormBall(-2 * ones(10), 2 * ones(10)) for _ in 1:100 @@ -555,3 +555,39 @@ end @test v ≈ vref @test norm(v) == 1 end + + +@testset "Scaled L-inf norm polytopes" begin + # tests ScaledBoundLInfNormBall for the standard hypercube, a shifted one, and a scaled one + lmo = FrankWolfe.ScaledBoundLInfNormBall(-ones(10), ones(10)) + lmo_ref = FrankWolfe.LpNormLMO{Inf}(1) + lmo_shifted = FrankWolfe.ScaledBoundLInfNormBall(zeros(10), 2 * ones(10)) + lmo_scaled = FrankWolfe.ScaledBoundLInfNormBall(-2 * ones(10), 2 * ones(10)) + bounds = collect(1.0:10) + # tests another ScaledBoundLInfNormBall with unequal bounds against a MOI optimizer + lmo_scaled_unequally = FrankWolfe.ScaledBoundLInfNormBall(-bounds, bounds) + o = GLPK.Optimizer() + MOI.set(o, MOI.Silent(), true) + x = MOI.add_variables(o, 10) + MOI.add_constraint.(o, x, MOI.GreaterThan.(-bounds)) + MOI.add_constraint.(o, x, MOI.LessThan.(bounds)) + scaled_unequally_opt = FrankWolfe.MathOptLMO(o) + for _ in 1:100 + d = randn(10) + v = FrankWolfe.compute_extreme_point(lmo, d) + vref = FrankWolfe.compute_extreme_point(lmo_ref, d) + @test v ≈ vref + vshift = FrankWolfe.compute_extreme_point(lmo_shifted, d) + @test v .+ 1 ≈ vshift + v2 = FrankWolfe.compute_extreme_point(lmo_scaled, d) + @test v2 ≈ 2v + v3 = FrankWolfe.compute_extreme_point(lmo_scaled_unequally, d) + v3_test = compute_extreme_point(scaled_unequally_opt, d) + @test v3 ≈ v3_test + end + d = zeros(10) + v = FrankWolfe.compute_extreme_point(lmo, d) + vref = FrankWolfe.compute_extreme_point(lmo_ref, d) + @test v ≈ vref + @test norm(v, Inf) == 1 +end