Skip to content

Commit

Permalink
rewrote old one as AdaptiveZerothOrder
Browse files Browse the repository at this point in the history
  • Loading branch information
matbesancon committed Nov 8, 2023
1 parent c438c31 commit df8616c
Show file tree
Hide file tree
Showing 12 changed files with 207 additions and 56 deletions.
2 changes: 1 addition & 1 deletion src/alternating_methods.jl
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ function ProjectionFW(y, lmo; max_iter=10000, eps=1e-3)
epsilon=eps,
max_iteration=max_iter,
trajectory=true,
line_search=FrankWolfe.Adaptive(verbose=false, relaxed_smoothness=true),
line_search=FrankWolfe.Adaptive(verbose=false, relaxed_smoothness=false),
)
return x_opt
end
Expand Down
157 changes: 124 additions & 33 deletions src/linesearch.jl
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,7 @@ It is also the fallback when the Lipschitz constant estimation fails due to nume
`perform_line_search` also has a `should_upgrade` keyword argument on
whether there should be a temporary upgrade to `BigFloat` for extended precision.
"""
mutable struct Adaptive{T,TT} <: LineSearchMethod
mutable struct AdaptiveZerothOrder{T,TT} <: LineSearchMethod
eta::T
tau::TT
L_est::T
Expand All @@ -343,36 +343,36 @@ mutable struct Adaptive{T,TT} <: LineSearchMethod
relaxed_smoothness::Bool
end

Adaptive(eta::T, tau::TT) where {T,TT} =
Adaptive{T,TT}(eta, tau, T(Inf), T(1e10), T(0.5), true, false)
AdaptiveZerothOrder(eta::T, tau::TT) where {T,TT} =
AdaptiveZerothOrder{T,TT}(eta, tau, T(Inf), T(1e10), T(0.5), true, false)

Adaptive(;
AdaptiveZerothOrder(;
eta=0.9,
tau=2,
L_est=Inf,
max_estimate=1e10,
alpha=0.5,
verbose=true,
relaxed_smoothness=false,
) = Adaptive(eta, tau, L_est, max_estimate, alpha, verbose, relaxed_smoothness)
) = AdaptiveZerothOrder(eta, tau, L_est, max_estimate, alpha, verbose, relaxed_smoothness)

struct AdaptiveWorkspace{XT,BT}
struct AdaptiveZerothOrderWorkspace{XT,BT}
x::XT
xbig::BT
end

build_linesearch_workspace(::Adaptive, x, gradient) = AdaptiveWorkspace(similar(x), big.(x))
build_linesearch_workspace(::AdaptiveZerothOrder, x, gradient) = AdaptiveZerothOrderWorkspace(similar(x), big.(x))

function perform_line_search(
line_search::Adaptive,
line_search::AdaptiveZerothOrder,
t,
f,
grad!,
gradient,
x,
d,
gamma_max,
storage::AdaptiveWorkspace,
storage::AdaptiveZerothOrderWorkspace,
memory_mode::MemoryEmphasis;
should_upgrade::Val=Val{false}(),
)
Expand Down Expand Up @@ -401,32 +401,21 @@ function perform_line_search(

gradient_storage = similar(gradient)

# while f(x_storage) - f(x) >
# -γ * α * dot_dir + α^2 * γ^2 * ndir2 * M / 2 + eps(float(γ)) &&
# γ ≥ 100 * eps(float(γ))

# # DEPRECATED / remove in future versions
# # Additional smoothness condition
# if line_search.relaxed_smoothness
# grad!(gradient_storage, x_storage)
# if fast_dot(gradient, d) - fast_dot(gradient_storage, d) <= γ * M * ndir2 + eps(float(γ))
# break
# end
# end

#################
# modified adaptive line search test from:
# S. Pokutta "The Frank-Wolfe algorith: a short introduction" (2023), preprint
# replaces the original test from:
# Pedregosa, F., Negiar, G., Askari, A., and Jaggi, M. (2020). "Linearly convergent Frank–Wolfe with backtracking line-search", Proceedings of AISTATS.
#################
grad!(gradient_storage, x_storage)
while 0 > fast_dot(gradient_storage, d) && γ 100 * eps(float(γ))

while f(x_storage) - f(x) >
-γ * α * dot_dir + α^2 * γ^2 * ndir2 * M / 2 + eps(float(γ)) &&
γ 100 * eps(float(γ))

# Additional smoothness condition
if line_search.relaxed_smoothness
grad!(gradient_storage, x_storage)
if fast_dot(gradient, d) - fast_dot(gradient_storage, d) <= γ * M * ndir2 + eps(float(γ))
break
end
end

M *= line_search.tau
γ = min(max(dot_dir / (M * ndir2), 0), gamma_max)
x_storage = muladd_memory_mode(memory_mode, x_storage, x, γ, d)
grad!(gradient_storage, x_storage)

niter += 1
if M > line_search.max_estimate
Expand Down Expand Up @@ -458,7 +447,7 @@ function perform_line_search(
return γ
end

Base.print(io::IO, ::Adaptive) = print(io, "Adaptive")
Base.print(io::IO, ::AdaptiveZerothOrder) = print(io, "AdaptiveZerothOrder")

function _upgrade_accuracy_adaptive(gradient, direction, storage, ::Val{true})
direction_big = big.(direction)
Expand All @@ -473,6 +462,108 @@ function _upgrade_accuracy_adaptive(gradient, direction, storage, ::Val{false})
return (dot_dir, ndir2, storage.x)
end

mutable struct Adaptive{T,TT} <: LineSearchMethod
eta::T
tau::TT
L_est::T
max_estimate::T
verbose::Bool
relaxed_smoothness::Bool
end

Adaptive(eta::T, tau::TT) where {T,TT} =
Adaptive{T,TT}(eta, tau, T(Inf), T(1e10), T(0.5), true, false)

Adaptive(;
eta=0.9,
tau=2,
L_est=Inf,
max_estimate=1e10,
verbose=true,
relaxed_smoothness=false,
) = Adaptive(eta, tau, L_est, max_estimate, verbose, relaxed_smoothness)

struct AdaptiveWorkspace{XT,BT}
x::XT
xbig::BT
gradient_storage::XT
end

build_linesearch_workspace(::Adaptive, x, gradient) = AdaptiveWorkspace(similar(x), big.(x), similar(x))

function perform_line_search(
line_search::Adaptive,
t,
f,
grad!,
gradient,
x,
d,
gamma_max,
storage::AdaptiveWorkspace,
memory_mode::MemoryEmphasis;
should_upgrade::Val=Val{false}(),
)
if norm(d) length(d) * eps(float(eltype(d)))
if should_upgrade isa Val{true}
return big(zero(promote_type(eltype(d), eltype(gradient))))
else
return zero(promote_type(eltype(d), eltype(gradient)))
end
end
x_storage = storage.x
if !isfinite(line_search.L_est)
epsilon_step = min(1e-3, gamma_max)
gradient_stepsize_estimation = storage.gradient_storage
x_storage = muladd_memory_mode(memory_mode, x_storage, x, epsilon_step, d)
grad!(gradient_stepsize_estimation, x_storage)
line_search.L_est = norm(gradient - gradient_stepsize_estimation) / (epsilon_step * norm(d))
end
M = line_search.eta * line_search.L_est
(dot_dir, ndir2, x_storage) = _upgrade_accuracy_adaptive(gradient, d, storage, should_upgrade)
γ = min(max(dot_dir / (M * ndir2), 0), gamma_max)
x_storage = muladd_memory_mode(memory_mode, x_storage, x, γ, d)
niter = 0
clipping = false
gradient_storage = storage.gradient_storage

grad!(gradient_storage, x_storage)
while 0 > fast_dot(gradient_storage, d) && γ 100 * eps(float(γ))
M *= line_search.tau
γ = min(max(dot_dir / (M * ndir2), 0), gamma_max)
x_storage = muladd_memory_mode(memory_mode, x_storage, x, γ, d)
grad!(gradient_storage, x_storage)

niter += 1
if M > line_search.max_estimate
# if this occurs, we hit numerical troubles
# if we were not using the relaxed smoothness, we try it first as a stable fallback
# note that the smoothness estimate is not updated at this iteration.
if !line_search.relaxed_smoothness
linesearch_fallback = deepcopy(line_search)
linesearch_fallback.relaxed_smoothness = true
return perform_line_search(
linesearch_fallback, t, f, grad!, gradient, x, d, gamma_max, storage, memory_mode;
should_upgrade=should_upgrade,
)
end
# if we are already in relaxed smoothness, produce a warning:
# one might see negative progess, cycling, or stalling.
# Potentially upgrade accuracy or use an alternative line search strategy
if line_search.verbose
@warn "Smoothness estimate run away -> hard clipping. Convergence might be not guaranteed."
end
clipping = true
break
end
end
if !clipping
line_search.L_est = M
end
γ = min(max(dot_dir / (line_search.L_est * ndir2), 0), gamma_max)
return γ
end

"""
MonotonicStepSize{F}
Expand Down
4 changes: 2 additions & 2 deletions test/bcg_direction_error.jl
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ x, v, primal, dual_gap, _, _ = FrankWolfe.blended_conditional_gradient(
lmo,
x0,
max_iteration=k,
line_search=FrankWolfe.Adaptive(L_est=2.0),
line_search=FrankWolfe.AdaptiveZerothOrder(L_est=2.0),
print_iter=100,
memory_mode=FrankWolfe.InplaceEmphasis(),
verbose=true,
Expand All @@ -52,7 +52,7 @@ x, v, primal_cut, dual_gap, _, _ = FrankWolfe.blended_conditional_gradient(
lmo,
x0,
max_iteration=k,
line_search=FrankWolfe.Adaptive(L_est=2.0),
line_search=FrankWolfe.AdaptiveZerothOrder(L_est=2.0),
print_iter=k / 10,
memory_mode=FrankWolfe.InplaceEmphasis(),
verbose=true,
Expand Down
6 changes: 3 additions & 3 deletions test/blended_accelerated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ const L = eigmax(hessian)
copy(x0),
epsilon=target_tolerance,
max_iteration=k,
line_search=FrankWolfe.Adaptive(L_est=L),
line_search=FrankWolfe.AdaptiveZerothOrder(L_est=L),
print_iter=k / 10,
hessian=hessian,
memory_mode=FrankWolfe.InplaceEmphasis(),
Expand All @@ -57,7 +57,7 @@ const L = eigmax(hessian)
copy(x0),
epsilon=target_tolerance,
max_iteration=k,
line_search=FrankWolfe.Adaptive(L_est=L),
line_search=FrankWolfe.AdaptiveZerothOrder(L_est=L),
print_iter=k / 10,
hessian=hessian,
memory_mode=FrankWolfe.InplaceEmphasis(),
Expand All @@ -75,7 +75,7 @@ const L = eigmax(hessian)
copy(x0),
epsilon=target_tolerance,
max_iteration=k,
line_search=FrankWolfe.Adaptive(L_est=L),
line_search=FrankWolfe.AdaptiveZerothOrder(L_est=L),
print_iter=k / 10,
memory_mode=FrankWolfe.InplaceEmphasis(),
verbose=false,
Expand Down
23 changes: 23 additions & 0 deletions test/oddities.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,17 @@ using Test
) < 1.0e-10

x0 = copy(x00)
@test abs(
FrankWolfe.away_frank_wolfe(
f,
grad!,
lmo,
x0,
max_iteration=1000,
line_search=FrankWolfe.AdaptiveZerothOrder(),
verbose=true,
)[3],
) < 1.0e-10
@test abs(
FrankWolfe.away_frank_wolfe(
f,
Expand All @@ -38,6 +49,18 @@ using Test


x0 = copy(x00)
@test abs(
FrankWolfe.away_frank_wolfe(
f,
grad!,
lmo,
x0,
max_iteration=1000,
lazy=true,
line_search=FrankWolfe.AdaptiveZerothOrder(),
verbose=true,
)[3],
) < 1.0e-10
@test abs(
FrankWolfe.away_frank_wolfe(
f,
Expand Down
12 changes: 6 additions & 6 deletions test/pairwise.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ end
lmo_prob,
x0,
max_iteration=6000,
line_search=FrankWolfe.Adaptive(),
line_search=FrankWolfe.AdaptiveZerothOrder(),
verbose=false,
epsilon=3e-7,
)
Expand All @@ -33,7 +33,7 @@ end
lmo_prob,
x0,
max_iteration=6000,
line_search=FrankWolfe.Adaptive(),
line_search=FrankWolfe.AdaptiveZerothOrder(),
print_iter=100,
verbose=false,
epsilon=3e-7,
Expand All @@ -46,7 +46,7 @@ end
lmo_prob,
x0,
max_iteration=6000,
line_search=FrankWolfe.Adaptive(),
line_search=FrankWolfe.AdaptiveZerothOrder(),
verbose=false,
lazy=true,
epsilon=3e-7,
Expand All @@ -73,7 +73,7 @@ end
lmo_prob,
x0,
max_iteration=6000,
line_search=FrankWolfe.Adaptive(),
line_search=FrankWolfe.AdaptiveZerothOrder(),
verbose=false,
epsilon=3e-7,
callback=test_callback,
Expand All @@ -96,7 +96,7 @@ end
lmo,
x0,
max_iteration=6000,
line_search=FrankWolfe.Adaptive(),
line_search=FrankWolfe.AdaptiveZerothOrder(),
epsilon=3e-7,
verbose=false,
)
Expand All @@ -109,7 +109,7 @@ end
lmo,
x0,
max_iteration=6000,
line_search=FrankWolfe.Adaptive(),
line_search=FrankWolfe.AdaptiveZerothOrder(),
epsilon=3e-7,
verbose=false,
recompute_last_vertex=false,
Expand Down
6 changes: 3 additions & 3 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ end
lmo_prob,
x0,
max_iteration=1000,
line_search=FrankWolfe.Adaptive(),
line_search=FrankWolfe.AdaptiveZerothOrder(),
verbose=true,
)[3] - 0.2,
) < 1.0e-5
Expand Down Expand Up @@ -444,7 +444,7 @@ end
lmo,
x0,
max_iteration=k,
line_search=FrankWolfe.Adaptive(),
line_search=FrankWolfe.AdaptiveZerothOrder(),
print_iter=k / 10,
memory_mode=FrankWolfe.InplaceEmphasis(),
verbose=true,
Expand All @@ -459,7 +459,7 @@ end
lmo,
x0,
max_iteration=k,
line_search=FrankWolfe.Adaptive(),
line_search=FrankWolfe.AdaptiveZerothOrder(),
print_iter=k / 10,
memory_mode=FrankWolfe.InplaceEmphasis(),
verbose=true,
Expand Down
Loading

0 comments on commit df8616c

Please sign in to comment.