Skip to content

Commit

Permalink
Merge branch 'master' into BPCG-with-direct-solve
Browse files Browse the repository at this point in the history
  • Loading branch information
pokutta authored Oct 25, 2024
2 parents 6d4280f + 1ac1797 commit 24ea326
Show file tree
Hide file tree
Showing 5 changed files with 205 additions and 111 deletions.
37 changes: 37 additions & 0 deletions examples/alternating_projections.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import FrankWolfe
using LinearAlgebra
using Random


include("../examples/plot_utils.jl")

Random.seed!(100)

n = 500
lmo = FrankWolfe.ConvexHullOracle([rand(Float64, (n,)) .+ 1.0 for _ in 1:n])
lmo2 = FrankWolfe.ConvexHullOracle([rand(Float64, (n,)) .- 1.0 for _ in 1:n])


trajectories = []

methods = [FrankWolfe.frank_wolfe, FrankWolfe.blended_pairwise_conditional_gradient, FrankWolfe.blended_pairwise_conditional_gradient]

for (i,m) in enumerate(methods)
if i == 1
x, _, _, _, traj_data = FrankWolfe.alternating_projections((lmo, lmo2), ones(n); verbose=true, print_iter=100, trajectory=true, proj_method=m)
elseif i== 2
x, _, _, _, traj_data = FrankWolfe.alternating_projections((lmo, lmo2), ones(n); verbose=true, print_iter=100, trajectory=true, proj_method=m, reuse_active_set=false, lazy=true)
else
x, _, _, _, traj_data = FrankWolfe.alternating_projections((lmo, lmo2), ones(n); verbose=true, print_iter=100, trajectory=true, proj_method=m, reuse_active_set=true, lazy=true)
end
push!(trajectories, traj_data)
end



labels = ["FW", "BPCG" ,"BPCG (reuse)"]

plot_trajectories(trajectories, labels, xscalelog=true)



2 changes: 1 addition & 1 deletion src/active_set_quadratic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ function detect_quadratic_function(grad!, x0; test=true)
X[:, i] .-= x0
G[:, i] .= storage .- g0
end
A = G * inv(X)
A = G \ X
b = g0 - A * x0
if test
x_test = randn(T, n)
Expand Down
163 changes: 93 additions & 70 deletions src/alternating_methods.jl
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,12 @@ end
Alternating Linear Minimization minimizes the objective `f` over the intersections of the feasible domains specified by `lmos`.
The tuple `x0` defines the initial points for each domain.
Returns a tuple `(x, v, primal, dual_gap, infeas, traj_data)` with:
Returns a tuple `(x, v, primal, dual_gap, dist2, traj_data)` with:
- `x` cartesian product of final iterates
- `v` cartesian product of last vertices of the LMOs
- `primal` primal value `f(x)`
- `dual_gap` final Frank-Wolfe gap
- `infeas` sum of squared, pairwise distances between iterates
- `dist2` is 1/2 of the sum of squared, pairwise distances between iterates
- `traj_data` vector of trajectory information.
"""
function alternating_linear_minimization(
Expand All @@ -49,17 +49,17 @@ function alternating_linear_minimization(
grad!,
lmos::NTuple{N,LinearMinimizationOracle},
x0::Tuple{Vararg{Any,N}};
lambda::Union{Float64, Function}=1.0,
lambda::Union{Float64,Function}=1.0,
verbose=false,
trajectory=false,
callback=nothing,
max_iteration=10000,
print_iter = max_iteration / 10,
print_iter=max_iteration / 10,
memory_mode=InplaceEmphasis(),
line_search::LS=Adaptive(),
epsilon=1e-7,
kwargs...,
) where {N, LS<:Union{LineSearchMethod,NTuple{N,LineSearchMethod}}}
) where {N,LS<:Union{LineSearchMethod,NTuple{N,LineSearchMethod}}}

x0_bc = BlockVector([x0[i] for i in 1:N], [size(x0[i]) for i in 1:N], sum(length, x0))
gradf = similar(x0_bc)
Expand All @@ -74,21 +74,14 @@ function alternating_linear_minimization(
for i in 1:N
grad!(gradf.blocks[i], x.blocks[i])
end
t = [2.0 * (N * b - sum(x.blocks)) for b in x.blocks]
t = [N * b - sum(x.blocks) for b in x.blocks]
return storage.blocks = λ[] * gradf.blocks + t
end
end

function dist2(x::BlockVector)
s = 0
for i=1:N
for j=1:i-1
diff = x.blocks[i] - x.blocks[j]
s += fast_dot(diff, diff)
end
end
return s
end
dist2(x::BlockVector) =
0.5 * (N - 1) * sum(fast_dot(x.blocks[i], x.blocks[i]) for i in 1:N) -
sum(fast_dot(x.blocks[i], x.blocks[j]) for i in 1:N for j in 1:i-1)

function build_objective()
λ = Ref(λ0)
Expand Down Expand Up @@ -122,8 +115,11 @@ function alternating_linear_minimization(

num_type = eltype(x0[1])
grad_type = eltype(gradf.blocks[1])
line_search_type = line_search isa Tuple ? [typeof(a) for a in line_search] : typeof(line_search)
println("MEMORY_MODE: $memory_mode STEPSIZE: $line_search_type EPSILON: $epsilon MAXITERATION: $max_iteration")
line_search_type =
line_search isa Tuple ? [typeof(a) for a in line_search] : typeof(line_search)
println(
"MEMORY_MODE: $memory_mode STEPSIZE: $line_search_type EPSILON: $epsilon MAXITERATION: $max_iteration",
)
println("TYPE: $num_type GRADIENTTYPE: $grad_type")
println("LAMBDA: $lambda")

Expand Down Expand Up @@ -177,7 +173,7 @@ function alternating_linear_minimization(
end

if lambda isa Function
callback = function (state,args...)
callback = function (state, args...)
state.f.λ[] = lambda(state)
state.grad!.λ[] = state.f.λ[]

Expand Down Expand Up @@ -211,33 +207,15 @@ function alternating_linear_minimization(
end


function ProjectionFW(y, lmo; max_iter=10000, eps=1e-3)
f(x) = sum(abs2, x - y)
grad!(storage, x) = storage .= 2 * (x - y)

x0 = FrankWolfe.compute_extreme_point(lmo, y)
x_opt, _ = FrankWolfe.frank_wolfe(
f,
grad!,
lmo,
x0,
epsilon=eps,
max_iteration=max_iter,
trajectory=true,
line_search=FrankWolfe.Adaptive(verbose=false, relaxed_smoothness=false),
)
return x_opt
end

"""
alternating_projections(lmos::NTuple{N,LinearMinimizationOracle}, x0; ...) where {N}
Computes a point in the intersection of feasible domains specified by `lmos`.
Returns a tuple `(x, v, dual_gap, infeas, traj_data)` with:
Returns a tuple `(x, v, dual_gap, dist2, traj_data)` with:
- `x` cartesian product of final iterates
- `v` cartesian product of last vertices of the LMOs
- `dual_gap` final Frank-Wolfe gap
- `infeas` sum of squared, pairwise distances between iterates
- `dist2` is 1/2 * sum of squared, pairwise distances between iterates
- `traj_data` vector of trajectory information.
"""
function alternating_projections(
Expand All @@ -252,6 +230,10 @@ function alternating_projections(
callback=nothing,
traj_data=[],
timeout=Inf,
proj_method=frank_wolfe,
inner_epsilon::Function=t -> 1 / (t^2 + 1),
reuse_active_set=false,
kwargs...,
) where {N}
return alternating_projections(
ProductLMO(lmos),
Expand All @@ -265,6 +247,10 @@ function alternating_projections(
callback,
traj_data,
timeout,
proj_method,
inner_epsilon,
reuse_active_set,
kwargs...,
)
end

Expand All @@ -281,17 +267,21 @@ function alternating_projections(
callback=nothing,
traj_data=[],
timeout=Inf,
proj_method=frank_wolfe,
inner_epsilon::Function=t -> 1 / (t^2 + 1),
reuse_active_set=false,
kwargs...,
) where {N}

# header and format string for output of the algorithm
headers = ["Type", "Iteration", "Dual Gap", "Infeas", "Time", "It/sec"]
headers = ["Type", "Iteration", "Dual Gap", "dist2", "Time", "It/sec"]
format_string = "%6s %13s %14e %14e %14e %14e\n"
function format_state(state, infeas)
function format_state(state, primal)
rep = (
steptype_string[Symbol(state.step_type)],
string(state.t),
Float64(state.dual_gap),
Float64(infeas),
Float64(primal),
state.time,
state.t / state.time,
)
Expand All @@ -300,27 +290,61 @@ function alternating_projections(

t = 0
dual_gap = Inf
x = fill(x0, N)
v = similar(x)
dual_gaps = fill(Inf, N)
x = BlockVector(compute_extreme_point.(lmo.lmos, fill(x0, N)))
step_type = ST_REGULAR
gradient = similar(x)
ndim = ndims(x)

infeasibility(x) = sum(
fast_dot(
selectdim(x, ndim, i) - selectdim(x, ndim, j),
selectdim(x, ndim, i) - selectdim(x, ndim, j),
) for i in 1:N for j in 1:i-1
)
if reuse_active_set
if proj_method
[away_frank_wolfe, blended_pairwise_conditional_gradient, pairwise_frank_wolfe]
error("The selected FW method does not support active sets reuse.")
end
active_sets = [ActiveSet([(1.0, x.blocks[i])]) for i in 1:N]
end

partial_infeasibility(x) =
sum(fast_dot(x[mod(i - 2, N)+1] - x[i], x[mod(i - 2, N)+1] - x[i]) for i in 1:N)
dist2(x::BlockVector) =
0.5 * (N - 1) * sum(fast_dot(x.blocks[i], x.blocks[i]) for i in 1:N) -
sum(fast_dot(x.blocks[i], x.blocks[j]) for i in 1:N for j in 1:i-1)

function grad!(storage, x)
@. storage = [2 * (x[i] - x[mod(i - 2, N)+1]) for i in 1:N]
return storage.blocks = [2.0 * (N * b - sum(x.blocks)) for b in x.blocks]
end

projection_step(x, i, t) = ProjectionFW(x, lmo.lmos[i]; eps=1 / (t^2 + 1))
function projection_step(i, t)
xii = x.blocks[mod(i - 2, N)+1] # iterate in previous block
f(y) = sum(abs2, y - xii)
function grad_proj!(storage, y)
return storage .= 2 * (y - xii)
end

if reuse_active_set

results = proj_method(
f,
grad_proj!,
lmo.lmos[i],
active_sets[i];
epsilon=inner_epsilon(t),
max_iteration=10000,
line_search=Adaptive(),
kwargs...,
)
active_sets[i] = results[:active_set]
else
results = proj_method(
f,
grad_proj!,
lmo.lmos[i],
x.blocks[i];
epsilon=inner_epsilon(t),
max_iteration=10000,
line_search=Adaptive(),
kwargs...,
)
end
return results[:x], results[:dual_gap]
end


if trajectory
Expand Down Expand Up @@ -372,19 +396,18 @@ function alternating_projections(
# Projection step:
for i in 1:N
# project the previous iterate on the i-th feasible region
x[i] = projection_step(x[mod(i - 2, N)+1], i, t)
x.blocks[i], dual_gaps[i] = projection_step(i, t)
end

dual_gap = sum(dual_gaps)

# Update gradients
grad!(gradient, x)

# Update dual gaps
v = compute_extreme_point.(lmo.lmos, gradient)
dual_gap = fast_dot(x - v, gradient)

# go easy on the memory - only compute if really needed
if ((mod(t, print_iter) == 0 && verbose) || callback !== nothing)
infeas = infeasibility(x)
primal = dist2(x)
end

first_iter = false
Expand All @@ -393,12 +416,12 @@ function alternating_projections(
if callback !== nothing
state = CallbackState(
t,
infeas,
infeas - dual_gap,
primal,
primal - dual_gap,
dual_gap,
tot_time,
x,
v,
nothing,
nothing,
nothing,
nothing,
Expand All @@ -408,7 +431,7 @@ function alternating_projections(
step_type,
)
# @show state
if callback(state, infeas) === false
if callback(state, primal) === false
break
end
end
Expand All @@ -419,18 +442,18 @@ function alternating_projections(
# this is important as some variants do not recompute f(x) and the dual_gap regularly but only when reporting
# hence the final computation.
step_type = ST_LAST
infeas = infeasibility(x)
primal = dist2(x)
grad!(gradient, x)
v = compute_extreme_point.(lmo.lmos, gradient)
v = compute_extreme_point(lmo, gradient)
dual_gap = fast_dot(x - v, gradient)

tot_time = (time_ns() - time_start) / 1.0e9

if callback !== nothing
state = CallbackState(
t,
infeas,
infeas - dual_gap,
primal,
primal - dual_gap,
dual_gap,
tot_time,
x,
Expand All @@ -443,9 +466,9 @@ function alternating_projections(
gradient,
step_type,
)
callback(state, infeas)
callback(state, primal)
end

return x, v, dual_gap, infeas, traj_data
return x, v, dual_gap, primal, traj_data

end
Loading

0 comments on commit 24ea326

Please sign in to comment.