diff --git a/src/FrankWolfe.jl b/src/FrankWolfe.jl index 1b02a686e..7e946aebb 100644 --- a/src/FrankWolfe.jl +++ b/src/FrankWolfe.jl @@ -37,6 +37,7 @@ include("active_set.jl") include("blended_cg.jl") include("afw.jl") include("fw_algorithms.jl") +include("block_oracles.jl") include("block_coordinate_algorithms.jl") include("alternating_methods.jl") include("pairwise.jl") diff --git a/src/abstract_oracles.jl b/src/abstract_oracles.jl index 2fe2f9e9e..3fb31de32 100644 --- a/src/abstract_oracles.jl +++ b/src/abstract_oracles.jl @@ -256,61 +256,3 @@ function compute_extreme_point( end return v end - -""" - ProductLMO(lmos...) - -Linear minimization oracle over the Cartesian product of multiple LMOs. -""" -struct ProductLMO{N,TL<:NTuple{N,LinearMinimizationOracle}} <: LinearMinimizationOracle - lmos::TL -end - -function ProductLMO{N}(lmos::TL) where {N,TL<:NTuple{N,LinearMinimizationOracle}} - return ProductLMO{N,TL}(lmos) -end - -function ProductLMO(lmos::Vararg{LinearMinimizationOracle,N}) where {N} - return ProductLMO{N}(lmos) -end - -""" - compute_extreme_point(lmo::ProductLMO, direction::Tuple; kwargs...) - -Extreme point computation on Cartesian product, with a direction `(d1, d2, ...)` given as a tuple of directions. -All keyword arguments are passed to all LMOs. -""" -function compute_extreme_point(lmo::ProductLMO, direction::Tuple; kwargs...) - return compute_extreme_point.(lmo.lmos, direction; kwargs...) -end - -""" - compute_extreme_point(lmo::ProductLMO, direction::AbstractArray; direction_indices, storage=similar(direction)) - -Extreme point computation, with a direction array and `direction_indices` provided such that: -`direction[direction_indices[i]]` is passed to the i-th LMO. -If no `direction_indices` are provided, the direction array is sliced along the last dimension and such that: -`direction[:, ... ,:, i]` is passed to the i-th LMO. -The result is stored in the optional `storage` container. - -All keyword arguments are passed to all LMOs. -""" -function compute_extreme_point( - lmo::ProductLMO{N}, - direction::AbstractArray; - storage=similar(direction), - direction_indices=nothing, - kwargs..., -) where {N} - if direction_indices !== nothing - for idx in 1:N - storage[direction_indices[idx]] .= - compute_extreme_point(lmo.lmos[idx], direction[direction_indices[idx]]; kwargs...) - end - else - ndim = ndims(direction) - direction_array = [direction[[idx < ndim ? Colon() : i for idx in 1:ndim]...] for i in 1:N] - storage = cat(compute_extreme_point.(lmo.lmos, direction_array)..., dims=ndim) - end - return storage -end diff --git a/src/block_oracles.jl b/src/block_oracles.jl new file mode 100644 index 000000000..95bdca8d9 --- /dev/null +++ b/src/block_oracles.jl @@ -0,0 +1,260 @@ +mutable struct BlockVector{T, MT <: AbstractArray{T}, ST <: Tuple} <: AbstractVector{T} + blocks::Vector{MT} + block_sizes::Vector{ST} + tot_size::Int +end + +function BlockVector(arrays::AbstractVector{MT}) where {T, MT <: AbstractArray{T}} + block_sizes = size.(arrays) + tot_size = sum(prod, block_sizes) + return BlockVector(arrays, block_sizes, tot_size) +end + +Base.size(arr::BlockVector) = (arr.tot_size, ) + +# returns the corresponding (block_index, index_in_block) for a given flattened index (for the whole block variable) +function _matching_index_index(arr::BlockVector, idx::Integer) + if idx < 1 || idx > length(arr) + throw(BoundsError(arr, idx)) + end + first_idx = 1 + for block_idx in eachindex(arr.block_sizes) + next_first = first_idx + prod(arr.block_sizes[block_idx]) + if next_first <= idx + # continue to next block + first_idx = next_first + else + # index is here + index_in_block = idx - first_idx + 1 + return (block_idx, index_in_block) + end + end + error("unreachable $idx") +end + +function Base.getindex(arr::BlockVector, idx::Integer) + (midx, idx_inner) = _matching_index_index(arr, idx) + return arr.blocks[midx][idx_inner] +end + +function Base.setindex!(arr::BlockVector, v, idx::Integer) + (midx, idx_inner) = _matching_index_index(arr, idx) + arr.blocks[midx][idx_inner] = v + return arr.blocks[midx][idx_inner] +end + + +function Base.copyto!(dest::BlockVector, src::BlockVector) + dest.tot_size = src.tot_size + for midx in eachindex(dest.blocks) + dest.blocks[midx] = copy(src.blocks[midx]) + end + dest.block_sizes = copy(src.block_sizes) + return dest +end + +function Base.similar(src::BlockVector{T1, MT}, ::Type{T}) where {T1, MT, T} + blocks = [similar(src.blocks[i], T) for i in eachindex(src.blocks)] + return BlockVector( + blocks, + src.block_sizes, + src.tot_size, + ) +end + +Base.similar(src::BlockVector{T, MT}) where {T, MT} = similar(src, T) + +function Base.convert(::Type{BlockVector{T, MT}}, bmv::BlockVector) where {T, MT} + cblocks = convert.(MT, bmv.blocks) + return BlockVector( + cblocks, + copy(bmv.block_sizes), + bmv.tot_size, + ) +end + +function Base.:+(v1::BlockVector, v2::BlockVector) + if size(v1) != size(v2) || length(v1.block_sizes) != length(v2.block_sizes) + throw(DimensionMismatch("$(length(v1)) != $(length(v2))")) + end + for i in eachindex(v1.block_sizes) + if v1.block_sizes[i] != v2.block_sizes[i] + throw(DimensionMismatch("$i-th block: $(v1.block_sizes[i]) != $(v2.block_sizes[i])")) + end + end + return BlockVector( + v1.blocks .+ v2.blocks, + copy(v1.block_sizes), + v1.tot_size, + ) +end + +Base.:-(v::BlockVector) = BlockVector( + [-b for b in v.blocks], + v.block_sizes, + v.tot_size, +) + +function Base.:-(v1::BlockVector, v2::BlockVector) + return v1 + (-v2) +end + +function Base.:*(s::Number, v::BlockVector) + return BlockVector( + s .* v.blocks, + copy(v.block_sizes), + v.tot_size, + ) +end + +Base.:*(v::BlockVector, s::Number) = s * v + +function LinearAlgebra.dot(v1::BlockVector{T1}, v2::BlockVector{T2}) where {T1, T2} + if size(v1) != size(v2) || length(v1.block_sizes) != length(v2.block_sizes) + throw(DimensionMismatch("$(length(v1)) != $(length(v2))")) + end + T = promote_type(T1, T2) + d = zero(T) + @inbounds for i in eachindex(v1.block_sizes) + if v1.block_sizes[i] != v2.block_sizes[i] + throw(DimensionMismatch("$i-th block: $(v1.block_sizes[i]) != $(v2.block_sizes[i])")) + end + d += dot(v1.blocks[i], v2.blocks[i]) + end + return d +end + +LinearAlgebra.norm(v::BlockVector) = sqrt(dot(v, v)) + +function Base.isequal(v1::BlockVector, v2::BlockVector) + if v1 === v2 + return true + end + if v1.tot_size != v2.tot_size || v1.block_sizes != v2.block_sizes + return false + end + for bidx in eachindex(v1.blocks) + if !isequal(v1.blocks[bidx], v2.blocks[bidx]) + return false + end + end + return true +end + +""" + ProductLMO(lmos) + +Linear minimization oracle over the Cartesian product of multiple LMOs. +""" +struct ProductLMO{N, LT <: Union{NTuple{N, FrankWolfe.LinearMinimizationOracle}, AbstractVector{<: FrankWolfe.LinearMinimizationOracle}}} <: FrankWolfe.LinearMinimizationOracle + lmos::LT +end + +function ProductLMO(lmos::Vector{LMO}) where {LMO <: FrankWolfe.LinearMinimizationOracle} + return ProductLMO{1, Vector{LMO}}(lmos) +end + +function ProductLMO(lmos::NT) where {N, LMO <: FrankWolfe.LinearMinimizationOracle, NT <: NTuple{N, LMO}} + return ProductLMO{N, NT}(lmos) +end + +function ProductLMO{N}(lmos::TL) where {N,TL<:NTuple{N,LinearMinimizationOracle}} + return ProductLMO{N,TL}(lmos) +end + +function ProductLMO(lmos::Vararg{LinearMinimizationOracle,N}) where {N} + return ProductLMO{N}(lmos) +end + +function FrankWolfe.compute_extreme_point(lmo::ProductLMO, direction::BlockVector; kwargs...) + @assert length(direction.blocks) == length(lmo.lmos) + blocks = [FrankWolfe.compute_extreme_point(lmo.lmos[idx], direction.blocks[idx]; kwargs...) for idx in eachindex(lmo.lmos)] + v = BlockVector(blocks, direction.block_sizes, direction.tot_size) + return v +end + +""" + compute_extreme_point(lmo::ProductLMO, direction::Tuple; kwargs...) + +Extreme point computation on Cartesian product, with a direction `(d1, d2, ...)` given as a tuple of directions. +All keyword arguments are passed to all LMOs. +""" +function compute_extreme_point(lmo::ProductLMO, direction::Tuple; kwargs...) + return compute_extreme_point.(lmo.lmos, direction; kwargs...) +end + +""" + compute_extreme_point(lmo::ProductLMO, direction::AbstractArray; direction_indices, storage=similar(direction)) + +Extreme point computation, with a direction array and `direction_indices` provided such that: +`direction[direction_indices[i]]` is passed to the i-th LMO. +If no `direction_indices` are provided, the direction array is sliced along the last dimension and such that: +`direction[:, ... ,:, i]` is passed to the i-th LMO. +The result is stored in the optional `storage` container. + +All keyword arguments are passed to all LMOs. +""" +function compute_extreme_point( + lmo::ProductLMO{N}, + direction::AbstractArray; + storage=similar(direction), + direction_indices=nothing, + kwargs..., +) where {N} + if direction_indices !== nothing + for idx in 1:N + storage[direction_indices[idx]] .= + compute_extreme_point(lmo.lmos[idx], direction[direction_indices[idx]]; kwargs...) + end + else + ndim = ndims(direction) + direction_array = [direction[[idx < ndim ? Colon() : i for idx in 1:ndim]...] for i in 1:N] + storage = cat(compute_extreme_point.(lmo.lmos, direction_array)..., dims=ndim) + end + return storage +end + +""" +MathOptInterface LMO but returns a vertex respecting the block structure +""" +function FrankWolfe.compute_extreme_point(lmo::FrankWolfe.MathOptLMO, direction::BlockVector) + xs = MOI.get(lmo.o, MOI.ListOfVariableIndices()) + terms = [MOI.ScalarAffineTerm(direction[idx], xs[idx]) for idx in eachindex(xs)] + vec_v = FrankWolfe.compute_extreme_point(lmo::FrankWolfe.MathOptLMO, terms) + v = similar(direction) + copyto!(v, vec_v) + return v +end + +function FrankWolfe.muladd_memory_mode(mem::FrankWolfe.InplaceEmphasis, storage::BlockVector, x::BlockVector, gamma::Real, d::BlockVector) + @inbounds for i in eachindex(x.blocks) + FrankWolfe.muladd_memory_mode(mem, storage.blocks[i], x.blocks[i], gamma, d.blocks[i]) + end + return storage +end + +function FrankWolfe.muladd_memory_mode(mem::FrankWolfe.InplaceEmphasis, x::BlockVector, gamma::Real, d::BlockVector) + @inbounds for i in eachindex(x.blocks) + FrankWolfe.muladd_memory_mode(mem, x.blocks[i], gamma, d.blocks[i]) + end + return x +end + +function FrankWolfe.muladd_memory_mode(mem::FrankWolfe.InplaceEmphasis, d::BlockVector, x::BlockVector, v::BlockVector) + @inbounds for i in eachindex(d.blocks) + FrankWolfe.muladd_memory_mode(mem, d.blocks[i], x.blocks[i], v.blocks[i]) + end + return d +end + +function FrankWolfe.compute_active_set_iterate!(active_set::FrankWolfe.ActiveSet{<:BlockVector}) + @inbounds for i in eachindex(active_set.x.blocks) + @. active_set.x.blocks[i] .= 0 + end + for (λi, ai) in active_set + for i in eachindex(active_set.x.blocks) + FrankWolfe.muladd_memory_mode(FrankWolfe.InplaceEmphasis(), active_set.x.blocks[i], -λi, ai.blocks[i]) + end + end + return active_set.x +end diff --git a/test/blocks.jl b/test/blocks.jl new file mode 100644 index 000000000..50e16df9c --- /dev/null +++ b/test/blocks.jl @@ -0,0 +1,90 @@ +using Test +using Random +using LinearAlgebra +using FrankWolfe +using SparseArrays + +@testset "Block array behaviour" begin + arr = FrankWolfe.BlockVector([ + [ + 1 3 + 4 6.0 + ], + [ + 1 3 1 + 4 1 2 + 1 3 5.0 + ], + ],) + @test length(arr) == 4 + 9 + @test arr[end] == 5.0 + @test arr[1] == 1.0 + @test arr[5] == 1.0 + @test arr[4] == 6.0 + v1 = Vector(arr) + arr2 = FrankWolfe.BlockVector([zeros(2, 2), ones(3, 3)],) + copyto!(arr2, arr) + v2 = Vector(arr2) + @test v1 == v2 + v1[1] *= 2 + @test v1 != v2 + @test size(similar(arr)) == size(arr) + @test typeof(similar(arr)) == typeof(arr) + @test norm(arr) == norm(collect(arr)) + @test dot(arr, arr) == dot(collect(arr), collect(arr)) + + arr3 = FrankWolfe.BlockVector([3 * ones(2, 2), ones(3, 3)],) + @test dot(arr3, arr) ≈ dot(arr, arr3) + @test dot(arr3, arr) ≈ dot(collect(arr3), collect(arr)) + arr4 = 2 * arr3 + @test arr4.blocks[1] == 6 * ones(2, 2) + arr5 = FrankWolfe.BlockVector([6 * ones(2, 2), 2 * ones(3, 3)],) + @test isequal(arr4, arr4) + @test isequal(arr4, arr5) + @test !isequal(arr3, arr4) + @test !isequal(arr2, FrankWolfe.BlockVector([zeros(2, 2)])) + +end + +@testset "FrankWolfe array methods" begin + mem = FrankWolfe.InplaceEmphasis() + arr0 = FrankWolfe.BlockVector([ + [ + 1 3 + 4 6.0 + ], + [ + 1 3 1 + 4 1 2 + 1 3 5.0 + ], + ],) + arr1 = rand() * arr0 + d = similar(arr1) + FrankWolfe.muladd_memory_mode(mem, d, arr1, arr0) + dref = arr1 - arr0 + @test d == dref + gamma = rand() + arr2 = zero(arr0) + FrankWolfe.muladd_memory_mode(mem, arr2, gamma, arr0) + @test arr2 == -gamma * arr0 + arr3 = similar(arr0) + FrankWolfe.muladd_memory_mode(mem, arr3, zero(arr0), gamma, arr0) + @test arr3 == -gamma * arr0 + + active_set = FrankWolfe.ActiveSet( + [0.25, 0.75], + [ + FrankWolfe.BlockVector([ones(100, 1), ones(30, 30)]), + FrankWolfe.BlockVector([3 * ones(100, 1), zeros(30, 30)]), + ], + FrankWolfe.BlockVector([ones(100, 1), ones(30, 30)]), + ) + FrankWolfe.compute_active_set_iterate!(active_set) + x = active_set.x + x_copy = 0 * x + for (λi, ai) in active_set + @. x_copy += λi * ai + end + @test norm(x_copy - x) ≤ 1e-12 +end diff --git a/test/lmo.jl b/test/lmo.jl index 821bcbd5a..7c6b514dd 100644 --- a/test/lmo.jl +++ b/test/lmo.jl @@ -632,6 +632,11 @@ end vvec = FrankWolfe.compute_extreme_point(lmo, [dinf; d1], direction_indices=(1:10, 11:15)) @test vvec ≈ [vinf; v1] + + # Test different constructor for ProductLMO and and direction as BlockVector + lmo2 = FrankWolfe.ProductLMO([FrankWolfe.LpNormLMO{Inf}(3.0), FrankWolfe.LpNormLMO{1}(2.0)]) + v_block = FrankWolfe.compute_extreme_point(lmo2, FrankWolfe.BlockVector([dinf, d1])) + @test FrankWolfe.BlockVector([vinf, v1]) == v_block end @testset "Scaled L-1 norm polytopes" begin @@ -733,6 +738,25 @@ end end end +@testset "MathOpt LMO with BlockVector" begin + o = GLPK.Optimizer() + MOI.set(o, MOI.Silent(), true) + x = MOI.add_variables(o, 10) + y = MOI.add_variables(o, 10) + MOI.add_constraint.(o, x, MOI.GreaterThan.(-ones(10))) + MOI.add_constraint.(o, x, MOI.LessThan.(ones(10))) + MOI.add_constraint.(o, y, MOI.GreaterThan.(-2*ones(10))) + MOI.add_constraint.(o, y, MOI.LessThan.(2*ones(10))) + lmo = FrankWolfe.MathOptLMO(o) + + direction = FrankWolfe.BlockVector([ones(10), -ones(10)]) + + v = FrankWolfe.compute_extreme_point(lmo, direction) + v_ref = FrankWolfe.BlockVector([-ones(10), 2*ones(10)]) + @test v == v_ref + +end + @testset "Inplace LMO correctness" begin V = [-6.0, -6.15703, -5.55986] diff --git a/test/runtests.jl b/test/runtests.jl index 1d0c9c4c9..c106e30f0 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -777,6 +777,8 @@ end include("generic-arrays.jl") +include("blocks.jl") + @testset "End-to-end trajectory tests" begin trajectory_testfiles = readdir(joinpath(@__DIR__, "trajectory_tests"), join=true) for file in trajectory_testfiles