Skip to content

Commit

Permalink
deprecation of params and Optimise (continued) (#2526)
Browse files Browse the repository at this point in the history
  • Loading branch information
CarloLucibello authored Nov 17, 2024
1 parent 6fffc31 commit 1fedc0d
Show file tree
Hide file tree
Showing 27 changed files with 374 additions and 444 deletions.
3 changes: 3 additions & 0 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ steps:
env:
FLUX_TEST_CUDA: "true"
FLUX_TEST_CPU: "false"
FLUX_TEST_ENZYME: "false"
timeout_in_minutes: 60

# - label: "GPU nightly"
Expand Down Expand Up @@ -53,6 +54,7 @@ steps:
env:
FLUX_TEST_METAL: "true"
FLUX_TEST_CPU: "false"
FLUX_TEST_ENZYME: "false"
matrix:
setup:
julia:
Expand Down Expand Up @@ -82,6 +84,7 @@ steps:
JULIA_AMDGPU_DISABLE_ARTIFACTS: "1"
FLUX_TEST_AMDGPU: "true"
FLUX_TEST_CPU: "false"
FLUX_TEST_ENZYME: "false"
JULIA_NUM_THREADS: 4
env:
SECRET_CODECOV_TOKEN: "fAV/xwuaV0l5oaIYSAXRQIor8h7yHdlrpLUZFwNVnchn7rDk9UZoz0oORG9vlKLc1GK2HhaPRAy+fTkJ3GM/8Y0phHh3ANK8f5UsGm2DUTNsnf6u9izgnwnoRTcsWu+vSO0fyYrxBvBCoJwljL+yZbDFz3oE16DP7HPIzxfQagm+o/kMEszVuoUXhuLXXH0LxT6pXl214qjqs04HfMRmKIIiup48NB6fBLdhGlQz64MdMNHBfgDa/fafB7eNvn0X6pEOxysoy6bDQLUhKelOXgcDx1UsTo34Yiqr+QeJPAeKcO//PWurwQhPoUoHfLad2da9DN4uQk4YQLqAlcIuAA==;U2FsdGVkX1+mRXF2c9soCXT7DYymY3msM+vrpaifiTp8xA+gMpbQ0G63WY3tJ+6V/fJcVnxYoKZVXbjcg8fl4Q=="
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ LocalPreferences.toml
.DS_Store
docs/mymodel.bson
prova.jl
benchmarks/
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name = "Flux"
uuid = "587475ba-b771-5e3f-ad9e-33799f191a9c"
version = "0.15-DEV"
version = "0.15.0-DEV"

[deps]
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
Expand Down
1 change: 0 additions & 1 deletion ext/FluxEnzymeExt/FluxEnzymeExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ module FluxEnzymeExt

using Flux
import Flux.Train: train!, _rule_to_state
import Flux.Optimise
import Optimisers
import Enzyme
using Enzyme: EnzymeRules, Active, Const, Duplicated, autodiff, ReverseWithPrimal
Expand Down
99 changes: 40 additions & 59 deletions src/Flux.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@ using MacroTools: @forward

@reexport using NNlib
using MLUtils
const stack = MLUtils.stack # now exported by Base
import Optimisers: Optimisers, trainable, destructure # before v0.13, Flux owned these functions
using Optimisers: freeze!, thaw!, adjust!, trainables

using Optimisers: Optimisers, destructure, freeze!, thaw!, adjust!, trainables, update!
import Optimisers: trainable
@reexport using Optimisers

using Random: default_rng
using Zygote, ChainRulesCore
using Zygote: Params, @adjoint, gradient, pullback
using Zygote: @adjoint, gradient, pullback
using Zygote.ForwardDiff: value
export gradient

Expand All @@ -31,10 +31,6 @@ export gradient
get_device_type,
DeviceIterator


# Pirate error to catch a common mistake. (Internal function `base` because overloading `update!` is more likely to give ambiguities.)
Optimisers.base(dx::Zygote.Grads) = error("Optimisers.jl cannot be used with Zygote.jl's implicit gradients, `Params` & `Grads`")

export Chain, Dense, Embedding, EmbeddingBag,
Maxout, SkipConnection, Parallel, PairwiseFusion,
RNNCell, LSTMCell, GRUCell, GRUv3Cell,
Expand All @@ -55,12 +51,43 @@ export Chain, Dense, Embedding, EmbeddingBag,
Bilinear, Scale,
# utils
outputsize, state, create_bias, @layer,
# from OneHotArrays.jl
onehot, onehotbatch, onecold,
# from Train
setup, train!,
# from Optimsers.jl
destructure, freeze!, thaw!, adjust!, trainables, update!, trainable,
# init
glorot_uniform,
glorot_normal,
kaiming_uniform,
kaiming_normal,
truncated_normal,
lecun_normal,
orthogonal,
sparse_init,
identity_init,
# Losses
binary_focal_loss,
binarycrossentropy,
crossentropy,
dice_coeff_loss,
focal_loss,
hinge_loss,
huber_loss,
kldivergence,
label_smoothing,
logitbinarycrossentropy,
logitcrossentropy,
mae,
mse,
msle,
poisson_loss,
siamese_contrastive_loss,
squared_hinge_loss,
tversky_loss,
))

include("optimise/Optimise.jl")
using .Optimise: Optimise
export ClipValue # this is const defined in deprecations, for ClipGrad

include("train.jl")
using .Train
using .Train: setup
Expand All @@ -69,18 +96,6 @@ using Adapt, Functors, OneHotArrays
include("utils.jl")
include("functor.jl")

@compat(public, (
# from OneHotArrays.jl
onehot, onehotbatch, onecold,
# from Functors.jl
functor, @functor, KeyPath, haskeypath, getkeypath,
# from Optimise/Train/Optimisers.jl
setup, update!, destructure, freeze!, adjust!, params, trainable, trainables
))

# Pirate error to catch a common mistake.
Functors.functor(::Type{<:MLUtils.DataLoader}, x) = error("`DataLoader` does not support Functors.jl, thus functions like `Flux.gpu` will not act on its contents.")

include("layers/show.jl")
include("layers/macro.jl")

Expand All @@ -97,8 +112,6 @@ include("loading.jl")
include("outputsize.jl")
export @autosize

include("deprecations.jl")

include("losses/Losses.jl")
using .Losses

Expand All @@ -110,38 +123,6 @@ include("distributed/backend.jl")
include("distributed/public_api.jl")
export MPIBackend, NCCLBackend, DistributedUtils

@compat(public, (
# init
glorot_uniform,
glorot_normal,
kaiming_uniform,
kaiming_normal,
truncated_normal,
lecun_normal,
orthogonal,
sparse_init,
identity_init,

# Losses
binary_focal_loss,
binarycrossentropy,
crossentropy,
dice_coeff_loss,
focal_loss,
hinge_loss,
huber_loss,
kldivergence,
label_smoothing,
logitbinarycrossentropy,
logitcrossentropy,
mae,
mse,
msle,
poisson_loss,
siamese_contrastive_loss,
squared_hinge_loss,
tversky_loss,
))

include("deprecations.jl")

end # module
171 changes: 50 additions & 121 deletions src/deprecations.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,125 +16,8 @@ GRUCell(in::Integer, out::Integer; kw...) = GRUCell(in => out; kw...)
GRUv3Cell(in::Integer, out::Integer; kw...) = GRUv3Cell(in => out; kw...)


#=
# Valid method in Optimise, old implicit style, is:
train!(loss, ps::Params, data, opt::AbstractOptimiser; cb = () -> ())
# Valid methods in Train, new explict style, are:
train!(loss, model, data, opt) # preferred
train!(loss, model, data, opt::Optimisers.AbstractRule) # if you forget setup
# Provide friendly errors for what happens if you mix these up:
=#
import .Optimise: train!

train!(loss, ps::Params, data, opt; cb=nothing) = error(
"""can't mix implict Params with explict state!
To use `Flux.params(m)` in `train!`, the 4th argument must be from the old `Flux.Optimise` sub-module.
But better to use the new explicit style, in which `m` itself is the 2nd argument.
""")

train!(loss, ps::Params, data, opt::Optimisers.AbstractRule; cb=nothing) = error(
"""can't mix implict Params with explict rule from Optimisers.jl
To use `Flux.params(m)` in `train!`, the 4th argument must be from the old `Flux.Optimise` sub-module.
But better to use the new explicit style, in which `m` itself is the 2nd argument.
""")

train!(loss, model, data, opt::Optimise.AbstractOptimiser; cb=nothing) =
train!(loss, model, data, __old_to_new(opt); cb)

# Next, to use the new `setup` with the still-exported old-style `Adam` etc:
import .Train: setup
setup(rule::Optimise.AbstractOptimiser, model) = setup(__old_to_new(rule), model)
# ... and allow accidental use of `Optimisers.setup` to do the same:
Optimisers.setup(rule::Optimise.AbstractOptimiser, model) = setup(__old_to_new(rule), model)


function __old_to_new(rule)
Base.depwarn("""Optimisers from Flux.Optimise module are deprecated.
Use optimisers from Optimisers.jl instead.""", :__old_to_new)
return _old_to_new(rule)
end

for T in [:Descent, :Adam, :Momentum, :Nesterov,
:AdaGrad, :AdaMax, :AdaDelta, :AMSGrad, :NAdam, :RAdam, :OAdam, :AdaBelief,
# :InvDecay, :ExpDecay,
:SignDecay,
]
@eval function _old_to_new(rule::Optimise.$T)
args = map(f -> getfield(rule, f), fieldnames(Optimisers.$T))
Optimisers.$T(args...)
end
end
_old_to_new(rule::Optimise.Optimiser) = Optimisers.OptimiserChain(map(_old_to_new, rule.os)...)
# const OptimiserChain = Optimise.Optimiser # lets you use new name with implicit params too.
const Optimiser = Optimisers.OptimiserChain
_old_to_new(rule::Optimise.WeightDecay) = Optimisers.WeightDecay(rule.wd) # called lambda now
_old_to_new(rule::Optimise.ClipNorm) = Optimisers.ClipNorm(rule.thresh) # called omega, and there are more fields
_old_to_new(rule::Optimise.ClipValue) = Optimisers.ClipGrad(rule.thresh) # called delta now, and struct name differs
# const ClipGrad = Optimise.ClipValue
const ClipValue = Optimisers.ClipGrad
_old_to_new(rule::Optimise.RMSProp) = Optimisers.RMSProp(rule.eta, rule.rho, rule.epsilon) # RMSProp has no field centred

_old_to_new(rule) = error("Flux.setup does not know how to translate this old-style implicit rule to a new-style Optimisers.jl explicit rule")

# This allows you to mix and match, like Flux.setup(OptimiserChain(Optimisers.SignDecay(), Flux.Descent()), [1,2,3.])
Optimisers.OptimiserChain(rules::Union{Optimisers.AbstractRule, Optimise.AbstractOptimiser}...) =
Optimisers.OptimiserChain(map(_old_to_new, rules))
_old_to_new(rule::Optimisers.AbstractRule) = rule

# Since `update!` should be called in a loop, it makes less sense to call `setup` for you if you forgot.
# But let's make sure that such uses give a helpful error:
import .Optimise: update!

function update!(opt::Optimise.AbstractOptimiser, model, grad)
# This error method requires narrowing the main worker method of Flux.Optimise
# to accept only arrays. Remove if this causes problems!
# update!(opt::Flux.Optimise.AbstractOptimiser, x::AbstractArray, x̄)
error("""Invalid input to `update!`.
* For the implicit style, this needs `update!(::AbstractOptimiser, ::Params, ::Grads)`
* For the explicit style, `update!(state, model, grad)` needs `state = Flux.setup(opt, model)`.
""")
end

# TODO this friendly error should go in Optimisers.jl.
# remove after https://github.com/FluxML/Optimisers.jl/pull/181
function update!(opt::Optimisers.AbstractRule, model, grad)
error("""Invalid input to `update!`.
`update!(state, model, grad)` needs `state = Flux.setup(opt, model)`.
""")
end
function update!(opt::Optimisers.AbstractRule, model::Chain, grad::Tuple)
error("""Invalid input to `update!`.
`update!(state, model, grad)` needs `state = Flux.setup(opt, model)`.
""")
end

# An easy error to make is to pass result of explicit gradient(...), not gradient(...)[1]
# Can't catch every case, but can catch many simple Flux models:

function update!(opt, model::Chain, grads::Tuple)
# Zygote will make a NamedTuple{(:layers,)} for the gradient of Chain, Diffractor a Tangent
@warn """explicit `update!(opt, model, grad)` wants the gradient for the model alone,
not the whole tuple from `gradient(m -> loss(m, x, y), model)`. You probably want `grads[1]`."""
update!(opt, model, grads[1])
end

function update!(opt::Optimise.AbstractOptimiser, model::Chain, grads::Tuple) # ambiguity
update!(opt, model, grads[1]) # calls error case "Invalid input" just above
end

# One more easy error to catch is using explicit gradient with `params(m)`:
#### v0.14 deprecations ###########################

function update!(opt::Optimise.AbstractOptimiser, ::Params, grads::Union{Tuple, NamedTuple})
error("""can't mix implicit Params with explicit gradients!
* For the implicit style, this needs `update(::AbstractOptimiser, ::Params, ::Grads)` with implicit gradient.
* For the explicit style, `update(state, model, grad)` needs the model itself, and `state = Flux.setup(opt, model)`.
""")
end


# v0.14 deprecations
@deprecate default_rng_value() Random.default_rng()


Expand Down Expand Up @@ -179,14 +62,14 @@ const FluxCUDAAdaptor = CUDADevice
const FluxAMDGPUAdaptor = AMDGPUDevice
const FluxMetalAdaptor = MetalDevice

# v0.15 deprecations
######## v0.15 deprecations #########################

# Enable these when 0.15 is released, and delete const ClipGrad = Optimise.ClipValue etc:
# Enable these when 0.16 is released, and delete const ClipGrad = Optimise.ClipValue etc:
# Base.@deprecate_binding Optimiser OptimiserChain
# Base.@deprecate_binding ClipValue ClipGrad

# train!(loss::Function, ps::Zygote.Params, data, opt) = throw(ArgumentError(
# """On Flux 0.15, `train!` no longer accepts implicit `Zygote.Params`.
# """On Flux 0.16, `train!` no longer accepts implicit `Zygote.Params`.
# Instead of `train!(loss_xy, Flux.params(model), data, Adam())`
# it now needs `opt = Flux.setup(Adam(), model); train!(loss_mxy, model, data, opt)`
# where `loss_mxy` accepts the model as its first argument.
Expand All @@ -197,3 +80,49 @@ function reset!(x)
Base.depwarn("reset!(m) is deprecated. You can remove this call as it is no more needed.", :reset!)
return x
end

function params!(p::Zygote.Params, x, seen = IdSet())
if x isa AbstractArray{<:Number} && Functors.isleaf(x)
return push!(p, x)
elseif x in seen
nothing
else
_check_new_macro(x) # complains if you used @functor not @layer
push!(seen, x)
for child in trainable(x)
params!(p, child, seen)
end
end
end

function params(m...)
Base.depwarn("""
Flux.params(m...) is deprecated. Use `Flux.trainable(model)` for parameters' collection
and the explicit `gradient(m -> loss(m, x, y), model)` for gradient computation.
""", :params)
ps = Params()
params!(ps, m)
return ps
end

# Allows caching of the parameters when params is called within gradient() to fix #2040.
# @non_differentiable params(m...) # https://github.com/FluxML/Flux.jl/pull/2054
# That speeds up implicit use, and silently breaks explicit use.
# From @macroexpand Zygote.@non_differentiable params(m...) and https://github.com/FluxML/Zygote.jl/pull/1248
Zygote._pullback(::Zygote.Context{true}, ::typeof(params), m...) = params(m), _ -> nothing

include("optimise/Optimise.jl") ## deprecated Module


# TODO this friendly error should go in Optimisers.jl.
# remove after https://github.com/FluxML/Optimisers.jl/pull/181
function Optimisers.update!(opt::Optimisers.AbstractRule, model, grad)
error("""Invalid input to `update!`.
`update!(state, model, grad)` needs `state = Flux.setup(opt, model)`.
""")
end
function Optimisers.update!(opt::Optimisers.AbstractRule, model::Chain, grad::Tuple)
error("""Invalid input to `update!`.
`update!(state, model, grad)` needs `state = Flux.setup(opt, model)`.
""")
end
Loading

0 comments on commit 1fedc0d

Please sign in to comment.