Skip to content

Commit

Permalink
remove v0.13 deprecations
Browse files Browse the repository at this point in the history
remove v0.13 deprecations

reinsert optimisers

add 1.9 CI

drop julia v1.9
  • Loading branch information
CarloLucibello committed Oct 11, 2024
1 parent aa035e9 commit 6b45e21
Show file tree
Hide file tree
Showing 14 changed files with 51 additions and 176 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
fail-fast: false
matrix:
version:
# - '1.9' # Uncomment when 1.10 is out. Replace this with the minimum Julia version that your package supports.
- '1.10' # Replace this with the minimum Julia version that your package supports.
- '1'
os: [ubuntu-latest]
arch: [x64]
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -67,4 +67,4 @@ SpecialFunctions = "2.1.2"
Statistics = "1"
Zygote = "0.6.67"
cuDNN = "1"
julia = "1.9"
julia = "1.10"
4 changes: 0 additions & 4 deletions ext/FluxEnzymeExt/FluxEnzymeExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,6 @@ _applyloss(loss, model, d...) = loss(model, d...)

EnzymeRules.inactive(::typeof(Flux.Losses._check_sizes), args...) = true

using Flux: _old_to_new # from src/deprecations.jl
train!(loss, model::Duplicated, data, opt::Optimise.AbstractOptimiser; cb=nothing) =
train!(loss, model, data, _old_to_new(opt); cb)

function train!(loss, model::Duplicated, data, rule::Optimisers.AbstractRule; cb = nothing)
train!(loss, model, data, _rule_to_state(model, rule); cb)
end
Expand Down
95 changes: 0 additions & 95 deletions src/deprecations.jl
Original file line number Diff line number Diff line change
@@ -1,47 +1,6 @@

# v0.13 deprecations

function Broadcast.broadcasted(f::Recur, args...)
# This had an explicit @adjoint rule, calling Zygote.∇map(__context__, f, args...), until v0.12
Base.depwarn("""Broadcasting is not safe to use with RNNs, as it does not guarantee an iteration order.
Re-writing this as a comprehension would be better.""", :broadcasted)
map(f, args...) # map isn't really safe either, but
end

@deprecate frequencies(xs) group_counts(xs)

struct Zeros
function Zeros()
Base.depwarn("Flux.Zeros is no more, has ceased to be, is bereft of life, is an ex-boondoggle... please use bias=false instead", :Zeros)
false
end
end
Zeros(args...) = Zeros() # was used both Dense(10, 2, initb = Zeros) and Dense(rand(2,10), Zeros())

function Optimise.update!(x::AbstractArray, x̄)
Base.depwarn("`Flux.Optimise.update!(x, x̄)` was not used internally and has been removed. Please write `x .-= x̄` instead.", :update!)
x .-=
end

function Diagonal(size::Integer...; kw...)
Base.depwarn("Flux.Diagonal is now Flux.Scale, and also allows an activation function.", :Diagonal)
Scale(size...; kw...)
end
function Diagonal(size::Tuple; kw...)
Base.depwarn("Flux.Diagonal is now Flux.Scale, and also allows an activation function.", :Diagonal)
Scale(size...; kw...)
end

# Deprecate this eventually once saving models w/o structure is no more
function loadparams!(m, xs)
Base.depwarn("loadparams! will be deprecated eventually. Use loadmodel! instead.", :loadparams!)
for (p, x) in zip(params(m), xs)
size(p) == size(x) ||
error("Expected param size $(size(p)), got $(size(x))")
copyto!(p, x)
end
end

# Channel notation: Changed to match Conv, but very softly deprecated!
# Perhaps change to @deprecate for v0.15, but there is no plan to remove these.
Dense(in::Integer, out::Integer, σ = identity; kw...) =
Expand All @@ -56,32 +15,6 @@ LSTMCell(in::Integer, out::Integer; kw...) = LSTMCell(in => out; kw...)
GRUCell(in::Integer, out::Integer; kw...) = GRUCell(in => out; kw...)
GRUv3Cell(in::Integer, out::Integer; kw...) = GRUv3Cell(in => out; kw...)

# Optimisers with old naming convention
Base.@deprecate_binding ADAM Adam
Base.@deprecate_binding NADAM NAdam
Base.@deprecate_binding ADAMW AdamW
Base.@deprecate_binding RADAM RAdam
Base.@deprecate_binding OADAM OAdam
Base.@deprecate_binding ADAGrad AdaGrad
Base.@deprecate_binding ADADelta AdaDelta

# Remove sub-module Data, while making sure Flux.Data.DataLoader keeps working
Base.@deprecate_binding Data Flux false "Sub-module Flux.Data has been removed. The only thing it contained may be accessed as Flux.DataLoader"

@deprecate paramtype(T,m) _paramtype(T,m) false # internal method, renamed to make this clear

@deprecate rng_from_array() Random.default_rng()

function istraining()
Base.depwarn("Flux.istraining() is deprecated, use NNlib.within_gradient(x) instead", :istraining)
false
end
ChainRulesCore.rrule(::typeof(istraining)) = true, _ -> (NoTangent(),)

function _isactive(m)
Base.depwarn("_isactive(m) is deprecated, use _isactive(m,x)", :_isactive, force=true)
_isactive(m, 1:0)
end

#=
# Valid method in Optimise, old implicit style, is:
Expand Down Expand Up @@ -110,7 +43,6 @@ train!(loss, ps::Params, data, opt::Optimisers.AbstractRule; cb=nothing) = error
train!(loss, model, data, opt::Optimise.AbstractOptimiser; cb=nothing) =
train!(loss, model, data, _old_to_new(opt); cb)


# Next, to use the new `setup` with the still-exported old-style `Adam` etc:
import .Train: setup
setup(rule::Optimise.AbstractOptimiser, model) = setup(_old_to_new(rule), model)
Expand Down Expand Up @@ -179,33 +111,6 @@ function update!(opt::Optimise.AbstractOptimiser, ::Params, grads::Union{Tuple,
""")
end

"""
trainmode!(m, active)
!!! warning
This two-argument method is deprecated.
Possible values of `active` are:
- `true` for training, or
- `false` for testing, same as [`testmode!`](@ref)`(m)`
- `:auto` or `nothing` for Flux to detect training automatically.
"""
function trainmode!(m, active::Bool)
Base.depwarn("trainmode!(m, active::Bool) is deprecated", :trainmode)
testmode!(m, !active)
end

# Greek-letter keywords deprecated in Flux 0.13
# Arguments (old => new, :function, "β" => "beta")
function _greek_ascii_depwarn(βbeta::Pair, func = :loss, names = "" => "")
Base.depwarn(LazyString("function ", func, " no longer accepts greek-letter keyword ", names.first, """
please use ascii """, names.second, " instead"), func)
βbeta.first
end
_greek_ascii_depwarn(βbeta::Pair{Nothing}, _...) = βbeta.second

ChainRulesCore.@non_differentiable _greek_ascii_depwarn(::Any...)


# v0.14 deprecations
@deprecate default_rng_value() Random.default_rng()
Expand Down
22 changes: 8 additions & 14 deletions src/layers/normalise.jl
Original file line number Diff line number Diff line change
Expand Up @@ -191,10 +191,9 @@ struct LayerNorm{F,D,T,N}
affine::Bool
end

function LayerNorm(size::Tuple{Vararg{Int}}, λ=identity; affine::Bool=true, eps::Real=1f-5, ϵ=nothing)
ε = _greek_ascii_depwarn=> eps, :LayerNorm, "ϵ" => "eps")
function LayerNorm(size::Tuple{Vararg{Int}}, λ=identity; affine::Bool=true, eps::Real=1f-5)
diag = affine ? Scale(size..., λ) : λ!=identity ? Base.Fix1(broadcast, λ) : identity
return LayerNorm(λ, diag, ε, size, affine)
return LayerNorm(λ, diag, eps, size, affine)
end
LayerNorm(size::Integer...; kw...) = LayerNorm(Int.(size); kw...)
LayerNorm(size_act...; kw...) = LayerNorm(Int.(size_act[1:end-1]), size_act[end]; kw...)
Expand Down Expand Up @@ -328,17 +327,16 @@ end
function BatchNorm(chs::Int, λ=identity;
initβ=zeros32, initγ=ones32,
affine::Bool=true, track_stats::Bool=true, active::Union{Bool,Nothing}=nothing,
eps::Real=1f-5, momentum::Real=0.1f0, ϵ=nothing)
eps::Real=1f-5, momentum::Real=0.1f0)

ε = _greek_ascii_depwarn=> eps, :BatchNorm, "ϵ" => "eps")

β = affine ? initβ(chs) : nothing
γ = affine ? initγ(chs) : nothing
μ = track_stats ? zeros32(chs) : nothing
σ² = track_stats ? ones32(chs) : nothing

return BatchNorm(λ, β, γ,
μ, σ², ε, momentum,
μ, σ², eps, momentum,
affine, track_stats,
active, chs)
end
Expand Down Expand Up @@ -421,17 +419,15 @@ end
function InstanceNorm(chs::Int, λ=identity;
initβ=zeros32, initγ=ones32,
affine::Bool=false, track_stats::Bool=false, active::Union{Bool,Nothing}=nothing,
eps::Real=1f-5, momentum::Real=0.1f0, ϵ=nothing)

ε = _greek_ascii_depwarn=> eps, :InstanceNorm, "ϵ" => "eps")
eps::Real=1f-5, momentum::Real=0.1f0)

β = affine ? initβ(chs) : nothing
γ = affine ? initγ(chs) : nothing
μ = track_stats ? zeros32(chs) : nothing
σ² = track_stats ? ones32(chs) : nothing

return InstanceNorm(λ, β, γ,
μ, σ², ε, momentum,
μ, σ², eps, momentum,
affine, track_stats,
active, chs)
end
Expand Down Expand Up @@ -520,9 +516,7 @@ end
function GroupNorm(chs::Int, G::Int, λ=identity;
initβ=zeros32, initγ=ones32,
affine::Bool=true, active::Union{Bool,Nothing}=nothing,
eps::Real=1f-5, momentum::Real=0.1f0, ϵ=nothing)

ε = _greek_ascii_depwarn=> eps, :GroupNorm, "ϵ" => "eps")
eps::Real=1f-5, momentum::Real=0.1f0)

chs % G == 0 || error("The number of groups ($(G)) must divide the number of channels ($chs)")

Expand All @@ -535,7 +529,7 @@ function GroupNorm(chs::Int, G::Int, λ=identity;
return GroupNorm(G, λ,
β, γ,
μ, σ²,
ε, momentum,
eps, momentum,
affine, track_stats,
active, chs)
end
Expand Down
5 changes: 2 additions & 3 deletions src/layers/stateless.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,10 @@ julia> isapprox(std(y; dims=1, corrected=false), ones(1, 10), atol=1e-5)
true
```
"""
@inline function normalise(x::AbstractArray; dims=ndims(x), eps=ofeltype(x, 1e-5), ϵ=nothing)
ε = _greek_ascii_depwarn=> eps, :InstanceNorm, "ϵ" => "eps")
@inline function normalise(x::AbstractArray; dims=ndims(x), eps=ofeltype(x, 1e-5))
μ = mean(x, dims=dims)
σ = std(x, dims=dims, mean=μ, corrected=false)
return @. (x - μ) /+ ε)
return @. (x - μ) /+ eps)
end

"""
Expand Down
2 changes: 1 addition & 1 deletion src/losses/Losses.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ using Statistics
using Zygote
using Zygote: @adjoint
using ChainRulesCore
using ..Flux: ofeltype, epseltype, _greek_ascii_depwarn
using ..Flux: ofeltype, epseltype
using NNlib: logsoftmax, logσ, ctc_loss, ctc_alpha, ∇ctc_loss
import Base.Broadcast: broadcasted

Expand Down
52 changes: 21 additions & 31 deletions src/losses/functions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,9 @@ julia> Flux.msle(Float32[0.9, 1.8, 2.7], 1:3)
0.011100831f0
```
"""
function msle(ŷ, y; agg = mean, eps::Real = epseltype(ŷ), ϵ = nothing)
ϵ = _greek_ascii_depwarn=> eps, :msle, "ϵ" => "eps")
function msle(ŷ, y; agg = mean, eps::Real = epseltype(ŷ))
_check_sizes(ŷ, y)
agg((log.((ŷ .+ ϵ) ./ (y .+ ϵ))) .^2 )
agg((log.((ŷ .+ eps) ./ (y .+ eps))) .^2 )
end

function _huber_metric(abs_error, δ)
Expand Down Expand Up @@ -101,9 +100,8 @@ julia> Flux.huber_loss(ŷ, 1:3, delta=0.05) # changes behaviour as |ŷ - y| >
0.003750000000000005
```
"""
function huber_loss(ŷ, y; agg = mean, delta::Real = 1, δ = nothing)
delta_tmp = _greek_ascii_depwarn=> delta, :huber_loss, "δ" => "delta")
δ = ofeltype(ŷ, delta_tmp)
function huber_loss(ŷ, y; agg = mean, delta::Real = 1)
δ = ofeltype(ŷ, delta)
_check_sizes(ŷ, y)
abs_error = abs.(ŷ .- y)

Expand Down Expand Up @@ -230,10 +228,9 @@ julia> Flux.crossentropy(y_model, y_smooth)
1.5776052f0
```
"""
function crossentropy(ŷ, y; dims = 1, agg = mean, eps::Real = epseltype(ŷ), ϵ = nothing)
ϵ = _greek_ascii_depwarn=> eps, :crossentropy, "ϵ" => "eps")
function crossentropy(ŷ, y; dims = 1, agg = mean, eps::Real = epseltype(ŷ))
_check_sizes(ŷ, y)
agg(.-sum(xlogy.(y, ŷ .+ ϵ); dims = dims))
agg(.-sum(xlogy.(y, ŷ .+ eps); dims = dims))
end

"""
Expand Down Expand Up @@ -319,10 +316,9 @@ julia> Flux.crossentropy(y_prob, y_hot)
0.43989f0
```
"""
function binarycrossentropy(ŷ, y; agg = mean, eps::Real = epseltype(ŷ), ϵ = nothing)
ϵ = _greek_ascii_depwarn=> eps, :binarycrossentropy, "ϵ" => "eps")
function binarycrossentropy(ŷ, y; agg = mean, eps::Real = epseltype(ŷ))
_check_sizes(ŷ, y)
agg(@.(-xlogy(y, ŷ + ϵ) - xlogy(1 - y, 1 -+ ϵ)))
agg(@.(-xlogy(y, ŷ + eps) - xlogy(1 - y, 1 -+ eps)))
end

"""
Expand Down Expand Up @@ -390,11 +386,10 @@ julia> Flux.kldivergence(p1, p2; eps = 0) # about 17.3 with the regulator
Inf
```
"""
function kldivergence(ŷ, y; dims = 1, agg = mean, eps::Real = epseltype(ŷ), ϵ = nothing)
ϵ = _greek_ascii_depwarn=> eps, :kldivergence, "ϵ" => "eps")
function kldivergence(ŷ, y; dims = 1, agg = mean, eps::Real = epseltype(ŷ))
_check_sizes(ŷ, y)
entropy = agg(sum(xlogx.(y); dims = dims))
cross_entropy = crossentropy(ŷ, y; dims, agg, eps=ϵ)
cross_entropy = crossentropy(ŷ, y; dims, agg, eps)
return entropy + cross_entropy
end

Expand Down Expand Up @@ -531,13 +526,12 @@ Calculated as:
"""
function tversky_loss(ŷ, y; beta::Real = 0.7, β = nothing)
beta_temp = _greek_ascii_depwarn=> beta, :tversky_loss, "β" => "beta")
β = ofeltype(ŷ, beta_temp)
_check_sizes(ŷ, y)
#TODO add agg
num = sum(y .* ŷ) + 1
den = sum(y .*+ β * (1 .- y) .*+ (1 - β) * y .* (1 .- ŷ)) + 1
1 - num / den
β = ofeltype(ŷ, beta)
_check_sizes(ŷ, y)
#TODO add agg
num = sum(y .* ŷ) + 1
den = sum(y .*+ β * (1 .- y) .*+ (1 - β) * y .* (1 .- ŷ)) + 1
1 - num / den
end

"""
Expand Down Expand Up @@ -568,12 +562,10 @@ julia> Flux.binary_focal_loss(ŷ, y) ≈ 0.0728675615927385
true
```
"""
function binary_focal_loss(ŷ, y; agg=mean, gamma=2, eps::Real=epseltype(ŷ), ϵ = nothing, γ = nothing)
ϵ = _greek_ascii_depwarn=> eps, :binary_focal_loss, "ϵ" => "eps")
gamma_temp = _greek_ascii_depwarn=> gamma, :binary_focal_loss, "γ" => "gamma")
γ = gamma_temp isa Integer ? gamma_temp : ofeltype(ŷ, gamma_temp)
function binary_focal_loss(ŷ, y; agg=mean, gamma=2, eps::Real=epseltype(ŷ))
γ = gamma isa Integer ? gamma : ofeltype(ŷ, gamma)
_check_sizes(ŷ, y)
ŷϵ =.+ ϵ
ŷϵ =.+ eps
p_t = y .* ŷϵ + (1 .- y) .* (1 .- ŷϵ)
ce = .-log.(p_t)
weight = (1 .- p_t) .^ γ
Expand Down Expand Up @@ -616,11 +608,9 @@ See also: [`Losses.binary_focal_loss`](@ref) for binary (not one-hot) labels
"""
function focal_loss(ŷ, y; dims=1, agg=mean, gamma=2, eps::Real=epseltype(ŷ), ϵ=nothing, γ=nothing)
ϵ = _greek_ascii_depwarn=> eps, :focal_loss, "ϵ" => "eps")
gamma_temp = _greek_ascii_depwarn=> gamma, :focal_loss, "γ" => "gamma")
γ = gamma_temp isa Integer ? gamma_temp : ofeltype(ŷ, gamma_temp)
γ = gamma isa Integer ? gamma : ofeltype(ŷ, gamma)
_check_sizes(ŷ, y)
ŷϵ =.+ ϵ
ŷϵ =.+ eps
agg(sum(@. -y * (1 - ŷϵ)^γ * log(ŷϵ); dims))
end

Expand Down
2 changes: 1 addition & 1 deletion test/layers/normalisation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,7 @@ end
@test Zygote.hessian_reverse(summ1, [1.0,2.0,3.0]) == zeros(3, 3)

m2 = Chain(BatchNorm(3), sum)
@test_broken Zygote.hessian_reverse(m2, Float32[1 2; 3 4; 5 6]) == zeros(Float32, 6, 6)
@test Zygote.hessian_reverse(m2, Float32[1 2; 3 4; 5 6]) == zeros(Float32, 6, 6) broken = VERSION >= v"1.11"
end

@testset "ForwardDiff" begin
Expand Down
2 changes: 1 addition & 1 deletion test/layers/show.jl
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ end

# Bug when no children, https://github.com/FluxML/Flux.jl/issues/2208
struct NoFields end
Flux.@functor NoFields
Flux.@layer NoFields

@testset "show with no fields" begin
str = repr("text/plain", Chain(Dense(1=>1), Dense(1=>1), NoFields()))
Expand Down
Loading

0 comments on commit 6b45e21

Please sign in to comment.