remove v0.13 deprecations

remove v0.13 deprecations reinsert optimisers add 1.9 CI drop julia v1.9
FluxML · Oct 11, 2024 · 6b45e21 · 6b45e21
1 parent aa035e9
commit 6b45e21
Show file tree

Hide file tree

Showing 14 changed files with 51 additions and 176 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -17,7 +17,7 @@ jobs:
       fail-fast: false
       matrix:
         version:
-          # - '1.9' # Uncomment when 1.10 is out. Replace this with the minimum Julia version that your package supports. 
+          - '1.10' # Replace this with the minimum Julia version that your package supports. 
           - '1'
         os: [ubuntu-latest]
         arch: [x64]

diff --git a/Project.toml b/Project.toml
@@ -67,4 +67,4 @@ SpecialFunctions = "2.1.2"
 Statistics = "1"
 Zygote = "0.6.67"
 cuDNN = "1"
-julia = "1.9"
+julia = "1.10"
diff --git a/ext/FluxEnzymeExt/FluxEnzymeExt.jl b/ext/FluxEnzymeExt/FluxEnzymeExt.jl
@@ -16,10 +16,6 @@ _applyloss(loss, model, d...) = loss(model, d...)
 
 EnzymeRules.inactive(::typeof(Flux.Losses._check_sizes), args...) = true
 
-using Flux: _old_to_new  # from src/deprecations.jl
-train!(loss, model::Duplicated, data, opt::Optimise.AbstractOptimiser; cb=nothing) =
-  train!(loss, model, data, _old_to_new(opt); cb)
-
 function train!(loss, model::Duplicated, data, rule::Optimisers.AbstractRule; cb = nothing)
   train!(loss, model, data, _rule_to_state(model, rule); cb)
 end

diff --git a/src/deprecations.jl b/src/deprecations.jl
@@ -1,47 +1,6 @@
 
 # v0.13 deprecations
 
-function Broadcast.broadcasted(f::Recur, args...)
-  # This had an explicit @adjoint rule, calling Zygote.∇map(__context__, f, args...), until v0.12
-  Base.depwarn("""Broadcasting is not safe to use with RNNs, as it does not guarantee an iteration order.
-    Re-writing this as a comprehension would be better.""", :broadcasted)
-  map(f, args...)  # map isn't really safe either, but 
-end
-
-@deprecate frequencies(xs) group_counts(xs)
-
-struct Zeros
-  function Zeros()
-    Base.depwarn("Flux.Zeros is no more, has ceased to be, is bereft of life, is an ex-boondoggle... please use bias=false instead", :Zeros)
-    false
-  end
-end
-Zeros(args...) = Zeros()  # was used both Dense(10, 2, initb = Zeros) and Dense(rand(2,10), Zeros())
-
-function Optimise.update!(x::AbstractArray, x̄)
-  Base.depwarn("`Flux.Optimise.update!(x, x̄)` was not used internally and has been removed. Please write `x .-= x̄` instead.", :update!)
-  x .-= x̄
-end
-
-function Diagonal(size::Integer...; kw...)
-  Base.depwarn("Flux.Diagonal is now Flux.Scale, and also allows an activation function.", :Diagonal)
-  Scale(size...; kw...)
-end
-function Diagonal(size::Tuple; kw...)
-  Base.depwarn("Flux.Diagonal is now Flux.Scale, and also allows an activation function.", :Diagonal)
-  Scale(size...; kw...)
-end
-
-# Deprecate this eventually once saving models w/o structure is no more
-function loadparams!(m, xs)
-  Base.depwarn("loadparams! will be deprecated eventually. Use loadmodel! instead.", :loadparams!)
-  for (p, x) in zip(params(m), xs)
-    size(p) == size(x) ||
-      error("Expected param size $(size(p)), got $(size(x))")
-    copyto!(p, x)
-  end
-end
-
 # Channel notation: Changed to match Conv, but very softly deprecated!
 # Perhaps change to @deprecate for v0.15, but there is no plan to remove these.
 Dense(in::Integer, out::Integer, σ = identity; kw...) =
@@ -56,32 +15,6 @@ LSTMCell(in::Integer, out::Integer; kw...) = LSTMCell(in => out; kw...)
 GRUCell(in::Integer, out::Integer; kw...) = GRUCell(in => out; kw...)
 GRUv3Cell(in::Integer, out::Integer; kw...) = GRUv3Cell(in => out; kw...)
 
-# Optimisers with old naming convention
-Base.@deprecate_binding ADAM Adam
-Base.@deprecate_binding NADAM NAdam
-Base.@deprecate_binding ADAMW AdamW
-Base.@deprecate_binding RADAM RAdam
-Base.@deprecate_binding OADAM OAdam
-Base.@deprecate_binding ADAGrad AdaGrad
-Base.@deprecate_binding ADADelta AdaDelta
-
-# Remove sub-module Data, while making sure Flux.Data.DataLoader keeps working
-Base.@deprecate_binding Data Flux false "Sub-module Flux.Data has been removed. The only thing it contained may be accessed as Flux.DataLoader"
-
-@deprecate paramtype(T,m) _paramtype(T,m) false  # internal method, renamed to make this clear
-
-@deprecate rng_from_array() Random.default_rng()
-
-function istraining()
-  Base.depwarn("Flux.istraining() is deprecated, use NNlib.within_gradient(x) instead", :istraining)
-  false
-end
-ChainRulesCore.rrule(::typeof(istraining)) = true, _ -> (NoTangent(),)
-
-function _isactive(m)
-  Base.depwarn("_isactive(m) is deprecated, use _isactive(m,x)", :_isactive, force=true)
-  _isactive(m, 1:0)
-end
 
 #=
   # Valid method in Optimise, old implicit style, is:
@@ -110,7 +43,6 @@ train!(loss, ps::Params, data, opt::Optimisers.AbstractRule; cb=nothing) = error
 train!(loss, model, data, opt::Optimise.AbstractOptimiser; cb=nothing) =
   train!(loss, model, data, _old_to_new(opt); cb)
 
-
 # Next, to use the new `setup` with the still-exported old-style `Adam` etc:
 import .Train: setup
 setup(rule::Optimise.AbstractOptimiser, model) = setup(_old_to_new(rule), model)
@@ -179,33 +111,6 @@ function update!(opt::Optimise.AbstractOptimiser, ::Params, grads::Union{Tuple,
     """)
 end
 
-""" 
-    trainmode!(m, active)
-
-!!! warning
-    This two-argument method is deprecated.
-
-Possible values of  `active` are:
-- `true` for training, or 
-- `false` for testing, same as [`testmode!`](@ref)`(m)`
-- `:auto` or `nothing` for Flux to detect training automatically.
-"""
-function trainmode!(m, active::Bool)
-  Base.depwarn("trainmode!(m, active::Bool) is deprecated", :trainmode)
-  testmode!(m, !active)
-end
-
-# Greek-letter keywords deprecated in Flux 0.13
-# Arguments (old => new, :function, "β" => "beta")
-function _greek_ascii_depwarn(βbeta::Pair, func = :loss, names = "" => "")
-  Base.depwarn(LazyString("function ", func, " no longer accepts greek-letter keyword ", names.first, """
-    please use ascii """, names.second, " instead"), func)
-  βbeta.first
-end
-_greek_ascii_depwarn(βbeta::Pair{Nothing}, _...) = βbeta.second
-
-ChainRulesCore.@non_differentiable _greek_ascii_depwarn(::Any...)
-
 
 # v0.14 deprecations
 @deprecate default_rng_value() Random.default_rng()

diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl
@@ -191,10 +191,9 @@ struct LayerNorm{F,D,T,N}
   affine::Bool
 end
 
-function LayerNorm(size::Tuple{Vararg{Int}}, λ=identity; affine::Bool=true, eps::Real=1f-5, ϵ=nothing)
-  ε = _greek_ascii_depwarn(ϵ => eps, :LayerNorm, "ϵ" => "eps")
+function LayerNorm(size::Tuple{Vararg{Int}}, λ=identity; affine::Bool=true, eps::Real=1f-5)
   diag = affine ? Scale(size..., λ) : λ!=identity ? Base.Fix1(broadcast, λ) : identity
-  return LayerNorm(λ, diag, ε, size, affine)
+  return LayerNorm(λ, diag, eps, size, affine)
 end
 LayerNorm(size::Integer...; kw...) = LayerNorm(Int.(size); kw...)
 LayerNorm(size_act...; kw...) = LayerNorm(Int.(size_act[1:end-1]), size_act[end]; kw...)
@@ -328,17 +327,16 @@ end
 function BatchNorm(chs::Int, λ=identity;
           initβ=zeros32, initγ=ones32,
           affine::Bool=true, track_stats::Bool=true, active::Union{Bool,Nothing}=nothing,
-          eps::Real=1f-5, momentum::Real=0.1f0, ϵ=nothing)
+          eps::Real=1f-5, momentum::Real=0.1f0)
 
-  ε = _greek_ascii_depwarn(ϵ => eps, :BatchNorm, "ϵ" => "eps")
 
   β = affine ? initβ(chs) : nothing
   γ = affine ? initγ(chs) : nothing
   μ = track_stats ? zeros32(chs) : nothing
   σ² = track_stats ? ones32(chs) : nothing
 
   return BatchNorm(λ, β, γ,
-            μ, σ², ε, momentum,
+            μ, σ², eps, momentum,
             affine, track_stats,
             active, chs)
 end
@@ -421,17 +419,15 @@ end
 function InstanceNorm(chs::Int, λ=identity;
                     initβ=zeros32, initγ=ones32,
                     affine::Bool=false, track_stats::Bool=false, active::Union{Bool,Nothing}=nothing,
-                    eps::Real=1f-5, momentum::Real=0.1f0, ϵ=nothing)
-
-  ε = _greek_ascii_depwarn(ϵ => eps, :InstanceNorm, "ϵ" => "eps")
+                    eps::Real=1f-5, momentum::Real=0.1f0)
 
   β = affine ? initβ(chs) : nothing
   γ = affine ? initγ(chs) : nothing
   μ = track_stats ? zeros32(chs) : nothing
   σ² = track_stats ? ones32(chs) : nothing
 
   return InstanceNorm(λ, β, γ,
-            μ, σ², ε, momentum,
+            μ, σ², eps, momentum,
             affine, track_stats,
             active, chs)
 end
@@ -520,9 +516,7 @@ end
 function GroupNorm(chs::Int, G::Int, λ=identity;
               initβ=zeros32, initγ=ones32,
               affine::Bool=true, active::Union{Bool,Nothing}=nothing,
-              eps::Real=1f-5, momentum::Real=0.1f0, ϵ=nothing)
-
-  ε = _greek_ascii_depwarn(ϵ => eps, :GroupNorm, "ϵ" => "eps")
+              eps::Real=1f-5, momentum::Real=0.1f0)
 
   chs % G == 0 || error("The number of groups ($(G)) must divide the number of channels ($chs)")
 
@@ -535,7 +529,7 @@ function GroupNorm(chs::Int, G::Int, λ=identity;
   return GroupNorm(G, λ,
             β, γ,
             μ, σ²,
-            ε, momentum,
+            eps, momentum,
             affine, track_stats,
             active, chs)
 end

diff --git a/src/layers/stateless.jl b/src/layers/stateless.jl
@@ -34,11 +34,10 @@ julia> isapprox(std(y; dims=1, corrected=false), ones(1, 10), atol=1e-5)
 true
 ```
 """
-@inline function normalise(x::AbstractArray; dims=ndims(x), eps=ofeltype(x, 1e-5), ϵ=nothing)
-  ε = _greek_ascii_depwarn(ϵ => eps, :InstanceNorm, "ϵ" => "eps")
+@inline function normalise(x::AbstractArray; dims=ndims(x), eps=ofeltype(x, 1e-5))
   μ = mean(x, dims=dims)
   σ = std(x, dims=dims, mean=μ, corrected=false)
-  return @. (x - μ) / (σ + ε)
+  return @. (x - μ) / (σ + eps)
 end
 
 """

diff --git a/src/losses/Losses.jl b/src/losses/Losses.jl
@@ -4,7 +4,7 @@ using Statistics
 using Zygote
 using Zygote: @adjoint
 using ChainRulesCore
-using ..Flux: ofeltype, epseltype, _greek_ascii_depwarn
+using ..Flux: ofeltype, epseltype
 using NNlib: logsoftmax, logσ, ctc_loss, ctc_alpha, ∇ctc_loss
 import Base.Broadcast: broadcasted
 

diff --git a/src/losses/functions.jl b/src/losses/functions.jl
@@ -66,10 +66,9 @@ julia> Flux.msle(Float32[0.9, 1.8, 2.7], 1:3)
 0.011100831f0
 ```
 """
-function msle(ŷ, y; agg = mean, eps::Real = epseltype(ŷ), ϵ = nothing)
-  ϵ = _greek_ascii_depwarn(ϵ => eps, :msle, "ϵ" => "eps")
+function msle(ŷ, y; agg = mean, eps::Real = epseltype(ŷ))
   _check_sizes(ŷ, y)
-  agg((log.((ŷ .+ ϵ) ./ (y .+ ϵ))) .^2 )
+  agg((log.((ŷ .+ eps) ./ (y .+ eps))) .^2 )
 end
 
 function _huber_metric(abs_error, δ)
@@ -101,9 +100,8 @@ julia> Flux.huber_loss(ŷ, 1:3, delta=0.05)  # changes behaviour as |ŷ - y| >
 0.003750000000000005
 ```
 """
-function huber_loss(ŷ, y; agg = mean, delta::Real = 1, δ = nothing)
-    delta_tmp = _greek_ascii_depwarn(δ => delta, :huber_loss, "δ" => "delta")
-    δ = ofeltype(ŷ, delta_tmp)
+function huber_loss(ŷ, y; agg = mean, delta::Real = 1)
+    δ = ofeltype(ŷ, delta)
     _check_sizes(ŷ, y)
     abs_error = abs.(ŷ .- y)
 
@@ -230,10 +228,9 @@ julia> Flux.crossentropy(y_model, y_smooth)
 1.5776052f0
 ```
 """
-function crossentropy(ŷ, y; dims = 1, agg = mean, eps::Real = epseltype(ŷ), ϵ = nothing)
-  ϵ = _greek_ascii_depwarn(ϵ => eps, :crossentropy, "ϵ" => "eps")
+function crossentropy(ŷ, y; dims = 1, agg = mean, eps::Real = epseltype(ŷ))
   _check_sizes(ŷ, y)
-  agg(.-sum(xlogy.(y, ŷ .+ ϵ); dims = dims))
+  agg(.-sum(xlogy.(y, ŷ .+ eps); dims = dims))
 end
 
 """
@@ -319,10 +316,9 @@ julia> Flux.crossentropy(y_prob, y_hot)
 0.43989f0
 ```
 """
-function binarycrossentropy(ŷ, y; agg = mean, eps::Real = epseltype(ŷ), ϵ = nothing)
-  ϵ = _greek_ascii_depwarn(ϵ => eps, :binarycrossentropy, "ϵ" => "eps")
+function binarycrossentropy(ŷ, y; agg = mean, eps::Real = epseltype(ŷ))
   _check_sizes(ŷ, y)
-  agg(@.(-xlogy(y, ŷ + ϵ) - xlogy(1 - y, 1 - ŷ + ϵ)))
+  agg(@.(-xlogy(y, ŷ + eps) - xlogy(1 - y, 1 - ŷ + eps)))
 end
 
 """
@@ -390,11 +386,10 @@ julia> Flux.kldivergence(p1, p2; eps = 0)  # about 17.3 with the regulator
 Inf
 ```
 """
-function kldivergence(ŷ, y; dims = 1, agg = mean, eps::Real = epseltype(ŷ), ϵ = nothing)
-  ϵ = _greek_ascii_depwarn(ϵ => eps, :kldivergence, "ϵ" => "eps")
+function kldivergence(ŷ, y; dims = 1, agg = mean, eps::Real = epseltype(ŷ))
   _check_sizes(ŷ, y)
   entropy = agg(sum(xlogx.(y); dims = dims))
-  cross_entropy = crossentropy(ŷ, y; dims, agg, eps=ϵ)
+  cross_entropy = crossentropy(ŷ, y; dims, agg, eps)
   return entropy + cross_entropy
 end
 
@@ -531,13 +526,12 @@ Calculated as:
 
 """
 function tversky_loss(ŷ, y; beta::Real = 0.7, β = nothing)
-  beta_temp = _greek_ascii_depwarn(β => beta, :tversky_loss, "β" => "beta")
-  β = ofeltype(ŷ, beta_temp)
-    _check_sizes(ŷ, y)
-    #TODO add agg
-    num = sum(y .* ŷ) + 1
-    den = sum(y .* ŷ + β * (1 .- y) .* ŷ + (1 - β) * y .* (1 .- ŷ)) + 1
-    1 - num / den
+  β = ofeltype(ŷ, beta)
+  _check_sizes(ŷ, y)
+  #TODO add agg
+  num = sum(y .* ŷ) + 1
+  den = sum(y .* ŷ + β * (1 .- y) .* ŷ + (1 - β) * y .* (1 .- ŷ)) + 1
+  1 - num / den
 end
 
 """
@@ -568,12 +562,10 @@ julia> Flux.binary_focal_loss(ŷ, y) ≈ 0.0728675615927385
 true
 ```
 """
-function binary_focal_loss(ŷ, y; agg=mean, gamma=2, eps::Real=epseltype(ŷ), ϵ = nothing, γ = nothing)
-    ϵ = _greek_ascii_depwarn(ϵ => eps, :binary_focal_loss, "ϵ" => "eps")
-    gamma_temp = _greek_ascii_depwarn(γ => gamma, :binary_focal_loss, "γ" => "gamma")
-    γ = gamma_temp isa Integer ? gamma_temp : ofeltype(ŷ, gamma_temp)
+function binary_focal_loss(ŷ, y; agg=mean, gamma=2, eps::Real=epseltype(ŷ))
+    γ = gamma isa Integer ? gamma : ofeltype(ŷ, gamma)
     _check_sizes(ŷ, y)
-    ŷϵ = ŷ .+ ϵ
+    ŷϵ = ŷ .+ eps
     p_t = y .* ŷϵ  + (1 .- y) .* (1 .- ŷϵ)
     ce = .-log.(p_t)
     weight = (1 .- p_t) .^ γ
@@ -616,11 +608,9 @@ See also: [`Losses.binary_focal_loss`](@ref) for binary (not one-hot) labels
 
 """
 function focal_loss(ŷ, y; dims=1, agg=mean, gamma=2, eps::Real=epseltype(ŷ), ϵ=nothing, γ=nothing)
-  ϵ = _greek_ascii_depwarn(ϵ => eps, :focal_loss, "ϵ" => "eps")
-  gamma_temp = _greek_ascii_depwarn(γ => gamma, :focal_loss, "γ" => "gamma")
-  γ = gamma_temp isa Integer ? gamma_temp : ofeltype(ŷ, gamma_temp)
+  γ = gamma isa Integer ? gamma : ofeltype(ŷ, gamma)
   _check_sizes(ŷ, y)
-  ŷϵ = ŷ .+ ϵ
+  ŷϵ = ŷ .+ eps
   agg(sum(@. -y * (1 - ŷϵ)^γ * log(ŷϵ); dims))
 end
 

diff --git a/test/layers/normalisation.jl b/test/layers/normalisation.jl
@@ -446,7 +446,7 @@ end
   @test Zygote.hessian_reverse(sum∘m1, [1.0,2.0,3.0]) == zeros(3, 3)
 
   m2 = Chain(BatchNorm(3), sum)
-  @test_broken Zygote.hessian_reverse(m2, Float32[1 2; 3 4; 5 6]) == zeros(Float32, 6, 6)
+  @test Zygote.hessian_reverse(m2, Float32[1 2; 3 4; 5 6]) == zeros(Float32, 6, 6) broken = VERSION >= v"1.11"
 end
 
 @testset "ForwardDiff" begin

diff --git a/test/layers/show.jl b/test/layers/show.jl
@@ -76,7 +76,7 @@ end
 
 # Bug when no children, https://github.com/FluxML/Flux.jl/issues/2208
 struct NoFields end
-Flux.@functor NoFields
+Flux.@layer NoFields
 
 @testset "show with no fields" begin
   str = repr("text/plain", Chain(Dense(1=>1), Dense(1=>1), NoFields()))