changes fo functors 0.5

FluxML · Nov 23, 2024 · ba5f33e · ba5f33e
1 parent 0702302
commit ba5f33e
Show file tree

Hide file tree

Showing 10 changed files with 25 additions and 67 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -12,6 +12,8 @@ See also [github's page](https://github.com/FluxML/Flux.jl/releases) for a compl
   Now Flux re-exports the optimisers from Optimisers.jl. Most users will be uneffected by this change.
   The module is still available for now, but will be removed in a future release.
 * Most Flux layers will [re-use memory via `NNlib.bias_act!`](https://github.com/FluxML/Flux.jl/pull/2327), when possible.
+* `Flux.params` has been deprecated. Use Zygote's explicit differentiation instead, 
+`gradient(m -> loss(m, x, y), model)` or use `Flux.trainables(model)` to get the trainable parameters.
 
 ## v0.14.22
 * Data movement between devices is now provided by [MLDataDevices.jl](https://github.com/LuxDL/MLDataDevices.jl).

diff --git a/docs/src/reference/models/functors.md b/docs/src/reference/models/functors.md
@@ -4,14 +4,17 @@ CollapsedDocStrings = true
 
 # Recursive transformations from Functors.jl
 
-Flux models are deeply nested structures, and [Functors.jl](https://github.com/FluxML/Functors.jl) provides tools needed to explore such objects, apply functions to the parameters they contain, and re-build them.
+Flux models are deeply nested structures, and [Functors.jl](https://github.com/FluxML/Functors.jl) provides tools needed to explore such objects, apply functions to the parameters they contain (e.g. for moving them to gpu), and re-build them.
 
 !!! compat "Flux ≤ 0.14"
     All layers were previously defined with the `Functors.@functor` macro.
     This still works, but it is recommended that you use the new [`Flux.@layer`](@ref Flux.@layer) macro instead.
     Both allow [`Flux.setup`](@ref Flux.setup) to see the parameters inside, and [`gpu`](@ref) to move them to the GPU, but [`Flux.@layer`](@ref Flux.@layer) also overloads printing,
     and offers a way to define `trainable` at the same time.
 
+!!! compat "Functors 0.5"
+    With Functors.jl v0.5, which is required by Flux v0.15 and later, every custom type is a functor by default. This means that applying `Flux.@layer` to a type is no longer strictly necessary, but it is still recommended for addictional features like pretty-printing and `trainable`.
+
 `Functors.jl` has its own [notes on basic usage](https://fluxml.ai/Functors.jl/stable/#Basic-Usage-and-Implementation) for more details. Additionally, the [Advanced Model Building and Customisation](@ref man-advanced) page covers the use cases of `Functors` in greater details.
 
 ```@docs

diff --git a/perf/recurrent.jl b/perf/recurrent.jl
@@ -3,7 +3,6 @@
 struct RNNWrapper{T}
   rnn::T
 end
-Flux.@functor RNNWrapper
 
 # Need to specialize for RNNWrapper.
 fw(r::RNNWrapper, X::Vector{<:AbstractArray}) = begin

diff --git a/src/deprecations.jl b/src/deprecations.jl
@@ -116,25 +116,17 @@ function Optimisers.update!(opt::Optimisers.AbstractRule, model::Chain, grad::Tu
 end
 
 
-"""
-    @functor MyLayer
-  
-Flux used to require the use of `Functors.@functor` to mark any new layer-like struct.
-This allowed it to explore inside the struct, and update any trainable parameters within.
-[email protected] removes this requirement. This is because [email protected] changed its behaviour
-to be opt-out instead of opt-in. Arbitrary structs will now be explored without special marking.
-Hence calling `@functor` is no longer required.
-Calling `Flux.@layer MyLayer` is, however, still recommended. This adds various convenience methods
-for your layer type, such as pretty printing.
-"""
 macro functor(ex)
-  Base.depwarn("""The macro `@functor` is deprecated.
-      Most likely, you should write `Flux.@layer MyLayer` which will add various convenience methods for your type,
-      such as pretty-printing, and use with Adapt.jl.
-      However, this is not required. Flux.jl v0.15 uses Functors.jl v0.5, which makes exploration of most nested `struct`s
-      opt-out instead of opt-in... so Flux will automatically see inside any custom struct definitions.
-      """, Symbol("@functor")
-  _layer_macro(ex)
+  Base.depwarn("""The macro `Flux.@functor` is deprecated.
+      Most likely, you should write `Flux.@layer MyLayer` which will add 
+      various convenience methods for your type, such as pretty-printing, and use with Adapt.jl.
+      However, this is not strictly required: Flux.jl v0.15 uses Functors.jl v0.5, 
+      which makes exploration of most nested `struct`s opt-out instead of opt-in... 
+      so Flux will automatically see inside any custom struct definitions to take care of things
+      like moving data to the GPU.
+      """, Symbol("@functor"))
+
+  return :(Functors.functor($(esc(expr))))
 end
 
 ### v0.16 deprecations ####################

diff --git a/src/functor.jl b/src/functor.jl
@@ -1,9 +1,3 @@
-import Adapt: adapt, adapt_storage
-using  LinearAlgebra: Cholesky
-using Zygote: IdSet
-import Functors: Functors, @functor, functor, fmap, isleaf
-using SparseArrays: AbstractSparseArray
-
 """
     testmode!(model, [mode]) -> model
 
@@ -222,10 +216,6 @@ Chain(
 """
 f16(m) = _paramtype(Float16, m)
 
-# Functors for certain Julia data structures -- PIRACY, should move to Functors.jl
-@functor Cholesky
-trainable(c::Cholesky) = ()
-
 
 """
     gpu(data::DataLoader)

diff --git a/src/layers/macro.jl b/src/layers/macro.jl
@@ -4,12 +4,8 @@
     @layer :expand Chain
     @layer BatchNorm trainable=(β,γ)
 
-This macro replaces most uses of `@functor`. Its basic purpose is the same:
-When you define a new layer, this tells Flux to explore inside it
-to see the parameters it trains, and also to move them to the GPU, change precision, etc.
-
-Like `@functor`, this assumes your struct has the default constructor, to enable re-building.
-If you define an inner constructor (i.e. a function within the `struct` block) things may break.
+This macro adds convenience functionality to a custom type to serve 
+as a neural network layer, module, or entire model.
 
 The keyword `trainable` allows you to limit this exploration, instead of visiting all `fieldnames(T)`.
 Note that it is never necessary to tell Flux to ignore non-array objects such as functions or sizes.
@@ -30,15 +26,9 @@ julia> struct Trio; a; b; c end
 julia> tri = Trio(Dense([1.1 2.2], [0.0], tanh), Dense(hcat(3.3), false), Dropout(0.4))
 Trio(Dense(2 => 1, tanh), Dense(1 => 1; bias=false), Dropout(0.4))
 
-julia> Flux.destructure(tri)  # parameters are not yet visible to Flux
-(Bool[], Restructure(Trio, ..., 0))
-
 julia> Flux.@layer :expand Trio
 
-julia> Flux.destructure(tri)  # now gpu, params, train!, etc will see inside too
-([1.1, 2.2, 0.0, 3.3], Restructure(Trio, ..., 4))
-
-julia> tri  # and layer is printed like Chain
+julia> tri  # now the layer is printed like Chain
 Trio(
   Dense(2 => 1, tanh),                  # 3 parameters
   Dense(1 => 1; bias=false),            # 1 parameters
@@ -67,7 +57,7 @@ function _layer_macro(exs...)
     exs
   end
 
-  push!(out.args, _macro_functor(esc(type)))
+  push!(out.args, _macro_adapt(esc(type)))
 
   for j in 1:length(rest)
     ex = rest[j]
@@ -86,28 +76,13 @@ function _layer_macro(exs...)
   return out
 end
 
-
-# @layer's code for Functors & Adapt
-# Unlike @functor, _default_functor doesn't need to eval anything
-
-function _macro_functor(type)
-  quote
-    Functors.functor(::Type{T}, x) where {T<:$type} = $_default_functor(T, x)
-    Adapt.adapt_structure(to, layer::$type) = $fmap($adapt(to), layer)
-  end
-end
-
-function _macro_functor(type, fields)
-  Meta.isexpr(fields, :tuple) || error("expected a tuple of field names")
-  symbols = Tuple(map(_noquotenode, fields.args))
+# @layer's code for Adapt
+function _macro_adapt(type)
   quote
-    Functors.functor(::Type{T}, x) where {T<:$type} = $_custom_functor(T, x, Val($symbols))
     Adapt.adapt_structure(to, layer::$type) = $fmap($adapt(to), layer)
   end
 end
-_macro_functor(type, field::Union{Symbol,QuoteNode}) = _macro_functor(type, :(($field,)))  # lets you forget a comma
 
-
 # @layer's code for Optimisers.trainable, and perhaps anything else,
 # with the pattern that keywords mean function names & what fields they pick.
 

diff --git a/src/layers/show.jl b/src/layers/show.jl
@@ -20,13 +20,13 @@ function _macro_big_show(ex)
 end
 
 function _big_show(io::IO, obj, indent::Int=0, name=nothing)
-  pre, post = _show_pre_post(obj)
   children = _show_children(obj)
   if all(_show_leaflike, children)
     # This check may not be useful anymore: it tries to infer when to stop the recursion by looking for grandkids,
     # but once all layers use @layer, they stop the recursion by defining a method for _big_show.
     _layer_show(io, obj, indent, name)
   else
+    pre, post = _show_pre_post(obj)
     println(io, " "^indent, isnothing(name) ? "" : "$name = ", pre)
     if obj isa Chain{<:NamedTuple} || obj isa NamedTuple
       # then we insert names -- can this be done more generically?
@@ -66,7 +66,7 @@ _show_pre_post(obj) = string(nameof(typeof(obj)), "("), ")"
 _show_pre_post(::AbstractVector) = "[", "]"
 _show_pre_post(::NamedTuple) = "(;", ")"
 
-_show_leaflike(x) = isleaf(x)  # mostly follow Functors, except for:
+_show_leaflike(x) = Functors.isleaf(x)  # mostly follow Functors, except for:
 
 # note the covariance of tuple, using <:T causes warning or error
 _show_leaflike(::Tuple{Vararg{Number}}) = true         # e.g. stride of Conv
@@ -146,7 +146,7 @@ function _big_finale(io::IO, m)
 end
 
 _childarray_sum(f, x::AbstractArray{<:Number}) = f(x)
-_childarray_sum(f, x) = isleaf(x) ? 0 : sum(y -> _childarray_sum(f, y), Functors.children(x), 
+_childarray_sum(f, x) = Functors.isleaf(x) ? 0 : sum(y -> _childarray_sum(f, y), Functors.children(x), 
 init=0)
 
 # utility functions

diff --git a/test/ext_enzyme/enzyme.jl b/test/ext_enzyme/enzyme.jl
@@ -76,7 +76,6 @@ end
         end
         SimpleDense(in::Integer, out::Integer; σ=identity) = SimpleDense(randn(Float32, out, in), zeros(Float32, out), σ)
         (m::SimpleDense)(x) = m.σ.(m.weight * x .+ m.bias)
-        @functor SimpleDense
 
         model = SimpleDense(2, 4)
         x = randn(Float32, 2)

diff --git a/test/layers/basic.jl b/test/layers/basic.jl
@@ -293,7 +293,6 @@ using Flux: activations
         x
       end
       (l::L1)(x) = l.x * x
-      Flux.@functor L1
       Base.:*(a::AbstractArray, b::Input) = a * b.x
 
       par = Parallel(+, L1(rand(Float32, 3,3)), L1(rand(Float32, 3,3)))

diff --git a/test/utils.jl b/test/utils.jl
@@ -623,7 +623,6 @@ end
         paths::T
     end
     Split(paths...) = Split(paths)
-    Flux.@functor Split
     (m::Split)(x::AbstractArray) = map(f -> f(x), m.paths)
 
     n_input, n_batch, n_shared = 5, 13, 11