misc stuff for v0.15 release (#2534)

Co-authored-by: Michael Abbott <[email protected]>
FluxML · Dec 4, 2024 · 4fea1f9 · 4fea1f9
1 parent bde153e
commit 4fea1f9
Show file tree

Hide file tree

Showing 39 changed files with 238 additions and 246 deletions.
diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
@@ -1,5 +1,5 @@
 steps:
-  - label: "CUDA GPU with julia v1"
+  - label: "CUDA - Julia 1"
     plugins:
       - JuliaCI/julia#v1:
           version: "1"
@@ -17,17 +17,7 @@ steps:
       FLUX_TEST_ENZYME: "false"
     timeout_in_minutes: 60
 
-  # - label: "GPU nightly"
-  #   plugins:
-  #     - JuliaCI/julia#v1:
-  #         version: "nightly"
-  #     - JuliaCI/julia-test#v1: ~
-  #   agents:
-  #     queue: "juliagpu"
-  #     cuda: "*"
-  #   timeout_in_minutes: 60
-
-  - label: "Metal with julia v1"
+  - label: "Metal - Julia 1"
     plugins:
       - JuliaCI/julia#v1:
           version: "1"
@@ -41,32 +31,18 @@ steps:
       queue: "juliaecosystem"
       os: "macos"
       arch: "aarch64"
-    commands: |
-      julia --project -e '
-          # make sure the 1.8-era Manifest works on this Julia version
-          using Pkg
-          Pkg.resolve()'
-    commands: |
-      printf "[Flux]\ngpu_backend = \"Metal\"\n" > LocalPreferences.toml
-
     if: build.message !~ /\[skip tests\]/
     timeout_in_minutes: 60
     env:
       FLUX_TEST_METAL: "true"
       FLUX_TEST_CPU: "false"
       FLUX_TEST_ENZYME: "false"
-    matrix:
-      setup:
-        julia:
-          # - "1.9"
-          - "1"
-          # - "nightly"
 
-  - label: "AMD GPU with Julia 1"
+  - label: "AMDGPU - Julia 1"
     plugins:
       - JuliaCI/julia#v1:
           version: "1"
-      - JuliaCI/julia-test#v1:
+      - JuliaCI/julia-test#v1: ~
       - JuliaCI/julia-coverage#v1:
           dirs:
             - src
@@ -75,8 +51,6 @@ steps:
       queue: "juliagpu"
       rocm: "*"
       rocmgpu: "*"
-    commands: |
-      printf "[Flux]\ngpu_backend = \"AMDGPU\"\n" > LocalPreferences.toml
     timeout_in_minutes: 60
     env:
       JULIA_AMDGPU_CORE_MUST_LOAD: "1"
@@ -86,5 +60,6 @@ steps:
       FLUX_TEST_CPU: "false"
       FLUX_TEST_ENZYME: "false"
       JULIA_NUM_THREADS: 4
+
 env:
   SECRET_CODECOV_TOKEN: "fAV/xwuaV0l5oaIYSAXRQIor8h7yHdlrpLUZFwNVnchn7rDk9UZoz0oORG9vlKLc1GK2HhaPRAy+fTkJ3GM/8Y0phHh3ANK8f5UsGm2DUTNsnf6u9izgnwnoRTcsWu+vSO0fyYrxBvBCoJwljL+yZbDFz3oE16DP7HPIzxfQagm+o/kMEszVuoUXhuLXXH0LxT6pXl214qjqs04HfMRmKIIiup48NB6fBLdhGlQz64MdMNHBfgDa/fafB7eNvn0X6pEOxysoy6bDQLUhKelOXgcDx1UsTo34Yiqr+QeJPAeKcO//PWurwQhPoUoHfLad2da9DN4uQk4YQLqAlcIuAA==;U2FsdGVkX1+mRXF2c9soCXT7DYymY3msM+vrpaifiTp8xA+gMpbQ0G63WY3tJ+6V/fJcVnxYoKZVXbjcg8fl4Q=="
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -52,18 +52,9 @@ jobs:
             ${{ runner.os }}-test-
             ${{ runner.os }}-
       - uses: julia-actions/julia-buildpkg@v1
-      - name: "Run test without coverage report"
-        uses: julia-actions/julia-runtest@v1
-        if: matrix.version != '1' || matrix.os != 'ubuntu-latest'
-        with:
-          coverage: false
-      - name: "Run test with coverage report"
-        uses: julia-actions/julia-runtest@v1
-        if: matrix.version == '1' && matrix.os == 'ubuntu-latest'
+      - uses: julia-actions/julia-runtest@v1
       - uses: julia-actions/julia-processcoverage@v1
-        if: matrix.version == '1' && matrix.os == 'ubuntu-latest'
       - uses: codecov/codecov-action@v5
-        if: matrix.version == '1' && matrix.os == 'ubuntu-latest'
         with:
           files: lcov.info
 

diff --git a/.github/workflows/pr_comment.yml b/.github/workflows/pr_comment.yml
@@ -8,7 +8,7 @@ jobs:
     steps:
       - name: Create PR comment
         if: github.event_name == 'pull_request' && github.repository == github.event.pull_request.head.repo.full_name && github.event.label.name == 'documentation' # if this is a pull request build AND the pull request is NOT made from a fork
-        uses: thollander/actions-comment-pull-request@fabd468d3a1a0b97feee5f6b9e499eab0dd903f6
+        uses: thollander/actions-comment-pull-request@24bffb9b452ba05a4f3f77933840a6a841d1b32b
         with:
           message: 'Once the build has completed, you can preview any updated documentation at this URL: https://fluxml.ai/Flux.jl/previews/PR${{ github.event.number }}/ in ~20 minutes'
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          github-token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/NEWS.md b/NEWS.md
@@ -13,10 +13,11 @@ See also [github's page](https://github.com/FluxML/Flux.jl/releases) for a compl
   The module is still available for now, but will be removed in a future release.
 * Most Flux layers will [re-use memory via `NNlib.bias_act!`](https://github.com/FluxML/Flux.jl/pull/2327), when possible.
 * Further support for Enzyme.jl, via methods of `Flux.gradient(loss, Duplicated(model))`.
-  Flux now owns & exports `gradient`, but without `Duplicated` this still defaults to calling Zygote.jl.
+  Flux now owns & exports `gradient` and `withgradient`, but without `Duplicated` this still defaults to calling Zygote.jl.
 * `Flux.params` has been deprecated. Use Zygote's explicit differentiation instead, 
 `gradient(m -> loss(m, x, y), model)`, or use `Flux.trainables(model)` to get the trainable parameters.
-* Flux now requires Functors.jl v0.5. This new release of Functors assumes all types to be functors by default. Therefore, applying `@layer` or `@functor` to a type is no longer strictly necessary for Flux's models. However, it is still recommended to use `@layer Model` for additional functionality like pretty printing.
+* Flux now requires Functors.jl v0.5. This new release of Functors assumes all types to be functors by default. Therefore, applying `Flux.@layer` or `Functors.@functor` to a type is no longer strictly necessary for Flux's models. However, it is still recommended to use `@layer Model` for additional functionality like pretty printing.
+* `@layer Model`now behaves the same as `@layer :expand Model`, which means that the model is expanded into its sublayers (if there are any) when printed. To force compact printing, use `@layer :noexpand Model`.
 
 ## v0.14.22
 * Data movement between devices is now provided by [MLDataDevices.jl](https://github.com/LuxDL/MLDataDevices.jl).

diff --git a/Project.toml b/Project.toml
@@ -1,6 +1,6 @@
 name = "Flux"
 uuid = "587475ba-b771-5e3f-ad9e-33799f191a9c"
-version = "0.15.0-DEV"
+version = "0.15.0"
 
 [deps]
 Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"

diff --git a/docs/make.jl b/docs/make.jl
@@ -2,11 +2,25 @@ using Documenter, Flux, NNlib, Functors, MLUtils, BSON, Optimisers,
       OneHotArrays, Zygote, ChainRulesCore, Plots, MLDatasets, Statistics, 
       DataFrames, JLD2, MLDataDevices
 
+
 DocMeta.setdocmeta!(Flux, :DocTestSetup, :(using Flux); recursive = true)
 
 makedocs(
+    ## This should be
+    ## modules = [Flux], checkdocs = :all,
+    ## but we get errors.
     modules = [Flux, NNlib, Functors, MLUtils, Zygote, OneHotArrays, Optimisers, ChainRulesCore, MLDataDevices],
     sitename = "Flux",
+    doctest = false,   # done later
+    checkdocs = :none, # :all, :exports, :none
+    # checkdocs_ignored_modules =  [NNlib, Functors, MLUtils, Zygote, OneHotArrays, Optimisers, ChainRulesCore, MLDataDevices],
+    warnonly = [:cross_references],
+    format = Documenter.HTML(
+        sidebar_sitename = false,
+        analytics = "UA-36890222-9",
+        assets = ["assets/flux.css"],
+        prettyurls = get(ENV, "CI", nothing) == "true"
+    ),
     pages = [
         "Welcome" => "index.md",
         "Guide" => [
@@ -58,17 +72,7 @@ makedocs(
             "Deep Convolutional GAN" => "tutorials/2021-10-08-dcgan-mnist.md",
             =#
         ],
-    ],
-    format = Documenter.HTML(
-        sidebar_sitename = false,
-        analytics = "UA-36890222-9",
-        assets = ["assets/flux.css"],
-        prettyurls = get(ENV, "CI", nothing) == "true"
-    ),
-    doctest = false,   # done later
-    checkdocs = :none, # :exports # Do not check if all functions appear in the docs
-                                  # since it considers all packages
-    warnonly = [:cross_references]
+    ]
 )
 
 doctest(Flux) # only test Flux modules

diff --git a/docs/src/guide/gpu.md b/docs/src/guide/gpu.md
@@ -3,7 +3,9 @@
 Most work on neural networks involves the use of GPUs, as they can typically perform the required computation much faster.
 This page describes how Flux co-operates with various other packages, which talk to GPU hardware.
 
-## Basic GPU use: from `Array` to `CuArray` with `cu`
+For those in a hurry, see the [quickstart](@ref man-quickstart) page. Or do `using CUDA` and then call `gpu` on both the model and the data. 
+
+## Basic GPU use: from `Array` to `CuArray`
 
 Julia's GPU packages work with special array types, in place of the built-in `Array`.
 The most used is `CuArray` provided by [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl), for GPUs made by NVIDIA.
@@ -119,7 +121,7 @@ model = Chain(...) |> device
     The reason they work on Flux models is that `Flux.@layer Layer` defines methods of `Adapt.adapt_structure(to, lay::Layer)`.
 
 
-## Automatic GPU choice with `gpu`
+## Automatic GPU choice with `gpu` and `gpu_device`
 
 Flux also provides a more automatic way of choosing which GPU (or none) to use. This is the function `gpu`:
 * By default it does nothing.
@@ -131,19 +133,28 @@ Flux also provides a more automatic way of choosing which GPU (or none) to use.
 For the most part, this means that a script which says `model |> gpu` and `data |> gpu` will just work.
 It should always run, and if a GPU package is loaded (and finds the correct hardware) then that will be used.
 
-The function `gpu` uses a lower-level function called `get_device()` from [MLDataDevices.jl](https://github.com/LuxDL/MLDataDevices.jl),
-which checks what to do & then returns some device object. In fact, the entire implementation is just this:
+The function `gpu` uses a lower-level function called [`gpu_device`](@ref) from MLDataDevices.jl,
+which checks what to do and then returns some device object. In fact, the entire implementation is just this:
 
 ```julia
 gpu(x) = gpu_device()(x)
 cpu(x) = cpu_device()(x)
 ```
 
+Automatic backend selection through `gpu` is not type-stable. That doesn't matter if you do it once, or once per large batch -- it costs a few microseconds. But it might matter if you do it within some loop.
 
-## Manually selecting devices
+To avoid this, you can first obtain a "device object" with `device = gpu_device()`, once, and then use this as the function to transfer data. Something like this:
+```julia
+to_device = gpu_device()
+gpu_model = model |> to_device
 
-I thought there was a whole `Flux.gpu_backend!` and Preferences.jl story we had to tell??
+for epoch in 1:num_epochs
+    for (x, y) in dataloader
+        x_gpu, y_gpu = (x, y) |> to_device
+        # training code...
+```
 
+Finally, setting a backend prefence with [`gpu_backend!`](@ref) gives type stability to the whole pipeline.
 
 ## Transferring Training Data
 
@@ -408,7 +419,7 @@ julia> set_preferences!("Flux", "FluxDistributedMPICUDAAware" => true)
 
 By default, Flux will run the checks on your system to see if it can support GPU functionality. You can check if Flux identified a valid GPU setup by typing the following:
 
-```julia
+```julia-repl
 julia> using CUDA
 
 julia> CUDA.functional()
@@ -417,7 +428,7 @@ true
 
 For AMD GPU:
 
-```julia
+```julia-repl
 julia> using AMDGPU
 
 julia> AMDGPU.functional()
@@ -429,7 +440,7 @@ true
 
 For Metal GPU:
 
-```julia
+```julia-repl
 julia> using Metal
 
 julia> Metal.functional()

diff --git a/docs/src/guide/models/basics.md b/docs/src/guide/models/basics.md
@@ -13,11 +13,6 @@ julia> df(x) = gradient(f, x)[1]; # df/dx = 6x + 2
 
 julia> df(2)
 14.0
-
-julia> d2f(x) = gradient(df, x)[1]; # d²f/dx² = 6
-
-julia> d2f(2)
-6.0
 ```
 
 When a function has many parameters, we can get gradients of each one at the same time:

diff --git a/docs/src/guide/models/custom_layers.md b/docs/src/guide/models/custom_layers.md
@@ -109,7 +109,9 @@ Join(combine, paths...) = Join(combine, paths)
 ```
 Notice again that we parameterized the type of the `combine` and `paths` fields. In addition to the performance considerations of concrete types, this allows either field to be `Vector`s, `Tuple`s, or one of each - we don't need to pay attention to which.
 
-The next step is to use [`Flux.@layer`](@ref) to make our struct behave like a Flux layer. This is important so that calling `Flux.setup` on a `Join` maps over the underlying trainable arrays on each path.
+The next step is to use [`Flux.@layer`](@ref) to make our struct behave like a Flux layer. 
+In Flux < v0.15 this used to be important so that calling `Flux.setup` on a `Join` maps over the underlying trainable arrays on each path. Since Flux v0.15, this is no longer necessary, since now Functors.jl automatically traverses custom types. However, [`Flux.@layer`](@ref) is still recommended for pretty printing and other niceties.
+
 ```julia
 Flux.@layer Join
 ```

diff --git a/docs/src/guide/models/quickstart.md b/docs/src/guide/models/quickstart.md
@@ -67,7 +67,7 @@ plot(p_true, p_raw, p_done, layout=(1,3), size=(1000,330))
 ```
 
 ```@raw html
-<img align="right" width="300px" src="../../assets/quickstart/loss.png">
+<img align="right" width="300px" src="../../../assets/quickstart/loss.png">
 ```
 
 Here's the loss during training:

diff --git a/docs/src/guide/training/training.md b/docs/src/guide/training/training.md
@@ -159,7 +159,7 @@ first(data) isa Tuple{AbstractMatrix, AbstractVector}  # true
 Here each iteration will use one matrix `x` (an image, perhaps) and one vector `y`.
 It is very common to instead train on *batches* of such inputs (or *mini-batches*,
 the two words mean the same thing) both for efficiency and for better results.
-This can be easily done using the [`DataLoader`](@ref Flux.Data.DataLoader):
+This can be easily done using the [`DataLoader`](@ref Flux.DataLoader):
 
 ```julia
 data = Flux.DataLoader((X, Y), batchsize=32)

diff --git a/docs/src/index.md b/docs/src/index.md
@@ -8,14 +8,14 @@ Flux is a library for machine learning. It comes "batteries-included" with many
 
 ### Installation
 
-Download [Julia 1.9](https://julialang.org/downloads/) or later, preferably the current stable release. You can add Flux using Julia's package manager, by typing `] add Flux` in the Julia prompt. 
+Download [Julia 1.10](https://julialang.org/downloads/) or later, preferably the current stable release. You can add Flux using Julia's package manager, by typing `] add Flux` in the Julia prompt. 
 For Nvidia GPU support, you will also need to install the `CUDA` and the `cuDNN` packages. For AMD GPU support, install the `AMDGPU` package. For acceleration on Apple Silicon, install the `Metal` package.
 
 ### Learning Flux
 
 The **[quick start](@ref man-quickstart)** page trains a simple neural network.
 
-This rest of the **guide** provides a from-scratch introduction to Flux's take on models and how they work, starting with [fitting a line](@ref man-overview). Once you understand these docs, congratulations, you also understand [Flux's source code](https://github.com/FluxML/Flux.jl), which is intended to be concise, legible and a good reference for more advanced concepts.
+The rest of the **guide** provides a from-scratch introduction to Flux's take on models and how they work, starting with [fitting a line](@ref man-overview). Once you understand these docs, congratulations, you also understand [Flux's source code](https://github.com/FluxML/Flux.jl), which is intended to be concise, legible and a good reference for more advanced concepts.
 
 There are some **tutorials** about building particular models. The **[model zoo](https://github.com/FluxML/model-zoo/)** has starting points for many other common ones. And finally, the **[ecosystem page](ecosystem.md)** lists packages which define Flux models.
 

diff --git a/docs/src/reference/data/mldatadevices.md b/docs/src/reference/data/mldatadevices.md
@@ -1,6 +1,11 @@
+```@meta
+CurrentModule = MLDataDevices
+CollapsedDocStrings = true
+```
+
 # Transferring data across devices
 
-Flux relies on the [MLDataDevices.jl](https://github.com/LuxDL/MLDataDevices.jl/blob/main/src/public.jl) package to manage devices and transfer data across them. You don't have to explicitly use the package, as Flux re-exports the necessary functions and types.
+Flux relies on the MLDataDevices.jl package to manage devices and transfer data across them. You don't have to explicitly use the package, as Flux re-exports the necessary functions and types.
 
 ```@docs
 MLDataDevices.cpu_device

diff --git a/docs/src/reference/data/mlutils.md b/docs/src/reference/data/mlutils.md
@@ -1,3 +1,8 @@
+```@meta
+CurrentModule = Flux
+CollapsedDocStrings = true
+```
+
 # Working with Data, using MLUtils.jl
 
 Flux re-exports the `DataLoader` type and utility functions for working with
@@ -25,6 +30,7 @@ MLUtils.chunk
 MLUtils.eachobs
 MLUtils.fill_like
 MLUtils.filterobs
+Flux.flatten
 MLUtils.flatten
 MLUtils.getobs
 MLUtils.getobs!

diff --git a/docs/src/reference/data/onehot.md b/docs/src/reference/data/onehot.md
@@ -1,3 +1,7 @@
+```@meta
+CollapsedDocStrings = true
+```
+
 # One-Hot Encoding with OneHotArrays.jl
 
 It's common to encode categorical variables (like `true`, `false` or `cat`, `dog`) in "one-of-k" or ["one-hot"](https://en.wikipedia.org/wiki/One-hot) form. [OneHotArrays.jl](https://github.com/FluxML/OneHotArrays.jl) provides the `onehot` function to make this easy.
@@ -51,7 +55,7 @@ julia> onecold(ans, [:a, :b, :c])
 
 Note that these operations returned `OneHotVector` and `OneHotMatrix` rather than `Array`s. `OneHotVector`s behave like normal vectors but avoid any unnecessary cost compared to using an integer index directly. For example, multiplying a matrix with a one-hot vector simply slices out the relevant row of the matrix under the hood.
 
-### Function listing
+## Function listing
 
 ```@docs
 OneHotArrays.onehot