Skip to content

Commit

Permalink
Update to Turing v0.35 (#559)
Browse files Browse the repository at this point in the history
* Add readme note on Julia version

* Bump Turing to 0.35

* Update minimum supported Julia version

* Remove unnecessary version qualifier

* Remove Tracker and replace with Mooncake, except in BNN doc

* Use Mooncake in BNN doc (#521)

* Fix BNN doc to work with Mooncake
  • Loading branch information
penelopeysm authored Nov 26, 2024
1 parent ba5e408 commit d42a408
Show file tree
Hide file tree
Showing 9 changed files with 73 additions and 18 deletions.
39 changes: 38 additions & 1 deletion Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

julia_version = "1.10.6"
manifest_format = "2.0"
project_hash = "52677c2802b9702303d49a2f40fdefa34939b429"
project_hash = "53e97ef537fe2e98eecbe367cd36c4bc6a201e3e"

[[deps.ADTypes]]
git-tree-sha1 = "72af59f5b8f09faee36b4ec48e014a79210f2f4f"
Expand Down Expand Up @@ -708,6 +708,12 @@ git-tree-sha1 = "23163d55f885173722d1e4cf0f6110cdbaf7e272"
uuid = "b552c78f-8df3-52c6-915a-8e097449b14b"
version = "1.15.1"

[[deps.DiffTests]]
deps = ["LinearAlgebra", "SparseArrays", "Statistics"]
git-tree-sha1 = "b92beb1933df01bf4915d3a05e54c2a0aad312c7"
uuid = "de460e47-3fe3-5279-bb4a-814414816d5d"
version = "0.1.2"

[[deps.DifferentialEquations]]
deps = ["BoundaryValueDiffEq", "DelayDiffEq", "DiffEqBase", "DiffEqCallbacks", "DiffEqNoiseProcess", "JumpProcesses", "LinearAlgebra", "LinearSolve", "NonlinearSolve", "OrdinaryDiffEq", "Random", "RecursiveArrayTools", "Reexport", "SciMLBase", "SteadyStateDiffEq", "StochasticDiffEq", "Sundials"]
git-tree-sha1 = "d55af9d6b51c54f81ae30d1a463206d32cc4c24a"
Expand Down Expand Up @@ -2109,6 +2115,11 @@ git-tree-sha1 = "ec4f7fbeab05d7747bdf98eb74d130a2a2ed298d"
uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
version = "1.2.0"

[[deps.MistyClosures]]
git-tree-sha1 = "1142aefd845c608f3c70e4c202c4aae725cab67b"
uuid = "dbe65cb8-6be2-42dd-bbc5-4196aaced4f4"
version = "2.0.0"

[[deps.Mmap]]
uuid = "a63ad114-7e13-5084-954f-fe012c677804"

Expand All @@ -2118,6 +2129,32 @@ git-tree-sha1 = "2c140d60d7cb82badf06d8783800d0bcd1a7daa2"
uuid = "78c3b35d-d492-501b-9361-3d52fe80e533"
version = "0.8.1"

[[deps.Mooncake]]
deps = ["ADTypes", "ChainRules", "ChainRulesCore", "DiffRules", "DiffTests", "ExprTools", "FunctionWrappers", "Graphs", "InteractiveUtils", "LinearAlgebra", "MistyClosures", "Random", "Setfield", "Test"]
git-tree-sha1 = "9fa3c8c11fe5286badb9c24e8d2420b3a07f9a8c"
uuid = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6"
version = "0.4.50"

[deps.Mooncake.extensions]
MooncakeAllocCheckExt = "AllocCheck"
MooncakeCUDAExt = "CUDA"
MooncakeDynamicPPLExt = "DynamicPPL"
MooncakeJETExt = "JET"
MooncakeLuxLibExt = "LuxLib"
MooncakeLuxLibSLEEFPiratesExtension = ["LuxLib", "SLEEFPirates"]
MooncakeNNlibExt = "NNlib"
MooncakeSpecialFunctionsExt = "SpecialFunctions"

[deps.Mooncake.weakdeps]
AllocCheck = "9b6a8646-10ed-4001-bbdc-1d2f46dfbb1a"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8"
JET = "c3a54625-cd67-489e-a8e7-0a5a0ff4e31b"
LuxLib = "82251201-b29d-42c6-8e01-566dec8acb11"
NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
SLEEFPirates = "476501e8-09a2-5ece-8869-fb82de89a1fa"
SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"

[[deps.MozillaCACerts_jll]]
uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
version = "2023.1.10"
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
Measures = "442fdcdd-2543-5da2-b0f3-8c86c306513e"
Memoization = "6fafb56a-5788-4b4e-91ca-c0cea6611c73"
MicroCanonicalHMC = "234d2aa0-2291-45f7-9047-6fa6f316b0a8"
Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6"
NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba"
OptimizationNLopt = "4e6fcdb7-1186-4e1f-a706-475e75c168bb"
Expand All @@ -48,7 +49,6 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd"
Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c"
Turing = "fce5fe82-541a-59a6-adf8-730c64b5f9a0"
TuringBenchmarking = "0db1332d-5c25-4deb-809f-459bc696f94f"
UnPack = "3a884ed6-31ef-47d7-9d2a-63182c4928ed"
Expand Down
17 changes: 17 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,17 @@ This repository is part of [Turing.jl's](https://turinglang.org/) website (i.e.
- The `master` branch contains the quarto source
- The `gh-pages` branch contains the `html` version of these documents compiled from the `master` branch.

> [!NOTE]
> Due to [an incompatibility between Bijectors.jl and
> Enzyme.jl](https://github.com/TuringLang/Bijectors.jl/pull/341), the docs
> currently must be built with Julia 1.10. You can do this either by making
> Julia 1.10 your default Julia version (`juliaup default 1.10`), or by
> explicitly specifying Julia 1.10 when running Quarto:
>
> ```bash
> QUARTO_JULIA=$(julia +1.10 -e "println(Sys.BINDIR)")/julia quarto render
> ```
## Local development
To get started with the docs website locally, you'll need to have [Quarto](https://quarto.org/docs/download/) installed.
Expand Down Expand Up @@ -79,6 +90,12 @@ If you find that Quarto's execution is failing with errors that aren't reproduci
quarto render /path/to/index.qmd --execute-daemon-restart
```
And also, kill any stray Quarto processes that are still running (sometimes it keeps running in the background):

```bash
pkill -9 -f quarto
```

## License

This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
6 changes: 3 additions & 3 deletions tutorials/03-bayesian-neural-network/index.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ using Turing
using FillArrays
using Lux
using Plots
using Tracker
import Mooncake
using Functors
using LinearAlgebra
Expand Down Expand Up @@ -189,7 +189,7 @@ const nn = StatefulLuxLayer{true}(nn_initial, nothing, st)
parameters ~ MvNormal(zeros(nparameters), Diagonal(abs2.(sigma .* ones(nparameters))))
# Forward NN to make predictions
preds = Lux.apply(nn, xs, vector_to_parameters(parameters, ps))
preds = Lux.apply(nn, xs, f32(vector_to_parameters(parameters, ps)))
# Observe each prediction.
for i in eachindex(ts)
Expand All @@ -208,7 +208,7 @@ setprogress!(false)
```{julia}
# Perform inference.
N = 2_000
ch = sample(bayes_nn(reduce(hcat, xs), ts), NUTS(; adtype=AutoTracker()), N);
ch = sample(bayes_nn(reduce(hcat, xs), ts), NUTS(; adtype=AutoMooncake(; config=nothing)), N);
```

Now we extract the parameter samples from the sampled chain as `θ` (this is of size `5000 x 20` where `5000` is the number of iterations and `20` is the number of parameters).
Expand Down
2 changes: 1 addition & 1 deletion tutorials/10-bayesian-differential-equations/index.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ More theoretical details on these methods can be found at: https://docs.sciml.ai
While these sensitivity analysis methods may seem complicated, using them is dead simple.
Here is a version of the Lotka-Volterra model using adjoint sensitivities.

All we have to do is switch the AD backend to one of the adjoint-compatible backends (ReverseDiff, Tracker, or Zygote)!
All we have to do is switch the AD backend to one of the adjoint-compatible backends (ReverseDiff or Zygote)!
Notice that on this model adjoints are slower.
This is because adjoints have a higher overhead on small parameter models and therefore we suggest using these methods only for models with around 100 parameters or more.
For more details, see https://arxiv.org/abs/1812.01892.
Expand Down
2 changes: 1 addition & 1 deletion tutorials/docs-00-getting-started/index.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Pkg.instantiate();

To use Turing, you need to install Julia first and then install Turing.

You will need to install Julia 1.7 or greater, which you can get from [the official Julia website](http://julialang.org/downloads/).
You will need to install Julia 1.10 or greater, which you can get from [the official Julia website](http://julialang.org/downloads/).

Turing is officially registered in the [Julia General package registry](https://github.com/JuliaRegistries/General), which means that you can install a stable version of Turing by running the following in the Julia REPL:

Expand Down
8 changes: 4 additions & 4 deletions tutorials/docs-10-using-turing-autodiff/index.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,8 @@ Pkg.instantiate();

## Switching AD Modes

Turing currently supports four automatic differentiation (AD) backends for sampling: [ForwardDiff](https://github.com/JuliaDiff/ForwardDiff.jl) for forward-mode AD; and [ReverseDiff](https://github.com/JuliaDiff/ReverseDiff.jl), [Zygote](https://github.com/FluxML/Zygote.jl), and [Tracker](https://github.com/FluxML/Tracker.jl) for reverse-mode AD.
While `Tracker` is still available, its use is discouraged due to a lack of active maintenance.
`ForwardDiff` is automatically imported by Turing. To utilize `Zygote` or `ReverseDiff` for AD, users must explicitly import them with `using Zygote` or `using ReverseDiff`, alongside `using Turing`.
Turing currently supports four automatic differentiation (AD) backends for sampling: [ForwardDiff](https://github.com/JuliaDiff/ForwardDiff.jl) for forward-mode AD; and [Mooncake](https://github.com/compintell/Mooncake.jl), [ReverseDiff](https://github.com/JuliaDiff/ReverseDiff.jl), and [Zygote](https://github.com/FluxML/Zygote.jl) for reverse-mode AD.
`ForwardDiff` is automatically imported by Turing. To utilize `Mooncake`, `Zygote`, or `ReverseDiff` for AD, users must explicitly import them with `import Mooncake`, `import Zygote` or `import ReverseDiff`, alongside `using Turing`.

As of Turing version v0.30, the global configuration flag for the AD backend has been removed in favour of [`AdTypes.jl`](https://github.com/SciML/ADTypes.jl), allowing users to specify the AD backend for individual samplers independently.
Users can pass the `adtype` keyword argument to the sampler constructor to select the desired AD backend, with the default being `AutoForwardDiff(; chunksize=0)`.
Expand Down Expand Up @@ -69,7 +68,8 @@ Generally, reverse-mode AD, for instance `ReverseDiff`, is faster when sampling
If the differentiation method is not specified in this way, Turing will default to using whatever the global AD backend is.
Currently, this defaults to `ForwardDiff`.

The most reliable way to ensure you are using the fastest AD that works for your problem is to benchmark them using `TuringBenchmarking`:
The most reliable way to ensure you are using the fastest AD that works for your problem is to benchmark them using [`TuringBenchmarking`](https://github.com/TuringLang/TuringBenchmarking.jl):

```{julia}
using TuringBenchmarking
benchmark_model(gdemo(1.5, 2), adbackends=[AutoForwardDiff(), AutoReverseDiff()])
Expand Down
4 changes: 2 additions & 2 deletions tutorials/docs-12-using-turing-guide/index.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ The `chains` variable now contains a `Chains` object which can be indexed by cha

#### Multithreaded sampling

If you wish to perform multithreaded sampling and are running Julia 1.3 or greater, you can call `sample` with the following signature:
If you wish to perform multithreaded sampling, you can call `sample` with the following signature:

```{julia}
#| eval: false
Expand Down Expand Up @@ -514,7 +514,7 @@ ForwardDiff (Turing's default AD backend) uses forward-mode chunk-wise AD. The c

#### AD Backend

Turing supports four automatic differentiation (AD) packages in the back end during sampling. The default AD backend is [ForwardDiff](https://github.com/JuliaDiff/ForwardDiff.jl) for forward-mode AD. Three reverse-mode AD backends are also supported, namely [Tracker](https://github.com/FluxML/Tracker.jl), [Zygote](https://github.com/FluxML/Zygote.jl) and [ReverseDiff](https://github.com/JuliaDiff/ReverseDiff.jl). `Zygote` and `ReverseDiff` are supported optionally if explicitly loaded by the user with `using Zygote` or `using ReverseDiff` next to `using Turing`.
Turing supports four automatic differentiation (AD) packages in the back end during sampling. The default AD backend is [ForwardDiff](https://github.com/JuliaDiff/ForwardDiff.jl) for forward-mode AD. Three reverse-mode AD backends are also supported, namely [Mooncake](https://github.com/compintell/Mooncake.jl), [Zygote](https://github.com/FluxML/Zygote.jl) and [ReverseDiff](https://github.com/JuliaDiff/ReverseDiff.jl). `Mooncake`, `Zygote`, and `ReverseDiff` also require the user to explicitly load them using `import Mooncake`, `import Zygote`, or `import ReverseDiff` next to `using Turing`.

For more information on Turing's automatic differentiation backend, please see the [Automatic Differentiation]({{<meta using-turing-autodiff>}}) article.

Expand Down
11 changes: 6 additions & 5 deletions tutorials/docs-13-using-turing-performance-tips/index.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -43,19 +43,20 @@ end
## Choose your AD backend

Automatic differentiation (AD) makes it possible to use modern, efficient gradient-based samplers like NUTS and HMC, and that means a good AD system is incredibly important. Turing currently
supports several AD backends, including [ForwardDiff](https://github.com/JuliaDiff/ForwardDiff.jl) (the default), [Zygote](https://github.com/FluxML/Zygote.jl),
[ReverseDiff](https://github.com/JuliaDiff/ReverseDiff.jl), and [Tracker](https://github.com/FluxML/Tracker.jl). Experimental support is also available for
[Tapir](https://github.com/withbayes/Tapir.jl).
supports several AD backends, including [ForwardDiff](https://github.com/JuliaDiff/ForwardDiff.jl) (the default),
[Mooncake](https://github.com/compintell/Mooncake.jl),
[Zygote](https://github.com/FluxML/Zygote.jl), and
[ReverseDiff](https://github.com/JuliaDiff/ReverseDiff.jl).

For many common types of models, the default ForwardDiff backend performs great, and there is no need to worry about changing it. However, if you need more speed, you can try
different backends via the standard [ADTypes](https://github.com/SciML/ADTypes.jl) interface by passing an `AbstractADType` to the sampler with the optional `adtype` argument, e.g.
`NUTS(adtype = AutoZygote())`. See [Automatic Differentiation]({{<meta using-turing-autodiff>}}) for details. Generally, `adtype = AutoForwardDiff()` is likely to be the fastest and most reliable for models with
few parameters (say, less than 20 or so), while reverse-mode backends such as `AutoZygote()` or `AutoReverseDiff()` will perform better for models with many parameters or linear algebra
operations. If in doubt, it's easy to try a few different backends to see how they compare.

### Special care for Zygote and Tracker
### Special care for Zygote

Note that Zygote and Tracker will not perform well if your model contains `for`-loops, due to the way reverse-mode AD is implemented in these packages. Zygote also cannot differentiate code
Note that Zygote will not perform well if your model contains `for`-loops, due to the way reverse-mode AD is implemented in these packages. Zygote also cannot differentiate code
that contains mutating operations. If you can't implement your model without `for`-loops or mutation, `ReverseDiff` will be a better, more performant option. In general, though,
vectorized operations are still likely to perform best.

Expand Down

0 comments on commit d42a408

Please sign in to comment.