Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: update to new reactant changes #1140

Merged
merged 6 commits into from
Dec 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "Lux"
uuid = "b2108857-7c20-44ae-9111-449ecde12c47"
authors = ["Avik Pal <[email protected]> and contributors"]
version = "1.4.2"
version = "1.4.3"

[deps]
ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
Expand Down Expand Up @@ -83,7 +83,7 @@ Adapt = "4.1"
ArgCheck = "2.3"
ArrayInterface = "7.17.1"
CUDA = "5.3.2"
ChainRulesCore = "1.24"
ChainRulesCore = "1.25"
Compat = "4.16"
ComponentArrays = "0.15.18"
ConcreteStructs = "0.2.3"
Expand All @@ -106,11 +106,11 @@ MPI = "0.20.19"
MacroTools = "0.5.13"
Markdown = "1.10"
NCCL = "0.1.1"
NNlib = "0.9.24"
NNlib = "0.9.26"
Optimisers = "0.4.1"
Preferences = "1.4.3"
Random = "1.10"
Reactant = "0.2.8"
Reactant = "0.2.12"
Reexport = "1.2.2"
ReverseDiff = "1.15"
SIMDTypes = "0.1"
Expand Down
6 changes: 3 additions & 3 deletions docs/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
[compat]
ADTypes = "1.10"
Adapt = "4"
ChainRulesCore = "1.24"
ChainRulesCore = "1.25"
ComponentArrays = "0.15.18"
Documenter = "1.4"
DocumenterVitepress = "0.1.3"
Expand All @@ -51,12 +51,12 @@ LuxCore = "1.2"
LuxLib = "1.3.4"
LuxTestUtils = "1.5"
MLDataDevices = "1.6"
NNlib = "0.9.24"
NNlib = "0.9.26"
Optimisers = "0.4.1"
Pkg = "1.10"
Printf = "1.10"
Random = "1.10"
Reactant = "0.2.8"
Reactant = "0.2.12"
StableRNGs = "1"
StaticArrays = "1"
WeightInitializers = "1"
Expand Down
2 changes: 1 addition & 1 deletion docs/make.jl
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ pages = [
"tutorials/intermediate/1_NeuralODE.md",
"tutorials/intermediate/2_BayesianNN.md",
"tutorials/intermediate/3_HyperNet.md",
"tutorials/intermediate/4_PINN2DPDE.md"
"tutorials/intermediate/4_PINN2DPDE.md",
],
"Advanced" => [
"tutorials/advanced/1_GravitationalWaveForm.md"
Expand Down
13 changes: 11 additions & 2 deletions ext/LuxReactantExt/LuxReactantExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,22 @@ module LuxReactantExt

using Enzyme: Enzyme, Const, Duplicated, Active
using Optimisers: Optimisers
using Reactant: Reactant, @compile, TracedRArray, TracedRNumber
using Reactant: Reactant, @compile, AnyTracedRArray, TracedRArray, TracedRNumber
using Setfield: @set!
using Static: False

using Lux: Lux, LuxOps, Training
using Lux: Lux, LuxOps, Training, Utils
using Lux.Training: TrainingBackendCache, ReactantBackend

Lux.is_extension_loaded(::Val{:Reactant}) = true

Utils.to_rarray(x; kwargs...) = Reactant.to_rarray(x; kwargs...)

function Utils.promote_to(::Type{T}, x::Number) where {T <: Number}
x isa Reactant.TracedType && return x
return Reactant.ConcreteRNumber{T}(x)
end

include("patches.jl")
include("training.jl")

Expand Down
3 changes: 3 additions & 0 deletions ext/LuxReactantExt/patches.jl
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
Utils.vec(x::AnyTracedRArray) = Reactant.TracedUtils.materialize_traced_array(vec(x))

# XXX: Use PoolDims once EnzymeJAX supports stablehlo.reduce_window adjoint
Lux.calculate_pool_dims(g::Lux.GlobalPoolMode, ::TracedRArray) = g
86 changes: 56 additions & 30 deletions ext/LuxReactantExt/training.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,28 @@
mutable struct StatsAndNewStateWrapper
stats::Any
st::Any
end

function wrapped_objective_function(
fn::F, model, ps, st, data, cache::StatsAndNewStateWrapper
) where {F}
loss, stₙ, stats = fn(model, ps, st, data)
cache.stats = stats
cache.st = stₙ
return loss
end

function compute_gradients_internal(objective_function::F, model, data, ps, st) where {F}
stats_wrapper = StatsAndNewStateWrapper(nothing, nothing)
res = Enzyme.gradient(
Enzyme.set_abi(Enzyme.ReverseWithPrimal, Reactant.ReactantABI),
Const(wrapped_objective_function), Const(objective_function),
Const(model), ps, Const(st), Const(data), Const(stats_wrapper)
)
loss, dps = res.val, res.derivs[3]
return dps, loss, stats_wrapper.stats, stats_wrapper.st
end

function Lux.Training.compute_gradients_impl(
backend::ReactantBackend, objective_function::F,
data, ts::Training.TrainState) where {F}
Expand All @@ -22,18 +47,33 @@ function Lux.Training.compute_gradients_impl(::ReactantBackend, obj_fn::F, data,
return grads, loss, stats, ts
end

function compute_gradients_internal(objective_function::F, model, data, ps, st) where {F}
dps = Enzyme.make_zero(ps)
_, (loss, stₙ, stats) = Enzyme.autodiff(
Enzyme.ReverseWithPrimal, Const(objective_function), Active, Const(model),
Duplicated(ps, dps), Const(st), Const(data))
return dps, loss, stats, stₙ
end

for inplace in ("!", "")
fname = Symbol(:single_train_step_impl, inplace)
internal_fn = Symbol(:compute_gradients_internal_and_step, inplace)
apply_gradients_fn = Symbol(:apply_gradients, inplace)
update_fn = Symbol(:update, inplace)

# Ideally users never hit this dispatch but it is still good to have as a fallback
@eval function Lux.Training.$(apply_gradients_fn)(
ts::Training.TrainState{<:TrainingBackendCache{ReactantBackend}}, grads
)
if hasfield(typeof(ts.cache.extras), :update_function)
update_function = ts.cache.extras.update_function
else
update_function = @compile Optimisers.$(update_fn)(
ts.optimizer_state, ts.parameters, grads)
@set! ts.cache.extras = merge(ts.cache.extras, (; update_function))
end

opt_state, ps = update_function(ts.optimizer_state, ts.parameters, grads)
@set! ts.parameters = ps
@set! ts.optimizer_state = opt_state
@set! ts.step = ts.step + 1
return ts
end

# XXX: Should we add a check to ensure the inputs to this function is same as the one
# used in the compiled function? We can re-trigger the compilation with a warning
@eval function Lux.Training.$(fname)(backend::ReactantBackend, objective_function::F,
data, ts::Training.TrainState) where {F}
compiled_grad_and_step_function = @compile $(internal_fn)(
Expand Down Expand Up @@ -68,27 +108,13 @@ for inplace in ("!", "")

return grads, loss, stats, ts
end
end

function compute_gradients_internal_and_step(objective_function::F, model, data, ps,
st, opt_state) where {F}
dps = Enzyme.make_zero(ps)
_, (loss, stₙ, stats) = Enzyme.autodiff(
Enzyme.set_abi(Enzyme.ReverseWithPrimal, Reactant.ReactantABI),
Const(objective_function), Active, Const(model),
Duplicated(ps, dps), Const(st), Const(data))
opt_state, ps = Optimisers.update(opt_state, ps, dps)
return dps, ps, loss, stats, stₙ, opt_state
end

function compute_gradients_internal_and_step!(objective_function::F, model, data, ps,
st, opt_state) where {F}
dps = Enzyme.make_zero(ps)
_, (loss, stₙ, stats) = Enzyme.autodiff(
Enzyme.set_abi(Enzyme.ReverseWithPrimal, Reactant.ReactantABI),
Const(objective_function), Active, Const(model),
Duplicated(ps, dps), Const(st), Const(data))
# XXX: Inplace updates not actually inplace
opt_state, ps = Optimisers.update!(opt_state, ps, dps)
return dps, ps, loss, stats, stₙ, opt_state
# XXX: Inplace version not actually inplace
@eval function $(internal_fn)(
objective_function::F, model, data, ps, st, opt_state) where {F}
dps, loss, stats, stₙ = compute_gradients_internal(
objective_function, model, data, ps, st)
opt_state, ps = Optimisers.$(update_fn)(opt_state, ps, dps)
return dps, ps, loss, stats, stₙ, opt_state
end
end
4 changes: 2 additions & 2 deletions lib/LuxLib/Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "LuxLib"
uuid = "82251201-b29d-42c6-8e01-566dec8acb11"
authors = ["Avik Pal <[email protected]> and contributors"]
version = "1.3.10"
version = "1.3.11"

[deps]
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
Expand Down Expand Up @@ -77,7 +77,7 @@ LuxCore = "1.2"
MKL = "0.7"
MLDataDevices = "1.6"
Markdown = "1.10"
NNlib = "0.9.24"
NNlib = "0.9.26"
Octavian = "0.3.28"
Preferences = "1.4.3"
Polyester = "0.7.15"
Expand Down
2 changes: 1 addition & 1 deletion lib/LuxLib/ext/LuxLibTrackerExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ for RM in (:TrackedVector, :Nothing, :AbstractVector),
Utils.is_tracked(RM, RV, S, B, XT) || continue

@eval Tracker.@grad_from_chainrules LuxLib.Impl.batchnorm_cudnn(
γ::$S, β::$B, x::$XT, rμ::$RM, rσ²::$RV, m::Real, ϵ::Real, training::StaticBool)
γ::$S, β::$B, x::$XT, rμ::$RM, rσ²::$RV, m, ϵ, training::StaticBool)
end

# Utils extensions
Expand Down
2 changes: 1 addition & 1 deletion lib/LuxLib/ext/LuxLibcuDNNExt/LuxLibcuDNNExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ include("batchnorm.jl")
function Impl.batchnorm(x::Union{<:CuArray{T, 2}, <:CuArray{T, 4}, <:CuArray{T, 5}},
γ::Optional{<:CuVector{T}}, β::Optional{<:CuVector{T}},
rμ::Optional{<:CuVector{T}}, rσ²::Optional{<:CuVector{T}},
training::StaticBool, σ::F, m::Real, ϵ::Real) where {T <: cuDNNFloat, F}
training::StaticBool, σ::F, m, ϵ) where {T <: cuDNNFloat, F}
rμₙ, rσ²ₙ = Impl.get_batchnorm_statistics(x, rμ, rσ², training)
y = Impl.batchnorm_cudnn(γ, β, x, rμₙ, rσ²ₙ, m, ϵ, training)[1]
return Impl.activation!!(σ, y), safe_vec(rμₙ), safe_vec(rσ²ₙ)
Expand Down
2 changes: 1 addition & 1 deletion lib/LuxLib/src/api/batchnorm.jl
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ mean and variance.
function batchnorm(x::AbstractArray{T, N}, γ::Optional{<:AbstractVector},
β::Optional{<:AbstractVector}, rμ::Optional{<:AbstractVector},
rσ²::Optional{<:AbstractVector}, training::TrainingType, act::F=identity,
momentum::Real=0.1f0, epsilon::Real=default_epsilon(x)) where {F, T, N}
momentum=0.1f0, epsilon=default_epsilon(x)) where {F, T, N}
σ = select_fastest_activation(act, x, γ, β, rμ, rσ²)
y, rμ, rσ² = batchnorm_impl(
x, γ, β, rμ, rσ², static_training_mode(training, x, γ, β, rμ, rσ²),
Expand Down
4 changes: 2 additions & 2 deletions lib/LuxLib/src/api/groupnorm.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
@doc doc"""
groupnorm(x, scale, bias, groups::Int, σ::F=identity,
epsilon::Real=eps(eltype(x)) ^ (5 // 7))
epsilon=eps(eltype(x)) ^ (5 // 7))

Group Normalization. For details see [1].

Expand Down Expand Up @@ -30,7 +30,7 @@ The normalized array is returned.
"""
function groupnorm(x::AbstractArray{<:Real, N}, scale::Optional{<:AbstractVector},
bias::Optional{<:AbstractVector}, groups::Int, σ::F=identity,
epsilon::Real=default_epsilon(x)) where {F, N}
epsilon=default_epsilon(x)) where {F, N}
assert_valid_groupnorm_arguments(x, scale, bias, groups)
return groupnorm_impl(
x, scale, bias, groups, select_fastest_activation(σ, x, scale, bias), epsilon)
Expand Down
4 changes: 2 additions & 2 deletions lib/LuxLib/src/api/instancenorm.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,15 @@ mean and variance.
"""
function instancenorm(x::AbstractArray, γ::Optional{<:AbstractVector},
β::Optional{<:AbstractVector}, training::TrainingType,
σ::F=identity, epsilon::Real=default_epsilon(x)) where {F}
σ::F=identity, epsilon=default_epsilon(x)) where {F}
# This API is kept for legacy purposes when we didn't support passing running stats
return instancenorm(x, γ, β, nothing, nothing, training, σ, nothing, epsilon)
end

function instancenorm(x::AbstractArray, γ::Optional{<:AbstractVector},
β::Optional{<:AbstractVector}, rμ::Optional{<:AbstractVector},
rσ²::Optional{<:AbstractVector}, training::TrainingType, σ::F=identity,
momentum::Optional{<:Real}=0.1f0, epsilon::Real=default_epsilon(x)) where {F}
momentum::Optional{<:Real}=0.1f0, epsilon=default_epsilon(x)) where {F}
assert_valid_instancenorm_arguments(x)

y, rμₙ, rσ²ₙ = instancenorm_impl(
Expand Down
2 changes: 1 addition & 1 deletion lib/LuxLib/src/api/layernorm.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ Normalized Array of same size as `x`.
"""
function layernorm(x::AbstractArray{xT, N}, scale::Optional{<:AbstractArray},
bias::Optional{<:AbstractArray}, σ::F=identity, dims=1:(N - 1),
epsilon::Real=default_epsilon(x)) where {F, xT, N}
epsilon=default_epsilon(x)) where {F, xT, N}
return layernorm_impl(
x, scale, bias, select_fastest_activation(σ, x, scale, bias), dims, epsilon)
end
Loading
Loading