diff --git a/src/compiler/interface.jl b/src/compiler/interface.jl index d5428e97e..f429102f6 100644 --- a/src/compiler/interface.jl +++ b/src/compiler/interface.jl @@ -4,11 +4,11 @@ using Core: Typeof import Base: copy!, IdSet import Base.Broadcast: broadcasted, materialize! -mutable struct Context <: AContext +mutable struct Context{I} <: AContext cache::Union{IdDict{Any,Any},Nothing} end -Context() = Context(nothing) +Context() = Context{false}(nothing) cache(cx::Context) = cx.cache === nothing ? (cx.cache = IdDict()) : cx.cache @@ -36,10 +36,28 @@ _pullback(f, args...) = _pullback(Context(), f, args...) tailmemaybe(::Nothing) = nothing tailmemaybe(x::Tuple) = Base.tail(x) -function pullback(f, args...) - y, back = _pullback(f, args...) +@inline pullback(f, args...) = pullback(f, Context(), args...) +function pullback(f, cx::AContext, args...) + y, back = _pullback(cx, f, args...) y, Δ -> tailmemaybe(back(Δ)) end +function pullback(cx::Context, f, args...) + ChainRulesCore.ignore_derivatives() do + @warn """ + Incorrect argument order for pullback, please use: + + pullback(f, __context__::Context, args) + + instead of: + + pullback(__context__::Context, f, args) + + This is usually caused by a call to pullback in a higher-order @adjoint. + The above warning will become an error in Zygote 0.7. + """ + end + return pullback(f, cx, args...) +end sensitivity(y::Number) = one(y) sensitivity(y::Complex) = error("Output is complex, so the gradient is not defined.") @@ -334,21 +352,21 @@ function Base.map(f, gs1::Grads, gss::ADictOrGrads...) end function Base.map!(f, gsout::Grads, gss::ADictOrGrads...) - all(issetequal(gsout.params, keys(gs)) for gs in gss) || + all(issetequal(gsout.params, keys(gs)) for gs in gss) || throw(ArgumentError("map! expects Grads objects with the same Params.")) for p in gsout.params - gsout[p] = f((_getformap(gs, p) for gs in gss)...) + gsout[p] = f((_getformap(gs, p) for gs in gss)...) end return gsout end function _getformap(gs, p) g = gs[p] - isnothing(g) ? fill!(similar(p), 0) : g + isnothing(g) ? fill!(similar(p), 0) : g end function pullback(f, ps::Params) - cx = Context() + cx = Context{true}(nothing) y, back = _pullback(cx, f) y, function (Δ) for p in ps diff --git a/src/lib/array.jl b/src/lib/array.jl index bbe13669d..feca8cf1c 100644 --- a/src/lib/array.jl +++ b/src/lib/array.jl @@ -306,7 +306,7 @@ end @adjoint function sum(f, xs::AbstractArray{<:AbstractArray}; kws...) @assert !haskey(kws, :init) # TODO add init support (julia 1.6) - return pullback(__context__, (f, xs) -> sum(f.(xs); kws...), f, xs) + return pullback((f, xs) -> sum(f.(xs); kws...), __context__, f, xs) end @adjoint function sum(xs::AbstractArray{Bool}; dims = :) @@ -315,7 +315,7 @@ end function _pullback(cx::AContext, ::typeof(prod), f, xs::AbstractArray) - y, back = pullback(cx, ((f, xs) -> prod(f.(xs))), f, xs) + y, back = pullback((f, xs) -> prod(f.(xs)), cx, f, xs) y, ȳ -> (nothing, back(ȳ)...) end diff --git a/src/lib/broadcast.jl b/src/lib/broadcast.jl index 6dbfdb829..c8a55df78 100644 --- a/src/lib/broadcast.jl +++ b/src/lib/broadcast.jl @@ -30,6 +30,10 @@ using Base.Broadcast: Broadcasted, AbstractArrayStyle, broadcasted, materialize # Utilities # ========= +# ChainRules already marks this non-differentiable, +# But inference can still give up because of the Zygote -> CR wrapper layer +@nograd Broadcast.combine_styles + accum_sum(xs; dims = :) = reduce(accum, xs, dims = dims) # Work around reducedim_init issue @@ -82,16 +86,16 @@ _minus(::Nothing) = nothing @adjoint broadcasted(::typeof(*), x::Numeric, y::Numeric) = x.*y, Δ -> (nothing, unbroadcast(x, Δ .* conj.(y)), unbroadcast(y, Δ .* conj.(x))) @adjoint broadcasted(::typeof(*), x::Number, y::AbstractArray{<:Number}) = - _pullback(*, x, y) # this uses dot(y,Δ) instead of sum(Δ .* conj.(y)) + _pullback(__context__, *, x, y) # this uses dot(y,Δ) instead of sum(Δ .* conj.(y)) @adjoint broadcasted(::typeof(*), x::AbstractArray{<:Number}, y::Number) = - _pullback(*, x, y) + _pullback(__context__, *, x, y) @adjoint function broadcasted(::typeof(/), x::Numeric, y::Numeric) res = x ./ y res, Δ -> (nothing, unbroadcast(x, Δ ./ conj.(y)), unbroadcast(y, .-Δ .* conj.(res ./ y))) end @adjoint broadcasted(::typeof(/), x::AbstractArray{<:Number}, y::Number) = - _pullback(/, x, y) + _pullback(__context__, /, x, y) @adjoint function broadcasted(::typeof(Base.literal_pow), ::typeof(^), x::Numeric, exp::Val{p}) where p y = Base.literal_pow.(^, x, exp) @@ -284,7 +288,7 @@ end # Not the ChainRules.rrule which will use the Zygote.Context and thus not be GPU compatible @adjoint function sum(f, xs::CUDA.AbstractGPUArray; kws...) @assert !haskey(kws, :init) # TODO add init support (julia 1.6) - return pullback(__context__, (f, xs) -> sum(f.(xs); kws...), f, xs) + return pullback((f, xs) -> sum(f.(xs); kws...), __context__, f, xs) end @adjoint function Base.convert(::Type{T}, xs::Array) where {T<:CUDA.AbstractGPUArray} diff --git a/src/lib/lib.jl b/src/lib/lib.jl index f11a74214..f7af468b4 100644 --- a/src/lib/lib.jl +++ b/src/lib/lib.jl @@ -21,7 +21,7 @@ accum(x, y) = accum(x, y, zs...) = accum(accum(x, y), zs...) -accum(x::Tuple, ys::Tuple...) = accum.(x, ys...) +accum(x::Tuple, ys::Tuple...) = map(accum, x, ys...) accum(x::AbstractArray, ys::AbstractArray...) = accum.(x, ys...) @generated function accum(x::NamedTuple, y::NamedTuple) @@ -50,6 +50,7 @@ end @adjoint Base.typeassert(x, T) = Base.typeassert(x, T), Δ -> (Δ, nothing) +accum_param(::Context{false}, _, Δ) = Δ @generated function accum_param(cx::Context, x, Δ) isbitstype(x) && return :(Δ) quote diff --git a/test/compiler.jl b/test/compiler.jl index bc37d271e..c5ddf1f38 100644 --- a/test/compiler.jl +++ b/test/compiler.jl @@ -1,5 +1,5 @@ using Zygote, Test -using Zygote: pullback, @adjoint +using Zygote: pullback, @adjoint, Context macro test_inferred(ex) :(let res = nothing @@ -160,13 +160,18 @@ end @testset "inference for `getproperty`" begin Gaussian = _Gaussian(:getproperty) g = Gaussian(randn(3), randn(3, 3)) - y, back = @inferred pullback(x -> x.m, g) - @test y == getfield(g, :m) - # This type instability is due to the handling of non-bitstypes in `accum_param` + y_explicit, back_explicit = @inferred pullback(x -> x.m, g) + y_implicit, back_implicit = @inferred pullback(x -> x.m, Context{true}(nothing), g) + @test y_explicit == y_implicit == getfield(g, :m) + + ∇args = ((m = [1.0, 0.0, 0.0], P = nothing),) if VERSION > v"1.7-" - @test Base.return_types(back, Tuple{Vector{Float64}}) == Any[Union{Tuple{Nothing}, typeof(((m = [1.0, 0.0, 0.0], P = nothing),))}] + # This type instability is due to the handling of non-bitstypes in `accum_param` + @test Base.return_types(back_implicit, Tuple{Vector{Float64}}) == Any[Union{Tuple{Nothing}, typeof(∇args)}] + # But the same should infer if implicit parameters are disabled + @test Base.return_types(back_explicit, Tuple{Vector{Float64}}) == Any[typeof(∇args)] end - @test back([1., 0, 0]) == ((m = [1.0, 0.0, 0.0], P = nothing),) + @test back_explicit([1., 0, 0]) == back_implicit([1., 0, 0]) == ∇args Base.getproperty(g::Gaussian, s::Symbol) = 2getfield(g, s) y, back = pullback(x -> x.m, g)