Skip to content

Commit

Permalink
perf: avoid double function call in ReverseDiff value_and_gradient (#…
Browse files Browse the repository at this point in the history
…729)

* perf: avoid double function call in ReverseDiff `value_and_gradient`

* Fixes

* Fix hessian

* Separate completely

* Replace DR.gradient(result) with grad
  • Loading branch information
gdalle authored Feb 16, 2025
1 parent 98e6e5f commit 3417ff5
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 79 deletions.
2 changes: 1 addition & 1 deletion DifferentiationInterface/Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "DifferentiationInterface"
uuid = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63"
authors = ["Guillaume Dalle", "Adrian Hill"]
version = "0.6.40"
version = "0.6.41"

[deps]
ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,23 +87,26 @@ end
function DI.value_and_gradient!(
f, grad, prep::ReverseDiffGradientPrep, ::AutoReverseDiff{compile}, x
) where {compile}
y = f(x) # TODO: ReverseDiff#251
result = DiffResult(y, (grad,))
result = MutableDiffResult(zero(eltype(x)), (grad,)) # ReverseDiff#251
if compile
result = gradient!(result, prep.tape, x)
else
result = gradient!(result, f, x, prep.config)
end
y = DR.value(result)
grad === DR.gradient(result) || copyto!(grad, DR.gradient(result))
return y, grad
return DR.value(result), grad # ReverseDiff#269
end

function DI.value_and_gradient(
f, prep::ReverseDiffGradientPrep, backend::AutoReverseDiff, x
)
grad = similar(x)
return DI.value_and_gradient!(f, grad, prep, backend, x)
f, prep::ReverseDiffGradientPrep, backend::AutoReverseDiff{compile}, x
) where {compile}
# GradientResult tries to mutate an SArray
result = MutableDiffResult(zero(eltype(x)), (similar(x),))
if compile
result = gradient!(result, prep.tape, x)
else
result = gradient!(result, f, x, prep.config)
end
return DR.value(result), DR.gradient(result)
end

function DI.gradient!(
Expand Down Expand Up @@ -144,23 +147,19 @@ function DI.value_and_gradient!(
contexts::Vararg{DI.Context,C},
) where {C}
fc = DI.with_contexts(f, contexts...)
y = fc(x) # TODO: ReverseDiff#251
result = DiffResult(y, (grad,))
result = MutableDiffResult(zero(eltype(x)), (grad,)) # ReverseDiff#251
result = gradient!(result, fc, x, prep.config)
y = DR.value(result)
grad === DR.gradient(result) || copyto!(grad, DR.gradient(result))
return y, grad
return DR.value(result), grad # ReverseDiff#269
end

function DI.value_and_gradient(
f,
prep::ReverseDiffGradientPrep,
backend::AutoReverseDiff,
x,
contexts::Vararg{DI.Context,C},
f, prep::ReverseDiffGradientPrep, ::AutoReverseDiff, x, contexts::Vararg{DI.Context,C}
) where {C}
grad = similar(x)
return DI.value_and_gradient!(f, grad, prep, backend, x, contexts...)
fc = DI.with_contexts(f, contexts...)
# GradientResult tries to mutate an SArray
result = MutableDiffResult(zero(eltype(x)), (similar(x),))
result = gradient!(result, fc, x, prep.config)
return DR.value(result), DR.gradient(result)
end

function DI.gradient!(
Expand Down Expand Up @@ -310,31 +309,23 @@ end

### Without contexts

@kwdef struct ReverseDiffHessianPrep{GC,HC,GT,HT} <: DI.HessianPrep
gradient_config::GC
@kwdef struct ReverseDiffHessianPrep{G<:ReverseDiffGradientPrep,HC,HT} <: DI.HessianPrep
gradient_prep::G
hessian_config::HC
gradient_tape::GT
hessian_tape::HT
end

function DI.prepare_hessian(f, ::AutoReverseDiff{compile}, x) where {compile}
function DI.prepare_hessian(f, backend::AutoReverseDiff{compile}, x) where {compile}
gradient_prep = DI.prepare_gradient(f, backend, x)
if compile
gradient_tape = ReverseDiff.compile(GradientTape(f, x))
hessian_tape = ReverseDiff.compile(HessianTape(f, x))
return ReverseDiffHessianPrep(;
gradient_config=nothing,
hessian_config=nothing,
gradient_tape=gradient_tape,
hessian_tape=hessian_tape,
gradient_prep, hessian_config=nothing, hessian_tape=hessian_tape
)
else
gradient_config = GradientConfig(x)
hessian_config = HessianConfig(x)
return ReverseDiffHessianPrep(;
gradient_config=gradient_config,
hessian_config=hessian_config,
gradient_tape=nothing,
hessian_tape=nothing,
gradient_prep, hessian_config=hessian_config, hessian_tape=nothing
)
end
end
Expand All @@ -360,47 +351,32 @@ function DI.hessian(
end

function DI.value_gradient_and_hessian!(
f, grad, hess, prep::ReverseDiffHessianPrep, ::AutoReverseDiff{compile}, x
f, grad, hess, prep::ReverseDiffHessianPrep, backend::AutoReverseDiff{compile}, x
) where {compile}
y = f(x) # TODO: ReverseDiff#251
result = DiffResult(y, (grad, hess))
if compile
result = hessian!(result, prep.hessian_tape, x)
grad = gradient!(grad, prep.gradient_tape, x) # TODO: ReverseDiff#251
else
result = hessian!(result, f, x) # TODO: add prep.hessian_config
grad = gradient!(grad, f, x, prep.gradient_config) # TODO: ReverseDiff#251
end
# grad === DR.gradient(result) || copyto!(grad, DR.gradient(result))
hess === DR.hessian(result) || copyto!(hess, DR.hessian(result))
y = f(x)
DI.gradient!(f, grad, prep.gradient_prep, backend, x)
DI.hessian!(f, hess, prep, backend, x)
return y, grad, hess
end

function DI.value_gradient_and_hessian(
f, prep::ReverseDiffHessianPrep, ::AutoReverseDiff{compile}, x
f, prep::ReverseDiffHessianPrep, backend::AutoReverseDiff{compile}, x
) where {compile}
y = f(x) # TODO: remove once ReverseDiff#251 is fixed
result = DiffResult(y, (similar(x), similar(x, length(x), length(x))))
if compile
result = hessian!(result, prep.hessian_tape, x)
else
result = hessian!(result, f, x) # todo: add prep.hessian_config
end
return (y, DR.gradient(result), DR.hessian(result))
y = f(x)
grad = DI.gradient(f, prep.gradient_prep, backend, x)
hess = DI.hessian(f, prep, backend, x)
return y, grad, hess
end

### With contexts

function DI.prepare_hessian(
f, ::AutoReverseDiff, x, contexts::Vararg{DI.Context,C}
f, backend::AutoReverseDiff, x, contexts::Vararg{DI.Context,C}
) where {C}
gradient_config = GradientConfig(x)
gradient_prep = DI.prepare_gradient(f, backend, x, contexts...)
hessian_config = HessianConfig(x)
return ReverseDiffHessianPrep(;
gradient_config=gradient_config,
hessian_config=hessian_config,
gradient_tape=nothing,
hessian_tape=nothing,
gradient_prep, hessian_config=hessian_config, hessian_tape=nothing
)
end

Expand Down Expand Up @@ -428,27 +404,25 @@ function DI.value_gradient_and_hessian!(
grad,
hess,
prep::ReverseDiffHessianPrep,
::AutoReverseDiff,
backend::AutoReverseDiff,
x,
contexts::Vararg{DI.Context,C},
) where {C}
fc = DI.with_contexts(f, contexts...)
y = fc(x) # TODO: ReverseDiff#251
result = DiffResult(y, (grad, hess))
result = hessian!(result, fc, x) # TODO: add prep.hessian_config
y = DR.value(result)
# grad === DR.gradient(result) || copyto!(grad, DR.gradient(result))
grad = gradient!(grad, fc, x, prep.gradient_config) # TODO: ReverseDiff#251
hess === DR.hessian(result) || copyto!(hess, DR.hessian(result))
y = f(x, map(DI.unwrap, contexts)...)
DI.gradient!(f, grad, prep.gradient_prep, backend, x, contexts...)
DI.hessian!(f, hess, prep, backend, x, contexts...)
return y, grad, hess
end

function DI.value_gradient_and_hessian(
f, prep::ReverseDiffHessianPrep, ::AutoReverseDiff, x, contexts::Vararg{DI.Context,C}
f,
prep::ReverseDiffHessianPrep,
backend::AutoReverseDiff,
x,
contexts::Vararg{DI.Context,C},
) where {C}
fc = DI.with_contexts(f, contexts...)
y = fc(x) # TODO: ReverseDiff#251
result = HessianResult(x)
result = hessian!(result, fc, x) # TODO: add prep.hessian_config
return (DR.value(result), DR.gradient(result), DR.hessian(result))
y = f(x, map(DI.unwrap, contexts)...)
grad = DI.gradient(f, prep.gradient_prep, backend, x, contexts...)
hess = DI.hessian(f, prep, backend, x, contexts...)
return y, grad, hess
end
4 changes: 3 additions & 1 deletion DifferentiationInterface/test/Back/ReverseDiff/test.jl
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ test_differentiation(
logging=LOGGING,
);

test_differentiation(backends, static_scenarios(); logging=LOGGING);
test_differentiation(
backends, static_scenarios(; include_constantified=true); logging=LOGGING
);

## Sparse

Expand Down

2 comments on commit 3417ff5

@gdalle
Copy link
Member Author

@gdalle gdalle commented on 3417ff5 Feb 16, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register subdir=DifferentiationInterface

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/125225

Tip: Release Notes

Did you know you can add release notes too? Just add markdown formatted text underneath the comment after the text
"Release notes:" and it will be added to the registry PR, and if TagBot is installed it will also be added to the
release that TagBot creates. i.e.

@JuliaRegistrator register

Release notes:

## Breaking changes

- blah

To add them here just re-invoke and the PR will be updated.

Tagging

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a DifferentiationInterface-v0.6.41 -m "<description of version>" 3417ff540f0df3a795c185b33df615dfc4334848
git push origin DifferentiationInterface-v0.6.41

Please sign in to comment.