Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Raise when in place operations occur on leafs requiring grad #1458

Merged
merged 19 commits into from
Jan 16, 2025
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 7 additions & 12 deletions thunder/executors/torchex.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,24 @@
from __future__ import annotations
import operator
import importlib
from dataclasses import replace
from contextlib import ContextDecorator
from functools import wraps, partial
from inspect import signature
from itertools import groupby
from functools import partial, wraps
from numbers import Number
from typing import TYPE_CHECKING
from collections.abc import Callable
from collections.abc import Hashable, Sequence
from collections.abc import Sequence
from types import ModuleType
from enum import Enum, auto

import torch
import math
from looseversion import LooseVersion

from thunder.core.compile_data import get_compile_data
import thunder.core.dtypes as dtypes
from thunder.core.dtypes import to_torch_dtype, to_dtype
import thunder.core.devices as devices
from thunder.core.devices import to_torch_device, to_device
import thunder.core.prims as prims
from thunder.core.trace import TraceCtx, set_tracectx, reset_tracectx, from_trace
from thunder.core.proxies import NumberProxy, TensorProxy, FutureTensorProxy, variableify, pytype
from thunder.core.pytree import tree_flatten, tree_unflatten
from thunder.core.symbol import Symbol, BoundSymbol
from thunder.core.proxies import NumberProxy, TensorProxy, FutureTensorProxy, pytype
from thunder.core.symbol import Symbol
from thunder.distributed.prims import DistributedReduceOps
import thunder.distributed.prims as dist_prims
import thunder.core.utils as utils
Expand Down Expand Up @@ -2190,6 +2182,9 @@ def is_float_type(self, input):


def _copy__impl(copy_from, copy_to):
cd = get_compile_data()
if cd is not None and cd.is_grad_enabled and copy_to.is_leaf and copy_to.requires_grad:
raise RuntimeError("a leaf Variable that requires grad is being used in an in-place operation.")
IvanYashchuk marked this conversation as resolved.
Show resolved Hide resolved
copy_to.copy_(copy_from)
return copy_to

Expand Down
38 changes: 18 additions & 20 deletions thunder/tests/test_inplace_functionalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,31 +476,27 @@ def f(xs, ys, z):
dtypes=NOTHING,
)
def test_inplace_to_tensors_with_grad(executor, device, _):
@torch.no_grad
beverlylytle marked this conversation as resolved.
Show resolved Hide resolved
def add_y(x, y):
x.add_(y, alpha=0.1)
# inplace operations requiring grad on leafs are illegal, trick to make z a non-leaf
z = torch.abs(x) * torch.sgn(x)
z.add_(y, alpha=0.1)

@torch.no_grad
def add_grad(x, y):
x.add_(x.grad, alpha=0.1)
jitted_f = executor.make_callable(add_y)
x = make_tensor((2, 2), device=device, dtype=torch.float32, requires_grad=True)
x.grad = make_tensor((2, 2), device=device, dtype=torch.float32)
y = make_tensor((2, 2), device=device, dtype=torch.float32)

for f in (add_y, add_grad):
jitted_f = executor.make_callable(f)
x = make_tensor((2, 2), device=device, dtype=torch.float32, requires_grad=True)
x.grad = make_tensor((2, 2), device=device, dtype=torch.float32)
y = make_tensor((2, 2), device=device, dtype=torch.float32)
x_ref = x.clone().detach().requires_grad_(True)
x_ref.grad = x.grad.clone().detach()
y_ref = y.clone().detach()

x_ref = x.clone().detach().requires_grad_(True)
x_ref.grad = x.grad.clone().detach()
y_ref = y.clone().detach()
res = jitted_f(x, y)
res_ref = add_y(x_ref, y_ref)

res = jitted_f(x, y)
res_ref = f(x_ref, y_ref)

torch.testing.assert_close(x, x_ref)
torch.testing.assert_close(x.grad, x_ref.grad)
torch.testing.assert_close(y, y_ref)
torch.testing.assert_close(res, res_ref)
torch.testing.assert_close(x, x_ref)
torch.testing.assert_close(x.grad, x_ref.grad)
torch.testing.assert_close(y, y_ref)
torch.testing.assert_close(res, res_ref)


@instantiate(
Expand Down Expand Up @@ -551,6 +547,8 @@ def single_tensor_adam(

jitted = executor.make_callable(single_tensor_adam)
params, grads, exp_avgs, exp_avg_sqs = tensors
cd = thunder.compile_data(jitted)
cd.compile_options["torch_compile_fullgraph"] = False
kshitij12345 marked this conversation as resolved.
Show resolved Hide resolved

jitted(params, grads, exp_avgs, exp_avg_sqs, state_steps)
torch.testing.assert_close(actual=tensors + [state_steps], expected=ref_tensors + [ref_state_steps])
Expand Down
Loading