diff --git a/test/cpp/api/init.cpp b/test/cpp/api/init.cpp index 1f4059de02eaf..79c74abeebc94 100644 --- a/test/cpp/api/init.cpp +++ b/test/cpp/api/init.cpp @@ -105,7 +105,7 @@ TEST(InitTest, CanInitializeTensorThatRequiresGrad) { ASSERT_THROWS_WITH( tensor.fill_(1), "a leaf Variable that requires grad " - "has been used in an in-place operation"); + "is being used in an in-place operation"); ASSERT_EQ(torch::nn::init::ones_(tensor).sum().item(), 12); } @@ -168,4 +168,4 @@ TEST(InitTest, FanModeLegacyEnum) { FANMODE_ENUM_LEGACY_WARNING_CHECK(kaiming_uniform_, FanIn, "torch::kFanIn") FANMODE_ENUM_LEGACY_WARNING_CHECK(kaiming_uniform_, FanOut, "torch::kFanOut") -} \ No newline at end of file +} diff --git a/test/test_autograd.py b/test/test_autograd.py index 41c1e5d0dba65..4dae782425855 100644 --- a/test/test_autograd.py +++ b/test/test_autograd.py @@ -3516,11 +3516,11 @@ def maybe_check_raise(fn, should_raise): # The 3 elements are for view_as, first output of unbind and second output of unbind run_test(grad_mode=True, requires_grad=False, is_view=True, should_raise_tuple=(None, None, None)) - # TODO: Second should_raise should not be None below, third one should not raise an internal assert + inp_change_err = "The {}th output of UnbindBackward is being modified inplace but this is not allowed" run_test(grad_mode=True, requires_grad=True, is_view=True, - should_raise_tuple=(None, None, "diff_view_meta->output_nr_ == 0 INTERNAL ASSERT FAILED")) + should_raise_tuple=(None, inp_change_err.format("0"), inp_change_err.format("1"))) # TODO: views require gradients when created in no_grad mode but their grad_fn is not populated - leaf_grad_err = "a leaf Variable that requires grad has been used in an in-place operation." + leaf_grad_err = "a leaf Variable that requires grad is being used in an in-place operation." run_test(grad_mode=False, requires_grad=True, is_view=True, should_raise_tuple=(leaf_grad_err, leaf_grad_err, leaf_grad_err)) run_test(grad_mode=False, requires_grad=False, is_view=True, diff --git a/tools/autograd/derivatives.yaml b/tools/autograd/derivatives.yaml index 09f2731a241d3..5e5b3cc431c7a 100644 --- a/tools/autograd/derivatives.yaml +++ b/tools/autograd/derivatives.yaml @@ -94,6 +94,17 @@ # used by our frontend. You MUST check the frontend code; search for # OpName.apply to see if it's still using a legacy Python style API. # +# Note: Returning views. +# The following cases exist: +# - If a function returns no view, it can have arbitrary outputs. +# - If a function return at least one Tensor that is a differentiable view +# of one of its input: +# - If there is only one differentiable output, this Tensor is marked as a +# differentiable view. (alias or transpose for example) +# - If there are more than one differentiable output, by default all the views are +# marked as differentiable views and created with allow_rebase_history=false. +# Meaning that any inplace operation on it will raise an error. (unbind for example) +# # NB: The parameter names here MUST be consistent with the parameter names # in Decalarations.yaml - name: abs(Tensor self) -> Tensor diff --git a/tools/autograd/gen_autograd.py b/tools/autograd/gen_autograd.py index ef0fe9bfe5bed..2c113dba5e300 100644 --- a/tools/autograd/gen_autograd.py +++ b/tools/autograd/gen_autograd.py @@ -30,9 +30,7 @@ from .utils import YamlLoader, split_name_params # See NOTE [ Autograd View Variables ] in variable.h for details. -# A map: function name => two options: -# 1. name of the argument that all outputs are view of -# 2. map: output idx => name of the argument that this result is view of +# A map: function name => name of the argument that all outputs are view of VIEW_FUNCTIONS = { 'numpy_T': 'self', 'alias': 'self', @@ -67,7 +65,6 @@ # returns a view of its inputs RETURNS_VIEWS_OF_INPUT = set(VIEW_FUNCTIONS.keys()).union({'chunk', 'split'}) - def format_return_type(returns): if len(returns) == 0: return 'void' diff --git a/tools/autograd/gen_variable_type.py b/tools/autograd/gen_variable_type.py index 989615ad14b2c..58b0c16579473 100644 --- a/tools/autograd/gen_variable_type.py +++ b/tools/autograd/gen_variable_type.py @@ -562,6 +562,9 @@ def emit_body(declaration): base_name = name[:-1] if inplace else name[:-4] if is_out_fn else name view_info = VIEW_FUNCTIONS.get(base_name, None) + # TODO: Add back when https://github.com/pytorch/pytorch/pull/32044 lands again + # if view_info is None and base_name in RETURNS_VIEWS_OF_INPUT: + # view_info = "self" def is_differentiable(arg): if 'TensorOptions' in arg['type']: @@ -759,59 +762,38 @@ def declare_returned_variables(): return '\n'.join(names) def wrap_output(call): - # Returns a 2-tuple `(wrapped_call, extra_wrapping_stmts)`, where - # `wrapped_call` is to drop-in replace `call`, and - # `extra_wrapping_stmts` is a list of extra statements to run after - # `call`. + # Returns `wrapped_call` which is a drop-in replacement for `call` if 'Tensor' not in declaration['return_type']: - return call, [] + return call elif view_info is not None: # See NOTE [ Autograd View Variables ] in variable.h for details. differentiable_output_vars = {r['name'] for r in differentiable_outputs} - tensor_output_vars = {r['name'] for r in returns if 'Tensor' in r['type']} - if not isinstance(view_info, dict): - if len(differentiable_output_vars) == len(tensor_output_vars): - # all outputs are differentiable - return 'as_view({}, {}, true)'.format(view_info, call), [] - elif len(differentiable_output_vars) == 0: - # no output is differentiable - return 'as_view({}, {}, false)'.format(view_info, call), [] - else: - # some of the outputs are differentiable - # need to expand to dict mode, i.e., one entry per output - base_name = view_info - view_info_dict = {} - for i, return_info in enumerate(returns): - if 'Tensor' in return_info['type']: - view_info_dict[i] = base_name + + if not isinstance(view_info, str): + raise TypeError("The view info should be a string for {}, but it is: {}".format(base_name, view_info)) + + if len(differentiable_output_vars) == 0: + # no output is differentiable (.indices() for SparseTensors for example) + return 'as_view({}, {}, /* is_differentiable */ false)'.format(view_info, call) + elif len(differentiable_output_vars) == 1: + # Single differentiable output (Tensor or Tensor[]) + return_info = differentiable_outputs[0] + # We only support simple Tensor or a TensorList for functions that return views + if not return_info['dynamic_type'] in ['Tensor', 'TensorList']: + raise RuntimeError("{} that return differentiable views can only return Tensor or Tensor[]".format(base_name)) + # Only allow rebasing of the history if we return a single Tensor + allow_rebase_history = 'true' + if return_info['dynamic_type'] == 'TensorList': + allow_rebase_history = 'false' + wrapped_call = ("as_view(/* base */{}, /* output */ {}, /* is_differentiable */ true, " + "/* allow_rebase_history */ {})").format(view_info, call, allow_rebase_history) + return wrapped_call else: - view_info_dict = view_info - - def wrap_view_single(output_var, base_var): - fmt = '{output_var} = as_view({base_var}, {output_var}, {is_differentiable});' - if output_var in differentiable_output_vars: - # If `GradMode::is_enabled()` is False, this is a - # non-differentiable view. Gradients should not flow through. - is_differentiable = 'true' - else: - # This output is non-differentiable, so it is a - # non-differentiable view. Gradients should not flow through. - is_differentiable = 'false' - return fmt.format(output_var=output_var, base_var=base_var, - is_differentiable=is_differentiable) - - extra_wrapping_stmts = [] - for output_idx, return_info in enumerate(returns): - if 'Tensor' not in return_info['type']: - assert output_idx not in view_info_dict, 'Can not wrap non-Tensor output as a view' - continue - output_var = return_info['name'] - if output_idx in view_info_dict: - stmt = wrap_view_single(output_var, view_info_dict[output_idx]) - extra_wrapping_stmts.append(stmt) - return call, extra_wrapping_stmts + # This could be supported but we don't need it at the moment, so keeping things simple. + raise RuntimeError("Function that return multiple differentiable output " + "when at least one of them is view is not supported.") else: - return 'std::move({})'.format(call), [] + return 'std::move({})'.format(call) def enforce_same_tensorimpl_and_storage(env, call): save_ptrs_stmts = [] @@ -838,7 +820,6 @@ def enforce_same_tensorimpl_and_storage(env, call): def emit_call(env): combined = nested_dict(env, declaration) - extra_wrapping_stmts = [] if strategy == 'use_derived': # We only care about adding `at::AutoNonVariableTypeMode` guard for non-variable dispatch # (which corresponds to 'use_derived' strategy). The purpose of this guard is to make sure @@ -852,7 +833,7 @@ def emit_call(env): base_type_call = CALL_DISPATCH_VIA_METHOD.substitute( combined, unpacked_method_args=unpacked_method_args) if not modifies_arguments and not returns_void: - rhs_value, extra_wrapping_stmts = wrap_output('tmp') + rhs_value = wrap_output('tmp') call = DISPATCH_TO_NON_VAR_TYPE_WITH_RETURN_VALUES.substitute( base_type_call=base_type_call, return_values=tie_return_values(), @@ -865,8 +846,6 @@ def emit_call(env): if not modifies_arguments and not returns_void: call = '{} = {}'.format(tie_return_values(), call) call = call + ';' - for stmt in extra_wrapping_stmts: - call += '\n' + stmt call = enforce_same_tensorimpl_and_storage(env, call) return call diff --git a/torch/csrc/autograd/VariableTypeManual.cpp b/torch/csrc/autograd/VariableTypeManual.cpp index b9aebfcc1f7fb..57375b5b037f9 100644 --- a/torch/csrc/autograd/VariableTypeManual.cpp +++ b/torch/csrc/autograd/VariableTypeManual.cpp @@ -255,7 +255,7 @@ Tensor detach(const Tensor & self) { } // - auto result = make_variable_view(self, self, /*is_differentiable=*/false, /*allow_tensor_metadata_change=*/false); + auto result = make_variable_non_differentiable_view(self, self, /*allow_tensor_metadata_change=*/false); namedinference::propagate_names(result, self); // if (jit::tracer::isTracing()) { diff --git a/torch/csrc/autograd/VariableTypeUtils.h b/torch/csrc/autograd/VariableTypeUtils.h index 4918a5aebe9c8..1a3d843e860eb 100644 --- a/torch/csrc/autograd/VariableTypeUtils.h +++ b/torch/csrc/autograd/VariableTypeUtils.h @@ -41,9 +41,20 @@ namespace torch { namespace autograd { inline void check_inplace(const Tensor& tensor) { auto& var = static_cast(tensor); - if (var.requires_grad() && var.is_leaf() && GradMode::is_enabled()) { - AT_ERROR( - "a leaf Variable that requires grad has been used in an in-place operation."); + if (var.requires_grad() && GradMode::is_enabled()) { + if (var.is_leaf()) { + AT_ERROR( + "a leaf Variable that requires grad is being used in an in-place operation."); + } else if (var.is_view()) { + // NB: is_view() ==> get_autograd_meta() + auto diff_view_meta = static_cast(impl::get_autograd_meta(var)); + auto grad_fn = impl::grad_fn_unsafe(var); + // NB: !var.is_leaf() ==> grad_fn != nullptr + TORCH_INTERNAL_ASSERT(grad_fn); + TORCH_CHECK(diff_view_meta->allow_rebase_history, + "The ", diff_view_meta->output_nr_, "th output of ", grad_fn->name(), + " is being modified inplace but this is not allowed as it would prevent correct gradient computation."); + } } } @@ -97,23 +108,33 @@ template inline variable_list flatten_tensor_args(Args&&... ar } // See NOTE [ Autograd View Variables ] for details. -inline Tensor as_view(const Tensor & base, Tensor tensor, bool is_differentiable = true) { +inline Tensor as_view(const Tensor & base, Tensor tensor, bool is_differentiable, bool allow_rebase_history=true) { auto base_var = Variable(base); if (base_var.is_view()) { base_var = base_var.base(); } - return make_variable_view(std::move(base_var), std::move(tensor), is_differentiable); + if (is_differentiable) { + return make_variable_differentiable_view(std::move(base_var), std::move(tensor), allow_rebase_history); + } else { + TORCH_CHECK(allow_rebase_history, "Non-differentiable views cannot set allow_rebase_history=false"); + return make_variable_non_differentiable_view(std::move(base_var), std::move(tensor)); + } } // See NOTE [ Autograd View Variables ] for details. -inline std::vector as_view(const Tensor & base, std::vector tensors, - bool is_differentiable = true) { +inline std::vector as_view(const Tensor & base, std::vector tensors, bool is_differentiable, + bool allow_rebase_history=true) { auto base_var = Variable(base); if (base_var.is_view()) { base_var = base_var.base(); } for(Tensor &tensor : tensors) { - tensor = make_variable_view(base_var, std::move(tensor), is_differentiable); + if (is_differentiable) { + tensor = make_variable_differentiable_view(base_var, std::move(tensor), allow_rebase_history); + } else { + TORCH_CHECK(allow_rebase_history, "Non-differentiable views cannot set allow_rebase_history=false"); + tensor = make_variable_non_differentiable_view(base_var, std::move(tensor)); + } } return tensors; } diff --git a/torch/csrc/autograd/variable.cpp b/torch/csrc/autograd/variable.cpp index 007664c65ce8a..4cc29ad04d8d7 100644 --- a/torch/csrc/autograd/variable.cpp +++ b/torch/csrc/autograd/variable.cpp @@ -25,8 +25,8 @@ namespace torch { namespace autograd { -DifferentiableViewMeta::DifferentiableViewMeta(at::TensorImpl* self_impl, Variable base) - : AutogradMeta(self_impl, false) { +DifferentiableViewMeta::DifferentiableViewMeta(at::TensorImpl* self_impl, Variable base, bool allow_rebase_history) + : AutogradMeta(self_impl), allow_rebase_history(allow_rebase_history) { base_ = std::move(base); TORCH_CHECK(base_.defined(), "base is undefined"); if (base_.is_view()) { @@ -72,12 +72,13 @@ namespace impl { } void rebase_history(const Variable& self, Edge gradient_edge) { - AT_ASSERT(gradient_edge.function != nullptr); + TORCH_INTERNAL_ASSERT(gradient_edge.function != nullptr); if (self.is_view()) { // NB: is_view() ==> get_autograd_meta() auto diff_view_meta = static_cast(get_autograd_meta(self)); - AT_ASSERT(gradient_edge.input_nr == 0); - AT_ASSERT(gradient_edge.function); + TORCH_INTERNAL_ASSERT(diff_view_meta->allow_rebase_history); + TORCH_INTERNAL_ASSERT(gradient_edge.input_nr == 0); + TORCH_INTERNAL_ASSERT(gradient_edge.function); TORCH_CHECK( gradient_edge.function->num_inputs() == 1, "Functions which modify views in-place must return a single Variable"); @@ -323,7 +324,7 @@ const std::shared_ptr& VariableHooks::grad_fn(const Tenso } auto current_version = self._version(); if (diff_view_meta->attr_version != current_version) { - AT_ASSERT(diff_view_meta->output_nr_ == 0); + TORCH_INTERNAL_ASSERT(diff_view_meta->output_nr_ == 0); auto fn = std::make_shared(); fn->self_geometry = at::TensorGeometry(diff_view_meta->base_); fn->size = self.sizes().vec(); diff --git a/torch/csrc/autograd/variable.h b/torch/csrc/autograd/variable.h index a3fd96d57e6d3..1cd9b5c261634 100644 --- a/torch/csrc/autograd/variable.h +++ b/torch/csrc/autograd/variable.h @@ -295,6 +295,14 @@ struct TORCH_API AutogradMeta : public c10::AutogradMetaInterface { /// + if an in-place op is done on view, in rebase_history() of view, which is /// called after every in-place op in VariableType.cpp, the grad_fn of base /// is updated. +/// + if a single autograd Node returns multiple differentiable views, if any +/// output is modified by an inplace operation, the autograd engine will make +/// an equivalent graph (corresponding to the view operations) without using +/// equivalent graph, where each output is treated as if it were produced by a +/// distinct view operation. This discards the original (e.g., user provided) +/// grad_fn. If the provided grad_fn does more than the backward of the view, +/// then the DifferentiableViewMeta must be created with allow_rebase_history=false +/// to prevent the engine from ignoring the provided grad_fn. /// /// /// Non-Differentiable Views @@ -313,8 +321,9 @@ struct TORCH_API AutogradMeta : public c10::AutogradMetaInterface { /// function are non-differentiable. /// These are called non-differentiable views as the gradients do not flow /// through the view relation. -/// Relevant logic for non-differentiable views is implemented in -/// make_variable_view below, and wrap_output of gen_variable_type.py. +/// +/// Relevant logic for both differentiable and non-differentiable views is implemented in +/// make_variable_(non_)differentiable_view below, and wrap_output of gen_variable_type.py. struct TORCH_API DifferentiableViewMeta : public AutogradMeta { /// The base `Variable` (never a view). Variable base_; @@ -324,11 +333,15 @@ struct TORCH_API DifferentiableViewMeta : public AutogradMeta { /// version_counter.current_version(). uint32_t attr_version; + /// Boolean flag that signifies if the history of this Tensor can be rebased + /// or if it is forbidden. + bool allow_rebase_history; + bool requires_grad() const override { return requires_grad_ || grad_fn_ || (is_view_ && base_.requires_grad()); } - DifferentiableViewMeta(at::TensorImpl* self_impl, Variable base); + DifferentiableViewMeta(at::TensorImpl* self_impl, Variable base, bool allow_rebase_history=true); ~DifferentiableViewMeta(); }; @@ -353,28 +366,34 @@ struct TORCH_API DifferentiableViewMeta : public AutogradMeta { /// prevent those changes from happening and is undesirable. // See NOTE [ Autograd View Variables ] for details. -inline Variable make_variable_view( +// Differentiable view. Track history with DifferentiableViewMeta. +inline Variable make_variable_differentiable_view( Variable base, at::Tensor data, - bool is_differentiable = true, - bool allow_tensor_metadata_change = true) { + bool allow_rebase_history) { if (data.defined()) { - if (is_differentiable) { - /// Differentiable view. Track history with DifferentiableViewMeta. - auto data_impl_copy = data.getIntrusivePtr()->shallow_copy_and_detach( - /*version_counter=*/0, - /*allow_tensor_metadata_change=*/allow_tensor_metadata_change); - data_impl_copy->set_autograd_meta(std::make_unique( - data_impl_copy.get(), std::move(base))); - return Variable(data_impl_copy); - } else { - /// Non-differentiable view. Just share version counter. - auto data_impl_copy = data.getIntrusivePtr()->shallow_copy_and_detach( - /*version_counter=*/impl::version_counter(base), - /*allow_tensor_metadata_change=*/allow_tensor_metadata_change); - data_impl_copy->set_autograd_meta(nullptr); - return Variable(data_impl_copy); + auto data_impl_copy = data.getIntrusivePtr()->shallow_copy_and_detach( + /*version_counter=*/0, + /*allow_tensor_metadata_change=*/true); + data_impl_copy->set_autograd_meta(std::make_unique( + data_impl_copy.get(), std::move(base), allow_rebase_history)); + return Variable(data_impl_copy); } + return Variable(); +} + +// See NOTE [ Autograd View Variables ] for details. +// Non-differentiable view. Just share version counter. +inline Variable make_variable_non_differentiable_view( + Variable base, + at::Tensor data, + bool allow_tensor_metadata_change = true) { + if (data.defined()) { + auto data_impl_copy = data.getIntrusivePtr()->shallow_copy_and_detach( + /*version_counter=*/impl::version_counter(base), + /*allow_tensor_metadata_change=*/allow_tensor_metadata_change); + data_impl_copy->set_autograd_meta(nullptr); + return Variable(data_impl_copy); } return Variable(); }