Skip to content

Commit

Permalink
Add allow_rebase_history flag and fix codegen functions for multiple …
Browse files Browse the repository at this point in the history
…views (pytorch#32790)

Summary:
Pull Request resolved: pytorch#32790

Same as pytorch#31990 but without the first commit in the stack that is problematic for a lot of people.

Test Plan: Imported from OSS

Differential Revision: D19814116

Pulled By: albanD

fbshipit-source-id: d104911a5b098a5807b4bc08b69803ebd4f69fa6
  • Loading branch information
albanD authored and facebook-github-bot committed Feb 11, 2020
1 parent 330d051 commit 3655975
Show file tree
Hide file tree
Showing 9 changed files with 124 additions and 96 deletions.
4 changes: 2 additions & 2 deletions test/cpp/api/init.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ TEST(InitTest, CanInitializeTensorThatRequiresGrad) {
ASSERT_THROWS_WITH(
tensor.fill_(1),
"a leaf Variable that requires grad "
"has been used in an in-place operation");
"is being used in an in-place operation");
ASSERT_EQ(torch::nn::init::ones_(tensor).sum().item<int32_t>(), 12);
}

Expand Down Expand Up @@ -168,4 +168,4 @@ TEST(InitTest, FanModeLegacyEnum) {

FANMODE_ENUM_LEGACY_WARNING_CHECK(kaiming_uniform_, FanIn, "torch::kFanIn")
FANMODE_ENUM_LEGACY_WARNING_CHECK(kaiming_uniform_, FanOut, "torch::kFanOut")
}
}
6 changes: 3 additions & 3 deletions test/test_autograd.py
Original file line number Diff line number Diff line change
Expand Up @@ -3516,11 +3516,11 @@ def maybe_check_raise(fn, should_raise):
# The 3 elements are for view_as, first output of unbind and second output of unbind
run_test(grad_mode=True, requires_grad=False, is_view=True,
should_raise_tuple=(None, None, None))
# TODO: Second should_raise should not be None below, third one should not raise an internal assert
inp_change_err = "The {}th output of UnbindBackward is being modified inplace but this is not allowed"
run_test(grad_mode=True, requires_grad=True, is_view=True,
should_raise_tuple=(None, None, "diff_view_meta->output_nr_ == 0 INTERNAL ASSERT FAILED"))
should_raise_tuple=(None, inp_change_err.format("0"), inp_change_err.format("1")))
# TODO: views require gradients when created in no_grad mode but their grad_fn is not populated
leaf_grad_err = "a leaf Variable that requires grad has been used in an in-place operation."
leaf_grad_err = "a leaf Variable that requires grad is being used in an in-place operation."
run_test(grad_mode=False, requires_grad=True, is_view=True,
should_raise_tuple=(leaf_grad_err, leaf_grad_err, leaf_grad_err))
run_test(grad_mode=False, requires_grad=False, is_view=True,
Expand Down
11 changes: 11 additions & 0 deletions tools/autograd/derivatives.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,17 @@
# used by our frontend. You MUST check the frontend code; search for
# OpName.apply to see if it's still using a legacy Python style API.
#
# Note: Returning views.
# The following cases exist:
# - If a function returns no view, it can have arbitrary outputs.
# - If a function return at least one Tensor that is a differentiable view
# of one of its input:
# - If there is only one differentiable output, this Tensor is marked as a
# differentiable view. (alias or transpose for example)
# - If there are more than one differentiable output, by default all the views are
# marked as differentiable views and created with allow_rebase_history=false.
# Meaning that any inplace operation on it will raise an error. (unbind for example)
#
# NB: The parameter names here MUST be consistent with the parameter names
# in Decalarations.yaml
- name: abs(Tensor self) -> Tensor
Expand Down
5 changes: 1 addition & 4 deletions tools/autograd/gen_autograd.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,7 @@
from .utils import YamlLoader, split_name_params

# See NOTE [ Autograd View Variables ] in variable.h for details.
# A map: function name => two options:
# 1. name of the argument that all outputs are view of
# 2. map: output idx => name of the argument that this result is view of
# A map: function name => name of the argument that all outputs are view of
VIEW_FUNCTIONS = {
'numpy_T': 'self',
'alias': 'self',
Expand Down Expand Up @@ -67,7 +65,6 @@
# returns a view of its inputs
RETURNS_VIEWS_OF_INPUT = set(VIEW_FUNCTIONS.keys()).union({'chunk', 'split'})


def format_return_type(returns):
if len(returns) == 0:
return 'void'
Expand Down
81 changes: 30 additions & 51 deletions tools/autograd/gen_variable_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -562,6 +562,9 @@ def emit_body(declaration):

base_name = name[:-1] if inplace else name[:-4] if is_out_fn else name
view_info = VIEW_FUNCTIONS.get(base_name, None)
# TODO: Add back when https://github.com/pytorch/pytorch/pull/32044 lands again
# if view_info is None and base_name in RETURNS_VIEWS_OF_INPUT:
# view_info = "self"

def is_differentiable(arg):
if 'TensorOptions' in arg['type']:
Expand Down Expand Up @@ -759,59 +762,38 @@ def declare_returned_variables():
return '\n'.join(names)

def wrap_output(call):
# Returns a 2-tuple `(wrapped_call, extra_wrapping_stmts)`, where
# `wrapped_call` is to drop-in replace `call`, and
# `extra_wrapping_stmts` is a list of extra statements to run after
# `call`.
# Returns `wrapped_call` which is a drop-in replacement for `call`
if 'Tensor' not in declaration['return_type']:
return call, []
return call
elif view_info is not None:
# See NOTE [ Autograd View Variables ] in variable.h for details.
differentiable_output_vars = {r['name'] for r in differentiable_outputs}
tensor_output_vars = {r['name'] for r in returns if 'Tensor' in r['type']}
if not isinstance(view_info, dict):
if len(differentiable_output_vars) == len(tensor_output_vars):
# all outputs are differentiable
return 'as_view({}, {}, true)'.format(view_info, call), []
elif len(differentiable_output_vars) == 0:
# no output is differentiable
return 'as_view({}, {}, false)'.format(view_info, call), []
else:
# some of the outputs are differentiable
# need to expand to dict mode, i.e., one entry per output
base_name = view_info
view_info_dict = {}
for i, return_info in enumerate(returns):
if 'Tensor' in return_info['type']:
view_info_dict[i] = base_name

if not isinstance(view_info, str):
raise TypeError("The view info should be a string for {}, but it is: {}".format(base_name, view_info))

if len(differentiable_output_vars) == 0:
# no output is differentiable (.indices() for SparseTensors for example)
return 'as_view({}, {}, /* is_differentiable */ false)'.format(view_info, call)
elif len(differentiable_output_vars) == 1:
# Single differentiable output (Tensor or Tensor[])
return_info = differentiable_outputs[0]
# We only support simple Tensor or a TensorList for functions that return views
if not return_info['dynamic_type'] in ['Tensor', 'TensorList']:
raise RuntimeError("{} that return differentiable views can only return Tensor or Tensor[]".format(base_name))
# Only allow rebasing of the history if we return a single Tensor
allow_rebase_history = 'true'
if return_info['dynamic_type'] == 'TensorList':
allow_rebase_history = 'false'
wrapped_call = ("as_view(/* base */{}, /* output */ {}, /* is_differentiable */ true, "
"/* allow_rebase_history */ {})").format(view_info, call, allow_rebase_history)
return wrapped_call
else:
view_info_dict = view_info

def wrap_view_single(output_var, base_var):
fmt = '{output_var} = as_view({base_var}, {output_var}, {is_differentiable});'
if output_var in differentiable_output_vars:
# If `GradMode::is_enabled()` is False, this is a
# non-differentiable view. Gradients should not flow through.
is_differentiable = 'true'
else:
# This output is non-differentiable, so it is a
# non-differentiable view. Gradients should not flow through.
is_differentiable = 'false'
return fmt.format(output_var=output_var, base_var=base_var,
is_differentiable=is_differentiable)

extra_wrapping_stmts = []
for output_idx, return_info in enumerate(returns):
if 'Tensor' not in return_info['type']:
assert output_idx not in view_info_dict, 'Can not wrap non-Tensor output as a view'
continue
output_var = return_info['name']
if output_idx in view_info_dict:
stmt = wrap_view_single(output_var, view_info_dict[output_idx])
extra_wrapping_stmts.append(stmt)
return call, extra_wrapping_stmts
# This could be supported but we don't need it at the moment, so keeping things simple.
raise RuntimeError("Function that return multiple differentiable output "
"when at least one of them is view is not supported.")
else:
return 'std::move({})'.format(call), []
return 'std::move({})'.format(call)

def enforce_same_tensorimpl_and_storage(env, call):
save_ptrs_stmts = []
Expand All @@ -838,7 +820,6 @@ def enforce_same_tensorimpl_and_storage(env, call):

def emit_call(env):
combined = nested_dict(env, declaration)
extra_wrapping_stmts = []
if strategy == 'use_derived':
# We only care about adding `at::AutoNonVariableTypeMode` guard for non-variable dispatch
# (which corresponds to 'use_derived' strategy). The purpose of this guard is to make sure
Expand All @@ -852,7 +833,7 @@ def emit_call(env):
base_type_call = CALL_DISPATCH_VIA_METHOD.substitute(
combined, unpacked_method_args=unpacked_method_args)
if not modifies_arguments and not returns_void:
rhs_value, extra_wrapping_stmts = wrap_output('tmp')
rhs_value = wrap_output('tmp')
call = DISPATCH_TO_NON_VAR_TYPE_WITH_RETURN_VALUES.substitute(
base_type_call=base_type_call,
return_values=tie_return_values(),
Expand All @@ -865,8 +846,6 @@ def emit_call(env):
if not modifies_arguments and not returns_void:
call = '{} = {}'.format(tie_return_values(), call)
call = call + ';'
for stmt in extra_wrapping_stmts:
call += '\n' + stmt
call = enforce_same_tensorimpl_and_storage(env, call)
return call

Expand Down
2 changes: 1 addition & 1 deletion torch/csrc/autograd/VariableTypeManual.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ Tensor detach(const Tensor & self) {

}
// <NON_GENERATED_CODE>
auto result = make_variable_view(self, self, /*is_differentiable=*/false, /*allow_tensor_metadata_change=*/false);
auto result = make_variable_non_differentiable_view(self, self, /*allow_tensor_metadata_change=*/false);
namedinference::propagate_names(result, self);
// </NON_GENERATED_CODE>
if (jit::tracer::isTracing()) {
Expand Down
37 changes: 29 additions & 8 deletions torch/csrc/autograd/VariableTypeUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,20 @@ namespace torch { namespace autograd {

inline void check_inplace(const Tensor& tensor) {
auto& var = static_cast<const Variable&>(tensor);
if (var.requires_grad() && var.is_leaf() && GradMode::is_enabled()) {
AT_ERROR(
"a leaf Variable that requires grad has been used in an in-place operation.");
if (var.requires_grad() && GradMode::is_enabled()) {
if (var.is_leaf()) {
AT_ERROR(
"a leaf Variable that requires grad is being used in an in-place operation.");
} else if (var.is_view()) {
// NB: is_view() ==> get_autograd_meta()
auto diff_view_meta = static_cast<DifferentiableViewMeta*>(impl::get_autograd_meta(var));
auto grad_fn = impl::grad_fn_unsafe(var);
// NB: !var.is_leaf() ==> grad_fn != nullptr
TORCH_INTERNAL_ASSERT(grad_fn);
TORCH_CHECK(diff_view_meta->allow_rebase_history,
"The ", diff_view_meta->output_nr_, "th output of ", grad_fn->name(),
" is being modified inplace but this is not allowed as it would prevent correct gradient computation.");
}
}
}

Expand Down Expand Up @@ -97,23 +108,33 @@ template<typename... Args> inline variable_list flatten_tensor_args(Args&&... ar
}

// See NOTE [ Autograd View Variables ] for details.
inline Tensor as_view(const Tensor & base, Tensor tensor, bool is_differentiable = true) {
inline Tensor as_view(const Tensor & base, Tensor tensor, bool is_differentiable, bool allow_rebase_history=true) {
auto base_var = Variable(base);
if (base_var.is_view()) {
base_var = base_var.base();
}
return make_variable_view(std::move(base_var), std::move(tensor), is_differentiable);
if (is_differentiable) {
return make_variable_differentiable_view(std::move(base_var), std::move(tensor), allow_rebase_history);
} else {
TORCH_CHECK(allow_rebase_history, "Non-differentiable views cannot set allow_rebase_history=false");
return make_variable_non_differentiable_view(std::move(base_var), std::move(tensor));
}
}

// See NOTE [ Autograd View Variables ] for details.
inline std::vector<Tensor> as_view(const Tensor & base, std::vector<Tensor> tensors,
bool is_differentiable = true) {
inline std::vector<Tensor> as_view(const Tensor & base, std::vector<Tensor> tensors, bool is_differentiable,
bool allow_rebase_history=true) {
auto base_var = Variable(base);
if (base_var.is_view()) {
base_var = base_var.base();
}
for(Tensor &tensor : tensors) {
tensor = make_variable_view(base_var, std::move(tensor), is_differentiable);
if (is_differentiable) {
tensor = make_variable_differentiable_view(base_var, std::move(tensor), allow_rebase_history);
} else {
TORCH_CHECK(allow_rebase_history, "Non-differentiable views cannot set allow_rebase_history=false");
tensor = make_variable_non_differentiable_view(base_var, std::move(tensor));
}
}
return tensors;
}
Expand Down
13 changes: 7 additions & 6 deletions torch/csrc/autograd/variable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ namespace torch {
namespace autograd {


DifferentiableViewMeta::DifferentiableViewMeta(at::TensorImpl* self_impl, Variable base)
: AutogradMeta(self_impl, false) {
DifferentiableViewMeta::DifferentiableViewMeta(at::TensorImpl* self_impl, Variable base, bool allow_rebase_history)
: AutogradMeta(self_impl), allow_rebase_history(allow_rebase_history) {
base_ = std::move(base);
TORCH_CHECK(base_.defined(), "base is undefined");
if (base_.is_view()) {
Expand Down Expand Up @@ -72,12 +72,13 @@ namespace impl {
}

void rebase_history(const Variable& self, Edge gradient_edge) {
AT_ASSERT(gradient_edge.function != nullptr);
TORCH_INTERNAL_ASSERT(gradient_edge.function != nullptr);
if (self.is_view()) {
// NB: is_view() ==> get_autograd_meta()
auto diff_view_meta = static_cast<DifferentiableViewMeta*>(get_autograd_meta(self));
AT_ASSERT(gradient_edge.input_nr == 0);
AT_ASSERT(gradient_edge.function);
TORCH_INTERNAL_ASSERT(diff_view_meta->allow_rebase_history);
TORCH_INTERNAL_ASSERT(gradient_edge.input_nr == 0);
TORCH_INTERNAL_ASSERT(gradient_edge.function);
TORCH_CHECK(
gradient_edge.function->num_inputs() == 1,
"Functions which modify views in-place must return a single Variable");
Expand Down Expand Up @@ -323,7 +324,7 @@ const std::shared_ptr<torch::autograd::Node>& VariableHooks::grad_fn(const Tenso
}
auto current_version = self._version();
if (diff_view_meta->attr_version != current_version) {
AT_ASSERT(diff_view_meta->output_nr_ == 0);
TORCH_INTERNAL_ASSERT(diff_view_meta->output_nr_ == 0);
auto fn = std::make_shared<torch::autograd::generated::AsStridedBackward>();
fn->self_geometry = at::TensorGeometry(diff_view_meta->base_);
fn->size = self.sizes().vec();
Expand Down
61 changes: 40 additions & 21 deletions torch/csrc/autograd/variable.h
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,14 @@ struct TORCH_API AutogradMeta : public c10::AutogradMetaInterface {
/// + if an in-place op is done on view, in rebase_history() of view, which is
/// called after every in-place op in VariableType.cpp, the grad_fn of base
/// is updated.
/// + if a single autograd Node returns multiple differentiable views, if any
/// output is modified by an inplace operation, the autograd engine will make
/// an equivalent graph (corresponding to the view operations) without using
/// equivalent graph, where each output is treated as if it were produced by a
/// distinct view operation. This discards the original (e.g., user provided)
/// grad_fn. If the provided grad_fn does more than the backward of the view,
/// then the DifferentiableViewMeta must be created with allow_rebase_history=false
/// to prevent the engine from ignoring the provided grad_fn.
///
///
/// Non-Differentiable Views
Expand All @@ -313,8 +321,9 @@ struct TORCH_API AutogradMeta : public c10::AutogradMetaInterface {
/// function are non-differentiable.
/// These are called non-differentiable views as the gradients do not flow
/// through the view relation.
/// Relevant logic for non-differentiable views is implemented in
/// make_variable_view below, and wrap_output of gen_variable_type.py.
///
/// Relevant logic for both differentiable and non-differentiable views is implemented in
/// make_variable_(non_)differentiable_view below, and wrap_output of gen_variable_type.py.
struct TORCH_API DifferentiableViewMeta : public AutogradMeta {
/// The base `Variable` (never a view).
Variable base_;
Expand All @@ -324,11 +333,15 @@ struct TORCH_API DifferentiableViewMeta : public AutogradMeta {
/// version_counter.current_version().
uint32_t attr_version;

/// Boolean flag that signifies if the history of this Tensor can be rebased
/// or if it is forbidden.
bool allow_rebase_history;

bool requires_grad() const override {
return requires_grad_ || grad_fn_ || (is_view_ && base_.requires_grad());
}

DifferentiableViewMeta(at::TensorImpl* self_impl, Variable base);
DifferentiableViewMeta(at::TensorImpl* self_impl, Variable base, bool allow_rebase_history=true);
~DifferentiableViewMeta();
};

Expand All @@ -353,28 +366,34 @@ struct TORCH_API DifferentiableViewMeta : public AutogradMeta {
/// prevent those changes from happening and is undesirable.

// See NOTE [ Autograd View Variables ] for details.
inline Variable make_variable_view(
// Differentiable view. Track history with DifferentiableViewMeta.
inline Variable make_variable_differentiable_view(
Variable base,
at::Tensor data,
bool is_differentiable = true,
bool allow_tensor_metadata_change = true) {
bool allow_rebase_history) {
if (data.defined()) {
if (is_differentiable) {
/// Differentiable view. Track history with DifferentiableViewMeta.
auto data_impl_copy = data.getIntrusivePtr()->shallow_copy_and_detach(
/*version_counter=*/0,
/*allow_tensor_metadata_change=*/allow_tensor_metadata_change);
data_impl_copy->set_autograd_meta(std::make_unique<DifferentiableViewMeta>(
data_impl_copy.get(), std::move(base)));
return Variable(data_impl_copy);
} else {
/// Non-differentiable view. Just share version counter.
auto data_impl_copy = data.getIntrusivePtr()->shallow_copy_and_detach(
/*version_counter=*/impl::version_counter(base),
/*allow_tensor_metadata_change=*/allow_tensor_metadata_change);
data_impl_copy->set_autograd_meta(nullptr);
return Variable(data_impl_copy);
auto data_impl_copy = data.getIntrusivePtr()->shallow_copy_and_detach(
/*version_counter=*/0,
/*allow_tensor_metadata_change=*/true);
data_impl_copy->set_autograd_meta(std::make_unique<DifferentiableViewMeta>(
data_impl_copy.get(), std::move(base), allow_rebase_history));
return Variable(data_impl_copy);
}
return Variable();
}

// See NOTE [ Autograd View Variables ] for details.
// Non-differentiable view. Just share version counter.
inline Variable make_variable_non_differentiable_view(
Variable base,
at::Tensor data,
bool allow_tensor_metadata_change = true) {
if (data.defined()) {
auto data_impl_copy = data.getIntrusivePtr()->shallow_copy_and_detach(
/*version_counter=*/impl::version_counter(base),
/*allow_tensor_metadata_change=*/allow_tensor_metadata_change);
data_impl_copy->set_autograd_meta(nullptr);
return Variable(data_impl_copy);
}
return Variable();
}
Expand Down

0 comments on commit 3655975

Please sign in to comment.