Skip to content

Commit

Permalink
Merge pull request #321 from DeepLink-org/zgc/dipu_move_copy_op_to_au…
Browse files Browse the repository at this point in the history
…togen

Zgc/dipu move copy op to autogen
  • Loading branch information
zhaoguochun1995 authored Nov 7, 2023
2 parents 012f5fe + 6a8dd56 commit 259b287
Show file tree
Hide file tree
Showing 7 changed files with 309 additions and 230 deletions.
2 changes: 2 additions & 0 deletions dipu/SupportedDiopiFunctions.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ diopiBitwiseOrInp
diopiBitwiseOrInpScalar
diopiBitwiseOrScalar
diopiBmm
diopiCastDtype
diopiCat
diopiCdist
diopiCdistBackward
Expand All @@ -64,6 +65,7 @@ diopiConvolution2d
diopiConvolution2dBackward
diopiConvTranspose2d
diopiConvTranspose2dBackward
diopiCopyInp
diopiCos
diopiCosInp
diopiCrossEntropyLoss
Expand Down
5 changes: 4 additions & 1 deletion dipu/scripts/autogen_diopi_wrapper/autogen_diopi_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ def create_return_code_frome_schema(schema, allow_return_ref = True):
return_code = schema[schema.find('->'):].replace('->', '').strip()
return_code = re.sub('Tensor *\[ *\] *', 'std::vector<Tensor> ' ,return_code)
return_code = re.sub('\([a-zA-Z]!\)', '&' , return_code)
return_code = re.sub('\([a-zA-Z]\)', '' , return_code)
return_code = re.sub('Tensor', 'at::Tensor' , return_code)
return_code = re.sub('([\w_\d:&]+)[ ]+([\w\d_]+)?', R'\1', return_code)
return_code = re.sub('\(', 'std::tuple<', return_code)
Expand Down Expand Up @@ -163,13 +164,15 @@ def create_param_list_from_schema(schema):
'Scalar *\[ *\]' : 'at::ArrayRef<at::Scalar>',
'Tensor *\( *[a-z]\!\) *\[ *\]' : 'at::ArrayRef<at::Tensor>',
'[ ]*\([a-zA-Z]!\)' : '&',
'MemoryFormat\?' : 'const c10::optional<c10::MemoryFormat>',
'str\?' : 'c10::optional<c10::string_view>',
'([, \(]{1})str ' : R'\1c10::string_view ',
'ScalarType[ ]*\?' : 'c10::optional<at::ScalarType>',
'ScalarType[ ]+([\w\d_]+)' : R'at::ScalarType \1',
'Scalar[ ]*\? *([\w\d_]+)' : R'const c10::optional<at::Scalar>& \1',
'Generator ?\?' : 'c10::optional<at::Generator>',
'Device ?\?' : 'c10::optional<c10::Device>',
'Device ?\?' : 'c10::optional<Device>',
'Device' : 'c10::Device',
'Layout ?\?' : 'c10::optional<at::Layout>' ,
'Tensor ?\? *\[ *\]' : R'const c10::List<c10::optional<at::Tensor>>&' ,
'Tensor ?\?' : 'const c10::optional<at::Tensor>&' ,
Expand Down
72 changes: 71 additions & 1 deletion dipu/scripts/autogen_diopi_wrapper/diopi_functions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2148,7 +2148,77 @@
return out;
interface: diopiNorm(ctx, out, self, p, dimDiopiSize);

#- schema: _amp_foreach_non_finite_check_and_unscale_(Tensor(a!)[] self, Tensor(b!) found_inf, Tensor inv_scale) -> void
- schema: "to.dtype(Tensor(a) self, ScalarType dtype, bool non_blocking=False, bool copy=False, MemoryFormat? memory_format=None) -> Tensor(a)"
register_op: False
custom_code_at_the_beginning: |
auto out = at::empty_like(self, self.options().dtype(dtype));
interface: diopiCastDtype(ctx, out, self);
custom_code_before_return: |
if (memory_format.has_value()) {
auto out1 = at::empty_like(out, out.options(), memory_format.value());
at::copy(out1, out, non_blocking);
out = out1;
}
if (!non_blocking) {
dipu::getCurrentDIPUStream().synchronize();
}
- schema: copy_(Tensor(a!) self, Tensor src, bool non_blocking=False) -> Tensor(a!)
no_device_check_args: [self, src]
device: [not_for_any_now] #todo
ins: [srcTemp]
custom_fallback: True
custom_code_at_the_beginning: |
dipu::DIPUGuard guard(self.is_cpu() ? src.device() : self.device());
auto stream = dipu::getCurrentDIPUStream();
auto srcTemp = self.dtype() == src.dtype() ? src : src.to(self.dtype());
srcTemp = (srcTemp.numel() == self.numel()) ? srcTemp : srcTemp.expand(self.sizes());
if (non_blocking) {
const bool is_default_stream = dipu::getDefaultDIPUStream() == stream;
if (self.is_cpu()) {
if (self.options().pinned_memory()) {
self.record_stream(stream);
}
} else if (!is_default_stream){
self.record_stream(stream);
}
if (srcTemp.is_cpu()) {
if (srcTemp.options().pinned_memory()) {
srcTemp.record_stream(stream);
}
} else if (!is_default_stream) {
srcTemp.record_stream(stream);
}
}
if (self.device().type() != srcTemp.device().type()) {
srcTemp = srcTemp.is_contiguous(self.suggest_memory_format()) ? srcTemp : srcTemp.contiguous(self.suggest_memory_format());
if (srcTemp.is_cpu() && (!self.is_cpu())) {
// c2d
dipu::devproxy::memCopyH2DAsync(stream.rawstream(), self.nbytes(), self.data_ptr(), srcTemp.data_ptr());
} else if ((!srcTemp.is_cpu()) && self.is_cpu()) {
// d2c
dipu::devproxy::memCopyD2HAsync(stream.rawstream(), self.nbytes(), self.data_ptr(), srcTemp.data_ptr());
}
if (!non_blocking) {
dipu::getCurrentDIPUStream().synchronize();
}
return self;
}
interface: diopiCopyInp(ctx, srcTemp, self)
custom_code_before_return: |
if (!non_blocking) {
dipu::getCurrentDIPUStream().synchronize();
}
- schema: copy_(Tensor(a!) self, Tensor src, bool non_blocking=False) -> Tensor(a!)
no_device_check_args: [self, src]
custom_fallback: True
dummy_call_diopi: True
custom_code_at_the_beginning: |
return custom_fallback_dipu_copy_(self, src, non_blocking);
interface: diopiCopyInp(ctx, src, self)

- schema: _amp_foreach_non_finite_check_and_unscale_(at::TensorList self, Tensor(b!) found_inf, Tensor inv_scale) -> void
custom_fallback: True
custom_code_at_the_beginning: |
Expand Down
197 changes: 2 additions & 195 deletions dipu/scripts/autogen_diopi_wrapper/diopi_wrapper_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,199 +22,6 @@
using dipu::diopi_helper::toDiopiGeneratorHandle;
inline bool checkDiopiReturnValue() {
static bool enable = std::getenv("DIPU_DISABLE_CHECK_DIOPI_RETURN_VALUE") == nullptr;
return enable;
}
inline bool checkTensorDevice() {
static bool enable = []() {
const char* env_ptr = std::getenv("DIPU_CHECK_TENSOR_DEVICE");
if (env_ptr == nullptr) {
return false;
}
return std::atoi(env_ptr) > 0 ? true : false;
}();
return enable;
}
inline void synchronizeIfEnable() {
static const char* mode = std::getenv("DIPU_SYNC_EXEC_MODE");
if (mode != nullptr) {
DIPU_LOG_ONCE << "The synchronous operation is performed after "
<<"the diopi function call because the DIPU_SYNC_EXEC_MODE environment variable is set" << std::endl;
dipu::getCurrentDIPUStream().synchronize();
}
return;
}
inline int dumpOpArgLevel() {
const char* env_ptr = std::getenv("DIPU_DUMP_OP_ARGS");
int level = env_ptr ? std::atoi(env_ptr) : 0;
return level;
}
template<typename T>
static std::string dumpArg(const T& t) {
std::stringstream stream;
stream << t;
return stream.str();
}
template<typename T1>
static std::string dumpArg(const c10::optional<T1> & opt_t) {
std::stringstream stream;
if (opt_t.has_value()) {
stream << dumpArg(opt_t.value());
}
return stream.str();
}
template<typename T>
static std::string dumpArg(const c10::OptionalArrayRef<T> & opt_t) {
std::stringstream stream;
if (opt_t.has_value()) {
stream << dumpArg(opt_t.value());
}
return stream.str();
}
template<typename T1, template<typename elem> class container>
static std::string dumpArg(const container<T1> & t) {
std::stringstream stream;
for (auto iter = t.begin(); iter != t.end(); ++iter) {
stream << dumpArg(*iter) << ", ";
}
return stream.str();
}
template<>
std::string dumpArg(const at::Tensor& tensor) {
std::stringstream stream;
if (tensor.defined()) {
stream << "numel: " << tensor.numel() << ",sizes: " << tensor.sizes() << ", stride: " << tensor.strides() << ", is_view: " << tensor.is_view() << ", dtype: " << tensor.dtype()
<< ", device:" << tensor.device() << ", layout:" << tensor.layout() << ", requires_grad: " << (tensor.requires_grad() ? "true" : "false") << ", pinned_memory: " << (tensor.is_pinned() ? "true" : "false")
<< ", memory_format: " << tensor.suggest_memory_format() << ", data_ptr: " << tensor.data_ptr();
if (dumpOpArgLevel() > 2) {
stream << std::endl << tensor;
}
} else {
stream << "undefined";
}
return stream.str();
}
template<>
std::string dumpArg(const at::Scalar& scalar) {
std::stringstream stream;
stream << scalar;
return stream.str();
}
template<>
std::string dumpArg(const c10::string_view& str) {
return dumpArg(std::string(str.data()));
}
template<>
std::string dumpArg(const at::Generator& generator) {
return "";
}
template<typename T, size_t N>
static std::string dumpArg(const std::array<T, N>& t) {
std::stringstream stream;
for (auto iter = t.begin(); iter != t.end(); ++iter) {
stream << dumpArg(*iter) << " ";
}
return stream.str();
}
template<>
std::string dumpArg(const c10::List<c10::optional<at::Tensor>>& t) {
std::stringstream stream;
stream << "size:" << t.size() << std::endl;
for (int i = 0; i < t.size(); ++i) {
bool has_value = t[i].has_value();
stream << "\t" << i << "th: has_value:" << has_value << " ";
if (has_value) {
stream << dumpArg(t[i].value());
}
stream << std::endl;
}
return stream.str();
}
template<typename T1, typename T2 , template<typename elem1> class container1, template<typename elem2> class container2>
static std::vector<int64_t> infer_reduce_op_shape(const container1<T1> & input_shape, const container2<T2> & dims, bool keepdim) {
if (dims.size() <= 0) {
return std::vector<int64_t>();
}
if (keepdim) {
std::vector<int64_t> output_shape(input_shape.begin(), input_shape.end());
for (auto iter = dims.begin(); iter != dims.end(); ++iter) {
auto dim = *iter;
dim += dim < 0 ? input_shape.size() : 0;
output_shape[dim] = 1;
}
return output_shape;
} else {
std::vector<int64_t> output_shape;
output_shape.reserve(input_shape.size() - dims.size());
for (int i = 0; i < input_shape.size(); ++i) {
bool reduce_dim = false;
for (auto iter = dims.begin(); iter != dims.end(); ++iter) {
auto dim = *iter;
dim += dim < 0 ? input_shape.size() : 0;
if (dim == i) {
reduce_dim = true;
break;
}
}
if (reduce_dim == false) {
output_shape.push_back(input_shape.at(i));
}
}
return output_shape;
}
}
static std::string _allclose(const at::Tensor& a, const at::Tensor& b) {
if(a.defined() && b.defined()) {
try {
if(at::allclose(a.cpu(), b.cpu(), 1e-4, 1e-5, true)) {
return "allclose";
} else {
auto diff = at::abs(a.cpu() - b.cpu());
auto mae = diff.mean().item<double>();
auto max_diff = diff.max().item<double>();
return "not_close, max diff: " + std::to_string(max_diff) + ", MAE: " + std::to_string(mae);
}
} catch (...) {
return "compare_error: not_close";
}
} else {
if(a.defined() != b.defined()) {
return "not_close, one of tensor inputs is empty";
} else {
return "allclose";
}
}
}
static std::string _allclose(const c10::ArrayRef<at::Tensor>& a, const c10::ArrayRef<at::Tensor>& b) {
if (a.size() != b.size()) {
return "not_allclose:";
}
std::string result;
for (size_t i = 0; i < a.size(); ++i) {
result += std::to_string(i) + "th " + _allclose(a[i], b[i]) + "; ";
}
return result;
}
using namespace dipu::diopi_helper;
Expand Down Expand Up @@ -242,11 +49,11 @@
// $comment
$cppsignautre {
dipu::profile::RecordBlockCreator _(__FUNCTION__);
$custom_code_at_the_beginning
::diopiContext context(dipu::getCurrentDIPUStream().rawstream());
auto ctx = &context;
$custom_code_at_the_beginning
$input_process_code
$output_process_code
Expand Down
29 changes: 0 additions & 29 deletions dipu/torch_dipu/csrc_dipu/aten/RegisterDIPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,34 +179,6 @@ namespace {
return dnative::empty_strided_cpu(size, stride, dtype_opt, layout_opt, device_opt, pin_memory_opt);
}

at::Tensor& wrapper_copy_(at::Tensor& self, const at::Tensor& src, bool non_blocking) {
dipu::profile::RecordBlockCreator dipu_recorder(__FUNCTION__);
static bool use_slow_copy = (std::getenv("DIPU_USE_SLOW_COPY") != nullptr);
dipu::DIPUGuard guard(self.is_cpu() ? src.device() : self.device());
if (non_blocking) {
auto stream = dipu::getCurrentDIPUStream();
const bool is_default_stream = dipu::getDefaultDIPUStream() == stream;
if (self.is_cpu()) {
if (self.options().pinned_memory()) {
self.record_stream(stream);
}
} else if (!is_default_stream){
self.record_stream(stream);
}
if (src.is_cpu()) {
if (src.options().pinned_memory()) {
src.record_stream(stream);
}
} else if (!is_default_stream) {
src.record_stream(stream);
}
}
if (use_slow_copy) {
return dnative::copy_(self, src, non_blocking);
} else {
return dipu::getDipuCopyInplace()->run(self, src, non_blocking);
}
}

at::Tensor wrapper_DIPU___reshape_alias(const at::Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride) {
dipu::profile::RecordBlockCreator dipu_recorder(__FUNCTION__);
Expand Down Expand Up @@ -357,7 +329,6 @@ DIPU_LIBRARY_IMPL(aten, DIPU_DEVICE_TYPE_MACRO, m) {
// always registered
m.impl("empty.memory_format", TORCH_FN(wrapper_DIPU_empty_memory_format));
m.impl("empty_strided", TORCH_FN(wrapper_DIPU_empty_strided));
m.impl("copy_", TORCH_FN(wrapper_copy_));
m.impl("_reshape_alias", TORCH_FN(wrapper_DIPU___reshape_alias));
m.impl("_copy_from_and_resize", TORCH_FN(wrapper_DIPU___copy_from_and_resize));
m.impl("resize_", TORCH_FN(wrapper_resize_));
Expand Down
Loading

0 comments on commit 259b287

Please sign in to comment.