From b5366a5cdf5b344f13caa96c5f585534c1cedada Mon Sep 17 00:00:00 2001 From: momo609 <963372609@qq.com> Date: Wed, 27 Sep 2023 09:53:07 +0800 Subject: [PATCH] add roi_align_rotated npu adpater and promote roi_pool adpater. --- docs/zh_cn/understand_mmcv/ops.md | 4 +- mmcv/ops/csrc/common/pytorch_npu_helper.hpp | 13 +++- mmcv/ops/csrc/pytorch/npu/focal_loss_npu.cpp | 16 ++++- .../pytorch/npu/roi_align_rotated_npu.cpp | 66 +++++++++++++++++++ mmcv/ops/csrc/pytorch/npu/roi_pool_npu.cpp | 14 ++-- 5 files changed, 104 insertions(+), 9 deletions(-) create mode 100644 mmcv/ops/csrc/pytorch/npu/roi_align_rotated_npu.cpp diff --git a/docs/zh_cn/understand_mmcv/ops.md b/docs/zh_cn/understand_mmcv/ops.md index ba744daf11..3023eb647a 100644 --- a/docs/zh_cn/understand_mmcv/ops.md +++ b/docs/zh_cn/understand_mmcv/ops.md @@ -26,7 +26,7 @@ MMCV 提供了检测、分割等任务中常用的算子 | FurthestPointSampleWithDist | | √ | | | | | FusedBiasLeakyrelu | | √ | | | √ | | GatherPoints | | √ | | | √ | -| GroupPoints | | √ | | | | +| GroupPoints | | √ | | | √ | | Iou3d | | √ | √ | | | | KNN | | √ | | | | | MaskedConv | | √ | √ | | √ | @@ -44,7 +44,7 @@ MMCV 提供了检测、分割等任务中常用的算子 | RotatedFeatureAlign | √ | √ | √ | | | | RoIPointPool3d | | √ | √ | | | | RoIPool | | √ | √ | | √ | -| RoIAlignRotated | √ | √ | √ | | | +| RoIAlignRotated | √ | √ | √ | | √ | | RiRoIAlignRotated | | √ | | | | | RoIAlign | √ | √ | √ | | √ | | RoIAwarePool3d | | √ | √ | | | diff --git a/mmcv/ops/csrc/common/pytorch_npu_helper.hpp b/mmcv/ops/csrc/common/pytorch_npu_helper.hpp index 073d6b38c3..01cfe80548 100644 --- a/mmcv/ops/csrc/common/pytorch_npu_helper.hpp +++ b/mmcv/ops/csrc/common/pytorch_npu_helper.hpp @@ -18,7 +18,6 @@ #ifndef PYTORCH_NPU_HELPER_HPP_ #define PYTORCH_NPU_HELPER_HPP_ -#include #include #include @@ -27,6 +26,18 @@ #define NPU_NAME_SPACE at_npu::native +const int SIZE = 8; +c10::SmallVector array_to_small_vector(c10::IntArrayRef shape) +{ + c10::SmallVector shape_small_vec; + for (int i = 0; i < shape.size(); i++) + { + shape_small_vec.emplace_back(shape[i]); + } + + return shape_small_vec; +} + #ifdef MMCV_WITH_XLA #define REGISTER_NPU_IMPL(key, value) REGISTER_DEVICE_IMPL(key, XLA, value) #else diff --git a/mmcv/ops/csrc/pytorch/npu/focal_loss_npu.cpp b/mmcv/ops/csrc/pytorch/npu/focal_loss_npu.cpp index b7c995a223..5d79a11f34 100644 --- a/mmcv/ops/csrc/pytorch/npu/focal_loss_npu.cpp +++ b/mmcv/ops/csrc/pytorch/npu/focal_loss_npu.cpp @@ -99,8 +99,20 @@ void softmax_focal_loss_forward_npu(Tensor input, Tensor target, Tensor weight, c10::SmallVector offsets = {0, 0}; c10::SmallVector sizes = {n_batch, 1}; at::IntArrayRef offset = at::IntArrayRef(offsets); - at::IntArrayRef size = at::IntArrayRef(sizes); - at_npu::native::custom_ops::npu_slice_out(op_output, offset, size, output); + at::IntArrayRef size_array = at::IntArrayRef(sizes); + c10::SmallVector output_size; + for (uint64_t i = 0; i < size_array.size(); i++) { + output_size.emplace_back(size_array[i]); + } + at::Tensor result = at::empty(output_size, op_output.options()); + c10::SmallVector offsetVec = array_to_small_vector(offset); + c10::SmallVector sizeVec = array_to_small_vector(size_array); + cmd.Name("Slice") + .Input(op_output) + .Input(offsetVec) + .Input(sizeVec) + .Output(output) + .Run(); } void softmax_focal_loss_forward_impl(Tensor input, Tensor target, Tensor weight, diff --git a/mmcv/ops/csrc/pytorch/npu/roi_align_rotated_npu.cpp b/mmcv/ops/csrc/pytorch/npu/roi_align_rotated_npu.cpp new file mode 100644 index 0000000000..49ba9361b7 --- /dev/null +++ b/mmcv/ops/csrc/pytorch/npu/roi_align_rotated_npu.cpp @@ -0,0 +1,66 @@ +#include "pytorch_npu_helper.hpp" + +using namespace NPU_NAME_SPACE; +using namespace std; + +void roi_align_rotated_forward_npu(Tensor input, Tensor rois, Tensor output, + int aligned_height, int aligned_width, + float spatial_scale, int sampling_ratio, + bool aligned, bool clockwise) { + int64_t aligned_height_64 = aligned_height; + int64_t aligned_width_64 = aligned_width; + int64_t sampling_ratio_64 = sampling_ratio; + OpCommand cmd; + cmd.Name("RoiAlignRotated") + .Input(input) + .Input(rois) + .Output(output) + .Attr("pooled_h", aligned_height_64) + .Attr("pooled_w", aligned_width_64) + .Attr("spatial_scale", spatial_scale) + .Attr("sampling_ratio", sampling_ratio_64) + .Attr("aligned", aligned) + .Attr("clockwise", clockwise) + .Run(); +} + +void roi_align_rotated_backward_npu(Tensor top_grad, Tensor rois, + Tensor bottom_grad, int aligned_height, + int aligned_width, float spatial_scale, + int sampling_ratio, bool aligned, + bool clockwise) { + int64_t aligned_height_64 = aligned_height; + int64_t aligned_width_64 = aligned_width; + int64_t sampling_ratio_64 = sampling_ratio; + c10::SmallVector y_grad_shape = + array_to_small_vector(bottom_grad.sizes()); + OpCommand cmd; + cmd.Name("RoiAlignRotatedGrad") + .Input(top_grad) + .Input(rois) + .Output(bottom_grad) + .Attr("y_grad_shape", y_grad_shape) + .Attr("pooled_h", aligned_width_64) + .Attr("pooled_w", aligned_height_64) + .Attr("spatial_scale", spatial_scale) + .Attr("sampling_ratio", sampling_ratio_64) + .Attr("aligned", aligned) + .Attr("clockwise", clockwise) + .Run(); +} + +void roi_align_rotated_forward_impl(Tensor input, Tensor rois, Tensor output, + int aligned_height, int aligned_width, + float spatial_scale, int sampling_ratio, + bool aligned, bool clockwise); + +void roi_align_rotated_backward_impl(Tensor top_grad, Tensor rois, + Tensor bottom_grad, int aligned_height, + int aligned_width, float spatial_scale, + int sampling_ratio, bool aligned, + bool clockwise); + +REGISTER_NPU_IMPL(roi_align_rotated_forward_impl, + roi_align_rotated_forward_npu); +REGISTER_NPU_IMPL(roi_align_rotated_backward_impl, + roi_align_rotated_backward_npu); diff --git a/mmcv/ops/csrc/pytorch/npu/roi_pool_npu.cpp b/mmcv/ops/csrc/pytorch/npu/roi_pool_npu.cpp index c7a11e8c6d..26eb542672 100644 --- a/mmcv/ops/csrc/pytorch/npu/roi_pool_npu.cpp +++ b/mmcv/ops/csrc/pytorch/npu/roi_pool_npu.cpp @@ -50,23 +50,29 @@ void roi_pool_backward_npu(Tensor grad_output, Tensor rois, Tensor argmax, int64_t pooled_height_64 = pooled_height; int64_t pooled_width_64 = pooled_width; int64_t pooled_channel = 1; + at::Tensor argmax_trans = argmax.transpose(1, 2).transpose(2, 3); + at::Tensor grad_output_trans = grad_output.transpose(1, 2).transpose(2, 3); at::Tensor roi_actual_num = at::empty_like(rois, rois.options().dtype(at::kInt)); - at::Tensor x = at::ones_like(grad_input); + at::Tensor x = at::ones_like(grad_input).transpose(1, 2).transpose(2, 3); + at::Tensor y = at::zeros_like(x); OpCommand cmd; cmd.Name("RoiPoolingGradWithArgMax") - .Input(grad_output) + .Input(grad_output_trans) .Input(x) .Input(rois) .Input(roi_actual_num) - .Input(argmax) - .Output(grad_input) + .Input(argmax_trans) + .Output(y) .Attr("pooled_h", pooled_height_64) .Attr("pooled_w", pooled_width_64) .Attr("spatial_scale_h", spatial_scale) .Attr("spatial_scale_w", spatial_scale) .Attr("pool_channel", pooled_channel) .Run(); + at::Tensor result = y.transpose(2, 3).transpose(1, 2); + at::Tensor res = NpuUtils::format_contiguous(result); + grad_input.copy_(res); } void roi_pool_forward_impl(Tensor input, Tensor rois, Tensor output,