From b5366a5cdf5b344f13caa96c5f585534c1cedada Mon Sep 17 00:00:00 2001
From: momo609 <963372609@qq.com>
Date: Wed, 27 Sep 2023 09:53:07 +0800
Subject: [PATCH] add roi_align_rotated npu adpater and promote roi_pool
 adpater.

---
 docs/zh_cn/understand_mmcv/ops.md             |  4 +-
 mmcv/ops/csrc/common/pytorch_npu_helper.hpp   | 13 +++-
 mmcv/ops/csrc/pytorch/npu/focal_loss_npu.cpp  | 16 ++++-
 .../pytorch/npu/roi_align_rotated_npu.cpp     | 66 +++++++++++++++++++
 mmcv/ops/csrc/pytorch/npu/roi_pool_npu.cpp    | 14 ++--
 5 files changed, 104 insertions(+), 9 deletions(-)
 create mode 100644 mmcv/ops/csrc/pytorch/npu/roi_align_rotated_npu.cpp

diff --git a/docs/zh_cn/understand_mmcv/ops.md b/docs/zh_cn/understand_mmcv/ops.md
index ba744daf11..3023eb647a 100644
--- a/docs/zh_cn/understand_mmcv/ops.md
+++ b/docs/zh_cn/understand_mmcv/ops.md
@@ -26,7 +26,7 @@ MMCV 提供了检测、分割等任务中常用的算子
 | FurthestPointSampleWithDist  |     | √    |     |     |        |
 | FusedBiasLeakyrelu           |     | √    |     |     | √      |
 | GatherPoints                 |     | √    |     |     | √      |
-| GroupPoints                  |     | √    |     |     |        |
+| GroupPoints                  |     | √    |     |     | √      |
 | Iou3d                        |     | √    | √   |     |        |
 | KNN                          |     | √    |     |     |        |
 | MaskedConv                   |     | √    | √   |     | √      |
@@ -44,7 +44,7 @@ MMCV 提供了检测、分割等任务中常用的算子
 | RotatedFeatureAlign          | √   | √    | √   |     |        |
 | RoIPointPool3d               |     | √    | √   |     |        |
 | RoIPool                      |     | √    | √   |     | √      |
-| RoIAlignRotated              | √   | √    | √   |     |        |
+| RoIAlignRotated              | √   | √    | √   |     | √      |
 | RiRoIAlignRotated            |     | √    |     |     |        |
 | RoIAlign                     | √   | √    | √   |     | √      |
 | RoIAwarePool3d               |     | √    | √   |     |        |
diff --git a/mmcv/ops/csrc/common/pytorch_npu_helper.hpp b/mmcv/ops/csrc/common/pytorch_npu_helper.hpp
index 073d6b38c3..01cfe80548 100644
--- a/mmcv/ops/csrc/common/pytorch_npu_helper.hpp
+++ b/mmcv/ops/csrc/common/pytorch_npu_helper.hpp
@@ -18,7 +18,6 @@
 #ifndef PYTORCH_NPU_HELPER_HPP_
 #define PYTORCH_NPU_HELPER_HPP_
 
-#include <torch_npu/csrc/aten/CustomFunctions.h>
 #include <torch_npu/csrc/framework/utils/CalcuOpUtil.h>
 #include <torch_npu/csrc/framework/utils/OpAdapter.h>
 
@@ -27,6 +26,18 @@
 
 #define NPU_NAME_SPACE at_npu::native
 
+const int SIZE = 8;
+c10::SmallVector<int64_t, SIZE> array_to_small_vector(c10::IntArrayRef shape)
+{
+  c10::SmallVector<int64_t, SIZE> shape_small_vec;
+  for (int i = 0; i < shape.size(); i++)
+  {
+    shape_small_vec.emplace_back(shape[i]);
+  }
+
+  return shape_small_vec;
+}
+
 #ifdef MMCV_WITH_XLA
 #define REGISTER_NPU_IMPL(key, value) REGISTER_DEVICE_IMPL(key, XLA, value)
 #else
diff --git a/mmcv/ops/csrc/pytorch/npu/focal_loss_npu.cpp b/mmcv/ops/csrc/pytorch/npu/focal_loss_npu.cpp
index b7c995a223..5d79a11f34 100644
--- a/mmcv/ops/csrc/pytorch/npu/focal_loss_npu.cpp
+++ b/mmcv/ops/csrc/pytorch/npu/focal_loss_npu.cpp
@@ -99,8 +99,20 @@ void softmax_focal_loss_forward_npu(Tensor input, Tensor target, Tensor weight,
   c10::SmallVector<int64_t, 2> offsets = {0, 0};
   c10::SmallVector<int64_t, 2> sizes = {n_batch, 1};
   at::IntArrayRef offset = at::IntArrayRef(offsets);
-  at::IntArrayRef size = at::IntArrayRef(sizes);
-  at_npu::native::custom_ops::npu_slice_out(op_output, offset, size, output);
+  at::IntArrayRef size_array = at::IntArrayRef(sizes);
+  c10::SmallVector<int64_t, N> output_size;
+  for (uint64_t i = 0; i < size_array.size(); i++) {
+    output_size.emplace_back(size_array[i]);
+  }
+  at::Tensor result = at::empty(output_size, op_output.options());
+  c10::SmallVector<int64_t, N> offsetVec = array_to_small_vector(offset);
+  c10::SmallVector<int64_t, N> sizeVec = array_to_small_vector(size_array);
+  cmd.Name("Slice")
+      .Input(op_output)
+      .Input(offsetVec)
+      .Input(sizeVec)
+      .Output(output)
+      .Run();
 }
 
 void softmax_focal_loss_forward_impl(Tensor input, Tensor target, Tensor weight,
diff --git a/mmcv/ops/csrc/pytorch/npu/roi_align_rotated_npu.cpp b/mmcv/ops/csrc/pytorch/npu/roi_align_rotated_npu.cpp
new file mode 100644
index 0000000000..49ba9361b7
--- /dev/null
+++ b/mmcv/ops/csrc/pytorch/npu/roi_align_rotated_npu.cpp
@@ -0,0 +1,66 @@
+#include "pytorch_npu_helper.hpp"
+
+using namespace NPU_NAME_SPACE;
+using namespace std;
+
+void roi_align_rotated_forward_npu(Tensor input, Tensor rois, Tensor output,
+                                   int aligned_height, int aligned_width,
+                                   float spatial_scale, int sampling_ratio,
+                                   bool aligned, bool clockwise) {
+  int64_t aligned_height_64 = aligned_height;
+  int64_t aligned_width_64 = aligned_width;
+  int64_t sampling_ratio_64 = sampling_ratio;
+  OpCommand cmd;
+  cmd.Name("RoiAlignRotated")
+      .Input(input)
+      .Input(rois)
+      .Output(output)
+      .Attr("pooled_h", aligned_height_64)
+      .Attr("pooled_w", aligned_width_64)
+      .Attr("spatial_scale", spatial_scale)
+      .Attr("sampling_ratio", sampling_ratio_64)
+      .Attr("aligned", aligned)
+      .Attr("clockwise", clockwise)
+      .Run();
+}
+
+void roi_align_rotated_backward_npu(Tensor top_grad, Tensor rois,
+                                    Tensor bottom_grad, int aligned_height,
+                                    int aligned_width, float spatial_scale,
+                                    int sampling_ratio, bool aligned,
+                                    bool clockwise) {
+  int64_t aligned_height_64 = aligned_height;
+  int64_t aligned_width_64 = aligned_width;
+  int64_t sampling_ratio_64 = sampling_ratio;
+  c10::SmallVector<int64_t, SIZE> y_grad_shape =
+      array_to_small_vector(bottom_grad.sizes());
+  OpCommand cmd;
+  cmd.Name("RoiAlignRotatedGrad")
+      .Input(top_grad)
+      .Input(rois)
+      .Output(bottom_grad)
+      .Attr("y_grad_shape", y_grad_shape)
+      .Attr("pooled_h", aligned_width_64)
+      .Attr("pooled_w", aligned_height_64)
+      .Attr("spatial_scale", spatial_scale)
+      .Attr("sampling_ratio", sampling_ratio_64)
+      .Attr("aligned", aligned)
+      .Attr("clockwise", clockwise)
+      .Run();
+}
+
+void roi_align_rotated_forward_impl(Tensor input, Tensor rois, Tensor output,
+                                    int aligned_height, int aligned_width,
+                                    float spatial_scale, int sampling_ratio,
+                                    bool aligned, bool clockwise);
+
+void roi_align_rotated_backward_impl(Tensor top_grad, Tensor rois,
+                                     Tensor bottom_grad, int aligned_height,
+                                     int aligned_width, float spatial_scale,
+                                     int sampling_ratio, bool aligned,
+                                     bool clockwise);
+
+REGISTER_NPU_IMPL(roi_align_rotated_forward_impl,
+                  roi_align_rotated_forward_npu);
+REGISTER_NPU_IMPL(roi_align_rotated_backward_impl,
+                  roi_align_rotated_backward_npu);
diff --git a/mmcv/ops/csrc/pytorch/npu/roi_pool_npu.cpp b/mmcv/ops/csrc/pytorch/npu/roi_pool_npu.cpp
index c7a11e8c6d..26eb542672 100644
--- a/mmcv/ops/csrc/pytorch/npu/roi_pool_npu.cpp
+++ b/mmcv/ops/csrc/pytorch/npu/roi_pool_npu.cpp
@@ -50,23 +50,29 @@ void roi_pool_backward_npu(Tensor grad_output, Tensor rois, Tensor argmax,
   int64_t pooled_height_64 = pooled_height;
   int64_t pooled_width_64 = pooled_width;
   int64_t pooled_channel = 1;
+  at::Tensor argmax_trans = argmax.transpose(1, 2).transpose(2, 3);
+  at::Tensor grad_output_trans = grad_output.transpose(1, 2).transpose(2, 3);
   at::Tensor roi_actual_num =
       at::empty_like(rois, rois.options().dtype(at::kInt));
-  at::Tensor x = at::ones_like(grad_input);
+  at::Tensor x = at::ones_like(grad_input).transpose(1, 2).transpose(2, 3);
+  at::Tensor y = at::zeros_like(x);
   OpCommand cmd;
   cmd.Name("RoiPoolingGradWithArgMax")
-      .Input(grad_output)
+      .Input(grad_output_trans)
       .Input(x)
       .Input(rois)
       .Input(roi_actual_num)
-      .Input(argmax)
-      .Output(grad_input)
+      .Input(argmax_trans)
+      .Output(y)
       .Attr("pooled_h", pooled_height_64)
       .Attr("pooled_w", pooled_width_64)
       .Attr("spatial_scale_h", spatial_scale)
       .Attr("spatial_scale_w", spatial_scale)
       .Attr("pool_channel", pooled_channel)
       .Run();
+  at::Tensor result = y.transpose(2, 3).transpose(1, 2);
+  at::Tensor res = NpuUtils::format_contiguous(result);
+  grad_input.copy_(res);
 }
 
 void roi_pool_forward_impl(Tensor input, Tensor rois, Tensor output,