From ede012339424a6af0352fbf7f5c4f056613d66fa Mon Sep 17 00:00:00 2001
From: Daisuke Nishimatsu <42202095+wep21@users.noreply.github.com>
Date: Mon, 11 Dec 2023 10:16:28 +0900
Subject: [PATCH] fix(lidar_centerpoint,image_projection_based_fusion): add
 guard to avoid exceeding max voxel size (#5824)

---
 .../pointpainting_fusion/preprocess_kernel.hpp      |  5 +++--
 .../src/pointpainting_fusion/pointpainting_trt.cpp  |  4 ++--
 .../src/pointpainting_fusion/preprocess_kernel.cu   | 13 ++++++++-----
 .../preprocess/preprocess_kernel.hpp                |  5 +++--
 .../lidar_centerpoint/lib/centerpoint_trt.cpp       |  4 ++--
 .../lib/preprocess/preprocess_kernel.cu             | 13 ++++++++-----
 6 files changed, 26 insertions(+), 18 deletions(-)

diff --git a/perception/image_projection_based_fusion/include/image_projection_based_fusion/pointpainting_fusion/preprocess_kernel.hpp b/perception/image_projection_based_fusion/include/image_projection_based_fusion/pointpainting_fusion/preprocess_kernel.hpp
index c913ac33d5e84..897609fa3d86d 100644
--- a/perception/image_projection_based_fusion/include/image_projection_based_fusion/pointpainting_fusion/preprocess_kernel.hpp
+++ b/perception/image_projection_based_fusion/include/image_projection_based_fusion/pointpainting_fusion/preprocess_kernel.hpp
@@ -27,8 +27,9 @@ cudaError_t generateVoxels_random_launch(
   cudaStream_t stream);
 
 cudaError_t generateBaseFeatures_launch(
-  unsigned int * mask, float * voxels, int grid_y_size, int grid_x_size, unsigned int * pillar_num,
-  float * voxel_features, float * voxel_num, int * voxel_idxs, cudaStream_t stream);
+  unsigned int * mask, float * voxels, int grid_y_size, int grid_x_size, int max_voxel_size,
+  unsigned int * pillar_num, float * voxel_features, float * voxel_num, int * voxel_idxs,
+  cudaStream_t stream);
 
 cudaError_t generateFeatures_launch(
   const float * voxel_features, const float * voxel_num_points, const int * coords,
diff --git a/perception/image_projection_based_fusion/src/pointpainting_fusion/pointpainting_trt.cpp b/perception/image_projection_based_fusion/src/pointpainting_fusion/pointpainting_trt.cpp
index 8911442f4c75d..d44620995c61b 100644
--- a/perception/image_projection_based_fusion/src/pointpainting_fusion/pointpainting_trt.cpp
+++ b/perception/image_projection_based_fusion/src/pointpainting_fusion/pointpainting_trt.cpp
@@ -69,8 +69,8 @@ bool PointPaintingTRT::preprocess(
 
   CHECK_CUDA_ERROR(image_projection_based_fusion::generateBaseFeatures_launch(
     mask_d_.get(), voxels_buffer_d_.get(), config_.grid_size_y_, config_.grid_size_x_,
-    num_voxels_d_.get(), voxels_d_.get(), num_points_per_voxel_d_.get(), coordinates_d_.get(),
-    stream_));
+    config_.max_voxel_size_, num_voxels_d_.get(), voxels_d_.get(), num_points_per_voxel_d_.get(),
+    coordinates_d_.get(), stream_));
 
   CHECK_CUDA_ERROR(image_projection_based_fusion::generateFeatures_launch(
     voxels_d_.get(), num_points_per_voxel_d_.get(), coordinates_d_.get(), num_voxels_d_.get(),
diff --git a/perception/image_projection_based_fusion/src/pointpainting_fusion/preprocess_kernel.cu b/perception/image_projection_based_fusion/src/pointpainting_fusion/preprocess_kernel.cu
index d06b60633acf8..68e08ac61a569 100644
--- a/perception/image_projection_based_fusion/src/pointpainting_fusion/preprocess_kernel.cu
+++ b/perception/image_projection_based_fusion/src/pointpainting_fusion/preprocess_kernel.cu
@@ -105,8 +105,8 @@ cudaError_t generateVoxels_random_launch(
 }
 
 __global__ void generateBaseFeatures_kernel(
-  unsigned int * mask, float * voxels, int grid_y_size, int grid_x_size, unsigned int * pillar_num,
-  float * voxel_features, float * voxel_num, int * voxel_idxs)
+  unsigned int * mask, float * voxels, int grid_y_size, int grid_x_size, int max_voxel_size,
+  unsigned int * pillar_num, float * voxel_features, float * voxel_num, int * voxel_idxs)
 {
   unsigned int voxel_idx = blockIdx.x * blockDim.x + threadIdx.x;
   unsigned int voxel_idy = blockIdx.y * blockDim.y + threadIdx.y;
@@ -120,6 +120,7 @@ __global__ void generateBaseFeatures_kernel(
 
   unsigned int current_pillarId = 0;
   current_pillarId = atomicAdd(pillar_num, 1);
+  if (current_pillarId > max_voxel_size - 1) return;
 
   voxel_num[current_pillarId] = count;
 
@@ -140,15 +141,17 @@ __global__ void generateBaseFeatures_kernel(
 
 // create 4 channels
 cudaError_t generateBaseFeatures_launch(
-  unsigned int * mask, float * voxels, int grid_y_size, int grid_x_size, unsigned int * pillar_num,
-  float * voxel_features, float * voxel_num, int * voxel_idxs, cudaStream_t stream)
+  unsigned int * mask, float * voxels, int grid_y_size, int grid_x_size, int max_voxel_size,
+  unsigned int * pillar_num, float * voxel_features, float * voxel_num, int * voxel_idxs,
+  cudaStream_t stream)
 {
   dim3 threads = {32, 32};
   dim3 blocks = {
     (grid_x_size + threads.x - 1) / threads.x, (grid_y_size + threads.y - 1) / threads.y};
 
   generateBaseFeatures_kernel<<<blocks, threads, 0, stream>>>(
-    mask, voxels, grid_y_size, grid_x_size, pillar_num, voxel_features, voxel_num, voxel_idxs);
+    mask, voxels, grid_y_size, grid_x_size, max_voxel_size, pillar_num, voxel_features, voxel_num,
+    voxel_idxs);
   cudaError_t err = cudaGetLastError();
   return err;
 }
diff --git a/perception/lidar_centerpoint/include/lidar_centerpoint/preprocess/preprocess_kernel.hpp b/perception/lidar_centerpoint/include/lidar_centerpoint/preprocess/preprocess_kernel.hpp
index 824144fe3b22b..9488b67475509 100644
--- a/perception/lidar_centerpoint/include/lidar_centerpoint/preprocess/preprocess_kernel.hpp
+++ b/perception/lidar_centerpoint/include/lidar_centerpoint/preprocess/preprocess_kernel.hpp
@@ -27,8 +27,9 @@ cudaError_t generateVoxels_random_launch(
   cudaStream_t stream);
 
 cudaError_t generateBaseFeatures_launch(
-  unsigned int * mask, float * voxels, int grid_y_size, int grid_x_size, unsigned int * pillar_num,
-  float * voxel_features, float * voxel_num, int * voxel_idxs, cudaStream_t stream);
+  unsigned int * mask, float * voxels, int grid_y_size, int grid_x_size, int max_voxel_size,
+  unsigned int * pillar_num, float * voxel_features, float * voxel_num, int * voxel_idxs,
+  cudaStream_t stream);
 
 cudaError_t generateFeatures_launch(
   const float * voxel_features, const float * voxel_num_points, const int * coords,
diff --git a/perception/lidar_centerpoint/lib/centerpoint_trt.cpp b/perception/lidar_centerpoint/lib/centerpoint_trt.cpp
index 67271985d3b5e..2804e172b73fa 100644
--- a/perception/lidar_centerpoint/lib/centerpoint_trt.cpp
+++ b/perception/lidar_centerpoint/lib/centerpoint_trt.cpp
@@ -156,8 +156,8 @@ bool CenterPointTRT::preprocess(
 
   CHECK_CUDA_ERROR(generateBaseFeatures_launch(
     mask_d_.get(), voxels_buffer_d_.get(), config_.grid_size_y_, config_.grid_size_x_,
-    num_voxels_d_.get(), voxels_d_.get(), num_points_per_voxel_d_.get(), coordinates_d_.get(),
-    stream_));
+    config_.max_voxel_size_, num_voxels_d_.get(), voxels_d_.get(), num_points_per_voxel_d_.get(),
+    coordinates_d_.get(), stream_));
 
   CHECK_CUDA_ERROR(generateFeatures_launch(
     voxels_d_.get(), num_points_per_voxel_d_.get(), coordinates_d_.get(), num_voxels_d_.get(),
diff --git a/perception/lidar_centerpoint/lib/preprocess/preprocess_kernel.cu b/perception/lidar_centerpoint/lib/preprocess/preprocess_kernel.cu
index 6f77ff84c2cea..118e31f892d72 100644
--- a/perception/lidar_centerpoint/lib/preprocess/preprocess_kernel.cu
+++ b/perception/lidar_centerpoint/lib/preprocess/preprocess_kernel.cu
@@ -87,8 +87,8 @@ cudaError_t generateVoxels_random_launch(
 }
 
 __global__ void generateBaseFeatures_kernel(
-  unsigned int * mask, float * voxels, int grid_y_size, int grid_x_size, unsigned int * pillar_num,
-  float * voxel_features, float * voxel_num, int * voxel_idxs)
+  unsigned int * mask, float * voxels, int grid_y_size, int grid_x_size, int max_voxel_size,
+  unsigned int * pillar_num, float * voxel_features, float * voxel_num, int * voxel_idxs)
 {
   unsigned int voxel_idx = blockIdx.x * blockDim.x + threadIdx.x;
   unsigned int voxel_idy = blockIdx.y * blockDim.y + threadIdx.y;
@@ -102,6 +102,7 @@ __global__ void generateBaseFeatures_kernel(
 
   unsigned int current_pillarId = 0;
   current_pillarId = atomicAdd(pillar_num, 1);
+  if (current_pillarId > max_voxel_size - 1) return;
 
   voxel_num[current_pillarId] = count;
 
@@ -120,15 +121,17 @@ __global__ void generateBaseFeatures_kernel(
 
 // create 4 channels
 cudaError_t generateBaseFeatures_launch(
-  unsigned int * mask, float * voxels, int grid_y_size, int grid_x_size, unsigned int * pillar_num,
-  float * voxel_features, float * voxel_num, int * voxel_idxs, cudaStream_t stream)
+  unsigned int * mask, float * voxels, int grid_y_size, int grid_x_size, int max_voxel_size,
+  unsigned int * pillar_num, float * voxel_features, float * voxel_num, int * voxel_idxs,
+  cudaStream_t stream)
 {
   dim3 threads = {32, 32};
   dim3 blocks = {
     (grid_x_size + threads.x - 1) / threads.x, (grid_y_size + threads.y - 1) / threads.y};
 
   generateBaseFeatures_kernel<<<blocks, threads, 0, stream>>>(
-    mask, voxels, grid_y_size, grid_x_size, pillar_num, voxel_features, voxel_num, voxel_idxs);
+    mask, voxels, grid_y_size, grid_x_size, max_voxel_size, pillar_num, voxel_features, voxel_num,
+    voxel_idxs);
   cudaError_t err = cudaGetLastError();
   return err;
 }