Skip to content

Commit

Permalink
feat(lidar_transfusion): intensity as uint8 and tests (autowarefounda…
Browse files Browse the repository at this point in the history
…tion#7745)

* feat(lidar_transfusion): intensity as uint8 and tests

Signed-off-by: Amadeusz Szymko <[email protected]>

* style(pre-commit): autofix

* fix(lidar_transfusion): headers include

Signed-off-by: Amadeusz Szymko <[email protected]>

* feat(lidar_transfusion): use autoware_point_types for validation & refactor

Signed-off-by: Amadeusz Szymko <[email protected]>

* style(lidar_transfusion): remove deprecated function

Signed-off-by: Amadeusz Szymko <[email protected]>

* test(lidar_transfusion): point_step validation is not required

Signed-off-by: Amadeusz Szymko <[email protected]>

* docs(lidar_transfusion): update assumptions / known limits

Signed-off-by: Amadeusz Szymko <[email protected]>

* refactor(lidar_transfusion): redundant points structure

Signed-off-by: Amadeusz Szymko <[email protected]>

* docs(lidar_transfusion): update assumptions / known limits

Signed-off-by: Amadeusz Szymko <[email protected]>

* test(lidar_transfusion): temporary disable CUDA tests

Signed-off-by: Amadeusz Szymko <[email protected]>

---------

Signed-off-by: Amadeusz Szymko <[email protected]>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Kenzo Lobos Tsunekawa <[email protected]>
  • Loading branch information
3 people authored and scepter914 committed Aug 20, 2024
1 parent dc446be commit 7da383d
Show file tree
Hide file tree
Showing 20 changed files with 1,517 additions and 105 deletions.
52 changes: 52 additions & 0 deletions perception/lidar_transfusion/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,58 @@ if(TRT_AVAIL AND CUDA_AVAIL AND CUDNN_AVAIL)
if(BUILD_TESTING)
find_package(ament_lint_auto REQUIRED)
ament_lint_auto_find_test_dependencies()

find_package(ament_cmake_gtest REQUIRED)
ament_auto_add_gtest(test_detection_class_remapper
test/test_detection_class_remapper.cpp
)
ament_auto_add_gtest(test_ros_utils
test/test_ros_utils.cpp
)
ament_auto_add_gtest(test_nms
test/test_nms.cpp
)

# Temporary disabled, tracked by:
# https://github.com/autowarefoundation/autoware.universe/issues/7724
# ament_auto_add_gtest(test_voxel_generator
# test/test_voxel_generator.cpp
# )

# # preprocess kernel test
# add_executable(test_preprocess_kernel
# test/test_preprocess_kernel.cpp
# )
# target_include_directories(test_preprocess_kernel PUBLIC
# ${test_preprocess_kernel_SOURCE_DIR}
# )
# target_link_libraries(test_preprocess_kernel
# transfusion_cuda_lib
# gtest
# gtest_main
# )
# ament_add_test(test_preprocess_kernel
# GENERATE_RESULT_FOR_RETURN_CODE_ZERO
# COMMAND "$<TARGET_FILE:test_preprocess_kernel>"
# )

# # postprocess kernel test
# add_executable(test_postprocess_kernel
# test/test_postprocess_kernel.cpp
# )
# target_include_directories(test_postprocess_kernel PUBLIC
# ${test_postprocess_kernel_SOURCE_DIR}
# )
# target_link_libraries(test_postprocess_kernel
# transfusion_cuda_lib
# gtest
# gtest_main
# )
# ament_add_test(test_postprocess_kernel
# GENERATE_RESULT_FOR_RETURN_CODE_ZERO
# COMMAND "$<TARGET_FILE:test_postprocess_kernel>"
# )

endif()

ament_auto_package(
Expand Down
18 changes: 18 additions & 0 deletions perception/lidar_transfusion/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,24 @@ ros2 launch lidar_transfusion lidar_transfusion.launch.xml log_level:=debug

## Assumptions / Known limits

This library operates on raw cloud data (bytes). It is assumed that the input pointcloud message has following format:

```python
[
sensor_msgs.msg.PointField(name='x', offset=0, datatype=7, count=1),
sensor_msgs.msg.PointField(name='y', offset=4, datatype=7, count=1),
sensor_msgs.msg.PointField(name='z', offset=8, datatype=7, count=1),
sensor_msgs.msg.PointField(name='intensity', offset=12, datatype=2, count=1)
]
```

This input may consist of other fields as well - shown format is required minimum.
For debug purposes, you can validate your pointcloud topic using simple command:

```bash
ros2 topic echo <input_topic> --field fields
```

## Trained Models

You can download the onnx format of trained models by clicking on the links below.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ class DensificationParam

struct PointCloudWithTransform
{
cuda::unique_ptr<float[]> points_d{nullptr};
cuda::unique_ptr<uint8_t[]> data_d{nullptr};
std_msgs::msg::Header header;
size_t num_points{0};
Eigen::Affine3f affine_past2world;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,8 @@ class PreprocessCuda
unsigned int * mask, float * voxels, unsigned int * pillar_num, float * voxel_features,
unsigned int * voxel_num, unsigned int * voxel_idxs);

// cudaError_t generateVoxelsInput_launch(
// uint8_t * cloud_data, CloudInfo & cloud_info, unsigned int points_agg, unsigned int
// points_size, float time_lag, float * affine_transform, float * points);

cudaError_t generateSweepPoints_launch(
const float * input_points, size_t points_size, int input_point_step, float time_lag,
const uint8_t * input_data, size_t points_size, int input_point_step, float time_lag,
const float * transform, float * output_points);

private:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,17 @@
#include "lidar_transfusion/cuda_utils.hpp"
#include "lidar_transfusion/preprocess/pointcloud_densification.hpp"
#include "lidar_transfusion/preprocess/preprocess_kernel.hpp"
#include "lidar_transfusion/ros_utils.hpp"
#include "lidar_transfusion/transfusion_config.hpp"
#include "lidar_transfusion/utils.hpp"

#ifdef ROS_DISTRO_GALACTIC
#include <tf2_eigen/tf2_eigen.h>
#else
#include <tf2_eigen/tf2_eigen.hpp>
#endif

#include <autoware_point_types/types.hpp>

#include <sensor_msgs/msg/point_cloud2.hpp>

#include <memory>
Expand All @@ -36,8 +38,8 @@

namespace lidar_transfusion
{
constexpr size_t AFF_MAT_SIZE = 16; // 4x4 matrix
constexpr size_t MAX_CLOUD_STEP_SIZE = 35; // PointXYZIRCADT
constexpr size_t AFF_MAT_SIZE = 16; // 4x4 matrix
constexpr size_t MAX_CLOUD_STEP_SIZE = sizeof(autoware_point_types::PointXYZIRCAEDT);

class VoxelGenerator
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,84 @@

#include "lidar_transfusion/utils.hpp"

#include <autoware_point_types/types.hpp>

#include <autoware_perception_msgs/msg/detected_object_kinematics.hpp>
#include <autoware_perception_msgs/msg/detected_objects.hpp>
#include <autoware_perception_msgs/msg/object_classification.hpp>
#include <autoware_perception_msgs/msg/shape.hpp>
#include <sensor_msgs/msg/point_field.hpp>

#include <cstdint>
#include <string>
#include <vector>

#define CHECK_OFFSET(offset, structure, field) \
static_assert( \
offsetof(structure, field) == offset, \
"Offset of " #field " in " #structure " does not match expected offset.")
#define CHECK_TYPE(type, structure, field) \
static_assert( \
std::is_same<decltype(structure::field), type>::value, \
"Type of " #field " in " #structure " does not match expected type.")
#define CHECK_FIELD(offset, type, structure, field) \
CHECK_OFFSET(offset, structure, field); \
CHECK_TYPE(type, structure, field)

namespace lidar_transfusion
{
using sensor_msgs::msg::PointField;

CHECK_FIELD(0, float, autoware_point_types::PointXYZIRCAEDT, x);
CHECK_FIELD(4, float, autoware_point_types::PointXYZIRCAEDT, y);
CHECK_FIELD(8, float, autoware_point_types::PointXYZIRCAEDT, z);
CHECK_FIELD(12, uint8_t, autoware_point_types::PointXYZIRCAEDT, intensity);

struct CloudInfo
{
uint32_t x_offset{0};
uint32_t y_offset{sizeof(float)};
uint32_t z_offset{sizeof(float) * 2};
uint32_t intensity_offset{sizeof(float) * 3};
uint8_t x_datatype{PointField::FLOAT32};
uint8_t y_datatype{PointField::FLOAT32};
uint8_t z_datatype{PointField::FLOAT32};
uint8_t intensity_datatype{PointField::UINT8};
uint8_t x_count{1};
uint8_t y_count{1};
uint8_t z_count{1};
uint8_t intensity_count{1};
uint32_t point_step{sizeof(autoware_point_types::PointXYZIRCAEDT)};
bool is_bigendian{false};

bool operator!=(const CloudInfo & rhs) const
{
return x_offset != rhs.x_offset || y_offset != rhs.y_offset || z_offset != rhs.z_offset ||
intensity_offset != rhs.intensity_offset || x_datatype != rhs.x_datatype ||
y_datatype != rhs.y_datatype || z_datatype != rhs.z_datatype ||
intensity_datatype != rhs.intensity_datatype || x_count != rhs.x_count ||
y_count != rhs.y_count || z_count != rhs.z_count ||
intensity_count != rhs.intensity_count || is_bigendian != rhs.is_bigendian;
}

friend std::ostream & operator<<(std::ostream & os, const CloudInfo & info)
{
os << "x_offset: " << static_cast<int>(info.x_offset) << std::endl;
os << "y_offset: " << static_cast<int>(info.y_offset) << std::endl;
os << "z_offset: " << static_cast<int>(info.z_offset) << std::endl;
os << "intensity_offset: " << static_cast<int>(info.intensity_offset) << std::endl;
os << "x_datatype: " << static_cast<int>(info.x_datatype) << std::endl;
os << "y_datatype: " << static_cast<int>(info.y_datatype) << std::endl;
os << "z_datatype: " << static_cast<int>(info.z_datatype) << std::endl;
os << "intensity_datatype: " << static_cast<int>(info.intensity_datatype) << std::endl;
os << "x_count: " << static_cast<int>(info.x_count) << std::endl;
os << "y_count: " << static_cast<int>(info.y_count) << std::endl;
os << "z_count: " << static_cast<int>(info.z_count) << std::endl;
os << "intensity_count: " << static_cast<int>(info.intensity_count) << std::endl;
os << "is_bigendian: " << static_cast<int>(info.is_bigendian) << std::endl;
return os;
}
};

void box3DToDetectedObject(
const Box3D & box3d, const std::vector<std::string> & class_names,
Expand Down
46 changes: 0 additions & 46 deletions perception/lidar_transfusion/include/lidar_transfusion/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,52 +36,6 @@ struct Box3D
float yaw;
};

struct CloudInfo
{
uint32_t x_offset;
uint32_t y_offset;
uint32_t z_offset;
uint32_t intensity_offset;
uint8_t x_datatype;
uint8_t y_datatype;
uint8_t z_datatype;
uint8_t intensity_datatype;
uint8_t x_count;
uint8_t y_count;
uint8_t z_count;
uint8_t intensity_count;
uint32_t point_step;
bool is_bigendian;

CloudInfo()
: x_offset(0),
y_offset(4),
z_offset(8),
intensity_offset(12),
x_datatype(7),
y_datatype(7),
z_datatype(7),
intensity_datatype(7),
x_count(1),
y_count(1),
z_count(1),
intensity_count(1),
point_step(16),
is_bigendian(false)
{
}

bool operator!=(const CloudInfo & rhs) const
{
return x_offset != rhs.x_offset || y_offset != rhs.y_offset || z_offset != rhs.z_offset ||
intensity_offset != rhs.intensity_offset || x_datatype != rhs.x_datatype ||
y_datatype != rhs.y_datatype || z_datatype != rhs.z_datatype ||
intensity_datatype != rhs.intensity_datatype || x_count != rhs.x_count ||
y_count != rhs.y_count || z_count != rhs.z_count ||
intensity_count != rhs.intensity_count || is_bigendian != rhs.is_bigendian;
}
};

enum NetworkIO { voxels = 0, num_points, coors, cls_score, dir_pred, bbox_pred, ENUM_SIZE };

// cspell: ignore divup
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,16 +93,15 @@ void PointCloudDensification::enqueue(
affine_world2current_ = affine_world2current;
current_timestamp_ = rclcpp::Time(msg.header.stamp).seconds();

assert(sizeof(uint8_t) * msg.width * msg.height * msg.point_step % sizeof(float) == 1);
auto points_d = cuda::make_unique<float[]>(
sizeof(uint8_t) * msg.width * msg.height * msg.point_step / sizeof(float));
auto data_d = cuda::make_unique<uint8_t[]>(
sizeof(uint8_t) * msg.width * msg.height * msg.point_step / sizeof(uint8_t));

CHECK_CUDA_ERROR(cudaMemcpyAsync(
points_d.get(), msg.data.data(), sizeof(uint8_t) * msg.width * msg.height * msg.point_step,
data_d.get(), msg.data.data(), sizeof(uint8_t) * msg.width * msg.height * msg.point_step,
cudaMemcpyHostToDevice, stream_));

PointCloudWithTransform pointcloud = {
std::move(points_d), msg.header, msg.width * msg.height, affine_world2current.inverse()};
std::move(data_d), msg.header, msg.width * msg.height, affine_world2current.inverse()};

pointcloud_cache_.push_front(std::move(pointcloud));
}
Expand Down
61 changes: 37 additions & 24 deletions perception/lidar_transfusion/lib/preprocess/preprocess_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
#include "lidar_transfusion/cuda_utils.hpp"
#include "lidar_transfusion/preprocess/preprocess_kernel.hpp"

#include <cstdint>

namespace lidar_transfusion
{

Expand Down Expand Up @@ -99,9 +101,12 @@ __global__ void generateVoxels_random_kernel(
cudaError_t PreprocessCuda::generateVoxels_random_launch(
float * points, unsigned int points_size, unsigned int * mask, float * voxels)
{
int threadNum = config_.threads_for_voxel_;
dim3 blocks((points_size + threadNum - 1) / threadNum);
dim3 threads(threadNum);
if (points_size == 0) {
return cudaGetLastError();
}
dim3 blocks(divup(points_size, config_.threads_for_voxel_));
dim3 threads(config_.threads_for_voxel_);

generateVoxels_random_kernel<<<blocks, threads, 0, stream_>>>(
points, points_size, config_.min_x_range_, config_.max_x_range_, config_.min_y_range_,
config_.max_y_range_, config_.min_z_range_, config_.max_z_range_, config_.voxel_x_size_,
Expand Down Expand Up @@ -165,40 +170,48 @@ cudaError_t PreprocessCuda::generateBaseFeatures_launch(
}

__global__ void generateSweepPoints_kernel(
const float * input_points, size_t points_size, int input_point_step, float time_lag,
const uint8_t * input_data, size_t points_size, int input_point_step, float time_lag,
const float * transform_array, int num_features, float * output_points)
{
int point_idx = blockIdx.x * blockDim.x + threadIdx.x;
if (point_idx >= points_size) return;

const float input_x = input_points[point_idx * input_point_step + 0];
const float input_y = input_points[point_idx * input_point_step + 1];
const float input_z = input_points[point_idx * input_point_step + 2];
const float intensity = input_points[point_idx * input_point_step + 3];

output_points[point_idx * num_features] = transform_array[0] * input_x +
transform_array[4] * input_y +
transform_array[8] * input_z + transform_array[12];
output_points[point_idx * num_features + 1] = transform_array[1] * input_x +
transform_array[5] * input_y +
transform_array[9] * input_z + transform_array[13];
output_points[point_idx * num_features + 2] = transform_array[2] * input_x +
transform_array[6] * input_y +
transform_array[10] * input_z + transform_array[14];
output_points[point_idx * num_features + 3] = intensity;
union {
uint32_t raw{0};
float value;
} input_x, input_y, input_z;

#pragma unroll
for (int i = 0; i < 4; i++) { // 4 bytes for float32
input_x.raw |= input_data[point_idx * input_point_step + i] << i * 8;
input_y.raw |= input_data[point_idx * input_point_step + i + 4] << i * 8;
input_z.raw |= input_data[point_idx * input_point_step + i + 8] << i * 8;
}

float input_intensity = static_cast<float>(input_data[point_idx * input_point_step + 12]);

output_points[point_idx * num_features] =
transform_array[0] * input_x.value + transform_array[4] * input_y.value +
transform_array[8] * input_z.value + transform_array[12];
output_points[point_idx * num_features + 1] =
transform_array[1] * input_x.value + transform_array[5] * input_y.value +
transform_array[9] * input_z.value + transform_array[13];
output_points[point_idx * num_features + 2] =
transform_array[2] * input_x.value + transform_array[6] * input_y.value +
transform_array[10] * input_z.value + transform_array[14];
output_points[point_idx * num_features + 3] = input_intensity;
output_points[point_idx * num_features + 4] = time_lag;
}

cudaError_t PreprocessCuda::generateSweepPoints_launch(
const float * input_points, size_t points_size, int input_point_step, float time_lag,
const uint8_t * input_data, size_t points_size, int input_point_step, float time_lag,
const float * transform_array, float * output_points)
{
int threadNum = config_.threads_for_voxel_;
dim3 blocks((points_size + threadNum - 1) / threadNum);
dim3 threads(threadNum);
dim3 blocks(divup(points_size, config_.threads_for_voxel_));
dim3 threads(config_.threads_for_voxel_);

generateSweepPoints_kernel<<<blocks, threads, 0, stream_>>>(
input_points, points_size, input_point_step, time_lag, transform_array,
input_data, points_size, input_point_step, time_lag, transform_array,
config_.num_point_feature_size_, output_points);

cudaError_t err = cudaGetLastError();
Expand Down
Loading

0 comments on commit 7da383d

Please sign in to comment.