Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: temporary pr, adding yolox type adaptation #1637

Draft
wants to merge 7 commits into
base: beta/v0.29.0
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions perception/tensorrt_yolox/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ include(CheckLanguage)
check_language(CUDA)
if(CMAKE_CUDA_COMPILER)
enable_language(CUDA)
find_package(autoware_type_adapters)
else()
message(WARNING "CUDA is not found. preprocess acceleration using CUDA will not be available.")
endif()
Expand All @@ -32,6 +33,7 @@ ament_auto_add_library(${PROJECT_NAME} SHARED

ament_target_dependencies(${PROJECT_NAME}
OpenCV
autoware_type_adapters
)

if(CMAKE_CUDA_COMPILER)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,6 @@
clip_value: 6.0 # If positive value is specified, the value of each layer output will be clipped between [0.0, clip_value]. This option is valid only when precision==int8 and used to manually specify the dynamic range instead of using any calibration.
preprocess_on_gpu: true # If true, pre-processing is performed on GPU.
calibration_image_list_path: "" # Path to a file which contains path to images. Those images will be used for int8 quantization.
is_using_image_transport: true # if true, will use image_transport, false -> will only publish raw images
type_adaptation_activated: true # if true, will use type_adaptation to transfer images in GPU.
is_publish_debug_rois_image_: true # if true, will draw ROIs in the image.
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
#ifndef TENSORRT_YOLOX__TENSORRT_YOLOX_HPP_
#define TENSORRT_YOLOX__TENSORRT_YOLOX_HPP_

#include "type_adapters/image_container.hpp"

#include <cuda_utils/cuda_unique_ptr.hpp>
#include <cuda_utils/stream_unique_ptr.hpp>
#include <opencv2/opencv.hpp>
Expand All @@ -31,6 +33,7 @@ using cuda_utils::CudaUniquePtr;
using cuda_utils::CudaUniquePtrHost;
using cuda_utils::makeCudaStream;
using cuda_utils::StreamUniquePtr;
using ImageContainer = autoware::type_adaptation::type_adapters::ImageContainer;

struct Object
{
Expand Down Expand Up @@ -107,6 +110,10 @@ class TrtYoloX
const std::vector<cv::Mat> & images, ObjectArrays & objects, std::vector<cv::Mat> & masks,
std::vector<cv::Mat> & color_masks);

bool doInference(
const std::vector<std::shared_ptr<ImageContainer>> & images, ObjectArrays & objects,
std::vector<cv::Mat> & masks, std::vector<cv::Mat> & color_masks);

/**
* @brief run inference including pre-process and post-process
* @param[out] objects results for object detection
Expand All @@ -116,6 +123,10 @@ class TrtYoloX
bool doInferenceWithRoi(
const std::vector<cv::Mat> & images, ObjectArrays & objects, const std::vector<cv::Rect> & roi);

bool doInferenceWithRoi(
const std::vector<std::shared_ptr<ImageContainer>> & images, ObjectArrays & objects,
const std::vector<cv::Rect> & roi);

/**
* @brief run multi-scale inference including pre-process and post-process
* @param[out] objects results for object detection
Expand All @@ -125,6 +136,10 @@ class TrtYoloX
bool doMultiScaleInference(
const cv::Mat & image, ObjectArrays & objects, const std::vector<cv::Rect> & roi);

bool doMultiScaleInference(
const std::shared_ptr<ImageContainer> & image, ObjectArrays & objects,
const std::vector<cv::Rect> & roi);

/**
* @brief allocate buffer for preprocess on GPU
* @param[in] width original image width
Expand Down Expand Up @@ -167,6 +182,7 @@ class TrtYoloX
* @param[in] images batching images
*/
void preprocessGpu(const std::vector<cv::Mat> & images);
void preprocessGpu(const std::vector<std::shared_ptr<ImageContainer>> & images);

/**
* @brief run preprocess including resizing, letterbox, NHWC2NCHW and toFloat on CPU
Expand All @@ -183,6 +199,9 @@ class TrtYoloX
void preprocessWithRoiGpu(
const std::vector<cv::Mat> & images, const std::vector<cv::Rect> & rois);

void preprocessWithRoiGpu(
const std::vector<std::shared_ptr<ImageContainer>> & images,
const std::vector<cv::Rect> & rois);
/**
* @brief run multi-scale preprocess including resizing, letterbox, NHWC2NCHW and toFloat on CPU
* @param[in] images batching images
Expand All @@ -196,15 +215,29 @@ class TrtYoloX
* @param[in] rois region of interest
*/
void multiScalePreprocessGpu(const cv::Mat & image, const std::vector<cv::Rect> & rois);
void multiScalePreprocessGpu(
const std::shared_ptr<ImageContainer> & image, const std::vector<cv::Rect> & rois);

bool multiScaleFeedforward(const cv::Mat & image, int batch_size, ObjectArrays & objects);
bool multiScaleFeedforward(
const std::shared_ptr<ImageContainer> & image, int batch_size, ObjectArrays & objects);

bool multiScaleFeedforwardAndDecode(
const cv::Mat & images, int batch_size, ObjectArrays & objects);
bool multiScaleFeedforwardAndDecode(
const std::shared_ptr<ImageContainer> & images, int batch_size, ObjectArrays & objects);

bool feedforward(const std::vector<cv::Mat> & images, ObjectArrays & objects);
bool feedforward(
const std::vector<std::shared_ptr<ImageContainer>> & images, ObjectArrays & objects);

bool feedforwardAndDecode(
const std::vector<cv::Mat> & images, ObjectArrays & objects, std::vector<cv::Mat> & masks,
std::vector<cv::Mat> & color_masks);
bool feedforwardAndDecode(
const std::vector<std::shared_ptr<ImageContainer>> & images, ObjectArrays & objects,
std::vector<cv::Mat> & masks, std::vector<cv::Mat> & color_masks);

void decodeOutputs(float * prob, ObjectArray & objects, float scale, cv::Size & img_size) const;
void generateGridsAndStride(
const int target_w, const int target_h, const std::vector<int> & strides,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#define TENSORRT_YOLOX__TENSORRT_YOLOX_NODE_HPP_

#include "object_recognition_utils/object_recognition_utils.hpp"
#include "type_adapters/image_container.hpp"

#include <autoware/universe_utils/ros/debug_publisher.hpp>
#include <autoware/universe_utils/system/stop_watch.hpp>
Expand Down Expand Up @@ -48,6 +49,7 @@ namespace tensorrt_yolox
// cspell: ignore Semseg
using LabelMap = std::map<int, std::string>;
using Label = tier4_perception_msgs::msg::Semantic;
using ImageContainer = autoware::type_adaptation::type_adapters::ImageContainer;
class TrtYoloXNode : public rclcpp::Node
{
struct RoiOverlaySemsegLabel
Expand All @@ -74,12 +76,22 @@ class TrtYoloXNode : public rclcpp::Node

private:
void onConnect();
void setUpImageSubscriber();
bool checkInputBlocked();
uint32_t getNumOutputConnections();
void onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg);
void onGpuImage(const std::shared_ptr<ImageContainer> msg);
bool readLabelFile(const std::string & label_path);
void replaceLabelMap();
void overlapSegmentByRoi(
const tensorrt_yolox::Object & object, cv::Mat & mask, const int width, const int height);
int mapRoiLabel2SegLabel(const int32_t roi_label_index);
void postInference(
const std_msgs::msg::Header header, const tensorrt_yolox::ObjectArrays & objects,
const int width, const int height, std::vector<cv::Mat> & masks);
void drawImageDetection(
cv_bridge::CvImagePtr image, const tensorrt_yolox::ObjectArrays & objects, const int width,
const int height);
image_transport::Publisher image_pub_;
image_transport::Publisher mask_pub_;

Expand All @@ -88,13 +100,20 @@ class TrtYoloXNode : public rclcpp::Node
rclcpp::Publisher<tier4_perception_msgs::msg::DetectedObjectsWithFeature>::SharedPtr objects_pub_;

image_transport::Subscriber image_sub_;
rclcpp::Subscription<ImageContainer>::SharedPtr gpu_image_sub_;
rclcpp::Publisher<sensor_msgs::msg::Image>::SharedPtr simple_image_pub_;
rclcpp::Publisher<sensor_msgs::msg::Image>::SharedPtr simple_mask_pub_;
rclcpp::Publisher<sensor_msgs::msg::Image>::SharedPtr simple_color_mask_pub_;

rclcpp::TimerBase::SharedPtr timer_;

LabelMap label_map_;
std::unique_ptr<tensorrt_yolox::TrtYoloX> trt_yolox_;
bool is_roi_overlap_segment_;
bool is_publish_color_mask_;
bool is_using_image_transport_;
bool type_adaptation_activated_;
bool is_publish_debug_rois_image_;
float overlap_roi_score_threshold_;
// TODO(badai-nguyen): change to function
std::map<std::string, int> remap_roi_to_semantic_ = {
Expand Down
1 change: 1 addition & 0 deletions perception/tensorrt_yolox/package.xml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
<buildtool_export_depend>tensorrt_cmake_module</buildtool_export_depend>

<depend>autoware_perception_msgs</depend>
<depend>autoware_type_adapters</depend>
<depend>cuda_utils</depend>
<depend>cv_bridge</depend>
<depend>image_transport</depend>
Expand Down
15 changes: 15 additions & 0 deletions perception/tensorrt_yolox/schema/yolox_s_plus_opt.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,21 @@
"type": "string",
"default": "",
"description": "Path to a file which contains path to images. Those images will be used for int8 quantization."
},
"is_using_image_transport": {
"type": "boolean",
"default": false,
"description": "If true, will use image_transport, false -> will only publish raw images"
},
"type_adaptation_activated": {
"type": "boolean",
"default": false,
"description": "If true, will use type_adaptation to transfer images in GPU."
},
"is_publish_debug_rois_image_": {
"type": "boolean",
"default": true,
"description": "If true, will draw ROIs in the image."
}
},
"required": [
Expand Down
Loading
Loading