tier4 · Owen-Liuyuxuan · Nov 12, 2024 · Nov 12, 2024 · Nov 18, 2024 · Nov 18, 2024
@@ -15,6 +15,7 @@ include(CheckLanguage)
 check_language(CUDA)
 if(CMAKE_CUDA_COMPILER)
   enable_language(CUDA)
+  find_package(autoware_type_adapters)
 else()
   message(WARNING "CUDA is not found. preprocess acceleration using CUDA will not be available.")
 endif()
@@ -32,6 +33,7 @@ ament_auto_add_library(${PROJECT_NAME} SHARED
 
 ament_target_dependencies(${PROJECT_NAME}
   OpenCV
+  autoware_type_adapters
 )
 
 if(CMAKE_CUDA_COMPILER)

@@ -35,3 +35,6 @@
     clip_value: 6.0 # If positive value is specified, the value of each layer output will be clipped between [0.0, clip_value]. This option is valid only when precision==int8 and used to manually specify the dynamic range instead of using any calibration.
     preprocess_on_gpu: true # If true, pre-processing is performed on GPU.
     calibration_image_list_path: "" # Path to a file which contains path to images. Those images will be used for int8 quantization.
+    is_using_image_transport: true # if true, will use image_transport, false -> will only publish raw images
+    type_adaptation_activated: true # if true, will use type_adaptation to transfer images in GPU.
+    is_publish_debug_rois_image_: true # if true, will draw ROIs in the image.
@@ -15,6 +15,8 @@
 #ifndef TENSORRT_YOLOX__TENSORRT_YOLOX_HPP_
 #define TENSORRT_YOLOX__TENSORRT_YOLOX_HPP_
 
+#include "type_adapters/image_container.hpp"
+
 #include <cuda_utils/cuda_unique_ptr.hpp>
 #include <cuda_utils/stream_unique_ptr.hpp>
 #include <opencv2/opencv.hpp>
@@ -31,6 +33,7 @@ using cuda_utils::CudaUniquePtr;
 using cuda_utils::CudaUniquePtrHost;
 using cuda_utils::makeCudaStream;
 using cuda_utils::StreamUniquePtr;
+using ImageContainer = autoware::type_adaptation::type_adapters::ImageContainer;
 
 struct Object
 {
@@ -107,6 +110,10 @@ class TrtYoloX
     const std::vector<cv::Mat> & images, ObjectArrays & objects, std::vector<cv::Mat> & masks,
     std::vector<cv::Mat> & color_masks);
 
+  bool doInference(
+    const std::vector<std::shared_ptr<ImageContainer>> & images, ObjectArrays & objects,
+    std::vector<cv::Mat> & masks, std::vector<cv::Mat> & color_masks);
+
   /**
    * @brief run inference including pre-process and post-process
    * @param[out] objects results for object detection
@@ -116,6 +123,10 @@ class TrtYoloX
   bool doInferenceWithRoi(
     const std::vector<cv::Mat> & images, ObjectArrays & objects, const std::vector<cv::Rect> & roi);
 
+  bool doInferenceWithRoi(
+    const std::vector<std::shared_ptr<ImageContainer>> & images, ObjectArrays & objects,
+    const std::vector<cv::Rect> & roi);
+
   /**
    * @brief run multi-scale inference including pre-process and post-process
    * @param[out] objects results for object detection
@@ -125,6 +136,10 @@ class TrtYoloX
   bool doMultiScaleInference(
     const cv::Mat & image, ObjectArrays & objects, const std::vector<cv::Rect> & roi);
 
+  bool doMultiScaleInference(
+    const std::shared_ptr<ImageContainer> & image, ObjectArrays & objects,
+    const std::vector<cv::Rect> & roi);
+
   /**
    * @brief allocate buffer for preprocess on GPU
    * @param[in] width original image width
@@ -167,6 +182,7 @@ class TrtYoloX
    * @param[in] images batching images
    */
   void preprocessGpu(const std::vector<cv::Mat> & images);
+  void preprocessGpu(const std::vector<std::shared_ptr<ImageContainer>> & images);
 
   /**
    * @brief run preprocess including resizing, letterbox, NHWC2NCHW and toFloat on CPU
@@ -183,6 +199,9 @@ class TrtYoloX
   void preprocessWithRoiGpu(
     const std::vector<cv::Mat> & images, const std::vector<cv::Rect> & rois);
 
+  void preprocessWithRoiGpu(
+    const std::vector<std::shared_ptr<ImageContainer>> & images,
+    const std::vector<cv::Rect> & rois);
   /**
    * @brief run multi-scale preprocess including resizing, letterbox, NHWC2NCHW and toFloat on CPU
    * @param[in] images batching images
@@ -196,15 +215,29 @@ class TrtYoloX
    * @param[in] rois region of interest
    */
   void multiScalePreprocessGpu(const cv::Mat & image, const std::vector<cv::Rect> & rois);
+  void multiScalePreprocessGpu(
+    const std::shared_ptr<ImageContainer> & image, const std::vector<cv::Rect> & rois);
 
   bool multiScaleFeedforward(const cv::Mat & image, int batch_size, ObjectArrays & objects);
+  bool multiScaleFeedforward(
+    const std::shared_ptr<ImageContainer> & image, int batch_size, ObjectArrays & objects);
+
   bool multiScaleFeedforwardAndDecode(
     const cv::Mat & images, int batch_size, ObjectArrays & objects);
+  bool multiScaleFeedforwardAndDecode(
+    const std::shared_ptr<ImageContainer> & images, int batch_size, ObjectArrays & objects);
 
   bool feedforward(const std::vector<cv::Mat> & images, ObjectArrays & objects);
+  bool feedforward(
+    const std::vector<std::shared_ptr<ImageContainer>> & images, ObjectArrays & objects);
+
   bool feedforwardAndDecode(
     const std::vector<cv::Mat> & images, ObjectArrays & objects, std::vector<cv::Mat> & masks,
     std::vector<cv::Mat> & color_masks);
+  bool feedforwardAndDecode(
+    const std::vector<std::shared_ptr<ImageContainer>> & images, ObjectArrays & objects,
+    std::vector<cv::Mat> & masks, std::vector<cv::Mat> & color_masks);
+
   void decodeOutputs(float * prob, ObjectArray & objects, float scale, cv::Size & img_size) const;
   void generateGridsAndStride(
     const int target_w, const int target_h, const std::vector<int> & strides,

@@ -16,6 +16,7 @@
 #define TENSORRT_YOLOX__TENSORRT_YOLOX_NODE_HPP_
 
 #include "object_recognition_utils/object_recognition_utils.hpp"
+#include "type_adapters/image_container.hpp"
 
 #include <autoware/universe_utils/ros/debug_publisher.hpp>
 #include <autoware/universe_utils/system/stop_watch.hpp>
@@ -48,6 +49,7 @@ namespace tensorrt_yolox
 // cspell: ignore Semseg
 using LabelMap = std::map<int, std::string>;
 using Label = tier4_perception_msgs::msg::Semantic;
+using ImageContainer = autoware::type_adaptation::type_adapters::ImageContainer;
 class TrtYoloXNode : public rclcpp::Node
 {
   struct RoiOverlaySemsegLabel
@@ -74,12 +76,22 @@ class TrtYoloXNode : public rclcpp::Node
 
 private:
   void onConnect();
+  void setUpImageSubscriber();
+  bool checkInputBlocked();
+  uint32_t getNumOutputConnections();
   void onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg);
+  void onGpuImage(const std::shared_ptr<ImageContainer> msg);
   bool readLabelFile(const std::string & label_path);
   void replaceLabelMap();
   void overlapSegmentByRoi(
     const tensorrt_yolox::Object & object, cv::Mat & mask, const int width, const int height);
   int mapRoiLabel2SegLabel(const int32_t roi_label_index);
+  void postInference(
+    const std_msgs::msg::Header header, const tensorrt_yolox::ObjectArrays & objects,
+    const int width, const int height, std::vector<cv::Mat> & masks);
+  void drawImageDetection(
+    cv_bridge::CvImagePtr image, const tensorrt_yolox::ObjectArrays & objects, const int width,
+    const int height);
   image_transport::Publisher image_pub_;
   image_transport::Publisher mask_pub_;
 
@@ -88,13 +100,20 @@ class TrtYoloXNode : public rclcpp::Node
   rclcpp::Publisher<tier4_perception_msgs::msg::DetectedObjectsWithFeature>::SharedPtr objects_pub_;
 
   image_transport::Subscriber image_sub_;
+  rclcpp::Subscription<ImageContainer>::SharedPtr gpu_image_sub_;
+  rclcpp::Publisher<sensor_msgs::msg::Image>::SharedPtr simple_image_pub_;
+  rclcpp::Publisher<sensor_msgs::msg::Image>::SharedPtr simple_mask_pub_;
+  rclcpp::Publisher<sensor_msgs::msg::Image>::SharedPtr simple_color_mask_pub_;
 
   rclcpp::TimerBase::SharedPtr timer_;
 
   LabelMap label_map_;
   std::unique_ptr<tensorrt_yolox::TrtYoloX> trt_yolox_;
   bool is_roi_overlap_segment_;
   bool is_publish_color_mask_;
+  bool is_using_image_transport_;
+  bool type_adaptation_activated_;
+  bool is_publish_debug_rois_image_;
   float overlap_roi_score_threshold_;
   // TODO(badai-nguyen): change to function
   std::map<std::string, int> remap_roi_to_semantic_ = {

@@ -19,6 +19,7 @@
   <buildtool_export_depend>tensorrt_cmake_module</buildtool_export_depend>
 
   <depend>autoware_perception_msgs</depend>
+  <depend>autoware_type_adapters</depend>
   <depend>cuda_utils</depend>
   <depend>cv_bridge</depend>
   <depend>image_transport</depend>

@@ -74,6 +74,21 @@
           "type": "string",
           "default": "",
           "description": "Path to a file which contains path to images. Those images will be used for int8 quantization."
+        },
+        "is_using_image_transport": {
+          "type": "boolean",
+          "default": false,
+          "description": "If true, will use image_transport, false -> will only publish raw images"
+        },
+        "type_adaptation_activated": {
+          "type": "boolean",
+          "default": false,
+          "description": "If true, will use type_adaptation to transfer images in GPU."
+        },
+        "is_publish_debug_rois_image_": {
+          "type": "boolean",
+          "default": true,
+          "description": "If true, will draw ROIs in the image."
         }
       },
       "required": [