From fd8043a395cada8b5dc225f690ba9f8f471f1e76 Mon Sep 17 00:00:00 2001 From: lizhensheng Date: Mon, 17 Oct 2022 16:19:40 +0800 Subject: [PATCH] add gpu intra-process-typeadaption demo. --- CMakeLists.txt | 23 +-- cmake/CudaComputeTargetFlags.cmake | 43 ++++++ cuda/CMakeLists.txt | 105 ++++++++++++++ cuda/cuda_hello_world.cu | 21 +++ cuda/cuda_image_container.cpp | 226 +++++++++++++++++++++++++++++ cuda/cuda_image_container.hpp | 161 ++++++++++++++++++++ cuda/cuda_image_listener.hpp | 84 +++++++++++ cuda/cuda_image_node.cpp | 22 +++ cuda/cuda_image_talker.hpp | 98 +++++++++++++ cuda/gmat_image_container.cpp | 171 ++++++++++++++++++++++ cuda/gmat_image_container.hpp | 164 +++++++++++++++++++++ cuda/gmat_image_listener.hpp | 87 +++++++++++ cuda/gmat_image_node.cpp | 22 +++ cuda/gmat_image_talker.hpp | 98 +++++++++++++ cuda/intra_gmat_listener.hpp | 80 ++++++++++ cuda/intra_gmat_node.cpp | 22 +++ cuda/intra_gmat_talker.hpp | 93 ++++++++++++ 17 files changed, 1505 insertions(+), 15 deletions(-) create mode 100644 cmake/CudaComputeTargetFlags.cmake create mode 100644 cuda/CMakeLists.txt create mode 100644 cuda/cuda_hello_world.cu create mode 100644 cuda/cuda_image_container.cpp create mode 100644 cuda/cuda_image_container.hpp create mode 100644 cuda/cuda_image_listener.hpp create mode 100644 cuda/cuda_image_node.cpp create mode 100644 cuda/cuda_image_talker.hpp create mode 100644 cuda/gmat_image_container.cpp create mode 100644 cuda/gmat_image_container.hpp create mode 100644 cuda/gmat_image_listener.hpp create mode 100644 cuda/gmat_image_node.cpp create mode 100644 cuda/gmat_image_talker.hpp create mode 100644 cuda/intra_gmat_listener.hpp create mode 100644 cuda/intra_gmat_node.cpp create mode 100644 cuda/intra_gmat_talker.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index e21f30f..0fb7f64 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,8 +2,11 @@ cmake_minimum_required(VERSION 3.5) project(shm_msgs) -# add open3d installation directory here +list(INSERT CMAKE_MODULE_PATH 0 ${CMAKE_CURRENT_LIST_DIR}/cmake) + +# add installation directory here # list(INSERT CMAKE_PREFIX_PATH 0 /opt/open3d/open3d) +list(INSERT CMAKE_PREFIX_PATH 0 /opt/opencv/opencv-4.6.0/) if(NOT CMAKE_CXX_STANDARD) set(CMAKE_CXX_STANDARD 17) @@ -81,21 +84,9 @@ list(REMOVE_ITEM PCL_LIBRARIES vtkRenderingOpenGL2 ) -find_package(OpenCV 4 QUIET - COMPONENTS - opencv_core - opencv_imgproc - opencv_imgcodecs - CONFIG -) +find_package(OpenCV 4 QUIET) if(NOT OpenCV_FOUND) - find_package(OpenCV 3 REQUIRED - COMPONENTS - opencv_core - opencv_imgproc - opencv_imgcodecs - CONFIG - ) + find_package(OpenCV 3 REQUIRED) endif() find_package(iceoryx_posh CONFIG REQUIRED) @@ -157,6 +148,8 @@ add_subdirectory(lib) add_subdirectory(src) # extra image intra-process demos add_subdirectory(intra) +# extra type-adaption cuda demos +add_subdirectory(cuda) if(BUILD_TESTING) add_subdirectory(test) diff --git a/cmake/CudaComputeTargetFlags.cmake b/cmake/CudaComputeTargetFlags.cmake new file mode 100644 index 0000000..f24731d --- /dev/null +++ b/cmake/CudaComputeTargetFlags.cmake @@ -0,0 +1,43 @@ +# +# Compute target flags macros by Anatoly Baksheev +# +# Usage in CmakeLists.txt: +# include(CudaComputeTargetFlags.cmake) +# APPEND_TARGET_ARCH_FLAGS() + +#compute flags macros +MACRO(CUDA_COMPUTE_TARGET_FLAGS arch_bin arch_ptx cuda_nvcc_target_flags) + string(REGEX REPLACE "\\." "" ARCH_BIN_WITHOUT_DOTS "${${arch_bin}}") + string(REGEX REPLACE "\\." "" ARCH_PTX_WITHOUT_DOTS "${${arch_ptx}}") + + set(cuda_computer_target_flags_temp "") + + # Tell NVCC to add binaries for the specified GPUs + string(REGEX MATCHALL "[0-9()]+" ARCH_LIST "${ARCH_BIN_WITHOUT_DOTS}") + foreach(ARCH IN LISTS ARCH_LIST) + if (ARCH MATCHES "([0-9]+)\\(([0-9]+)\\)") + # User explicitly specified PTX for the concrete BIN + set(cuda_computer_target_flags_temp ${cuda_computer_target_flags_temp} -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1}) + else() + # User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN + set(cuda_computer_target_flags_temp ${cuda_computer_target_flags_temp} -gencode arch=compute_${ARCH},code=sm_${ARCH}) + endif() + endforeach() + + # Tell NVCC to add PTX intermediate code for the specified architectures + string(REGEX MATCHALL "[0-9]+" ARCH_LIST "${ARCH_PTX_WITHOUT_DOTS}") + foreach(ARCH IN LISTS ARCH_LIST) + set(cuda_computer_target_flags_temp ${cuda_computer_target_flags_temp} -gencode arch=compute_${ARCH},code=compute_${ARCH}) + endforeach() + + set(${cuda_nvcc_target_flags} ${cuda_computer_target_flags_temp}) +ENDMACRO() + +MACRO(APPEND_TARGET_ARCH_FLAGS) + set(cuda_nvcc_target_flags "") + CUDA_COMPUTE_TARGET_FLAGS(CUDA_ARCH_BIN CUDA_ARCH_PTX cuda_nvcc_target_flags) + if (cuda_nvcc_target_flags) + message(STATUS "CUDA NVCC target flags: ${cuda_nvcc_target_flags}") + list(APPEND CUDA_NVCC_FLAGS ${cuda_nvcc_target_flags}) + endif() +ENDMACRO() \ No newline at end of file diff --git a/cuda/CMakeLists.txt b/cuda/CMakeLists.txt new file mode 100644 index 0000000..1fc5dd7 --- /dev/null +++ b/cuda/CMakeLists.txt @@ -0,0 +1,105 @@ +macro(GA_CHECK_CUDA) + if ($ENV{GPUAC_COMPILE_WITH_CUDA}) + find_package(CUDA REQUIRED QUIET) + find_package(Eigen3 REQUIRED QUIET) + + # if(NOT ${CUDA_VERSION} VERSION_LESS "10.0" + # AND NOT ${CUDA_VERSION} VERSION_EQUAL "10.0" ) + # message(FATAL_ERROR "GPU support on Melodic requires CUDA<=10.0") + # endif() + if(${CUDA_VERSION} VERSION_GREATER "9.1" + AND ${CMAKE_VERSION} VERSION_LESS "3.12.3") + unset(CUDA_cublas_device_LIBRARY CACHE) + set(CUDA_cublas_device_LIBRARY ${CUDA_cublas_LIBRARY}) + set(CUDA_CUBLAS_LIBRARIES ${CUDA_cublas_LIBRARY}) + endif() + # if ("$ENV{ROS_DISTRO}" STREQUAL "melodic" AND ${EIGEN3_VERSION_STRING} VERSION_LESS "3.3.7") + # message(FATAL_ERROR "GPU support on Melodic requires Eigen version>= 3.3.7") + # endif() + if(NOT DEFINED CMAKE_CUDA_STANDARD) + set(CMAKE_CUDA_STANDARD 14) + set(CMAKE_CUDA_STANDARD_REQUIRED ON) + endif() + set(USE_CUDA ON) + else() + message(WARNING "CUDA support is disabled. Set the GPUAC_COMPILE_WITH_CUDA environment variable and recompile to enable it") + set(USE_CUDA OFF) + endif() +endmacro() + +include(CudaComputeTargetFlags) +GA_CHECK_CUDA() +set_directory_properties(PROPERTIES COMPILE_DEFINITIONS "") +APPEND_TARGET_ARCH_FLAGS() + +if(NOT USE_CUDA) + return() +endif() + +# Enable NVTX markers for improved profiling +add_definitions(-DUSE_NVTX) +link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64") +link_libraries("nvToolsExt") + +# nvcc -arch sm_50 ./cuda_hello_world.cu -o docker_hello +cuda_add_executable(cuda_hello_world cuda_hello_world.cu) +target_include_directories(cuda_hello_world PUBLIC + "$" + "$" + ${CUDA_INCLUDE_DIRS} +) +target_link_libraries(cuda_hello_world ${PROJECT_NAME}_image + ${CUDA_nvToolsExt_LIBRARY} + ${CUDA_LIBRARIES} +) +install(TARGETS cuda_hello_world DESTINATION lib/${PROJECT_NAME}) + +cuda_add_executable(cuda_image_node cuda_image_node.cpp cuda_image_container.cpp) +ament_target_dependencies(cuda_image_node "rclcpp" "cv_bridge" "sensor_msgs") +rosidl_get_typesupport_target(cpp_typesupport_target "${PROJECT_NAME}" "rosidl_typesupport_cpp") +target_include_directories(cuda_image_node PUBLIC + "$" + "$" + ${CUDA_INCLUDE_DIRS} +) +target_link_libraries(cuda_image_node ${PROJECT_NAME}_image + opencv_highgui + ${cpp_typesupport_target} + ${CUDA_nvToolsExt_LIBRARY} + ${CUDA_LIBRARIES} +) +install(TARGETS cuda_image_node DESTINATION lib/${PROJECT_NAME}) + +add_executable(intra_gmat_node intra_gmat_node.cpp) +ament_target_dependencies(intra_gmat_node "rclcpp" "cv_bridge" "sensor_msgs") +rosidl_get_typesupport_target(cpp_typesupport_target "${PROJECT_NAME}" "rosidl_typesupport_cpp") +target_include_directories(intra_gmat_node PUBLIC + "$" + "$" + ${CUDA_INCLUDE_DIRS} + ${OpenCV_INCLUDE_DIRS} +) +target_link_libraries(intra_gmat_node ${PROJECT_NAME}_image + ${cpp_typesupport_target} + ${CUDA_nvToolsExt_LIBRARY} + ${CUDA_LIBRARIES} + ${OpenCV_LIBS} +) +install(TARGETS intra_gmat_node DESTINATION lib/${PROJECT_NAME}) + +add_executable(gmat_image_node gmat_image_node.cpp gmat_image_container.cpp) +ament_target_dependencies(gmat_image_node "rclcpp" "cv_bridge" "sensor_msgs") +rosidl_get_typesupport_target(cpp_typesupport_target "${PROJECT_NAME}" "rosidl_typesupport_cpp") +target_include_directories(gmat_image_node PUBLIC + "$" + "$" + ${CUDA_INCLUDE_DIRS} + ${OpenCV_INCLUDE_DIRS} +) +target_link_libraries(gmat_image_node ${PROJECT_NAME}_image + ${cpp_typesupport_target} + ${CUDA_nvToolsExt_LIBRARY} + ${CUDA_LIBRARIES} + ${OpenCV_LIBS} +) +install(TARGETS gmat_image_node DESTINATION lib/${PROJECT_NAME}) diff --git a/cuda/cuda_hello_world.cu b/cuda/cuda_hello_world.cu new file mode 100644 index 0000000..6b7d1bd --- /dev/null +++ b/cuda/cuda_hello_world.cu @@ -0,0 +1,21 @@ +#include +#include +#include + +__global__ void cuda_hello(){ + printf("Hello World from GPU! %d\n", threadIdx.x*gridDim.x); +} + +int main() { + printf("Hello World from CPU!\n"); + cudaSetDevice(0); + cuda_hello<<<1,10>>>(); + uint8_t *cuda_mem_; + if (cudaMalloc(&cuda_mem_, 1024) != cudaSuccess) + { + throw std::runtime_error("Failed to allocate device memory"); + } + + cudaDeviceSynchronize(); + return 0; +} \ No newline at end of file diff --git a/cuda/cuda_image_container.cpp b/cuda/cuda_image_container.cpp new file mode 100644 index 0000000..f5f4220 --- /dev/null +++ b/cuda/cuda_image_container.cpp @@ -0,0 +1,226 @@ +#include +#include +#include + +#include "sensor_msgs/msg/image.hpp" +#include "std_msgs/msg/header.hpp" + +#include "cuda_image_container.hpp" + +#include "cuda.h" // NOLINT +#include "cuda_runtime.h" // NOLINT +#include // NOLINT + +namespace shm_msgs +{ +namespace +{ +template struct NotNull +{ + NotNull(const T *pointer_in, const char *msg) : pointer(pointer_in) + { + if (pointer == nullptr) + { + throw std::invalid_argument(msg); + } + } + + const T *pointer; +}; + +} // namespace + +CUDAStreamWrapper::CUDAStreamWrapper() +{ + cudaStreamCreate(&main_stream_); +} + +CUDAStreamWrapper::~CUDAStreamWrapper() +{ + cudaStreamDestroy(main_stream_); +} + +CUDAMemoryWrapper::CUDAMemoryWrapper(size_t bytes_to_allocate) : bytes_allocated_(bytes_to_allocate) +{ + // non-jetson using cudaMalloc rather than cudaMallocManaged +#ifndef __aarch64__ + if (cudaMalloc(&cuda_mem_, bytes_to_allocate) != cudaSuccess) +#else + if (cudaMallocManaged(&cuda_mem_, bytes_to_allocate) != cudaSuccess) +#endif + { + throw std::runtime_error("Failed to allocate device memory"); + } +} + +void CUDAMemoryWrapper::copy_to_device(uint8_t *host_mem, size_t bytes_to_copy, const cudaStream_t &stream) +{ + nvtxRangePushA("CudaImageContainer:CopyToDevice"); + if (bytes_to_copy > bytes_allocated_) + { + throw std::invalid_argument("Tried to copy too many bytes to device"); + } + if (cudaMemcpyAsync(cuda_mem_, host_mem, bytes_to_copy, cudaMemcpyHostToDevice, stream) != cudaSuccess) + { + throw std::runtime_error("Failed to copy memory to the GPU"); + } + cudaStreamSynchronize(stream); + nvtxRangePop(); +} + +void CUDAMemoryWrapper::copy_from_device(uint8_t *host_mem, size_t bytes_to_copy, const cudaStream_t &stream) +{ + nvtxRangePushA("CudaImageContainer:CopyFromDevice"); + if (bytes_to_copy > bytes_allocated_) + { + throw std::invalid_argument("Tried to copy too many bytes from device"); + } + if (cudaMemcpyAsync(host_mem, cuda_mem_, bytes_to_copy, cudaMemcpyDeviceToHost, stream) != cudaSuccess) + { + throw std::runtime_error("Failed to copy memory from the GPU"); + } + cudaStreamSynchronize(stream); + nvtxRangePop(); +} + +uint8_t *CUDAMemoryWrapper::device_memory() +{ + return cuda_mem_; +} + +CUDAMemoryWrapper::~CUDAMemoryWrapper() +{ + cudaFree(cuda_mem_); +} + +CUDAEventWrapper::CUDAEventWrapper() +{ + cudaEventCreate(&event_); +} + +void CUDAEventWrapper::record(std::shared_ptr cuda_stream) +{ + cudaEventRecord(event_, cuda_stream->stream()); +} + +CUDAEventWrapper::~CUDAEventWrapper() +{ + cudaEventDestroy(event_); +} + +CudaImageContainer::CudaImageContainer() +{ + cuda_stream_ = std::make_shared(); + cuda_event_ = std::make_shared(); +} + +CudaImageContainer::CudaImageContainer(std_msgs::msg::Header header, uint32_t height, uint32_t width, + std::string encoding, uint32_t step, + std::shared_ptr cuda_stream) + : header_(header), cuda_stream_(cuda_stream), height_(height), width_(width), encoding_(encoding), step_(step) +{ + nvtxRangePushA("CudaImageContainer:Create"); + cuda_mem_ = std::make_shared(size_in_bytes()); + cuda_event_ = std::make_shared(); + nvtxRangePop(); +} + +CudaImageContainer::CudaImageContainer(std::unique_ptr unique_sensor_msgs_image) + : CudaImageContainer( + NotNull(unique_sensor_msgs_image.get(), "unique_sensor_msgs_image cannot be nullptr").pointer->header, + unique_sensor_msgs_image->height, unique_sensor_msgs_image->width, unique_sensor_msgs_image->encoding, + unique_sensor_msgs_image->step) +{ + nvtxRangePushA("CudaImageContainer:CreateFromMessage"); + cuda_mem_->copy_to_device(&unique_sensor_msgs_image->data[0], size_in_bytes(), cuda_stream_->stream()); + nvtxRangePop(); +} + +CudaImageContainer::CudaImageContainer(const sensor_msgs::msg::Image &sensor_msgs_image) + : CudaImageContainer(std::make_unique(sensor_msgs_image)) +{ +} + +CudaImageContainer::CudaImageContainer(const CudaImageContainer &other) +{ + nvtxRangePushA("CudaImageContainer:Copy"); + header_ = other.header_; + height_ = other.height_; + width_ = other.width_; + encoding_ = other.encoding_; + step_ = other.step_; + + // Make a new stream and have it wait on the previous one. + cuda_stream_ = std::make_shared(); + cuda_event_ = std::make_shared(); + cuda_event_->record(other.cuda_stream_); + cudaStreamWaitEvent(cuda_stream_->stream(), cuda_event_->event()); + + // Copy the image data over so that this is an independent copy (deep copy). + cuda_mem_ = std::make_shared(size_in_bytes()); + if (cudaMemcpyAsync(other.cuda_mem_->device_memory(), cuda_mem_->device_memory(), size_in_bytes(), + cudaMemcpyDeviceToDevice, cuda_stream_->stream()) != cudaSuccess) + { + throw std::runtime_error("Failed to copy memory from the GPU"); + } + nvtxRangePop(); +} + +// Equals operator makes this container become another, shallow copy only. +CudaImageContainer &CudaImageContainer::operator=(const CudaImageContainer &other) +{ + if (this == &other) + { + return *this; + } + + header_ = other.header_; + height_ = other.height_; + width_ = other.width_; + encoding_ = other.encoding_; + step_ = other.step_; + cuda_stream_ = other.cuda_stream_; + cuda_event_ = other.cuda_event_; + cuda_mem_ = other.cuda_mem_; + + return *this; +} + +CudaImageContainer::~CudaImageContainer() +{ +} + +const std_msgs::msg::Header &CudaImageContainer::header() const +{ + return header_; +} + +std_msgs::msg::Header &CudaImageContainer::header() +{ + return header_; +} + +uint8_t *CudaImageContainer::cuda_mem() +{ + return cuda_mem_->device_memory(); +} + +void CudaImageContainer::get_sensor_msgs_image(sensor_msgs::msg::Image &destination) const +{ + nvtxRangePushA("CudaImageContainer:GetMsg"); + destination.header = header_; + destination.height = height_; + destination.width = width_; + destination.encoding = encoding_; + destination.step = step_; + destination.data.resize(size_in_bytes()); + cuda_mem_->copy_from_device(&destination.data[0], size_in_bytes(), cuda_stream_->stream()); + nvtxRangePop(); +} + +size_t CudaImageContainer::size_in_bytes() const +{ + return height_ * step_; +} + +} // namespace shm_msgs \ No newline at end of file diff --git a/cuda/cuda_image_container.hpp b/cuda/cuda_image_container.hpp new file mode 100644 index 0000000..3114bc6 --- /dev/null +++ b/cuda/cuda_image_container.hpp @@ -0,0 +1,161 @@ +#ifndef SHM_MSGS__CUDA_IMAGE_CONTAINER_HPP_ +#define SHM_MSGS__CUDA_IMAGE_CONTAINER_HPP_ + +#include +#include + +#include "rclcpp/type_adapter.hpp" +#include "sensor_msgs/msg/image.hpp" +#include "std_msgs/msg/header.hpp" + +#include "cuda.h" // NOLINT +#include "cuda_runtime.h" // NOLINT + +namespace shm_msgs +{ +class CUDAStreamWrapper final +{ + public: + CUDAStreamWrapper(); + + ~CUDAStreamWrapper(); + + cudaStream_t &stream() + { + return main_stream_; + } + + private: + cudaStream_t main_stream_{}; +}; + +class CUDAMemoryWrapper final +{ + public: + explicit CUDAMemoryWrapper(size_t size_in_bytes); + + void copy_to_device(uint8_t *host_mem, size_t bytes_to_copy, const cudaStream_t &stream); + + void copy_from_device(uint8_t *host_mem, size_t bytes_to_copy, const cudaStream_t &stream); + + uint8_t *device_memory(); + + ~CUDAMemoryWrapper(); + + private: + size_t bytes_allocated_{0}; + + uint8_t *cuda_mem_{nullptr}; +}; + +class CUDAEventWrapper final +{ + public: + CUDAEventWrapper(); + + void record(std::shared_ptr cuda_stream); + + cudaEvent_t &event() + { + return event_; + } + + ~CUDAEventWrapper(); + + private: + cudaEvent_t event_; +}; + +class CudaImageContainer final +{ + public: + CudaImageContainer(); + + /// Store an owning pointer to a sensor_msg::msg::Image, and create CUDA memory that references it + explicit CudaImageContainer(std::unique_ptr unique_sensor_msgs_image); + + /// Copy the sensor_msgs::msg::Image into this contain and create CUDA memory that references it. + explicit CudaImageContainer(const sensor_msgs::msg::Image &sensor_msgs_image); + + explicit CudaImageContainer(const CudaImageContainer &other); + + CudaImageContainer(std_msgs::msg::Header header, uint32_t height, uint32_t width, std::string encoding, + uint32_t step, + std::shared_ptr cuda_stream = std::make_shared()); + + CudaImageContainer &operator=(const CudaImageContainer &other); + + ~CudaImageContainer(); + + /// Const access the ROS Header. + const std_msgs::msg::Header &header() const; + + /// Access the ROS Header. + std_msgs::msg::Header &header(); + + void get_sensor_msgs_image(sensor_msgs::msg::Image &destination) const; + + uint8_t *cuda_mem(); + + size_t size_in_bytes() const; + + std::shared_ptr cuda_stream() const + { + return cuda_stream_; + } + + uint32_t height() const + { + return height_; + } + + uint32_t width() const + { + return width_; + } + + const std::string &encoding() const + { + return encoding_; + } + + uint32_t step() const + { + return step_; + } + + private: + std_msgs::msg::Header header_; + + std::shared_ptr cuda_stream_; + + std::shared_ptr cuda_mem_; + + std::shared_ptr cuda_event_; + + uint32_t height_{0}; + uint32_t width_{0}; + std::string encoding_; + uint32_t step_{0}; +}; + +} // namespace shm_msgs + +template <> struct rclcpp::TypeAdapter +{ + using is_specialized = std::true_type; + using custom_type = shm_msgs::CudaImageContainer; + using ros_message_type = sensor_msgs::msg::Image; + + static void convert_to_ros_message(const custom_type &source, ros_message_type &destination) + { + source.get_sensor_msgs_image(destination); + } + + static void convert_to_custom(const ros_message_type &source, custom_type &destination) + { + destination = shm_msgs::CudaImageContainer(source); + } +}; + +#endif // SHM_MSGS__CUDA_IMAGE_CONTAINER_HPP_ diff --git a/cuda/cuda_image_listener.hpp b/cuda/cuda_image_listener.hpp new file mode 100644 index 0000000..4bc23aa --- /dev/null +++ b/cuda/cuda_image_listener.hpp @@ -0,0 +1,84 @@ +// Copyright 2021 Apex.AI, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "rclcpp/rclcpp.hpp" +#include "rclcpp/type_adapter.hpp" + +#include "sensor_msgs/msg/image.hpp" +#include + +#include "cuda_image_container.hpp" + +#include + +using namespace std::chrono_literals; +using namespace cv_bridge; + +class Listener : public rclcpp::Node { +private: + using Topic = rclcpp::TypeAdapter; + using DataType = shm_msgs::CudaImageContainer; + +public: + explicit Listener() + : Node("cuda_image_listener", rclcpp::NodeOptions().use_intra_process_comms(true)) { + + // subscription callback to process arriving data + auto callback = [this](std::unique_ptr msg) -> void { + + RCLCPP_INFO(this->get_logger(), "received with address: 0x%" PRIXPTR "", reinterpret_cast(msg.get())); + + // no conversion + // last_cvimage = cv_bridge::toCvShare(msg); + // last_cvimage = cv_bridge::toCvCopy(msg); + + // move to the private ptr + m_last_container = std::move(msg); + RCLCPP_INFO(this->get_logger(), "container address: 0x%" PRIXPTR "", reinterpret_cast(m_last_container.get())); + + auto time_offset_ns = (now() - m_last_container->header().stamp).nanoseconds(); + auto timestamp_offset_ns = (rclcpp::Time(m_last_container->header().stamp) - m_last_image_ts).nanoseconds(); + auto time_offset_ms = time_offset_ns / 1000000.0F; + auto timestamp_offset_ms = timestamp_offset_ns / 1000000.0F; + RCLCPP_INFO(get_logger(), "get-image-transport-time: %.3f", time_offset_ms); + if(m_last_image_ts.nanoseconds() > 0.0) + { + RCLCPP_INFO(get_logger(), "get-image-timestamp_offset-time: %.3f", timestamp_offset_ms); + } + m_last_image_ts = m_last_container->header().stamp; + // cv::imshow("im show", last_cvimage->image); + // cv::waitKey(0); + }; + + // rclcpp::QoS qos(rclcpp::KeepLast(10)); + rclcpp::QoS custom_qos_profile = rclcpp::QoS(rclcpp::QoSInitialization::from_rmw(rmw_qos_profile_default)) + .history(rmw_qos_history_policy_t::RMW_QOS_POLICY_HISTORY_KEEP_LAST) + .keep_last(5) + .reliability(rmw_qos_reliability_policy_t::RMW_QOS_POLICY_RELIABILITY_BEST_EFFORT) + .durability(rmw_qos_durability_policy_t::RMW_QOS_POLICY_DURABILITY_VOLATILE) + .avoid_ros_namespace_conventions(false); + + m_subscription = create_subscription("cuda_image", custom_qos_profile, callback); + } + +private: + rclcpp::Subscription::SharedPtr m_subscription; + + std::unique_ptr m_last_container; + rclcpp::Time m_last_image_ts{0, 0, RCL_ROS_TIME}; +}; diff --git a/cuda/cuda_image_node.cpp b/cuda/cuda_image_node.cpp new file mode 100644 index 0000000..c313b6d --- /dev/null +++ b/cuda/cuda_image_node.cpp @@ -0,0 +1,22 @@ +#include + +#include + +#include "cuda_image_listener.hpp" +#include "cuda_image_talker.hpp" + +int main(int argc, char *argv[]) +{ + setvbuf(stdout, NULL, _IONBF, BUFSIZ); + rclcpp::init(argc, argv); + rclcpp::executors::SingleThreadedExecutor executor; + + auto talker = std::make_shared(); + auto listener = std::make_shared(); + executor.add_node(talker); + executor.add_node(listener); + + executor.spin(); + rclcpp::shutdown(); + return 0; +} \ No newline at end of file diff --git a/cuda/cuda_image_talker.hpp b/cuda/cuda_image_talker.hpp new file mode 100644 index 0000000..f6bff44 --- /dev/null +++ b/cuda/cuda_image_talker.hpp @@ -0,0 +1,98 @@ +// Copyright 2021 Apex.AI, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include + +#include "rclcpp/rclcpp.hpp" +#include "rclcpp/type_adapter.hpp" + +#include "sensor_msgs/msg/image.hpp" +#include +#include + +#include + +#include "cuda_image_container.hpp" + +using namespace std::chrono_literals; +using namespace cv_bridge; + +class Talker : public rclcpp::Node +{ + private: + using Topic = rclcpp::TypeAdapter; + using DataType = shm_msgs::CudaImageContainer; + + public: + explicit Talker() : Node("cuda_image_talker", rclcpp::NodeOptions().use_intra_process_comms(true)) + { + + // m_input_cvimage->image = cv::imread("./res/img/205_182.png"); + // m_input_cvimage->image = cv::imread("./res/img/1024_768.jpeg"); + m_input_cvimage->image = cv::imread("./res/img/1920_1080.jpg"); + m_input_cvimage->header.frame_id = "camera_link"; + m_input_cvimage->encoding = "bgr8"; + // cv::imshow("input image", m_input_cvimage->image); + // cv::waitKey(0); + + auto publishMessage = [this]() -> void { + // conversion from mat to sensor_msg + m_input_rosimage = std::make_unique(); + // todo: from cvmat to container + m_input_cvimage->toImageMsg(*m_input_rosimage); + // start here + m_input_rosimage->header.stamp = now(); + + // no deep copy from capture to published + auto msg = std::make_unique(std::move(m_input_rosimage)); + + // RCLCPP_INFO(this->get_logger(), "Publishing with ts: %u.%u", msg->header().stamp.sec, + // msg->header().stamp.nanosec); + RCLCPP_INFO(this->get_logger(), "Publishing with address: 0x%" PRIXPTR "", + reinterpret_cast(msg.get())); + + m_publisher->publish(std::move(msg)); + // We gave up ownership + + m_count++; + }; + + // rclcpp::QoS qos(rclcpp::KeepLast(10)); + // rclcpp::QoS qos(rclcpp::SensorDataQoS()); + rclcpp::QoS custom_qos_profile = + rclcpp::QoS(rclcpp::QoSInitialization::from_rmw(rmw_qos_profile_default)) + .history(rmw_qos_history_policy_t::RMW_QOS_POLICY_HISTORY_KEEP_LAST) + .keep_last(5) + .reliability(rmw_qos_reliability_policy_t::RMW_QOS_POLICY_RELIABILITY_BEST_EFFORT) + .durability(rmw_qos_durability_policy_t::RMW_QOS_POLICY_DURABILITY_VOLATILE) + .avoid_ros_namespace_conventions(false); + + m_publisher = this->create_publisher("cuda_image", custom_qos_profile); + + // Use a timer to schedule periodic message publishing. + m_timer = this->create_wall_timer(1s, publishMessage); + } + + private: + uint64_t m_count = 1; + rclcpp::Publisher::SharedPtr m_publisher; + rclcpp::TimerBase::SharedPtr m_timer; + std::shared_ptr m_input_cvimage{std::make_shared()}; + std::unique_ptr m_input_rosimage{std::make_unique()}; +}; diff --git a/cuda/gmat_image_container.cpp b/cuda/gmat_image_container.cpp new file mode 100644 index 0000000..e46ca5a --- /dev/null +++ b/cuda/gmat_image_container.cpp @@ -0,0 +1,171 @@ +#include +#include +#include +#include +#include + +#include "opencv2/core/mat.hpp" + +#include "sensor_msgs/msg/image.hpp" +#include "std_msgs/msg/header.hpp" + +#include "gmat_image_container.hpp" + +namespace shm_msgs +{ + +namespace +{ +int +encoding2mat_type(const std::string & encoding) +{ + if (encoding == "mono8") { + return CV_8UC1; + } else if (encoding == "bgr8") { + return CV_8UC3; + } else if (encoding == "mono16") { + return CV_16SC1; + } else if (encoding == "rgba8") { + return CV_8UC4; + } else if (encoding == "bgra8") { + return CV_8UC4; + } else if (encoding == "32FC1") { + return CV_32FC1; + } else if (encoding == "rgb8") { + return CV_8UC3; + } else if (encoding == "yuv422") { + return CV_8UC2; + } else { + throw std::runtime_error("Unsupported encoding type"); + } +} + +template +struct NotNull +{ + NotNull(const T * pointer_in, const char * msg) + : pointer(pointer_in) + { + if (pointer == nullptr) { + throw std::invalid_argument(msg); + } + } + + const T * pointer; +}; + +} // namespace + +ROSGpuMatContainer::ROSGpuMatContainer( + std::unique_ptr unique_sensor_msgs_image) +: header_(NotNull( + unique_sensor_msgs_image.get(), + "unique_sensor_msgs_image cannot be nullptr" +).pointer->header), + frame_( + unique_sensor_msgs_image->height, + unique_sensor_msgs_image->width, + encoding2mat_type(unique_sensor_msgs_image->encoding), + unique_sensor_msgs_image->data.data(), + unique_sensor_msgs_image->step), + storage_(std::move(unique_sensor_msgs_image)) +{} + +ROSGpuMatContainer::ROSGpuMatContainer( + const cv::Mat & mat_frame, + const std_msgs::msg::Header & header, + std::shared_ptr cuda_stream) +: header_(header), + cv_cuda_stream_(cuda_stream), + frame_(mat_frame), + storage_(nullptr) +{ + cv_cuda_event_ = std::make_shared(); +} + +ROSGpuMatContainer::ROSGpuMatContainer( + const sensor_msgs::msg::Image & sensor_msgs_image) +: ROSGpuMatContainer(std::make_unique(sensor_msgs_image)) +{} + +bool +ROSGpuMatContainer::is_owning() const +{ + return std::holds_alternative(storage_); +} + +const cv::cuda::GpuMat & +ROSGpuMatContainer::cv_gpu_mat() const +{ + return frame_; +} + +cv::cuda::GpuMat +ROSGpuMatContainer::cv_gpu_mat() +{ + return frame_; +} + +const std_msgs::msg::Header & +ROSGpuMatContainer::header() const +{ + return header_; +} + +std_msgs::msg::Header & +ROSGpuMatContainer::header() +{ + return header_; +} + +// std::shared_ptr +// ROSGpuMatContainer::get_sensor_msgs_msg_image_pointer() const +// { +// if (!std::holds_alternative>(storage_)) { +// return nullptr; +// } +// return std::get>(storage_); +// } + +std::unique_ptr +ROSGpuMatContainer::get_sensor_msgs_msg_image_pointer_copy() const +{ + auto unique_image = std::make_unique(); + this->get_sensor_msgs_msg_image_copy(*unique_image); + return unique_image; +} + +void +ROSGpuMatContainer::get_sensor_msgs_msg_image_copy( + sensor_msgs::msg::Image & sensor_msgs_image) const +{ + sensor_msgs_image.height = frame_.rows; + sensor_msgs_image.width = frame_.cols; + switch (frame_.type()) { + case CV_8UC1: + sensor_msgs_image.encoding = "mono8"; + break; + case CV_8UC3: + sensor_msgs_image.encoding = "bgr8"; + break; + case CV_16SC1: + sensor_msgs_image.encoding = "mono16"; + break; + case CV_8UC4: + sensor_msgs_image.encoding = "rgba8"; + break; + default: + throw std::runtime_error("unsupported encoding type"); + } + sensor_msgs_image.step = static_cast(frame_.step); + size_t size = frame_.step * frame_.rows; + sensor_msgs_image.data.resize(size); + // memcpy(&sensor_msgs_image.data[0], frame_.data, size); + cv::Mat cpu_mat; + frame_.download(cpu_mat, *cv_cuda_stream_); + sensor_msgs_image.data.assign(cpu_mat.datastart, cpu_mat.dataend); + + sensor_msgs_image.header = header_; +} + +} // namespace shm_msgs \ No newline at end of file diff --git a/cuda/gmat_image_container.hpp b/cuda/gmat_image_container.hpp new file mode 100644 index 0000000..9e192c4 --- /dev/null +++ b/cuda/gmat_image_container.hpp @@ -0,0 +1,164 @@ +#ifndef SHM_MSGS__GPU_MAT_SENSOR_MSGS_IMAGE_TYPE_ADAPTER_HPP_ +#define SHM_MSGS__GPU_MAT_SENSOR_MSGS_IMAGE_TYPE_ADAPTER_HPP_ + +#include +#include +#include + +#include "opencv2/core/mat.hpp" +#include + +#include "rclcpp/type_adapter.hpp" +#include "sensor_msgs/msg/image.hpp" + +namespace shm_msgs +{ + +class ROSGpuMatContainer +{ + +public: + using SensorMsgsImageStorageType = std::variant< + std::nullptr_t, + std::unique_ptr, + std::shared_ptr + >; + + ROSGpuMatContainer() + { + cv_cuda_stream_ = std::make_shared(); + cv_cuda_event_ = std::make_shared(); + } + + // deep copy from other + explicit ROSGpuMatContainer(const ROSGpuMatContainer & other) + : header_(other.header_) + { + // Make a new stream and have it wait on the previous one. + cv_cuda_stream_ = std::make_shared(); + cv_cuda_event_ = std::make_shared(); + cv_cuda_event_->record(*other.cv_cuda_stream_); + cv_cuda_stream_->waitEvent(*cv_cuda_event_); + + other.frame_.copyTo(frame_, *cv_cuda_stream_); + if (std::holds_alternative>(other.storage_)) { + storage_ = std::get>(other.storage_); + } else if (std::holds_alternative>(other.storage_)) { + storage_ = std::make_unique( + *std::get>(other.storage_)); + } + } + + // shallow copy only + ROSGpuMatContainer & operator=(const ROSGpuMatContainer & other) + { + if (this != &other) { + header_ = other.header_; + frame_ = other.frame_; + cv_cuda_stream_ = other.cv_cuda_stream_; + cv_cuda_event_ = other.cv_cuda_event_; + if (std::holds_alternative>(other.storage_)) { + storage_ = std::get>(other.storage_); + } else if (std::holds_alternative>(other.storage_)) { + storage_ = std::make_unique( + *std::get>(other.storage_)); + } else if (std::holds_alternative(other.storage_)) { + storage_ = nullptr; + } + } + return *this; + } + + /// Store an owning pointer to a sensor_msg::msg::Image, and create a cv::cuda::GpuMat that references it. + explicit ROSGpuMatContainer(std::unique_ptr unique_sensor_msgs_image); + + /// copy the given cv::Mat into this class. + ROSGpuMatContainer( + const cv::Mat & mat_frame, + const std_msgs::msg::Header & header, + std::shared_ptr cv_cuda_stream); + + /// Copy the sensor_msgs::msg::Image into this contain and create a cv::cuda::GpuMat that references it. + explicit ROSGpuMatContainer(const sensor_msgs::msg::Image & sensor_msgs_image); + + /// Return true if this class owns the data the cv_gpu_mat references. + /** + * Note that this does not check if the cv::cuda::GpuMat owns its own data, only if + * this class owns a sensor_msgs::msg::Image that the cv::cuda::GpuMat references. + */ + bool + is_owning() const; + + /// Const access the cv::cuda::GpuMat in this class. + const cv::cuda::GpuMat & + cv_gpu_mat() const; + + /// Get a shallow copy of the cv::cuda::GpuMat that is in this class. + /** + * Note that if you want to let this container go out of scope you should + * make a deep copy with cv::cuda::GpuMat::clone() beforehand. + */ + cv::cuda::GpuMat + cv_gpu_mat(); + + /// Const access the ROS Header. + const std_msgs::msg::Header & + header() const; + + /// Access the ROS Header. + std_msgs::msg::Header & + header(); + + /// Get shared const pointer to the sensor_msgs::msg::Image if available, otherwise nullptr. + // std::shared_ptr + // get_sensor_msgs_msg_image_pointer() const; + + /// Get copy as a unique pointer to the sensor_msgs::msg::Image. + std::unique_ptr + get_sensor_msgs_msg_image_pointer_copy() const; + + /// Get a copy of the image as a sensor_msgs::msg::Image. + sensor_msgs::msg::Image + get_sensor_msgs_msg_image_copy() const; + + /// Get a copy of the image as a sensor_msgs::msg::Image. + void + get_sensor_msgs_msg_image_copy(sensor_msgs::msg::Image & sensor_msgs_image) const; + +private: + std_msgs::msg::Header header_; + cv::cuda::GpuMat frame_; + std::shared_ptr cv_cuda_stream_; + std::shared_ptr cv_cuda_event_; + SensorMsgsImageStorageType storage_; +}; + +} // namespace shm_msgs + +template<> +struct rclcpp::TypeAdapter +{ + using is_specialized = std::true_type; + using custom_type = shm_msgs::ROSGpuMatContainer; + using ros_message_type = sensor_msgs::msg::Image; + + static + void + convert_to_ros_message( + const custom_type & source, + ros_message_type & destination) + { + source.get_sensor_msgs_msg_image_copy(destination); + } + + static + void + convert_to_custom( + const ros_message_type & source, + custom_type & destination) + { + destination = shm_msgs::ROSGpuMatContainer(source); + } +}; + +#endif // SHM_MSGS__GPU_MAT_SENSOR_MSGS_IMAGE_TYPE_ADAPTER_HPP_ \ No newline at end of file diff --git a/cuda/gmat_image_listener.hpp b/cuda/gmat_image_listener.hpp new file mode 100644 index 0000000..23747e5 --- /dev/null +++ b/cuda/gmat_image_listener.hpp @@ -0,0 +1,87 @@ +// Copyright 2021 Apex.AI, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "rclcpp/rclcpp.hpp" +#include "rclcpp/type_adapter.hpp" + +#include "sensor_msgs/msg/image.hpp" +#include + +#include "gmat_image_container.hpp" + +#include + +using namespace std::chrono_literals; +using namespace cv_bridge; + +class Listener : public rclcpp::Node { +private: + using Topic = rclcpp::TypeAdapter; + using DataType = shm_msgs::ROSGpuMatContainer; + +public: + explicit Listener() + : Node("gmat_image_listener", rclcpp::NodeOptions().use_intra_process_comms(true)) { + + // subscription callback to process arriving data + auto callback = [this](std::unique_ptr msg) -> void { + + RCLCPP_INFO(this->get_logger(), "received with address: 0x%" PRIXPTR "", reinterpret_cast(msg.get())); + + // no conversion + // last_cvimage = cv_bridge::toCvShare(msg); + // last_cvimage = cv_bridge::toCvCopy(msg); + + // move to the private ptr + m_last_container = std::move(msg); + RCLCPP_INFO(this->get_logger(), "container address: 0x%" PRIXPTR "", reinterpret_cast(m_last_container.get())); + + // directly process gpumat, no copy from gpu to cpu + m_last_container->cv_gpu_mat(); + + auto time_offset_ns = (now() - m_last_container->header().stamp).nanoseconds(); + auto timestamp_offset_ns = (rclcpp::Time(m_last_container->header().stamp) - m_last_image_ts).nanoseconds(); + auto time_offset_ms = time_offset_ns / 1000000.0F; + auto timestamp_offset_ms = timestamp_offset_ns / 1000000.0F; + RCLCPP_INFO(get_logger(), "get-image-transport-time: %.3f", time_offset_ms); + if(m_last_image_ts.nanoseconds() > 0.0) + { + RCLCPP_INFO(get_logger(), "get-image-timestamp_offset-time: %.3f", timestamp_offset_ms); + } + m_last_image_ts = m_last_container->header().stamp; + // cv::imshow("im show", last_cvimage->image); + // cv::waitKey(0); + }; + + // rclcpp::QoS qos(rclcpp::KeepLast(10)); + rclcpp::QoS custom_qos_profile = rclcpp::QoS(rclcpp::QoSInitialization::from_rmw(rmw_qos_profile_default)) + .history(rmw_qos_history_policy_t::RMW_QOS_POLICY_HISTORY_KEEP_LAST) + .keep_last(5) + .reliability(rmw_qos_reliability_policy_t::RMW_QOS_POLICY_RELIABILITY_BEST_EFFORT) + .durability(rmw_qos_durability_policy_t::RMW_QOS_POLICY_DURABILITY_VOLATILE) + .avoid_ros_namespace_conventions(false); + + m_subscription = create_subscription("gmat_image", custom_qos_profile, callback); + } + +private: + rclcpp::Subscription::SharedPtr m_subscription; + + std::unique_ptr m_last_container; + rclcpp::Time m_last_image_ts{0, 0, RCL_ROS_TIME}; +}; diff --git a/cuda/gmat_image_node.cpp b/cuda/gmat_image_node.cpp new file mode 100644 index 0000000..a814e85 --- /dev/null +++ b/cuda/gmat_image_node.cpp @@ -0,0 +1,22 @@ +#include + +#include + +#include "gmat_image_talker.hpp" +#include "gmat_image_listener.hpp" + +int main(int argc, char * argv[]) +{ + setvbuf(stdout, NULL, _IONBF, BUFSIZ); + rclcpp::init(argc, argv); + rclcpp::executors::SingleThreadedExecutor executor; + + auto talker = std::make_shared(); + auto listener = std::make_shared(); + executor.add_node(talker); + executor.add_node(listener); + + executor.spin(); + rclcpp::shutdown(); + return 0; +} \ No newline at end of file diff --git a/cuda/gmat_image_talker.hpp b/cuda/gmat_image_talker.hpp new file mode 100644 index 0000000..4c963b3 --- /dev/null +++ b/cuda/gmat_image_talker.hpp @@ -0,0 +1,98 @@ +// Copyright 2021 Apex.AI, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include + +#include "rclcpp/rclcpp.hpp" +#include "rclcpp/type_adapter.hpp" + +#include "sensor_msgs/msg/image.hpp" +#include +#include + +#include + +#include "gmat_image_container.hpp" + +using namespace std::chrono_literals; +using namespace cv_bridge; + +// RCLCPP_USING_CUSTOM_TYPE_AS_ROS_MESSAGE_TYPE( +// shm_msgs::ROSGpuMatContainer, +// sensor_msgs::msg::Image); + +class Talker : public rclcpp::Node { +private: + using Topic = rclcpp::TypeAdapter; + using DataType = shm_msgs::ROSGpuMatContainer; + +public: + explicit Talker() + : Node("gmat_image_talker", rclcpp::NodeOptions().use_intra_process_comms(true)) { + + // m_input_cvimage->image = cv::imread("./res/img/205_182.png"); + // m_input_cvimage->image = cv::imread("./res/img/1024_768.jpeg"); + m_input_cvimage->image = cv::imread("./res/img/1920_1080.jpg"); + m_input_cvimage->header.frame_id = "camera_link"; + m_input_cvimage->encoding = "bgr8"; + // cv::imshow("input image", m_input_cvimage->image); + // cv::waitKey(0); + cv_cuda_stream_ = std::make_shared(); + + auto publishMessage = [this]() -> void { + m_input_cvimage->header.stamp = now(); + + // no deep copy from capture to published + // copy from cpu to gpu + auto msg = std::make_unique(m_input_cvimage->image, m_input_cvimage->header, cv_cuda_stream_); + + // no conversion + // m_input_cvimage->toImageMsg(*msg); + + // RCLCPP_INFO(this->get_logger(), "Publishing with ts: %u.%u", msg->header().stamp.sec, msg->header().stamp.nanosec); + RCLCPP_INFO(this->get_logger(), "Publishing with address: 0x%" PRIXPTR "", reinterpret_cast(msg.get())); + + m_publisher->publish(std::move(msg)); + // We gave up ownership + + m_count++; + }; + + // rclcpp::QoS qos(rclcpp::KeepLast(10)); + // rclcpp::QoS qos(rclcpp::SensorDataQoS()); + rclcpp::QoS custom_qos_profile = rclcpp::QoS(rclcpp::QoSInitialization::from_rmw(rmw_qos_profile_default)) + .history(rmw_qos_history_policy_t::RMW_QOS_POLICY_HISTORY_KEEP_LAST) + .keep_last(5) + .reliability(rmw_qos_reliability_policy_t::RMW_QOS_POLICY_RELIABILITY_BEST_EFFORT) + .durability(rmw_qos_durability_policy_t::RMW_QOS_POLICY_DURABILITY_VOLATILE) + .avoid_ros_namespace_conventions(false); + + m_publisher = this->create_publisher("gmat_image", custom_qos_profile); + + // Use a timer to schedule periodic message publishing. + m_timer = this->create_wall_timer(1s, publishMessage); + } + +private: + uint64_t m_count = 1; + rclcpp::Publisher::SharedPtr m_publisher; + rclcpp::TimerBase::SharedPtr m_timer; + std::shared_ptr m_input_cvimage{std::make_shared()}; + std::shared_ptr cv_cuda_stream_; +}; diff --git a/cuda/intra_gmat_listener.hpp b/cuda/intra_gmat_listener.hpp new file mode 100644 index 0000000..2d859d7 --- /dev/null +++ b/cuda/intra_gmat_listener.hpp @@ -0,0 +1,80 @@ +// Copyright 2021 Apex.AI, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "rclcpp/rclcpp.hpp" + +#include "sensor_msgs/msg/image.hpp" +#include + +#include +#include + +using namespace std::chrono_literals; +using namespace cv_bridge; +class Listener : public rclcpp::Node { +private: + using Topic = sensor_msgs::msg::Image; + +public: + explicit Listener() + : Node("intra_gmat_listener", rclcpp::NodeOptions().use_intra_process_comms(true)) { + + // subscription callback to process arriving data + auto callback = [this](const Topic::SharedPtr msg) -> void { + + RCLCPP_INFO(this->get_logger(), "received with address: 0x%" PRIXPTR "", reinterpret_cast(msg.get())); + + // from receive to process + last_cvimage = cv_bridge::toCvShare(msg); + // last_cvimage = cv_bridge::toCvCopy(msg); + + // copy from cpu to gpu + m_last_gmatimage->upload(last_cvimage->image); + + auto time_offset_ns = (now() - last_cvimage->header.stamp).nanoseconds(); + auto timestamp_offset_ns = (rclcpp::Time(msg->header.stamp) - m_last_image_ts).nanoseconds(); + auto time_offset_ms = time_offset_ns / 1000000.0F; + auto timestamp_offset_ms = timestamp_offset_ns / 1000000.0F; + RCLCPP_INFO(get_logger(), "get-image-transport-time: %.3f", time_offset_ms); + if(m_last_image_ts.nanoseconds() > 0.0) + { + RCLCPP_INFO(get_logger(), "get-image-timestamp_offset-time: %.3f", timestamp_offset_ms); + } + m_last_image_ts = msg->header.stamp; + // cv::imshow("im show", last_cvimage->image); + // cv::waitKey(0); + }; + + // rclcpp::QoS qos(rclcpp::KeepLast(10)); + rclcpp::QoS custom_qos_profile = rclcpp::QoS(rclcpp::QoSInitialization::from_rmw(rmw_qos_profile_default)) + .history(rmw_qos_history_policy_t::RMW_QOS_POLICY_HISTORY_KEEP_LAST) + .keep_last(5) + .reliability(rmw_qos_reliability_policy_t::RMW_QOS_POLICY_RELIABILITY_BEST_EFFORT) + .durability(rmw_qos_durability_policy_t::RMW_QOS_POLICY_DURABILITY_VOLATILE) + .avoid_ros_namespace_conventions(false); + + m_subscription = create_subscription("intra_gmat", custom_qos_profile, callback); + } + +private: + rclcpp::Subscription::SharedPtr m_subscription; + + cv_bridge::CvImageConstPtr last_cvimage; + std::shared_ptr m_last_gmatimage{std::make_shared()}; + rclcpp::Time m_last_image_ts{0, 0, RCL_ROS_TIME}; +}; diff --git a/cuda/intra_gmat_node.cpp b/cuda/intra_gmat_node.cpp new file mode 100644 index 0000000..de1c806 --- /dev/null +++ b/cuda/intra_gmat_node.cpp @@ -0,0 +1,22 @@ +#include + +#include + +#include "intra_gmat_talker.hpp" +#include "intra_gmat_listener.hpp" + +int main(int argc, char * argv[]) +{ + setvbuf(stdout, NULL, _IONBF, BUFSIZ); + rclcpp::init(argc, argv); + rclcpp::executors::SingleThreadedExecutor executor; + + auto talker = std::make_shared(); + auto listener = std::make_shared(); + executor.add_node(talker); + executor.add_node(listener); + + executor.spin(); + rclcpp::shutdown(); + return 0; +} \ No newline at end of file diff --git a/cuda/intra_gmat_talker.hpp b/cuda/intra_gmat_talker.hpp new file mode 100644 index 0000000..cdf0b12 --- /dev/null +++ b/cuda/intra_gmat_talker.hpp @@ -0,0 +1,93 @@ +// Copyright 2021 Apex.AI, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include + +#include "rclcpp/rclcpp.hpp" + +#include "sensor_msgs/msg/image.hpp" +#include +#include + +#include +#include + +using namespace std::chrono_literals; +using namespace cv_bridge; + +class Talker : public rclcpp::Node { +private: + using Topic = sensor_msgs::msg::Image; + +public: + explicit Talker() + : Node("intra_gmat_talker", rclcpp::NodeOptions().use_intra_process_comms(true)) { + + // m_input_cvimage->image = cv::imread("./res/img/205_182.png"); + // m_input_cvimage->image = cv::imread("./res/img/1024_768.jpeg"); + m_input_cvimage->image = cv::imread("./res/img/1920_1080.jpg"); + m_input_cvimage->header.frame_id = "camera_link"; + m_input_cvimage->encoding = "bgr8"; + // cv::imshow("input image", m_input_cvimage->image); + // cv::waitKey(0); + // copy from cpu to gpu and processing + m_input_gmatimage->upload(m_input_cvimage->image); + + auto publishMessage = [this]() -> void { + m_input_cvimage->header.stamp = now(); + + // copy from gpu to cpu + m_input_gmatimage->download(m_input_cvimage->image); + + auto msg = std::make_unique(); + // a copy from capture to published + m_input_cvimage->toImageMsg(*msg); + + // RCLCPP_INFO(this->get_logger(), "Publishing with ts: %u.%u", m_input_cvimage->header.stamp.sec, m_input_cvimage->header.stamp.nanosec); + RCLCPP_INFO(this->get_logger(), "Publishing with address: 0x%" PRIXPTR "", reinterpret_cast(msg.get())); + + m_publisher->publish(std::move(msg)); + // We gave up ownership + + m_count++; + }; + + // rclcpp::QoS qos(rclcpp::KeepLast(10)); + // rclcpp::QoS qos(rclcpp::SensorDataQoS()); + rclcpp::QoS custom_qos_profile = rclcpp::QoS(rclcpp::QoSInitialization::from_rmw(rmw_qos_profile_default)) + .history(rmw_qos_history_policy_t::RMW_QOS_POLICY_HISTORY_KEEP_LAST) + .keep_last(5) + .reliability(rmw_qos_reliability_policy_t::RMW_QOS_POLICY_RELIABILITY_BEST_EFFORT) + .durability(rmw_qos_durability_policy_t::RMW_QOS_POLICY_DURABILITY_VOLATILE) + .avoid_ros_namespace_conventions(false); + + m_publisher = this->create_publisher("intra_gmat", custom_qos_profile); + + // Use a timer to schedule periodic message publishing. + m_timer = this->create_wall_timer(1s, publishMessage); + } + +private: + uint64_t m_count = 1; + rclcpp::Publisher::SharedPtr m_publisher; + rclcpp::TimerBase::SharedPtr m_timer; + std::shared_ptr m_input_cvimage{std::make_shared()}; + std::shared_ptr m_input_gmatimage{std::make_shared()}; + std::unique_ptr m_input_rosimage{std::make_unique()}; +};