diff --git a/CMakeLists.txt b/CMakeLists.txt index 438c19c..391bad1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,6 +4,8 @@ project(robot_body_filter) set (CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH};${CMAKE_CURRENT_SOURCE_DIR}/cmake") include(DetectOptional) +add_compile_options(-fopenmp) + if(${ROBOT_BODY_FILTER_HAVE_CXX_OPTIONAL}) # add_compile_definitions would be nicer, but isn't available in Stretch add_compile_options(-DROBOT_BODY_FILTER_USE_CXX_OPTIONAL=1) @@ -11,7 +13,7 @@ else() add_compile_options(-DROBOT_BODY_FILTER_USE_CXX_OPTIONAL=0) endif() -set(THIS_PACKAGE_DEPS backward_ros urdf urdf_parser_plugin filters geometric_shapes laser_geometry moveit_core moveit_ros_perception rclcpp sensor_msgs std_srvs pcl_conversions tf2 tf2_ros tf2_eigen tf2_sensor_msgs urdf visualization_msgs GLUT OpenCV) +set(THIS_PACKAGE_DEPS backward_ros urdf urdf_parser_plugin filters geometric_shapes laser_geometry moveit_core moveit_ros_perception rclcpp sensor_msgs std_srvs pcl_conversions tf2 tf2_ros tf2_eigen tf2_sensor_msgs urdf visualization_msgs GLUT OpenMP OpenCV) set(MESSAGE_DEPS geometry_msgs std_msgs) find_package(ament_cmake REQUIRED) diff --git a/src/RayCastingShapeMask.cpp b/src/RayCastingShapeMask.cpp index ecb9d80..231b09d 100644 --- a/src/RayCastingShapeMask.cpp +++ b/src/RayCastingShapeMask.cpp @@ -1,5 +1,6 @@ /* HACK HACK HACK */ /* We want to subclass ShapeMask and use its private members. */ +#include #include #include #include // has to be there, otherwise we encounter build problems @@ -14,6 +15,8 @@ #include +#include + // #include namespace robot_body_filter @@ -240,7 +243,7 @@ void RayCastingShapeMask::maskContainmentAndShadows( // Cloud iterators are not incremented in the for loop, because of the pragma // Comment out below parallelization as it can result in very high CPU consumption - //#pragma omp parallel for schedule(dynamic) + #pragma #pragma omp parallel for schedule(dynamic) num_threads(8) for (size_t i = 0; i < np; ++i) { const Eigen::Vector3d pt(static_cast(*(iter_x + i)), diff --git a/src/utils/tf2_sensor_msgs.cpp b/src/utils/tf2_sensor_msgs.cpp index c3acc80..1d521b1 100644 --- a/src/utils/tf2_sensor_msgs.cpp +++ b/src/utils/tf2_sensor_msgs.cpp @@ -9,6 +9,8 @@ #include +#include + namespace robot_body_filter { const static std::unordered_map XYZ_CHANNELS({ @@ -48,18 +50,22 @@ void transformChannel(const sensor_msgs::msg::PointCloud2& cloudIn, sensor_msgs: CloudIter z_out(cloudOut, channelPrefix + "z"); Eigen::Vector3f point; + size_t np = num_points(cloudIn); + // the switch has to be outside the for loop for performance reasons switch (type) { - case CloudChannelType::POINT: - for (; x_in != x_in.end(); ++x_in, ++y_in, ++z_in, ++x_out, ++y_out, ++z_out) - { - point = t * Eigen::Vector3f(*x_in, *y_in, *z_in); // apply the whole transform - *x_out = point.x(); - *y_out = point.y(); - *z_out = point.z(); + case CloudChannelType::POINT: { + // High CPU usage comment out below line for single threaded operation + #pragma #pragma omp parallel for schedule(dynamic) num_threads(8) + for (size_t i = 0; i < np; ++i) { + point = t * Eigen::Vector3f(*(x_in + i), *(y_in + i), *(z_in + i)); // apply the whole transform + *(x_out + i) = point.x(); + *(y_out + i) = point.y(); + *(z_out + i) = point.z(); } break; + } case CloudChannelType::DIRECTION: for (; x_out != x_out.end(); ++x_in, ++y_in, ++z_in, ++x_out, ++y_out, ++z_out) { @@ -68,7 +74,6 @@ void transformChannel(const sensor_msgs::msg::PointCloud2& cloudIn, sensor_msgs: *y_out = point.y(); *z_out = point.z(); } - RCLCPP_ERROR(rclcpp::get_logger("robot_body_filter"), "break"); break; case CloudChannelType::SCALAR: //TODO: ADD WARNING FOR NOT SUPPORTED