From 5e08b6834a269cda9b8a62556bfe9c87de23dc0f Mon Sep 17 00:00:00 2001 From: zzodo Date: Thu, 2 Nov 2023 15:32:07 +0900 Subject: [PATCH] Add support for limiting num_threads in tbb task --- cpp/kiss_icp/core/Registration.cpp | 52 ++++++++++++---------- cpp/kiss_icp/core/VoxelHashMap.cpp | 71 +++++++++++++++++------------- 2 files changed, 70 insertions(+), 53 deletions(-) diff --git a/cpp/kiss_icp/core/Registration.cpp b/cpp/kiss_icp/core/Registration.cpp index c3e0d49c..a34094c1 100644 --- a/cpp/kiss_icp/core/Registration.cpp +++ b/cpp/kiss_icp/core/Registration.cpp @@ -24,6 +24,7 @@ #include #include +#include #include #include @@ -62,6 +63,7 @@ void TransformPoints(const Sophus::SE3d &T, std::vector &points constexpr int MAX_NUM_ITERATIONS_ = 500; constexpr double ESTIMATION_THRESHOLD_ = 0.0001; +constexpr int NUM_THREADS_ = 16; } // namespace @@ -79,29 +81,33 @@ std::tuple BuildLinearSystem( return std::make_tuple(J_r, residual); }; - const auto &[JTJ, JTr] = tbb::parallel_reduce( - // Range - tbb::blocked_range{0, source.size()}, - // Identity - ResultTuple(), - // 1st Lambda: Parallel computation - [&](const tbb::blocked_range &r, ResultTuple J) -> ResultTuple { - auto Weight = [&](double residual2) { - return square(kernel) / square(kernel + residual2); - }; - auto &[JTJ_private, JTr_private] = J; - for (auto i = r.begin(); i < r.end(); ++i) { - const auto &[J_r, residual] = compute_jacobian_and_residual(i); - const double w = Weight(residual.squaredNorm()); - JTJ_private.noalias() += J_r.transpose() * w * J_r; - JTr_private.noalias() += J_r.transpose() * w * residual; - } - return J; - }, - // 2nd Lambda: Parallel reduction of the private Jacboians - [&](ResultTuple a, const ResultTuple &b) -> ResultTuple { return a + b; }); - - return std::make_tuple(JTJ, JTr); + ResultTuple jacobian; + tbb::task_arena limited_arena(NUM_THREADS_); + limited_arena.execute([&]() -> void { + jacobian = tbb::parallel_reduce( + // Range + tbb::blocked_range{0, source.size()}, + // Identity + ResultTuple(), + // 1st Lambda: Parallel computation + [&](const tbb::blocked_range &r, ResultTuple J) -> ResultTuple { + auto Weight = [&](double residual2) { + return square(kernel) / square(kernel + residual2); + }; + auto &[JTJ_private, JTr_private] = J; + for (auto i = r.begin(); i < r.end(); ++i) { + const auto &[J_r, residual] = compute_jacobian_and_residual(i); + const double w = Weight(residual.squaredNorm()); + JTJ_private.noalias() += J_r.transpose() * w * J_r; + JTr_private.noalias() += J_r.transpose() * w * residual; + } + return J; + }, + // 2nd Lambda: Parallel reduction of the private Jacboians + [&](ResultTuple a, const ResultTuple &b) -> ResultTuple { return a + b; }); + }); + + return std::make_tuple(jacobian.JTJ, jacobian.JTr); } Sophus::SE3d RegisterFrame(const std::vector &frame, diff --git a/cpp/kiss_icp/core/VoxelHashMap.cpp b/cpp/kiss_icp/core/VoxelHashMap.cpp index 2212dd67..bbd15756 100644 --- a/cpp/kiss_icp/core/VoxelHashMap.cpp +++ b/cpp/kiss_icp/core/VoxelHashMap.cpp @@ -24,6 +24,7 @@ #include #include +#include #include #include @@ -42,6 +43,9 @@ struct ResultTuple { std::vector source; std::vector target; }; + +constexpr int NUM_THREADS_ = 16; + } // namespace namespace kiss_icp { @@ -90,39 +94,46 @@ VoxelHashMap::Vector3dVectorTuple VoxelHashMap::GetCorrespondences( return closest_neighbor; }; + using points_iterator = std::vector::const_iterator; - const auto [source, target] = tbb::parallel_reduce( - // Range - tbb::blocked_range{points.cbegin(), points.cend()}, - // Identity - ResultTuple(points.size()), - // 1st lambda: Parallel computation - [max_correspondance_distance, &GetClosestNeighboor]( - const tbb::blocked_range &r, ResultTuple res) -> ResultTuple { - auto &[src, tgt] = res; - src.reserve(r.size()); - tgt.reserve(r.size()); - for (const auto &point : r) { - Eigen::Vector3d closest_neighboors = GetClosestNeighboor(point); - if ((closest_neighboors - point).norm() < max_correspondance_distance) { - src.emplace_back(point); - tgt.emplace_back(closest_neighboors); + ResultTuple correspondences(points.size()); + tbb::task_arena limited_arena(NUM_THREADS_); + limited_arena.execute([&]() -> void { + correspondences = tbb::parallel_reduce( + // Range + tbb::blocked_range{points.cbegin(), points.cend()}, + // Identity + ResultTuple(points.size()), + // 1st lambda: Parallel computation + [max_correspondance_distance, &GetClosestNeighboor]( + const tbb::blocked_range &r, ResultTuple res) -> ResultTuple { + auto &[src, tgt] = res; + src.reserve(r.size()); + tgt.reserve(r.size()); + for (const auto &point : r) { + Eigen::Vector3d closest_neighboors = GetClosestNeighboor(point); + if ((closest_neighboors - point).norm() < max_correspondance_distance) { + src.emplace_back(point); + tgt.emplace_back(closest_neighboors); + } } - } - return res; - }, - // 2nd lambda: Parallel reduction - [](ResultTuple a, const ResultTuple &b) -> ResultTuple { - auto &[src, tgt] = a; - const auto &[srcp, tgtp] = b; - src.insert(src.end(), // - std::make_move_iterator(srcp.begin()), std::make_move_iterator(srcp.end())); - tgt.insert(tgt.end(), // - std::make_move_iterator(tgtp.begin()), std::make_move_iterator(tgtp.end())); - return a; - }); + return res; + }, + // 2nd lambda: Parallel reduction + [](ResultTuple a, const ResultTuple &b) -> ResultTuple { + auto &[src, tgt] = a; + const auto &[srcp, tgtp] = b; + src.insert(src.end(), // + std::make_move_iterator(srcp.begin()), + std::make_move_iterator(srcp.end())); + tgt.insert(tgt.end(), // + std::make_move_iterator(tgtp.begin()), + std::make_move_iterator(tgtp.end())); + return a; + }); + }); - return std::make_tuple(source, target); + return std::make_tuple(correspondences.source, correspondences.target); } std::vector VoxelHashMap::Pointcloud() const {