Skip to content

Commit

Permalink
Add support for limiting num_threads in tbb task
Browse files Browse the repository at this point in the history
  • Loading branch information
zzodo committed Nov 2, 2023
1 parent fcbd09d commit 5e08b68
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 53 deletions.
52 changes: 29 additions & 23 deletions cpp/kiss_icp/core/Registration.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

#include <tbb/blocked_range.h>
#include <tbb/parallel_reduce.h>
#include <tbb/task_arena.h>

#include <algorithm>
#include <cmath>
Expand Down Expand Up @@ -62,6 +63,7 @@ void TransformPoints(const Sophus::SE3d &T, std::vector<Eigen::Vector3d> &points

constexpr int MAX_NUM_ITERATIONS_ = 500;
constexpr double ESTIMATION_THRESHOLD_ = 0.0001;
constexpr int NUM_THREADS_ = 16;

} // namespace

Expand All @@ -79,29 +81,33 @@ std::tuple<Eigen::Matrix6d, Eigen::Vector6d> BuildLinearSystem(
return std::make_tuple(J_r, residual);
};

const auto &[JTJ, JTr] = tbb::parallel_reduce(
// Range
tbb::blocked_range<size_t>{0, source.size()},
// Identity
ResultTuple(),
// 1st Lambda: Parallel computation
[&](const tbb::blocked_range<size_t> &r, ResultTuple J) -> ResultTuple {
auto Weight = [&](double residual2) {
return square(kernel) / square(kernel + residual2);
};
auto &[JTJ_private, JTr_private] = J;
for (auto i = r.begin(); i < r.end(); ++i) {
const auto &[J_r, residual] = compute_jacobian_and_residual(i);
const double w = Weight(residual.squaredNorm());
JTJ_private.noalias() += J_r.transpose() * w * J_r;
JTr_private.noalias() += J_r.transpose() * w * residual;
}
return J;
},
// 2nd Lambda: Parallel reduction of the private Jacboians
[&](ResultTuple a, const ResultTuple &b) -> ResultTuple { return a + b; });

return std::make_tuple(JTJ, JTr);
ResultTuple jacobian;
tbb::task_arena limited_arena(NUM_THREADS_);
limited_arena.execute([&]() -> void {
jacobian = tbb::parallel_reduce(
// Range
tbb::blocked_range<size_t>{0, source.size()},
// Identity
ResultTuple(),
// 1st Lambda: Parallel computation
[&](const tbb::blocked_range<size_t> &r, ResultTuple J) -> ResultTuple {
auto Weight = [&](double residual2) {
return square(kernel) / square(kernel + residual2);
};
auto &[JTJ_private, JTr_private] = J;
for (auto i = r.begin(); i < r.end(); ++i) {
const auto &[J_r, residual] = compute_jacobian_and_residual(i);
const double w = Weight(residual.squaredNorm());
JTJ_private.noalias() += J_r.transpose() * w * J_r;
JTr_private.noalias() += J_r.transpose() * w * residual;
}
return J;
},
// 2nd Lambda: Parallel reduction of the private Jacboians
[&](ResultTuple a, const ResultTuple &b) -> ResultTuple { return a + b; });
});

return std::make_tuple(jacobian.JTJ, jacobian.JTr);
}

Sophus::SE3d RegisterFrame(const std::vector<Eigen::Vector3d> &frame,
Expand Down
71 changes: 41 additions & 30 deletions cpp/kiss_icp/core/VoxelHashMap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

#include <tbb/blocked_range.h>
#include <tbb/parallel_reduce.h>
#include <tbb/task_arena.h>

#include <Eigen/Core>
#include <algorithm>
Expand All @@ -42,6 +43,9 @@ struct ResultTuple {
std::vector<Eigen::Vector3d> source;
std::vector<Eigen::Vector3d> target;
};

constexpr int NUM_THREADS_ = 16;

} // namespace

namespace kiss_icp {
Expand Down Expand Up @@ -90,39 +94,46 @@ VoxelHashMap::Vector3dVectorTuple VoxelHashMap::GetCorrespondences(

return closest_neighbor;
};

using points_iterator = std::vector<Eigen::Vector3d>::const_iterator;
const auto [source, target] = tbb::parallel_reduce(
// Range
tbb::blocked_range<points_iterator>{points.cbegin(), points.cend()},
// Identity
ResultTuple(points.size()),
// 1st lambda: Parallel computation
[max_correspondance_distance, &GetClosestNeighboor](
const tbb::blocked_range<points_iterator> &r, ResultTuple res) -> ResultTuple {
auto &[src, tgt] = res;
src.reserve(r.size());
tgt.reserve(r.size());
for (const auto &point : r) {
Eigen::Vector3d closest_neighboors = GetClosestNeighboor(point);
if ((closest_neighboors - point).norm() < max_correspondance_distance) {
src.emplace_back(point);
tgt.emplace_back(closest_neighboors);
ResultTuple correspondences(points.size());
tbb::task_arena limited_arena(NUM_THREADS_);
limited_arena.execute([&]() -> void {
correspondences = tbb::parallel_reduce(
// Range
tbb::blocked_range<points_iterator>{points.cbegin(), points.cend()},
// Identity
ResultTuple(points.size()),
// 1st lambda: Parallel computation
[max_correspondance_distance, &GetClosestNeighboor](
const tbb::blocked_range<points_iterator> &r, ResultTuple res) -> ResultTuple {
auto &[src, tgt] = res;
src.reserve(r.size());
tgt.reserve(r.size());
for (const auto &point : r) {
Eigen::Vector3d closest_neighboors = GetClosestNeighboor(point);
if ((closest_neighboors - point).norm() < max_correspondance_distance) {
src.emplace_back(point);
tgt.emplace_back(closest_neighboors);
}
}
}
return res;
},
// 2nd lambda: Parallel reduction
[](ResultTuple a, const ResultTuple &b) -> ResultTuple {
auto &[src, tgt] = a;
const auto &[srcp, tgtp] = b;
src.insert(src.end(), //
std::make_move_iterator(srcp.begin()), std::make_move_iterator(srcp.end()));
tgt.insert(tgt.end(), //
std::make_move_iterator(tgtp.begin()), std::make_move_iterator(tgtp.end()));
return a;
});
return res;
},
// 2nd lambda: Parallel reduction
[](ResultTuple a, const ResultTuple &b) -> ResultTuple {
auto &[src, tgt] = a;
const auto &[srcp, tgtp] = b;
src.insert(src.end(), //
std::make_move_iterator(srcp.begin()),
std::make_move_iterator(srcp.end()));
tgt.insert(tgt.end(), //
std::make_move_iterator(tgtp.begin()),
std::make_move_iterator(tgtp.end()));
return a;
});
});

return std::make_tuple(source, target);
return std::make_tuple(correspondences.source, correspondences.target);
}

std::vector<Eigen::Vector3d> VoxelHashMap::Pointcloud() const {
Expand Down

0 comments on commit 5e08b68

Please sign in to comment.