Skip to content

Commit

Permalink
Add host for_each APIs for multiset and multimap
Browse files Browse the repository at this point in the history
  • Loading branch information
PointKernel committed Oct 31, 2024
1 parent 63f26dd commit a9c69b9
Show file tree
Hide file tree
Showing 4 changed files with 376 additions and 110 deletions.
67 changes: 67 additions & 0 deletions include/cuco/detail/static_multimap/static_multimap.inl
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,73 @@ void static_multimap<Key, T, Extent, Scope, KeyEqual, ProbingScheme, Allocator,
impl_->find_async(first, last, output_begin, ref(op::find), stream);
}

template <class Key,
class T,
class Extent,
cuda::thread_scope Scope,
class KeyEqual,
class ProbingScheme,
class Allocator,
class Storage>
template <typename CallbackOp>
void static_multimap<Key, T, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>::for_each(
CallbackOp&& callback_op, cuda::stream_ref stream) const
{
impl_->for_each_async(std::forward<CallbackOp>(callback_op), stream);
stream.wait();
}

template <class Key,
class T,
class Extent,
cuda::thread_scope Scope,
class KeyEqual,
class ProbingScheme,
class Allocator,
class Storage>
template <typename CallbackOp>
void static_multimap<Key, T, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>::
for_each_async(CallbackOp&& callback_op, cuda::stream_ref stream) const
{
impl_->for_each_async(std::forward<CallbackOp>(callback_op), stream);
}

template <class Key,
class T,
class Extent,
cuda::thread_scope Scope,
class KeyEqual,
class ProbingScheme,
class Allocator,
class Storage>
template <typename InputIt, typename CallbackOp>
void static_multimap<Key, T, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>::for_each(
InputIt first, InputIt last, CallbackOp&& callback_op, cuda::stream_ref stream) const
{
impl_->for_each_async(
first, last, std::forward<CallbackOp>(callback_op), ref(op::for_each), stream);
stream.wait();
}

template <class Key,
class T,
class Extent,
cuda::thread_scope Scope,
class KeyEqual,
class ProbingScheme,
class Allocator,
class Storage>
template <typename InputIt, typename CallbackOp>
void static_multimap<Key, T, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>::
for_each_async(InputIt first,
InputIt last,
CallbackOp&& callback_op,
cuda::stream_ref stream) const noexcept
{
impl_->for_each_async(
first, last, std::forward<CallbackOp>(callback_op), ref(op::for_each), stream);
}

template <class Key,
class T,
class Extent,
Expand Down
151 changes: 107 additions & 44 deletions include/cuco/detail/static_multiset/static_multiset.inl
Original file line number Diff line number Diff line change
Expand Up @@ -284,17 +284,12 @@ template <class Key,
class ProbingScheme,
class Allocator,
class Storage>
template <class InputProbeIt, class OutputProbeIt, class OutputMatchIt>
std::pair<OutputProbeIt, OutputMatchIt>
static_multiset<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>::retrieve(
InputProbeIt first,
InputProbeIt last,
OutputProbeIt output_probe,
OutputMatchIt output_match,
cuda::stream_ref stream) const
template <typename CallbackOp>
void static_multiset<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>::for_each(
CallbackOp&& callback_op, cuda::stream_ref stream) const
{
return this->impl_->retrieve(
first, last, output_probe, output_match, this->ref(op::retrieve), stream);
impl_->for_each_async(std::forward<CallbackOp>(callback_op), stream);
stream.wait();
}

template <class Key,
Expand All @@ -304,24 +299,11 @@ template <class Key,
class ProbingScheme,
class Allocator,
class Storage>
template <class InputProbeIt,
class ProbeEqual,
class ProbeHash,
class OutputProbeIt,
class OutputMatchIt>
std::pair<OutputProbeIt, OutputMatchIt>
static_multiset<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>::retrieve(
InputProbeIt first,
InputProbeIt last,
ProbeEqual const& probe_equal,
ProbeHash const& probe_hash,
OutputProbeIt output_probe,
OutputMatchIt output_match,
cuda::stream_ref stream) const
template <typename CallbackOp>
void static_multiset<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>::
for_each_async(CallbackOp&& callback_op, cuda::stream_ref stream) const
{
auto const probe_ref =
this->ref(op::retrieve).rebind_key_eq(probe_equal).rebind_hash_function(probe_hash);
return this->impl_->retrieve(first, last, output_probe, output_match, probe_ref, stream);
impl_->for_each_async(std::forward<CallbackOp>(callback_op), stream);
}

template <class Key,
Expand All @@ -331,24 +313,31 @@ template <class Key,
class ProbingScheme,
class Allocator,
class Storage>
template <class InputProbeIt,
class ProbeEqual,
class ProbeHash,
class OutputProbeIt,
class OutputMatchIt>
std::pair<OutputProbeIt, OutputMatchIt>
static_multiset<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>::retrieve_outer(
InputProbeIt first,
InputProbeIt last,
ProbeEqual const& probe_equal,
ProbeHash const& probe_hash,
OutputProbeIt output_probe,
OutputMatchIt output_match,
cuda::stream_ref stream) const
template <typename InputIt, typename CallbackOp>
void static_multiset<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>::for_each(
InputIt first, InputIt last, CallbackOp&& callback_op, cuda::stream_ref stream) const
{
auto const probe_ref =
this->ref(op::retrieve).rebind_key_eq(probe_equal).rebind_hash_function(probe_hash);
return this->impl_->retrieve_outer(first, last, output_probe, output_match, probe_ref, stream);
impl_->for_each_async(
first, last, std::forward<CallbackOp>(callback_op), ref(op::for_each), stream);
stream.wait();
}

template <class Key,
class Extent,
cuda::thread_scope Scope,
class KeyEqual,
class ProbingScheme,
class Allocator,
class Storage>
template <typename InputIt, typename CallbackOp>
void static_multiset<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>::
for_each_async(InputIt first,
InputIt last,
CallbackOp&& callback_op,
cuda::stream_ref stream) const noexcept
{
impl_->for_each_async(
first, last, std::forward<CallbackOp>(callback_op), ref(op::for_each), stream);
}

template <class Key,
Expand Down Expand Up @@ -412,6 +401,80 @@ static_multiset<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>
stream);
}

template <class Key,
class Extent,
cuda::thread_scope Scope,
class KeyEqual,
class ProbingScheme,
class Allocator,
class Storage>
template <class InputProbeIt, class OutputProbeIt, class OutputMatchIt>
std::pair<OutputProbeIt, OutputMatchIt>
static_multiset<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>::retrieve(
InputProbeIt first,
InputProbeIt last,
OutputProbeIt output_probe,
OutputMatchIt output_match,
cuda::stream_ref stream) const
{
return this->impl_->retrieve(
first, last, output_probe, output_match, this->ref(op::retrieve), stream);
}

template <class Key,
class Extent,
cuda::thread_scope Scope,
class KeyEqual,
class ProbingScheme,
class Allocator,
class Storage>
template <class InputProbeIt,
class ProbeEqual,
class ProbeHash,
class OutputProbeIt,
class OutputMatchIt>
std::pair<OutputProbeIt, OutputMatchIt>
static_multiset<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>::retrieve(
InputProbeIt first,
InputProbeIt last,
ProbeEqual const& probe_equal,
ProbeHash const& probe_hash,
OutputProbeIt output_probe,
OutputMatchIt output_match,
cuda::stream_ref stream) const
{
auto const probe_ref =
this->ref(op::retrieve).rebind_key_eq(probe_equal).rebind_hash_function(probe_hash);
return this->impl_->retrieve(first, last, output_probe, output_match, probe_ref, stream);
}

template <class Key,
class Extent,
cuda::thread_scope Scope,
class KeyEqual,
class ProbingScheme,
class Allocator,
class Storage>
template <class InputProbeIt,
class ProbeEqual,
class ProbeHash,
class OutputProbeIt,
class OutputMatchIt>
std::pair<OutputProbeIt, OutputMatchIt>
static_multiset<Key, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>::retrieve_outer(
InputProbeIt first,
InputProbeIt last,
ProbeEqual const& probe_equal,
ProbeHash const& probe_hash,
OutputProbeIt output_probe,
OutputMatchIt output_match,
cuda::stream_ref stream) const
{
auto const probe_ref =
this->ref(op::retrieve).rebind_key_eq(probe_equal).rebind_hash_function(probe_hash);
return this->impl_->retrieve_outer(first, last, output_probe, output_match, probe_ref, stream);
}

template <class Key,
class Extent,
cuda::thread_scope Scope,
Expand Down
68 changes: 68 additions & 0 deletions include/cuco/static_multimap.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -518,6 +518,74 @@ class static_multimap {
OutputIt output_begin,
cuda::stream_ref stream = {}) const;

/**
* @brief Applies the given function object `callback_op` to the copy of every filled slot in the
* container
*
* @note The return value of `callback_op`, if any, is ignored.
*
* @tparam CallbackOp Type of unary callback function object
*
* @param callback_op Function to apply to the copy of the filled slot
* @param stream CUDA stream used for this operation
*/
template <typename CallbackOp>
void for_each(CallbackOp&& callback_op, cuda::stream_ref stream = {}) const;

/**
* @brief Asynchronously applies the given function object `callback_op` to the copy of every
* filled slot in the container
*
* @note The return value of `callback_op`, if any, is ignored.
*
* @tparam CallbackOp Type of unary callback function object
*
* @param callback_op Function to apply to the copy of the filled slot
* @param stream CUDA stream used for this operation
*/
template <typename CallbackOp>
void for_each_async(CallbackOp&& callback_op, cuda::stream_ref stream = {}) const;

/**
* @brief For each key in the range [first, last), applies the function object `callback_op` to
* the copy of all corresponding matches found in the container.
*
* @note The return value of `callback_op`, if any, is ignored.
*
* @tparam InputIt Device accessible random access input iterator
* @tparam CallbackOp Type of unary callback function object
*
* @param first Beginning of the sequence of keys
* @param last End of the sequence of keys
* @param callback_op Function to apply to the copy of the matched slot
* @param stream CUDA stream used for this operation
*/
template <typename InputIt, typename CallbackOp>
void for_each(InputIt first,
InputIt last,
CallbackOp&& callback_op,
cuda::stream_ref stream = {}) const;

/**
* @brief For each key in the range [first, last), asynchronously applies the function object
* `callback_op` to the copy of all corresponding matches found in the container.
*
* @note The return value of `callback_op`, if any, is ignored.
*
* @tparam InputIt Device accessible random access input iterator
* @tparam CallbackOp Type of unary callback function object
*
* @param first Beginning of the sequence of keys
* @param last End of the sequence of keys
* @param callback_op Function to apply to the copy of the matched slot
* @param stream CUDA stream used for this operation
*/
template <typename InputIt, typename CallbackOp>
void for_each_async(InputIt first,
InputIt last,
CallbackOp&& callback_op,
cuda::stream_ref stream = {}) const noexcept;

/**
* @brief Counts the occurrences of keys in `[first, last)` contained in the multimap
*
Expand Down
Loading

0 comments on commit a9c69b9

Please sign in to comment.