From b120f7e73e882b4eaa6b5a2cb91aeed20bf1198d Mon Sep 17 00:00:00 2001
From: Yunsong Wang <yunsongw@nvidia.com>
Date: Wed, 4 Oct 2023 14:23:24 -0700
Subject: [PATCH] Improve `contains_column` by invoking `contains_table`
 (#14238)

Part of #https://github.com/rapidsai/cudf/issues/12261

This PR simplifies the `contains_column` implementation by invoking `contains_table` and gets rid of the use of the cudf `unordered_multiset`. It also removes the `unordered_multiset` header file from libcudf.

Authors:
  - Yunsong Wang (https://github.com/PointKernel)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/14238
---
 cpp/src/hash/unordered_multiset.cuh | 159 ----------------------------
 cpp/src/search/contains_column.cu   |  67 +-----------
 2 files changed, 1 insertion(+), 225 deletions(-)
 delete mode 100644 cpp/src/hash/unordered_multiset.cuh
diff --git a/cpp/src/hash/unordered_multiset.cuh b/cpp/src/hash/unordered_multiset.cuh
deleted file mode 100644
index 183042fc0f4..00000000000
--- a/cpp/src/hash/unordered_multiset.cuh
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <cudf/column/column_device_view.cuh>
-#include <cudf/detail/utilities/vector_factories.hpp>
-#include <cudf/hashing/detail/default_hash.cuh>
-#include <cudf/hashing/detail/helper_functions.cuh>
-#include <cudf/utilities/span.hpp>
-
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/device_uvector.hpp>
-#include <rmm/exec_policy.hpp>
-
-#include <thrust/copy.h>
-#include <thrust/for_each.h>
-#include <thrust/iterator/counting_iterator.h>
-#include <thrust/scan.h>
-
-#include <cuda/atomic>
-
-namespace cudf {
-namespace detail {
-/*
- *  Device view of the unordered multiset
- */
-template <typename Element,
-          typename Hasher   = cudf::hashing::detail::default_hash<Element>,
-          typename Equality = equal_to<Element>>
-class unordered_multiset_device_view {
- public:
-  unordered_multiset_device_view(size_type hash_size,
-                                 size_type const* hash_begin,
-                                 Element const* hash_data)
-    : hash_size{hash_size}, hash_begin{hash_begin}, hash_data{hash_data}, hasher(), equals()
-  {
-  }
-
-  bool __device__ contains(Element e) const
-  {
-    size_type loc = hasher(e) % (2 * hash_size);
-
-    for (size_type i = hash_begin[loc]; i < hash_begin[loc + 1]; ++i) {
-      if (equals(hash_data[i], e)) return true;
-    }
-
-    return false;
-  }
-
- private:
-  Hasher hasher;
-  Equality equals;
-  size_type hash_size;
-  size_type const* hash_begin;
-  Element const* hash_data;
-};
-
-/*
- * Fixed size set on a device.
- */
-template <typename Element,
-          typename Hasher   = cudf::hashing::detail::default_hash<Element>,
-          typename Equality = equal_to<Element>>
-class unordered_multiset {
- public:
-  /**
-   * @brief Factory to construct a new unordered_multiset
-   */
-  static unordered_multiset<Element> create(column_view const& col, rmm::cuda_stream_view stream)
-  {
-    auto d_column = column_device_view::create(col, stream);
-    auto d_col    = *d_column;
-
-    auto hash_bins_start = cudf::detail::make_zeroed_device_uvector_async<size_type>(
-      2 * d_col.size() + 1, stream, rmm::mr::get_current_device_resource());
-    auto hash_bins_end = cudf::detail::make_zeroed_device_uvector_async<size_type>(
-      2 * d_col.size() + 1, stream, rmm::mr::get_current_device_resource());
-    auto hash_data = rmm::device_uvector<Element>(d_col.size(), stream);
-
-    Hasher hasher;
-    size_type* d_hash_bins_start = hash_bins_start.data();
-    size_type* d_hash_bins_end   = hash_bins_end.data();
-    Element* d_hash_data         = hash_data.data();
-
-    thrust::for_each(
-      rmm::exec_policy(stream),
-      thrust::make_counting_iterator<size_type>(0),
-      thrust::make_counting_iterator<size_type>(col.size()),
-      [d_hash_bins_start, d_col, hasher] __device__(size_t idx) {
-        if (!d_col.is_null(idx)) {
-          Element e     = d_col.element<Element>(idx);
-          size_type tmp = hasher(e) % (2 * d_col.size());
-          cuda::atomic_ref<size_type, cuda::thread_scope_device> ref{*(d_hash_bins_start + tmp)};
-          ref.fetch_add(1, cuda::std::memory_order_relaxed);
-        }
-      });
-
-    thrust::exclusive_scan(rmm::exec_policy(stream),
-                           hash_bins_start.begin(),
-                           hash_bins_start.end(),
-                           hash_bins_end.begin());
-
-    thrust::copy(rmm::exec_policy(stream),
-                 hash_bins_end.begin(),
-                 hash_bins_end.end(),
-                 hash_bins_start.begin());
-
-    thrust::for_each(
-      rmm::exec_policy(stream),
-      thrust::make_counting_iterator<size_type>(0),
-      thrust::make_counting_iterator<size_type>(col.size()),
-      [d_hash_bins_end, d_hash_data, d_col, hasher] __device__(size_t idx) {
-        if (!d_col.is_null(idx)) {
-          Element e     = d_col.element<Element>(idx);
-          size_type tmp = hasher(e) % (2 * d_col.size());
-          cuda::atomic_ref<size_type, cuda::thread_scope_device> ref{*(d_hash_bins_end + tmp)};
-          size_type offset    = ref.fetch_add(1, cuda::std::memory_order_relaxed);
-          d_hash_data[offset] = e;
-        }
-      });
-
-    return unordered_multiset(d_col.size(), std::move(hash_bins_start), std::move(hash_data));
-  }
-
-  unordered_multiset_device_view<Element, Hasher, Equality> to_device() const
-  {
-    return unordered_multiset_device_view<Element, Hasher, Equality>(
-      size, hash_bins.data(), hash_data.data());
-  }
-
- private:
-  unordered_multiset(size_type size,
-                     rmm::device_uvector<size_type>&& hash_bins,
-                     rmm::device_uvector<Element>&& hash_data)
-    : size{size}, hash_bins{std::move(hash_bins)}, hash_data{std::move(hash_data)}
-  {
-  }
-
-  size_type size;
-  rmm::device_uvector<size_type> hash_bins;
-  rmm::device_uvector<Element> hash_data;
-};
-
-}  // namespace detail
-}  // namespace cudf
diff --git a/cpp/src/search/contains_column.cu b/cpp/src/search/contains_column.cu
index 4363bd212fe..85971647434 100644
--- a/cpp/src/search/contains_column.cu
+++ b/cpp/src/search/contains_column.cu
@@ -14,23 +14,14 @@
  * limitations under the License.
  */
 
-#include <hash/unordered_multiset.cuh>
-
-#include <cudf/column/column_factories.hpp>
 #include <cudf/detail/null_mask.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/search.hpp>
 #include <cudf/dictionary/detail/search.hpp>
 #include <cudf/dictionary/detail/update_keys.hpp>
 #include <cudf/table/table_view.hpp>
-#include <cudf/utilities/default_stream.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
-#include <rmm/exec_policy.hpp>
-
-#include <thrust/iterator/counting_iterator.h>
-#include <thrust/transform.h>
-#include <thrust/uninitialized_fill.h>
 
 namespace cudf {
 namespace detail {
@@ -38,61 +29,7 @@ namespace detail {
 namespace {
 
 struct contains_column_dispatch {
-  template <typename Element, typename Haystack>
-  struct contains_fn {
-    bool __device__ operator()(size_type const idx) const
-    {
-      if (needles_have_nulls && needles.is_null_nocheck(idx)) {
-        // Exit early. The value doesn't matter, and will be masked as a null element.
-        return true;
-      }
-
-      return haystack.contains(needles.template element<Element>(idx));
-    }
-
-    Haystack const haystack;
-    column_device_view const needles;
-    bool const needles_have_nulls;
-  };
-
-  template <typename Element, CUDF_ENABLE_IF(!is_nested<Element>())>
-  std::unique_ptr<column> operator()(column_view const& haystack,
-                                     column_view const& needles,
-                                     rmm::cuda_stream_view stream,
-                                     rmm::mr::device_memory_resource* mr) const
-  {
-    auto result = make_numeric_column(data_type{type_to_id<bool>()},
-                                      needles.size(),
-                                      copy_bitmask(needles, stream, mr),
-                                      needles.null_count(),
-                                      stream,
-                                      mr);
-    if (needles.is_empty()) { return result; }
-
-    auto const out_begin = result->mutable_view().template begin<bool>();
-    if (haystack.is_empty()) {
-      thrust::uninitialized_fill(
-        rmm::exec_policy(stream), out_begin, out_begin + needles.size(), false);
-      return result;
-    }
-
-    auto const haystack_set = cudf::detail::unordered_multiset<Element>::create(haystack, stream);
-    auto const haystack_set_dv = haystack_set.to_device();
-    auto const needles_cdv_ptr = column_device_view::create(needles, stream);
-
-    thrust::transform(rmm::exec_policy(stream),
-                      thrust::make_counting_iterator<size_type>(0),
-                      thrust::make_counting_iterator<size_type>(needles.size()),
-                      out_begin,
-                      contains_fn<Element, decltype(haystack_set_dv)>{
-                        haystack_set_dv, *needles_cdv_ptr, needles.has_nulls()});
-
-    result->set_null_count(needles.null_count());
-
-    return result;
-  }
-
-  template <typename Element, CUDF_ENABLE_IF(is_nested<Element>())>
+  template <typename Element>
   std::unique_ptr<column> operator()(column_view const& haystack,
                                      column_view const& needles,
                                      rmm::cuda_stream_view stream,
@@ -144,8 +81,6 @@ std::unique_ptr<column> contains(column_view const& haystack,
                                  rmm::cuda_stream_view stream,
                                  rmm::mr::device_memory_resource* mr)
 {
-  CUDF_EXPECTS(haystack.type() == needles.type(), "DTYPE mismatch");
-
   return cudf::type_dispatcher(
     haystack.type(), contains_column_dispatch{}, haystack, needles, stream, mr);
 }