Skip to content

Commit

Permalink
merge conflict and address review
Browse files Browse the repository at this point in the history
  • Loading branch information
Matt711 committed Dec 4, 2024
2 parents 68f36a6 + 1b82963 commit b4b76e4
Show file tree
Hide file tree
Showing 64 changed files with 465 additions and 328 deletions.
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ dependencies:
- cramjam
- cubinlinker
- cuda-nvtx=11.8
- cuda-python>=11.7.1,<12.0a0,<=11.8.3
- cuda-python>=11.7.1,<12.0a0
- cuda-sanitizer-api=11.8.86
- cuda-version=11.8
- cudatoolkit
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-125_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ dependencies:
- cuda-nvcc
- cuda-nvrtc-dev
- cuda-nvtx-dev
- cuda-python>=12.0,<13.0a0,<=12.6.0
- cuda-python>=12.0,<13.0a0
- cuda-sanitizer-api
- cuda-version=12.5
- cupy>=12.0.0
Expand Down
4 changes: 2 additions & 2 deletions conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ requirements:
- cudatoolkit
- ptxcompiler >=0.7.0
- cubinlinker # CUDA enhanced compatibility.
- cuda-python >=11.7.1,<12.0a0,<=11.8.3
- cuda-python >=11.7.1,<12.0a0
{% else %}
- cuda-cudart
- libcufile # [linux64]
Expand All @@ -100,7 +100,7 @@ requirements:
# TODO: Add nvjitlink here
# xref: https://github.com/rapidsai/cudf/issues/12822
- cuda-nvrtc
- cuda-python >=12.0,<13.0a0,<=12.6.0
- cuda-python >=12.0,<13.0a0
- pynvjitlink
{% endif %}
- {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
Expand Down
4 changes: 2 additions & 2 deletions conda/recipes/pylibcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,9 @@ requirements:
- {{ pin_compatible('rmm', max_pin='x.x') }}
- fsspec >=0.6.0
{% if cuda_major == "11" %}
- cuda-python >=11.7.1,<12.0a0,<=11.8.3
- cuda-python >=11.7.1,<12.0a0
{% else %}
- cuda-python >=12.0,<13.0a0,<=12.6.0
- cuda-python >=12.0,<13.0a0
{% endif %}
- nvtx >=0.2.1
- packaging
Expand Down
5 changes: 0 additions & 5 deletions cpp/cmake/thirdparty/patches/cccl_override.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,6 @@
"packages" : {
"CCCL" : {
"patches" : [
{
"file" : "${current_json_dir}/cccl_symbol_visibility.diff",
"issue" : "Correct symbol visibility issues in libcudacxx [https://github.com/NVIDIA/cccl/pull/1832/]",
"fixed_in" : "2.6"
},
{
"file" : "${current_json_dir}/thrust_disable_64bit_dispatching.diff",
"issue" : "Remove 64bit dispatching as not needed by libcudf and results in compiling twice as many kernels [https://github.com/rapidsai/cudf/pull/11437]",
Expand Down
27 changes: 0 additions & 27 deletions cpp/cmake/thirdparty/patches/cccl_symbol_visibility.diff

This file was deleted.

66 changes: 50 additions & 16 deletions cpp/cmake/thirdparty/patches/thrust_disable_64bit_dispatching.diff
Original file line number Diff line number Diff line change
@@ -1,25 +1,59 @@
diff --git a/thrust/thrust/system/cuda/detail/dispatch.h b/thrust/thrust/system/cuda/detail/dispatch.h
index 2a3cc4e33..8fb337b26 100644
index 971b93d62..0d6b25b07 100644
--- a/thrust/thrust/system/cuda/detail/dispatch.h
+++ b/thrust/thrust/system/cuda/detail/dispatch.h
@@ -44,8 +44,7 @@
} \
else \
{ \
- auto THRUST_PP_CAT2(count, _fixed) = static_cast<thrust::detail::int64_t>(count); \
- status = call arguments; \
@@ -36,16 +36,15 @@
* that callables for both branches consist of the same tokens, and is intended to be used with Thrust-style dispatch
* interfaces, that always deduce the size type from the arguments.
*/
-#define THRUST_INDEX_TYPE_DISPATCH(status, call, count, arguments) \
- if (count <= thrust::detail::integer_traits<std::int32_t>::const_max) \
- { \
- auto THRUST_PP_CAT2(count, _fixed) = static_cast<std::int32_t>(count); \
- status = call arguments; \
- } \
- else \
- { \
- auto THRUST_PP_CAT2(count, _fixed) = static_cast<std::int64_t>(count); \
- status = call arguments; \
+#define THRUST_INDEX_TYPE_DISPATCH(status, call, count, arguments) \
+ if (count <= thrust::detail::integer_traits<std::int32_t>::const_max) \
+ { \
+ auto THRUST_PP_CAT2(count, _fixed) = static_cast<std::int32_t>(count); \
+ status = call arguments; \
+ } \
+ else \
+ { \
+ throw std::runtime_error("THRUST_INDEX_TYPE_DISPATCH 64-bit count is unsupported in libcudf"); \
}

/**
@@ -66,9 +65,7 @@
} \
else \
{ \
- auto THRUST_PP_CAT2(count1, _fixed) = static_cast<thrust::detail::int64_t>(count1); \
- auto THRUST_PP_CAT2(count2, _fixed) = static_cast<thrust::detail::int64_t>(count2); \
- status = call arguments; \
@@ -55,18 +54,16 @@
*
* This version of the macro supports providing two count variables, which is necessary for set algorithms.
*/
-#define THRUST_DOUBLE_INDEX_TYPE_DISPATCH(status, call, count1, count2, arguments) \
- if (count1 + count2 <= thrust::detail::integer_traits<std::int32_t>::const_max) \
- { \
- auto THRUST_PP_CAT2(count1, _fixed) = static_cast<std::int32_t>(count1); \
- auto THRUST_PP_CAT2(count2, _fixed) = static_cast<std::int32_t>(count2); \
- status = call arguments; \
- } \
- else \
- { \
- auto THRUST_PP_CAT2(count1, _fixed) = static_cast<std::int64_t>(count1); \
- auto THRUST_PP_CAT2(count2, _fixed) = static_cast<std::int64_t>(count2); \
- status = call arguments; \
+#define THRUST_DOUBLE_INDEX_TYPE_DISPATCH(status, call, count1, count2, arguments) \
+ if (count1 + count2 <= thrust::detail::integer_traits<std::int32_t>::const_max) \
+ { \
+ auto THRUST_PP_CAT2(count1, _fixed) = static_cast<std::int32_t>(count1); \
+ auto THRUST_PP_CAT2(count2, _fixed) = static_cast<std::int32_t>(count2); \
+ status = call arguments; \
+ } \
+ else \
+ { \
+ throw std::runtime_error("THRUST_DOUBLE_INDEX_TYPE_DISPATCH 64-bit count is unsupported in libcudf"); \
}

/**
* Dispatch between 32-bit and 64-bit index based versions of the same algorithm
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
diff --git a/cub/cub/block/block_merge_sort.cuh b/cub/cub/block/block_merge_sort.cuh
index eb76ebb0b..c6c529a50 100644
index 29510db5e..cf57e5786 100644
--- a/cub/cub/block/block_merge_sort.cuh
+++ b/cub/cub/block/block_merge_sort.cuh
@@ -95,7 +95,7 @@ _CCCL_DEVICE _CCCL_FORCEINLINE void SerialMerge(
KeyT key1 = keys_shared[keys1_beg];
KeyT key2 = keys_shared[keys2_beg];

-#pragma unroll
+#pragma unroll 1
for (int item = 0; item < ITEMS_PER_THREAD; ++item)
{
bool p = (keys2_beg < keys2_end) && ((keys1_beg >= keys1_end) || compare_op(key2, key1));
@@ -376,7 +376,7 @@ public:
const bool p = (keys2_beg < keys2_end) && ((keys1_beg >= keys1_end) || compare_op(key2, key1));
@@ -374,7 +374,7 @@ public:
//
KeyT max_key = oob_default;

-#pragma unroll
+#pragma unroll 1
for (int item = 1; item < ITEMS_PER_THREAD; ++item)
Expand All @@ -27,7 +27,7 @@ index 7d9e8622f..da5627306 100644
@@ -87,10 +87,10 @@ StableOddEvenSort(KeyT (&keys)[ITEMS_PER_THREAD], ValueT (&items)[ITEMS_PER_THRE
{
constexpr bool KEYS_ONLY = ::cuda::std::is_same<ValueT, NullType>::value;

-#pragma unroll
+#pragma unroll 1
for (int i = 0; i < ITEMS_PER_THREAD; ++i)
Expand Down
6 changes: 3 additions & 3 deletions cpp/include/cudf/column/column_device_view.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -460,18 +460,18 @@ class alignas(16) column_device_view : public detail::column_device_view_base {
*/
struct index_element_fn {
template <typename IndexType,
CUDF_ENABLE_IF(is_index_type<IndexType>() and std::is_unsigned_v<IndexType>)>
CUDF_ENABLE_IF(is_index_type<IndexType>() and std::is_signed_v<IndexType>)>
__device__ size_type operator()(column_device_view const& indices, size_type index)
{
return static_cast<size_type>(indices.element<IndexType>(index));
}

template <typename IndexType,
typename... Args,
CUDF_ENABLE_IF(not(is_index_type<IndexType>() and std::is_unsigned_v<IndexType>))>
CUDF_ENABLE_IF(not(is_index_type<IndexType>() and std::is_signed_v<IndexType>))>
__device__ size_type operator()(Args&&... args)
{
CUDF_UNREACHABLE("dictionary indices must be an unsigned integral type");
CUDF_UNREACHABLE("dictionary indices must be a signed integral type");
}
};

Expand Down
8 changes: 4 additions & 4 deletions cpp/include/cudf/detail/copy_range.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -56,15 +56,15 @@ CUDF_KERNEL void copy_range_kernel(SourceValueIterator source_value_begin,
constexpr cudf::size_type leader_lane{0};
int const lane_id = threadIdx.x % warp_size;

cudf::size_type const tid = threadIdx.x + blockIdx.x * blockDim.x;
int const warp_id = tid / warp_size;
auto const tid = cudf::detail::grid_1d::global_thread_id();
auto const warp_id = tid / warp_size;

cudf::size_type const offset = target.offset();
cudf::size_type const begin_mask_idx = cudf::word_index(offset + target_begin);
cudf::size_type const end_mask_idx = cudf::word_index(offset + target_end);

cudf::size_type mask_idx = begin_mask_idx + warp_id;
cudf::size_type const masks_per_grid = gridDim.x * blockDim.x / warp_size;
cudf::size_type const masks_per_grid = cudf::detail::grid_1d::grid_stride() / warp_size;

cudf::size_type target_offset = begin_mask_idx * warp_size - (offset + target_begin);
cudf::size_type source_idx = tid + target_offset;
Expand Down Expand Up @@ -92,7 +92,7 @@ CUDF_KERNEL void copy_range_kernel(SourceValueIterator source_value_begin,
}
}

source_idx += blockDim.x * gridDim.x;
source_idx += cudf::detail::grid_1d::grid_stride();
mask_idx += masks_per_grid;
}

Expand Down
9 changes: 4 additions & 5 deletions cpp/include/cudf/detail/null_mask.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -67,15 +67,15 @@ CUDF_KERNEL void offset_bitmask_binop(Binop op,
size_type source_size_bits,
size_type* count_ptr)
{
auto const tid = threadIdx.x + blockIdx.x * blockDim.x;
auto const tid = cudf::detail::grid_1d::global_thread_id();

auto const last_bit_index = source_size_bits - 1;
auto const last_word_index = cudf::word_index(last_bit_index);

size_type thread_count = 0;

for (size_type destination_word_index = tid; destination_word_index < destination.size();
destination_word_index += blockDim.x * gridDim.x) {
destination_word_index += cudf::detail::grid_1d::grid_stride()) {
bitmask_type destination_word =
detail::get_mask_offset_word(source[0],
destination_word_index,
Expand Down Expand Up @@ -214,8 +214,7 @@ CUDF_KERNEL void subtract_set_bits_range_boundaries_kernel(bitmask_type const* b
{
constexpr size_type const word_size_in_bits{detail::size_in_bits<bitmask_type>()};

size_type const tid = threadIdx.x + blockIdx.x * blockDim.x;
size_type range_id = tid;
auto range_id = cudf::detail::grid_1d::global_thread_id();

while (range_id < num_ranges) {
size_type const first_bit_index = *(first_bit_indices + range_id);
Expand Down Expand Up @@ -243,7 +242,7 @@ CUDF_KERNEL void subtract_set_bits_range_boundaries_kernel(bitmask_type const* b
// Update the null count with the computed delta.
size_type updated_null_count = *(null_counts + range_id) + delta;
*(null_counts + range_id) = updated_null_count;
range_id += blockDim.x * gridDim.x;
range_id += cudf::detail::grid_1d::grid_stride();
}
}

Expand Down
4 changes: 2 additions & 2 deletions cpp/include/cudf/dictionary/encode.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ namespace dictionary {
*
* The null mask and null count are copied from the input column to the output column.
*
* @throw cudf::logic_error if indices type is not an unsigned integer type
* @throw cudf::logic_error if indices type is not a signed integer type
* @throw cudf::logic_error if the column to encode is already a DICTIONARY type
*
* @code{.pseudo}
Expand All @@ -58,7 +58,7 @@ namespace dictionary {
*/
std::unique_ptr<column> encode(
column_view const& column,
data_type indices_type = data_type{type_id::UINT32},
data_type indices_type = data_type{type_id::INT32},
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

Expand Down
23 changes: 23 additions & 0 deletions cpp/include/cudf/utilities/traits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,29 @@ constexpr inline bool is_index_type()
*/
bool is_index_type(data_type type);

/**
* @brief Indicates whether the type `T` is a signed numeric type.
*
* @tparam T The type to verify
* @return true `T` is signed numeric
*/
template <typename T>
constexpr inline bool is_signed()
{
return std::is_signed_v<T>;
}

/**
* @brief Indicates whether `type` is a signed numeric `data_type`.
*
* "Signed Numeric" types include fundamental integral types such as `INT*`
* but can also be `FLOAT*` types.
*
* @param type The `data_type` to verify
* @return true `type` is signed numeric
*/
bool is_signed(data_type type);

/**
* @brief Indicates whether the type `T` is a unsigned numeric type.
*
Expand Down
8 changes: 4 additions & 4 deletions cpp/include/cudf_test/column_wrapper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -974,7 +974,7 @@ class dictionary_column_wrapper : public detail::column_wrapper {
{
wrapped =
cudf::dictionary::encode(fixed_width_column_wrapper<KeyElementTo, SourceElementT>(begin, end),
cudf::data_type{type_id::UINT32},
cudf::data_type{type_id::INT32},
cudf::test::get_default_stream());
}

Expand Down Expand Up @@ -1009,7 +1009,7 @@ class dictionary_column_wrapper : public detail::column_wrapper {
{
wrapped = cudf::dictionary::encode(
fixed_width_column_wrapper<KeyElementTo, SourceElementT>(begin, end, v),
cudf::data_type{type_id::UINT32},
cudf::data_type{type_id::INT32},
cudf::test::get_default_stream());
}

Expand Down Expand Up @@ -1173,7 +1173,7 @@ class dictionary_column_wrapper<std::string> : public detail::column_wrapper {
dictionary_column_wrapper(StringsIterator begin, StringsIterator end) : column_wrapper{}
{
wrapped = cudf::dictionary::encode(strings_column_wrapper(begin, end),
cudf::data_type{type_id::UINT32},
cudf::data_type{type_id::INT32},
cudf::test::get_default_stream());
}

Expand Down Expand Up @@ -1210,7 +1210,7 @@ class dictionary_column_wrapper<std::string> : public detail::column_wrapper {
: column_wrapper{}
{
wrapped = cudf::dictionary::encode(strings_column_wrapper(begin, end, v),
cudf::data_type{type_id::UINT32},
cudf::data_type{type_id::INT32},
cudf::test::get_default_stream());
}

Expand Down
2 changes: 2 additions & 0 deletions cpp/include/nvtext/byte_pair_encoding.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,13 +122,15 @@ std::unique_ptr<bpe_merge_pairs> load_merge_pairs(
* @param merges_pairs Created by a call to @ref nvtext::load_merge_pairs.
* @param separator String used to build the output after encoding.
* Default is a space.
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Memory resource to allocate any returned objects.
* @return An encoded column of strings.
*/
std::unique_ptr<cudf::column> byte_pair_encoding(
cudf::strings_column_view const& input,
bpe_merge_pairs const& merges_pairs,
cudf::string_scalar const& separator = cudf::string_scalar(" "),
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/** @} */ // end of group
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/column/column_factories.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ std::unique_ptr<column> make_dictionary_from_scalar(scalar const& s,
CUDF_EXPECTS(s.is_valid(stream), "cannot create a dictionary with a null key");
return make_dictionary_column(
make_column_from_scalar(s, 1, stream, mr),
make_column_from_scalar(numeric_scalar<uint32_t>(0, true, stream), size, stream, mr),
make_column_from_scalar(numeric_scalar<int32_t>(0, true, stream), size, stream, mr),
rmm::device_buffer{0, stream, mr},
0);
}
Expand Down
Loading

0 comments on commit b4b76e4

Please sign in to comment.