diff --git a/cpp/include/cudf/utilities/type_dispatcher.hpp b/cpp/include/cudf/utilities/type_dispatcher.hpp index 6351a84e38f..55a494cab42 100644 --- a/cpp/include/cudf/utilities/type_dispatcher.hpp +++ b/cpp/include/cudf/utilities/type_dispatcher.hpp @@ -53,7 +53,7 @@ namespace CUDF_EXPORT cudf { * @return The `cudf::type_id` corresponding to the specified type */ template -inline constexpr type_id type_to_id() +CUDF_HOST_DEVICE inline constexpr type_id type_to_id() { return type_id::EMPTY; }; diff --git a/cpp/src/text/edit_distance.cu b/cpp/src/text/edit_distance.cu index b04e9961e01..be8d199cd77 100644 --- a/cpp/src/text/edit_distance.cu +++ b/cpp/src/text/edit_distance.cu @@ -30,6 +30,7 @@ #include #include +#include #include #include #include @@ -64,10 +65,10 @@ __device__ cudf::size_type compute_distance(cudf::string_view const& d_str, if (str_length == 0) return tgt_length; if (tgt_length == 0) return str_length; - auto begin = str_length < tgt_length ? d_str.begin() : d_tgt.begin(); - auto itr = str_length < tgt_length ? d_tgt.begin() : d_str.begin(); - // .first is min and .second is max - auto const [n, m] = std::minmax(str_length, tgt_length); + auto begin = str_length < tgt_length ? d_str.begin() : d_tgt.begin(); + auto itr = str_length < tgt_length ? d_tgt.begin() : d_str.begin(); + auto const n = cuda::std::min(str_length, tgt_length); + auto const m = cuda::std::max(str_length, tgt_length); // setup compute buffer pointers auto v0 = buffer; auto v1 = v0 + n + 1; @@ -81,7 +82,7 @@ __device__ cudf::size_type compute_distance(cudf::string_view const& d_str, auto sub_cost = v0[j] + (*itr != *itr_tgt); auto del_cost = v0[j + 1] + 1; auto ins_cost = v1[j] + 1; - v1[j + 1] = std::min(std::min(sub_cost, del_cost), ins_cost); + v1[j + 1] = cuda::std::min(cuda::std::min(sub_cost, del_cost), ins_cost); } thrust::swap(v0, v1); } @@ -170,7 +171,7 @@ std::unique_ptr edit_distance(cudf::strings_column_view const& str ? d_targets.element(0) : d_targets.element(idx); // just need 2 integers for each character of the shorter string - return (std::min(d_str.length(), d_tgt.length()) + 1) * 2; + return (cuda::std::min(d_str.length(), d_tgt.length()) + 1) * 2; }); // get the total size of the temporary compute buffer @@ -241,7 +242,7 @@ std::unique_ptr edit_distance_matrix(cudf::strings_column_view con if (d_str1.empty() || d_str2.empty()) { return; } // the temp size needed is 2 integers per character of the shorter string d_offsets[idx - ((row + 1) * (row + 2)) / 2] = - (std::min(d_str1.length(), d_str2.length()) + 1) * 2; + (cuda::std::min(d_str1.length(), d_str2.length()) + 1) * 2; }); // get the total size for the compute buffer diff --git a/cpp/src/text/jaccard.cu b/cpp/src/text/jaccard.cu index 2de94a4eb59..7cd44196e8a 100644 --- a/cpp/src/text/jaccard.cu +++ b/cpp/src/text/jaccard.cu @@ -36,6 +36,7 @@ #include #include +#include #include #include #include @@ -243,7 +244,7 @@ CUDF_KERNEL void count_substrings_kernel(cudf::column_device_view const d_string } } auto const char_count = warp_reduce(temp_storage).Sum(count); - if (lane_idx == 0) { d_counts[str_idx] = std::max(1, char_count - width + 1); } + if (lane_idx == 0) { d_counts[str_idx] = cuda::std::max(1, char_count - width + 1); } } /** diff --git a/cpp/src/text/minhash.cu b/cpp/src/text/minhash.cu index 9a44d9477ab..75b1acd356f 100644 --- a/cpp/src/text/minhash.cu +++ b/cpp/src/text/minhash.cu @@ -40,14 +40,13 @@ #include #include +#include #include #include #include #include #include -#include - namespace nvtext { namespace detail { namespace { @@ -156,7 +155,7 @@ CUDF_KERNEL void minhash_seed_kernel(cudf::column_device_view const d_strings, // initialize the output -- only needed for wider strings auto d_output = d_results + (str_idx * param_count); for (auto i = lane_idx; i < param_count; i += tile_size) { - d_output[i] = std::numeric_limits::max(); + d_output[i] = cuda::std::numeric_limits::max(); } } } @@ -226,7 +225,7 @@ CUDF_KERNEL void minhash_kernel(cudf::column_device_view const d_strings, ? section_size : cuda::std::max(static_cast(size_bytes > 0), section_size - width + 1); - auto const init = size_bytes == 0 ? 0 : std::numeric_limits::max(); + auto const init = size_bytes == 0 ? 0 : cuda::std::numeric_limits::max(); auto const lane_idx = block.thread_rank(); auto const d_output = d_results + (str_idx * parameter_a.size()); @@ -235,7 +234,7 @@ CUDF_KERNEL void minhash_kernel(cudf::column_device_view const d_strings, // constants used in the permutation calculations constexpr uint64_t mersenne_prime = (1UL << 61) - 1; - constexpr hash_value_type hash_max = std::numeric_limits::max(); + constexpr hash_value_type hash_max = cuda::std::numeric_limits::max(); // found to be an efficient shared memory size for both hash types __shared__ hash_value_type block_values[block_size * params_per_thread]; diff --git a/cpp/src/text/replace.cu b/cpp/src/text/replace.cu index 943bcbe9b3a..1a6b571efde 100644 --- a/cpp/src/text/replace.cu +++ b/cpp/src/text/replace.cu @@ -35,6 +35,7 @@ #include #include +#include #include #include #include @@ -196,7 +197,7 @@ struct sub_offset_fn { { // keep delimiter search within this sub-block auto const end = - d_input_chars + std::min(last_offset, ((idx + 2) * LS_SUB_BLOCK_SIZE) + first_offset); + d_input_chars + cuda::std::min(last_offset, ((idx + 2) * LS_SUB_BLOCK_SIZE) + first_offset); // starting point of this sub-block auto itr = d_input_chars + first_offset + ((idx + 1) * LS_SUB_BLOCK_SIZE); while ((itr < end) && diff --git a/cpp/src/text/subword/data_normalizer.cu b/cpp/src/text/subword/data_normalizer.cu index a3bed45e4bd..45649e4f8f0 100644 --- a/cpp/src/text/subword/data_normalizer.cu +++ b/cpp/src/text/subword/data_normalizer.cu @@ -26,6 +26,7 @@ #include #include +#include #include #include #include @@ -134,8 +135,8 @@ extract_code_points_from_utf8(unsigned char const* strings, constexpr uint8_t max_utf8_blocks_for_char = 4; uint8_t utf8_blocks[max_utf8_blocks_for_char] = {0}; - for (int i = 0; i < std::min(static_cast(max_utf8_blocks_for_char), - total_bytes - start_byte_for_thread); + for (int i = 0; i < cuda::std::min(static_cast(max_utf8_blocks_for_char), + total_bytes - start_byte_for_thread); ++i) { utf8_blocks[i] = strings[start_byte_for_thread + i]; } diff --git a/cpp/src/text/subword/wordpiece_tokenizer.cu b/cpp/src/text/subword/wordpiece_tokenizer.cu index dd1e8ddb027..e8f482ce982 100644 --- a/cpp/src/text/subword/wordpiece_tokenizer.cu +++ b/cpp/src/text/subword/wordpiece_tokenizer.cu @@ -27,6 +27,8 @@ #include #include +#include +#include #include #include #include @@ -87,7 +89,7 @@ CUDF_KERNEL void init_data_and_mark_word_start_and_ends(uint32_t const* code_poi // Deal with the start_word_indices array if (char_for_thread < num_code_points) { - uint32_t val_to_write = std::numeric_limits::max(); + uint32_t val_to_write = cuda::std::numeric_limits::max(); if ((code_points[char_for_thread] != SPACE_CODE_POINT) && (char_for_thread > 0) && (code_points[char_for_thread - 1] == SPACE_CODE_POINT)) { val_to_write = char_for_thread; @@ -95,7 +97,7 @@ CUDF_KERNEL void init_data_and_mark_word_start_and_ends(uint32_t const* code_poi start_word_indices[char_for_thread] = val_to_write; // Deal with the end_word_indices_array - val_to_write = std::numeric_limits::max(); + val_to_write = cuda::std::numeric_limits::max(); if ((code_points[char_for_thread] != SPACE_CODE_POINT) && (char_for_thread + 1 < num_code_points) && (code_points[char_for_thread + 1] == SPACE_CODE_POINT)) { @@ -103,7 +105,7 @@ CUDF_KERNEL void init_data_and_mark_word_start_and_ends(uint32_t const* code_poi } end_word_indices[char_for_thread] = val_to_write; - token_ids[char_for_thread] = std::numeric_limits::max(); + token_ids[char_for_thread] = cuda::std::numeric_limits::max(); tokens_per_word[char_for_thread] = 0; } } @@ -214,7 +216,7 @@ struct mark_special_tokens { __device__ void operator()(size_t idx) const { uint32_t const start_index = start_word_indices[idx]; - if ((start_index == std::numeric_limits::max()) || + if ((start_index == cuda::std::numeric_limits::max()) || ((start_index + MIN_ST_WIDTH + 2) > num_code_points)) return; if (code_points[start_index] != '[') return; @@ -225,12 +227,12 @@ struct mark_special_tokens { uint32_t const end_index = [&] { auto const begin = start_word_indices + start_pos; auto const width = - std::min(static_cast(MAX_ST_WIDTH + 1), (num_code_points - start_pos)); + cuda::std::min(static_cast(MAX_ST_WIDTH + 1), (num_code_points - start_pos)); auto const end = begin + width; // checking the next start-word is more reliable than arbitrarily searching for ']' // in case the text is split across string rows auto const iter = thrust::find_if(thrust::seq, begin + 1, end, [](auto swi) { - return swi != std::numeric_limits::max(); + return swi != cuda::std::numeric_limits::max(); }); return iter == end ? start_index : static_cast(iter - start_word_indices); }(); @@ -254,11 +256,11 @@ struct mark_special_tokens { thrust::fill(thrust::seq, start_word_indices + start_index + 1, // keep the first one start_word_indices + end_index + 1, - std::numeric_limits::max()); + cuda::std::numeric_limits::max()); thrust::fill(thrust::seq, end_word_indices + start_index, end_word_indices + end_index + 1, - std::numeric_limits::max()); + cuda::std::numeric_limits::max()); // reset the new end-word index end_word_indices[end_pos] = end_pos + 1; @@ -382,7 +384,7 @@ CUDF_KERNEL void kernel_wordpiece_tokenizer(uint32_t const* code_points, // We need to clean up the global array. This case is very uncommon. // Only 0.016% of words cannot be resolved to a token from the squad dev set. for (uint32_t i = 1; i < num_values_tokenized; ++i) { - token_ids[token_start + i] = std::numeric_limits::max(); + token_ids[token_start + i] = cuda::std::numeric_limits::max(); } num_values_tokenized = 0; } @@ -423,7 +425,10 @@ uvector_pair wordpiece_tokenizer::tokenize(cudf::strings_column_view const& inpu } struct copy_if_fn { // inline lambda not allowed in private or protected member function - __device__ bool operator()(uint32_t cp) { return cp != std::numeric_limits::max(); } + __device__ bool operator()(uint32_t cp) + { + return cp != cuda::std::numeric_limits::max(); + } }; struct tranform_fn { // just converting uint8 value to uint32 @@ -487,7 +492,7 @@ void wordpiece_tokenizer::tokenize(uvector_pair& cps_and_offsets, rmm::cuda_stre auto itr_end = thrust::remove(rmm::exec_policy(stream), device_word_indices.begin(), device_word_indices.end(), - std::numeric_limits::max()); + cuda::std::numeric_limits::max()); // The number of tokens selected will be double the number of words since we // select from both the start and end index arrays. @@ -523,7 +528,8 @@ void wordpiece_tokenizer::tokenize(uvector_pair& cps_and_offsets, rmm::cuda_stre // token so this will always have enough memory to store the contiguous tokens. uint32_t* contiguous_token_ids = device_code_points; auto const copy_size = // thrust::copy_if limited to copying int-max values - std::min(device_token_ids.size(), static_cast(std::numeric_limits::max())); + cuda::std::min(device_token_ids.size(), + static_cast(cuda::std::numeric_limits::max())); auto ids_itr = device_token_ids.begin(); auto const ids_end = device_token_ids.end(); while (ids_itr != ids_end) {