From f3f159ae166426125347e7d6f8dd7210d4075179 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Fri, 13 Dec 2024 08:46:57 -0500 Subject: [PATCH 01/32] Use no-sync copy for fixed-width types in cudf::concatenate (#17584) Replacing `thrust::copy` with `cudaMemcpyAsync` improves performance upto 2x in specific cases in `cudf::concatenate` The `thrust::copy` does a sync for device-to-device copy though it is not necessary. Using `rmm::exec_policy_nosync` had no effect. Will work with CCCL to determine if this is a bug in `thrust::copy` since computing the return value does not require a sync. Also moved the benchmark for concatenate from googlebench to nvbench. Closes #17172 Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) - Vukasin Milovanovic (https://github.com/vuule) URL: https://github.com/rapidsai/cudf/pull/17584 --- cpp/benchmarks/CMakeLists.txt | 5 +- cpp/benchmarks/column/concatenate.cpp | 169 ------------------------- cpp/benchmarks/copying/concatenate.cpp | 84 ++++++++++++ cpp/src/copying/concatenate.cu | 6 +- 4 files changed, 92 insertions(+), 172 deletions(-) delete mode 100644 cpp/benchmarks/column/concatenate.cpp create mode 100644 cpp/benchmarks/copying/concatenate.cpp diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 8e5ea900efa..b1456600c95 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -140,8 +140,9 @@ function(ConfigureNVBench CMAKE_BENCH_NAME) endfunction() # ################################################################################################## -# * column benchmarks ----------------------------------------------------------------------------- -ConfigureBench(COLUMN_CONCAT_BENCH column/concatenate.cpp) +# * copying benchmarks +# ----------------------------------------------------------------------------- +ConfigureNVBench(COPYING_NVBENCH copying/concatenate.cpp) # ################################################################################################## # * gather benchmark ------------------------------------------------------------------------------ diff --git a/cpp/benchmarks/column/concatenate.cpp b/cpp/benchmarks/column/concatenate.cpp deleted file mode 100644 index 51106c72137..00000000000 --- a/cpp/benchmarks/column/concatenate.cpp +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include -#include - -#include - -#include -#include -#include - -#include -#include -#include - -#include -#include - -class Concatenate : public cudf::benchmark {}; - -template -static void BM_concatenate(benchmark::State& state) -{ - cudf::size_type const num_rows = state.range(0); - cudf::size_type const num_cols = state.range(1); - - auto input = create_sequence_table(cycle_dtypes({cudf::type_to_id()}, num_cols), - row_count{num_rows}, - Nullable ? std::optional{2.0 / 3.0} : std::nullopt); - auto input_columns = input->view(); - std::vector column_views(input_columns.begin(), input_columns.end()); - - CUDF_CHECK_CUDA(0); - - for (auto _ : state) { - cuda_event_timer raii(state, true, cudf::get_default_stream()); - auto result = cudf::concatenate(column_views); - } - - state.SetBytesProcessed(state.iterations() * num_cols * num_rows * sizeof(T)); -} - -#define CONCAT_BENCHMARK_DEFINE(type, nullable) \ - BENCHMARK_DEFINE_F(Concatenate, BM_concatenate##_##nullable_##nullable) \ - (::benchmark::State & st) { BM_concatenate(st); } \ - BENCHMARK_REGISTER_F(Concatenate, BM_concatenate##_##nullable_##nullable) \ - ->RangeMultiplier(8) \ - ->Ranges({{1 << 6, 1 << 18}, {2, 1024}}) \ - ->Unit(benchmark::kMillisecond) \ - ->UseManualTime(); - -CONCAT_BENCHMARK_DEFINE(int64_t, false) -CONCAT_BENCHMARK_DEFINE(int64_t, true) - -template -static void BM_concatenate_tables(benchmark::State& state) -{ - cudf::size_type const num_rows = state.range(0); - cudf::size_type const num_cols = state.range(1); - cudf::size_type const num_tables = state.range(2); - - std::vector> tables(num_tables); - std::generate_n(tables.begin(), num_tables, [&]() { - return create_sequence_table(cycle_dtypes({cudf::type_to_id()}, num_cols), - row_count{num_rows}, - Nullable ? std::optional{2.0 / 3.0} : std::nullopt); - }); - - // Generate table views - std::vector table_views(num_tables); - std::transform(tables.begin(), tables.end(), table_views.begin(), [](auto& table) mutable { - return table->view(); - }); - - CUDF_CHECK_CUDA(0); - - for (auto _ : state) { - cuda_event_timer raii(state, true, cudf::get_default_stream()); - auto result = cudf::concatenate(table_views); - } - - state.SetBytesProcessed(state.iterations() * num_cols * num_rows * num_tables * sizeof(T)); -} - -#define CONCAT_TABLES_BENCHMARK_DEFINE(type, nullable) \ - BENCHMARK_DEFINE_F(Concatenate, BM_concatenate_tables##_##nullable_##nullable) \ - (::benchmark::State & st) { BM_concatenate_tables(st); } \ - BENCHMARK_REGISTER_F(Concatenate, BM_concatenate_tables##_##nullable_##nullable) \ - ->RangeMultiplier(8) \ - ->Ranges({{1 << 8, 1 << 12}, {2, 32}, {2, 128}}) \ - ->Unit(benchmark::kMillisecond) \ - ->UseManualTime(); - -CONCAT_TABLES_BENCHMARK_DEFINE(int64_t, false) -CONCAT_TABLES_BENCHMARK_DEFINE(int64_t, true) - -class ConcatenateStrings : public cudf::benchmark {}; - -template -static void BM_concatenate_strings(benchmark::State& state) -{ - using column_wrapper = cudf::test::strings_column_wrapper; - - auto const num_rows = state.range(0); - auto const num_chars = state.range(1); - auto const num_cols = state.range(2); - - std::string str(num_chars, 'a'); - - // Create owning columns - std::vector columns; - columns.reserve(num_cols); - std::generate_n(std::back_inserter(columns), num_cols, [num_rows, c_str = str.c_str()]() { - auto iter = thrust::make_constant_iterator(c_str); - if (Nullable) { - auto count_it = thrust::make_counting_iterator(0); - auto valid_iter = - thrust::make_transform_iterator(count_it, [](auto i) { return i % 3 == 0; }); - return column_wrapper(iter, iter + num_rows, valid_iter); - } else { - return column_wrapper(iter, iter + num_rows); - } - }); - - // Generate column views - std::vector column_views; - column_views.reserve(columns.size()); - std::transform( - columns.begin(), columns.end(), std::back_inserter(column_views), [](auto const& col) { - return static_cast(col); - }); - - CUDF_CHECK_CUDA(0); - - for (auto _ : state) { - cuda_event_timer raii(state, true, cudf::get_default_stream()); - auto result = cudf::concatenate(column_views); - } - - state.SetBytesProcessed(state.iterations() * num_cols * num_rows * - (sizeof(int32_t) + num_chars)); // offset + chars -} - -#define CONCAT_STRINGS_BENCHMARK_DEFINE(nullable) \ - BENCHMARK_DEFINE_F(Concatenate, BM_concatenate_strings##_##nullable_##nullable) \ - (::benchmark::State & st) { BM_concatenate_strings(st); } \ - BENCHMARK_REGISTER_F(Concatenate, BM_concatenate_strings##_##nullable_##nullable) \ - ->RangeMultiplier(8) \ - ->Ranges({{1 << 8, 1 << 14}, {8, 128}, {2, 256}}) \ - ->Unit(benchmark::kMillisecond) \ - ->UseManualTime(); - -CONCAT_STRINGS_BENCHMARK_DEFINE(false) -CONCAT_STRINGS_BENCHMARK_DEFINE(true) diff --git a/cpp/benchmarks/copying/concatenate.cpp b/cpp/benchmarks/copying/concatenate.cpp new file mode 100644 index 00000000000..586b479d0ad --- /dev/null +++ b/cpp/benchmarks/copying/concatenate.cpp @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include + +#include +#include +#include +#include + +#include + +#include + +static void bench_concatenate(nvbench::state& state) +{ + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const num_cols = static_cast(state.get_int64("num_cols")); + auto const nulls = static_cast(state.get_float64("nulls")); + + auto input = create_sequence_table( + cycle_dtypes({cudf::type_to_id()}, num_cols), row_count{num_rows}, nulls); + auto input_columns = input->view(); + auto column_views = std::vector(input_columns.begin(), input_columns.end()); + + auto stream = cudf::get_default_stream(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + state.add_global_memory_reads(num_rows * num_cols); + state.add_global_memory_writes(num_rows * num_cols); + + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch&) { auto result = cudf::concatenate(column_views); }); +} + +NVBENCH_BENCH(bench_concatenate) + .set_name("concatenate") + .add_int64_axis("num_rows", {64, 512, 4096, 32768, 262144}) + .add_int64_axis("num_cols", {2, 8, 64, 512, 1024}) + .add_float64_axis("nulls", {0.0, 0.3}); + +static void bench_concatenate_strings(nvbench::state& state) +{ + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const num_cols = static_cast(state.get_int64("num_cols")); + auto const row_width = static_cast(state.get_int64("row_width")); + auto const nulls = static_cast(state.get_float64("nulls")); + + data_profile const profile = + data_profile_builder() + .distribution(cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width) + .null_probability(nulls); + auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile); + auto const input = column->view(); + + auto column_views = std::vector(num_cols, input); + + auto stream = cudf::get_default_stream(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + auto const sv = cudf::strings_column_view(input); + state.add_global_memory_reads(sv.chars_size(stream) * num_cols); + state.add_global_memory_writes(sv.chars_size(stream) * num_cols); + + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch&) { auto result = cudf::concatenate(column_views); }); +} + +NVBENCH_BENCH(bench_concatenate_strings) + .set_name("concatenate_strings") + .add_int64_axis("num_rows", {256, 512, 4096, 16384}) + .add_int64_axis("num_cols", {2, 8, 64, 256}) + .add_int64_axis("row_width", {32, 128}) + .add_float64_axis("nulls", {0.0, 0.3}); diff --git a/cpp/src/copying/concatenate.cu b/cpp/src/copying/concatenate.cu index d8419760120..6fc49afd7ac 100644 --- a/cpp/src/copying/concatenate.cu +++ b/cpp/src/copying/concatenate.cu @@ -308,7 +308,11 @@ std::unique_ptr for_each_concatenate(host_span views, auto count = 0; for (auto& v : views) { - thrust::copy(rmm::exec_policy(stream), v.begin(), v.end(), m_view.begin() + count); + cudaMemcpyAsync(m_view.begin() + count, + v.begin(), + v.size() * sizeof(T), + cudaMemcpyDeviceToDevice, + stream.value()); count += v.size(); } From a0957273a686875c8c3da19dfb80f4048e472e19 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Fri, 13 Dec 2024 08:47:35 -0500 Subject: [PATCH 02/32] Allow large strings in nvtext benchmarks (#17579) Removes the 2GB limit check from the nvtext benchmarks and adjusts the parameters to be consistent across the benchmarks. Also converts the subword-tokenizer to nvbench and removes the unused `word_minhash.cpp` source file. Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Vukasin Milovanovic (https://github.com/vuule) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/17579 --- cpp/benchmarks/CMakeLists.txt | 15 ++++-- cpp/benchmarks/text/edit_distance.cpp | 15 +++--- cpp/benchmarks/text/hash_ngrams.cpp | 15 +++--- cpp/benchmarks/text/jaccard.cpp | 13 ++--- cpp/benchmarks/text/normalize.cpp | 15 +++--- cpp/benchmarks/text/replace.cpp | 9 +--- cpp/benchmarks/text/subword.cpp | 58 +++++++++----------- cpp/benchmarks/text/tokenize.cpp | 15 +++--- cpp/benchmarks/text/vocab.cpp | 17 +++--- cpp/benchmarks/text/word_minhash.cpp | 77 --------------------------- 10 files changed, 74 insertions(+), 175 deletions(-) delete mode 100644 cpp/benchmarks/text/word_minhash.cpp diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index b1456600c95..749e1b628ee 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -352,11 +352,18 @@ ConfigureNVBench(BINARYOP_NVBENCH binaryop/binaryop.cpp binaryop/compiled_binary # ################################################################################################## # * nvtext benchmark ------------------------------------------------------------------- -ConfigureBench(TEXT_BENCH text/subword.cpp) - ConfigureNVBench( - TEXT_NVBENCH text/edit_distance.cpp text/hash_ngrams.cpp text/jaccard.cpp text/minhash.cpp - text/ngrams.cpp text/normalize.cpp text/replace.cpp text/tokenize.cpp text/vocab.cpp + TEXT_NVBENCH + text/edit_distance.cpp + text/hash_ngrams.cpp + text/jaccard.cpp + text/minhash.cpp + text/ngrams.cpp + text/normalize.cpp + text/replace.cpp + text/subword.cpp + text/tokenize.cpp + text/vocab.cpp ) # ################################################################################################## diff --git a/cpp/benchmarks/text/edit_distance.cpp b/cpp/benchmarks/text/edit_distance.cpp index 6ffa90edb8f..0ad1ae30f8c 100644 --- a/cpp/benchmarks/text/edit_distance.cpp +++ b/cpp/benchmarks/text/edit_distance.cpp @@ -27,15 +27,11 @@ static void bench_edit_distance(nvbench::state& state) { auto const num_rows = static_cast(state.get_int64("num_rows")); - auto const row_width = static_cast(state.get_int64("row_width")); - - if (static_cast(num_rows) * static_cast(row_width) >= - static_cast(std::numeric_limits::max())) { - state.skip("Skip benchmarks greater than size_type limit"); - } + auto const min_width = static_cast(state.get_int64("min_width")); + auto const max_width = static_cast(state.get_int64("max_width")); data_profile const strings_profile = data_profile_builder().distribution( - cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width); + cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width); auto const strings_table = create_random_table( {cudf::type_id::STRING, cudf::type_id::STRING}, row_count{num_rows}, strings_profile); cudf::strings_column_view input1(strings_table->view().column(0)); @@ -55,5 +51,6 @@ static void bench_edit_distance(nvbench::state& state) NVBENCH_BENCH(bench_edit_distance) .set_name("edit_distance") - .add_int64_axis("num_rows", {1024, 4096, 8192, 16364, 32768, 262144}) - .add_int64_axis("row_width", {8, 16, 32, 64, 128, 256}); + .add_int64_axis("min_width", {0}) + .add_int64_axis("max_width", {32, 64, 128, 256}) + .add_int64_axis("num_rows", {32768, 262144}); diff --git a/cpp/benchmarks/text/hash_ngrams.cpp b/cpp/benchmarks/text/hash_ngrams.cpp index 4e5daf83a3c..7577cf00c0f 100644 --- a/cpp/benchmarks/text/hash_ngrams.cpp +++ b/cpp/benchmarks/text/hash_ngrams.cpp @@ -27,16 +27,12 @@ static void bench_hash_ngrams(nvbench::state& state) { auto const num_rows = static_cast(state.get_int64("num_rows")); - auto const row_width = static_cast(state.get_int64("row_width")); + auto const min_width = static_cast(state.get_int64("min_width")); + auto const max_width = static_cast(state.get_int64("max_width")); auto const ngrams = static_cast(state.get_int64("ngrams")); - if (static_cast(num_rows) * static_cast(row_width) >= - static_cast(std::numeric_limits::max())) { - state.skip("Skip benchmarks greater than size_type limit"); - } - data_profile const strings_profile = data_profile_builder().distribution( - cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width); + cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width); auto const strings_table = create_random_table({cudf::type_id::STRING}, row_count{num_rows}, strings_profile); cudf::strings_column_view input(strings_table->view().column(0)); @@ -55,6 +51,7 @@ static void bench_hash_ngrams(nvbench::state& state) NVBENCH_BENCH(bench_hash_ngrams) .set_name("hash_ngrams") - .add_int64_axis("num_rows", {1024, 4096, 8192, 16364, 32768, 262144}) - .add_int64_axis("row_width", {128, 512, 2048}) + .add_int64_axis("min_width", {0}) + .add_int64_axis("max_width", {128, 512, 2048}) + .add_int64_axis("num_rows", {16384, 32768, 262144}) .add_int64_axis("ngrams", {5, 10}); diff --git a/cpp/benchmarks/text/jaccard.cpp b/cpp/benchmarks/text/jaccard.cpp index d5b74da6773..5506501138b 100644 --- a/cpp/benchmarks/text/jaccard.cpp +++ b/cpp/benchmarks/text/jaccard.cpp @@ -28,17 +28,13 @@ static void bench_jaccard(nvbench::state& state) { auto const num_rows = static_cast(state.get_int64("num_rows")); - auto const row_width = static_cast(state.get_int64("row_width")); + auto const min_width = static_cast(state.get_int64("min_width")); + auto const max_width = static_cast(state.get_int64("max_width")); auto const substring_width = static_cast(state.get_int64("substring_width")); - if (static_cast(num_rows) * static_cast(row_width) >= - static_cast(std::numeric_limits::max())) { - state.skip("Skip benchmarks greater than size_type limit"); - } - data_profile const strings_profile = data_profile_builder() - .distribution(cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width) + .distribution(cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width) .no_validity(); auto const input_table = create_random_table( {cudf::type_id::STRING, cudf::type_id::STRING}, row_count{num_rows}, strings_profile); @@ -59,6 +55,7 @@ static void bench_jaccard(nvbench::state& state) NVBENCH_BENCH(bench_jaccard) .set_name("jaccard") + .add_int64_axis("min_width", {0}) + .add_int64_axis("max_width", {128, 512, 1024, 2048}) .add_int64_axis("num_rows", {32768, 131072, 262144}) - .add_int64_axis("row_width", {128, 512, 1024, 2048}) .add_int64_axis("substring_width", {5, 10}); diff --git a/cpp/benchmarks/text/normalize.cpp b/cpp/benchmarks/text/normalize.cpp index 71bccd80d39..594dc0de28a 100644 --- a/cpp/benchmarks/text/normalize.cpp +++ b/cpp/benchmarks/text/normalize.cpp @@ -28,16 +28,12 @@ static void bench_normalize(nvbench::state& state) { auto const num_rows = static_cast(state.get_int64("num_rows")); - auto const row_width = static_cast(state.get_int64("row_width")); + auto const min_width = static_cast(state.get_int64("min_width")); + auto const max_width = static_cast(state.get_int64("max_width")); auto const normalize_type = state.get_string("type"); - if (static_cast(num_rows) * static_cast(row_width) >= - static_cast(std::numeric_limits::max())) { - state.skip("Skip benchmarks greater than size_type limit"); - } - data_profile const profile = data_profile_builder().distribution( - cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width); + cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width); auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile); cudf::strings_column_view input(column->view()); @@ -60,6 +56,7 @@ static void bench_normalize(nvbench::state& state) NVBENCH_BENCH(bench_normalize) .set_name("normalize") - .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024}) - .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216}) + .add_int64_axis("min_width", {0}) + .add_int64_axis("max_width", {32, 64, 128, 256}) + .add_int64_axis("num_rows", {32768, 262144, 2097152}) .add_string_axis("type", {"spaces", "characters", "to_lower"}); diff --git a/cpp/benchmarks/text/replace.cpp b/cpp/benchmarks/text/replace.cpp index 767ebab3eee..24ca4e5dfd7 100644 --- a/cpp/benchmarks/text/replace.cpp +++ b/cpp/benchmarks/text/replace.cpp @@ -31,11 +31,6 @@ static void bench_replace(nvbench::state& state) auto const num_rows = static_cast(state.get_int64("num_rows")); auto const row_width = static_cast(state.get_int64("row_width")); - if (static_cast(num_rows) * static_cast(row_width) >= - static_cast(std::numeric_limits::max())) { - state.skip("Skip benchmarks greater than size_type limit"); - } - std::vector words{" ", "one ", "two ", "three ", "four ", "five ", "six ", "sevén ", "eight ", "nine ", "ten ", "eleven ", "twelve ", "thirteen ", "fourteen ", @@ -71,5 +66,5 @@ static void bench_replace(nvbench::state& state) NVBENCH_BENCH(bench_replace) .set_name("replace") - .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024}) - .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216}); + .add_int64_axis("row_width", {32, 64, 128, 256}) + .add_int64_axis("num_rows", {32768, 262144, 2097152}); diff --git a/cpp/benchmarks/text/subword.cpp b/cpp/benchmarks/text/subword.cpp index dd8df695d3e..0b4e3bdefa5 100644 --- a/cpp/benchmarks/text/subword.cpp +++ b/cpp/benchmarks/text/subword.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,9 +14,6 @@ * limitations under the License. */ -#include -#include - #include #include @@ -24,6 +21,8 @@ #include +#include + #include #include #include @@ -54,40 +53,33 @@ static std::string create_hash_vocab_file() return hash_file; } -static void BM_subword_tokenizer(benchmark::State& state) +static void bench_subword_tokenizer(nvbench::state& state) { - auto const nrows = static_cast(state.range(0)); - std::vector h_strings(nrows, "This is a test "); + auto const num_rows = static_cast(state.get_int64("num_rows")); + + std::vector h_strings(num_rows, "This is a test "); cudf::test::strings_column_wrapper strings(h_strings.begin(), h_strings.end()); static std::string hash_file = create_hash_vocab_file(); std::vector offsets{14}; - uint32_t max_sequence_length = 64; - uint32_t stride = 48; - uint32_t do_truncate = 0; - uint32_t do_lower = 1; - // - auto vocab = nvtext::load_vocabulary_file(hash_file); - for (auto _ : state) { - cuda_event_timer raii(state, true); - auto result = nvtext::subword_tokenize(cudf::strings_column_view{strings}, - *vocab, - max_sequence_length, - stride, - do_lower, - do_truncate); - } -} + uint32_t max_sequence = 64; + uint32_t stride = 48; + uint32_t do_truncate = 0; + uint32_t do_lower = 1; -class Subword : public cudf::benchmark {}; + auto input = cudf::strings_column_view{strings}; -#define SUBWORD_BM_BENCHMARK_DEFINE(name) \ - BENCHMARK_DEFINE_F(Subword, name)(::benchmark::State & state) { BM_subword_tokenizer(state); } \ - BENCHMARK_REGISTER_F(Subword, name) \ - ->RangeMultiplier(2) \ - ->Range(1 << 10, 1 << 17) \ - ->UseManualTime() \ - ->Unit(benchmark::kMillisecond); + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + auto chars_size = input.chars_size(cudf::get_default_stream()); + state.add_global_memory_reads(chars_size); + state.add_global_memory_writes(num_rows * max_sequence); -SUBWORD_BM_BENCHMARK_DEFINE(BM_subword_tokenizer); + auto vocab = nvtext::load_vocabulary_file(hash_file); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + auto result = + nvtext::subword_tokenize(input, *vocab, max_sequence, stride, do_lower, do_truncate); + }); +} -// BENCHMARK_MAIN(); +NVBENCH_BENCH(bench_subword_tokenizer) + .set_name("subword_tokenize") + .add_int64_axis("num_rows", {32768, 262144, 2097152}); diff --git a/cpp/benchmarks/text/tokenize.cpp b/cpp/benchmarks/text/tokenize.cpp index e83310e0343..b9590c5539f 100644 --- a/cpp/benchmarks/text/tokenize.cpp +++ b/cpp/benchmarks/text/tokenize.cpp @@ -31,17 +31,13 @@ static void bench_tokenize(nvbench::state& state) { auto const num_rows = static_cast(state.get_int64("num_rows")); - auto const row_width = static_cast(state.get_int64("row_width")); + auto const min_width = static_cast(state.get_int64("min_width")); + auto const max_width = static_cast(state.get_int64("max_width")); auto const tokenize_type = state.get_string("type"); - if (static_cast(num_rows) * static_cast(row_width) >= - static_cast(std::numeric_limits::max())) { - state.skip("Skip benchmarks greater than size_type limit"); - } - data_profile const profile = data_profile_builder() - .distribution(cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width) + .distribution(cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width) .no_validity(); auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile); cudf::strings_column_view input(column->view()); @@ -82,6 +78,7 @@ static void bench_tokenize(nvbench::state& state) NVBENCH_BENCH(bench_tokenize) .set_name("tokenize") - .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024}) - .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216}) + .add_int64_axis("min_width", {0}) + .add_int64_axis("max_width", {32, 64, 128, 256}) + .add_int64_axis("num_rows", {32768, 262144, 2097152}) .add_string_axis("type", {"whitespace", "multi", "count", "count_multi", "ngrams", "characters"}); diff --git a/cpp/benchmarks/text/vocab.cpp b/cpp/benchmarks/text/vocab.cpp index 523d277df18..0502f375d99 100644 --- a/cpp/benchmarks/text/vocab.cpp +++ b/cpp/benchmarks/text/vocab.cpp @@ -33,16 +33,12 @@ static void bench_vocab_tokenize(nvbench::state& state) { auto const stream = cudf::get_default_stream(); auto const num_rows = static_cast(state.get_int64("num_rows")); - auto const row_width = static_cast(state.get_int64("row_width")); + auto const min_width = static_cast(state.get_int64("min_width")); + auto const max_width = static_cast(state.get_int64("max_width")); - if (static_cast(num_rows) * static_cast(row_width) >= - static_cast(std::numeric_limits::max())) { - state.skip("Skip benchmarks greater than size_type limit"); - } - - auto const column = [num_rows, row_width] { + auto const column = [num_rows, min_width, max_width] { data_profile const profile = data_profile_builder().no_validity().distribution( - cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width); + cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width); auto const col = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile); return cudf::strings::filter_characters_of_type( cudf::strings_column_view(col->view()), @@ -85,5 +81,6 @@ static void bench_vocab_tokenize(nvbench::state& state) NVBENCH_BENCH(bench_vocab_tokenize) .set_name("vocab_tokenize") - .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024}) - .add_int64_axis("num_rows", {262144, 524288, 1048576, 2097152, 4194304, 16777216}); + .add_int64_axis("min_width", {0}) + .add_int64_axis("max_width", {32, 64, 128, 256}) + .add_int64_axis("num_rows", {32768, 262144, 2097152}); diff --git a/cpp/benchmarks/text/word_minhash.cpp b/cpp/benchmarks/text/word_minhash.cpp deleted file mode 100644 index adc3dddc59c..00000000000 --- a/cpp/benchmarks/text/word_minhash.cpp +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include -#include -#include -#include - -#include - -#include - -#include - -static void bench_word_minhash(nvbench::state& state) -{ - auto const num_rows = static_cast(state.get_int64("num_rows")); - auto const row_width = static_cast(state.get_int64("row_width")); - auto const seed_count = static_cast(state.get_int64("seed_count")); - auto const base64 = state.get_int64("hash_type") == 64; - - data_profile const strings_profile = - data_profile_builder().distribution(cudf::type_id::STRING, distribution_id::NORMAL, 0, 5); - auto strings_table = - create_random_table({cudf::type_id::STRING}, row_count{num_rows}, strings_profile); - - auto const num_offsets = (num_rows / row_width) + 1; - auto offsets = cudf::sequence(num_offsets, - cudf::numeric_scalar(0), - cudf::numeric_scalar(row_width)); - - auto source = cudf::make_lists_column(num_offsets - 1, - std::move(offsets), - std::move(strings_table->release().front()), - 0, - rmm::device_buffer{}); - - data_profile const seeds_profile = data_profile_builder().no_validity().distribution( - cudf::type_to_id(), distribution_id::NORMAL, 0, 256); - auto const seed_type = base64 ? cudf::type_id::UINT64 : cudf::type_id::UINT32; - auto const seeds_table = create_random_table({seed_type}, row_count{seed_count}, seeds_profile); - auto seeds = seeds_table->get_column(0); - - state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); - - cudf::strings_column_view input(cudf::lists_column_view(source->view()).child()); - auto chars_size = input.chars_size(cudf::get_default_stream()); - state.add_global_memory_reads(chars_size); - state.add_global_memory_writes(num_rows); // output are hashes - - state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { - auto result = base64 ? nvtext::word_minhash64(source->view(), seeds.view()) - : nvtext::word_minhash(source->view(), seeds.view()); - }); -} - -NVBENCH_BENCH(bench_word_minhash) - .set_name("word_minhash") - .add_int64_axis("num_rows", {131072, 262144, 524288, 1048576, 2097152}) - .add_int64_axis("row_width", {10, 100, 1000}) - .add_int64_axis("seed_count", {2, 25}) - .add_int64_axis("hash_type", {32, 64}); From 62669e04cc11bd53dab1102e83aba76804f4dbde Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Fri, 13 Dec 2024 10:10:02 -0500 Subject: [PATCH 03/32] Fix ctest fail running libcudf tests in a Debug build (#17576) Fixes libcudf gtest failures when running with ctest on a Debug build. The error from `LastTest.log` indicates: ``` 1/106 Testing: COLUMN_TEST 1/106 Test: COLUMN_TEST Command: "/conda/envs/rapids/bin/cmake" "-Dcommand_to_run=/cudf/cpp/build/gtests/COLUMN_TEST" "-Dcommand_args=" "-P=/cudf/cpp/build/rapids-cmake/./run_gpu_test.cmake" Directory: /cudf/cpp/build/tests "COLUMN_TEST" start time: Dec 11 15:46 UTC Output: ---------------------------------------------------------- /conda/envs/rapids/bin/cmake: symbol lookup error: /cudf/cpp/build/libcudf_identify_stream_usage_mode_cudf.so: undefined symbol: _ZN3rmm6loggerD1Ev Test time = 0.00 sec ---------------------------------------------------------- Test Failed. "COLUMN_TEST" end time: Dec 11 15:46 UTC "COLUMN_TEST" time elapsed: 00:00:00 ``` Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/17576 --- cpp/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 2f17b57b0a4..78f529a44d3 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -1105,7 +1105,7 @@ if(CUDF_BUILD_STREAMS_TEST_UTIL) ${_tgt} PRIVATE "$:${CUDF_CXX_FLAGS}>>" ) target_include_directories(${_tgt} PRIVATE "$") - target_link_libraries(${_tgt} PUBLIC CUDA::cudart rmm::rmm) + target_link_libraries(${_tgt} PUBLIC CUDA::cudart rmm::rmm rmm::rmm_logger rmm::rmm_logger_impl) if(CUDF_BUILD_STACKTRACE_DEBUG) target_link_libraries(${_tgt} PRIVATE cudf_backtrace) endif() From 4d6925ce1b83e10ea249346436ff8fdc4d28d73d Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 13 Dec 2024 10:30:45 -0800 Subject: [PATCH 04/32] Remove unused masked keyword in column_empty (#17530) Follow up to https://github.com/rapidsai/cudf/pull/16715. Now that the usages of the `masked` keyword in RAPIDS have been address (https://github.com/rapidsai/cuspatial/pull/1496 is the only one I could find), I think we can remove this keyword all together in this method Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/17530 --- python/cudf/cudf/core/column/categorical.py | 2 +- python/cudf/cudf/core/column/column.py | 12 ++---- python/cudf/cudf/core/column/datetime.py | 6 +-- .../cudf/cudf/core/column/numerical_base.py | 2 +- python/cudf/cudf/core/column/string.py | 2 +- python/cudf/cudf/core/column/timedelta.py | 2 +- python/cudf/cudf/core/dataframe.py | 39 +++++++------------ python/cudf/cudf/core/dtypes.py | 4 +- python/cudf/cudf/core/groupby/groupby.py | 7 ++-- python/cudf/cudf/core/index.py | 2 +- python/cudf/cudf/core/indexed_frame.py | 1 - python/cudf/cudf/io/parquet.py | 1 - 12 files changed, 28 insertions(+), 52 deletions(-) diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py index 71ec11e75af..a0cf38c6f51 100644 --- a/python/cudf/cudf/core/column/categorical.py +++ b/python/cudf/cudf/core/column/categorical.py @@ -1193,7 +1193,7 @@ def _concat( f"size > {libcudf.MAX_COLUMN_SIZE_STR}" ) elif newsize == 0: - codes_col = column.column_empty(0, head.codes.dtype, masked=True) + codes_col = column.column_empty(0, head.codes.dtype) else: codes_col = column.concat_columns(codes) # type: ignore[arg-type] diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 42b4fda8be2..624a3ac95ed 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -551,7 +551,7 @@ def slice(self, start: int, stop: int, stride: int | None = None) -> Self: if stop < 0 and not (stride < 0 and stop == -1): stop = stop + len(self) if (stride > 0 and start >= stop) or (stride < 0 and start <= stop): - return cast(Self, column_empty(0, self.dtype, masked=True)) + return cast(Self, column_empty(0, self.dtype)) # compute mask slice if stride == 1: return libcudf.copying.column_slice(self, [start, stop])[ @@ -1054,7 +1054,7 @@ def astype(self, dtype: Dtype, copy: bool = False) -> ColumnBase: if self.dtype == dtype: result = self else: - result = column_empty(0, dtype=dtype, masked=self.nullable) + result = column_empty(0, dtype=dtype) elif dtype == "category": # TODO: Figure out why `cudf.dtype("category")` # astype's different than just the string @@ -1625,7 +1625,6 @@ def _has_any_nan(arbitrary: pd.Series | np.ndarray) -> bool: def column_empty( row_count: int, dtype: Dtype = "object", - masked: bool = False, for_numba: bool = False, ) -> ColumnBase: """ @@ -1642,9 +1641,6 @@ def column_empty( dtype : Dtype Type of the column. - masked : bool - Unused. - for_numba : bool, default False If True, don't allocate a mask as it's not supported by numba. """ @@ -2420,7 +2416,7 @@ def concat_columns(objs: "MutableSequence[ColumnBase]") -> ColumnBase: """Concatenate a sequence of columns.""" if len(objs) == 0: dtype = cudf.dtype(None) - return column_empty(0, dtype=dtype, masked=True) + return column_empty(0, dtype=dtype) # If all columns are `NumericalColumn` with different dtypes, # we cast them to a common dtype. @@ -2467,7 +2463,7 @@ def concat_columns(objs: "MutableSequence[ColumnBase]") -> ColumnBase: f"size > {libcudf.MAX_COLUMN_SIZE_STR}" ) elif newsize == 0: - return column_empty(0, head.dtype, masked=True) + return column_empty(0, head.dtype) # Filter out inputs that have 0 length, then concatenate. objs_with_len = [o for o in objs if len(o)] diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index b526a6efa51..81b82040b8d 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -598,14 +598,12 @@ def strftime(self, format: str) -> cudf.core.column.StringColumn: if len(self) == 0: return cast( cudf.core.column.StringColumn, - column.column_empty(0, dtype="object", masked=False), + column.column_empty(0, dtype="object"), ) if format in _DATETIME_SPECIAL_FORMATS: names = as_column(_DATETIME_NAMES) else: - names = cudf.core.column.column_empty( - 0, dtype="object", masked=False - ) + names = column.column_empty(0, dtype="object") return string._datetime_to_str_typecast_functions[self.dtype]( self, format, names ) diff --git a/python/cudf/cudf/core/column/numerical_base.py b/python/cudf/cudf/core/column/numerical_base.py index e06a0447f5c..7a39355dd50 100644 --- a/python/cudf/cudf/core/column/numerical_base.py +++ b/python/cudf/cudf/core/column/numerical_base.py @@ -139,7 +139,7 @@ def quantile( result = cast( NumericalBaseColumn, cudf.core.column.column_empty( - row_count=len(q), dtype=self.dtype, masked=True + row_count=len(q), dtype=self.dtype ), ) else: diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index c021554f3bd..d76caa5c3b8 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -5855,7 +5855,7 @@ def strptime( f"dtype must be datetime or timedelta type, not {dtype}" ) elif self.null_count == len(self): - return column.column_empty(len(self), dtype=dtype, masked=True) # type: ignore[return-value] + return column.column_empty(len(self), dtype=dtype) # type: ignore[return-value] elif (self == "None").any(): raise ValueError( "Cannot convert `None` value to datetime or timedelta." diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py index f3a7916aa35..8b1515acae2 100644 --- a/python/cudf/cudf/core/column/timedelta.py +++ b/python/cudf/cudf/core/column/timedelta.py @@ -294,7 +294,7 @@ def strftime(self, format: str) -> cudf.core.column.StringColumn: if len(self) == 0: return cast( cudf.core.column.StringColumn, - column.column_empty(0, dtype="object", masked=False), + column.column_empty(0, dtype="object"), ) else: return string._timedelta_to_str_typecast_functions[self.dtype]( diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 8cdc45e12da..fce361e18ea 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -774,9 +774,7 @@ def __init__( label_dtype = getattr(columns, "dtype", None) self._data = ColumnAccessor( { - k: column.column_empty( - len(self), dtype="object", masked=True - ) + k: column_empty(len(self), dtype="object") for k in columns }, level_names=tuple(columns.names) @@ -979,8 +977,8 @@ def _init_from_series_list(self, data, columns, index): if columns is not None: for col_name in columns: if col_name not in self._data: - self._data[col_name] = column.column_empty( - row_count=len(self), dtype=None, masked=True + self._data[col_name] = column_empty( + row_count=len(self), dtype=None ) self._data._level_names = ( tuple(columns.names) @@ -1031,11 +1029,7 @@ def _init_from_list_like(self, data, index=None, columns=None): data = list(itertools.zip_longest(*data)) if columns is not None and len(data) == 0: - data = [ - cudf.core.column.column_empty(row_count=0, dtype=None) - for _ in columns - ] - + data = [column_empty(row_count=0, dtype=None) for _ in columns] for col_name, col in enumerate(data): self._data[col_name] = column.as_column(col) self._data.rangeindex = True @@ -1074,9 +1068,8 @@ def _init_from_dict_like( # the provided index, so we need to return a masked # array of nulls if an index is given. empty_column = functools.partial( - cudf.core.column.column_empty, - row_count=(0 if index is None else len(index)), - masked=index is not None, + column_empty, + row_count=0 if index is None else len(index), ) data = { @@ -1421,7 +1414,7 @@ def __setitem__(self, arg, value): new_columns = ( value if key == arg - else column.column_empty( + else column_empty( row_count=length, dtype=col.dtype ) for key, col in self._column_labels_and_values @@ -3373,7 +3366,7 @@ def _insert(self, loc, name, value, nan_as_null=None, ignore_index=True): if num_cols != 0: ca = self._data._from_columns_like_self( ( - column.column_empty(row_count=length, dtype=dtype) + column_empty(row_count=length, dtype=dtype) for _, dtype in self._dtypes ), verify=False, @@ -3479,7 +3472,7 @@ def diff(self, periods=1, axis=0): if abs(periods) > len(self): df = cudf.DataFrame._from_data( { - name: column_empty(len(self), dtype=dtype, masked=True) + name: column_empty(len(self), dtype=dtype) for name, dtype in zip(self._column_names, self.dtypes) } ) @@ -3859,9 +3852,7 @@ def agg(self, aggs, axis=None): result = DataFrame(index=idxs, columns=cols) for key in aggs.keys(): col = self[key] - col_empty = column_empty( - len(idxs), dtype=col.dtype, masked=True - ) + col_empty = column_empty(len(idxs), dtype=col.dtype) ans = cudf.Series._from_column( col_empty, index=cudf.Index(idxs) ) @@ -6177,9 +6168,7 @@ def quantile( quant_index=False, )._column if len(res) == 0: - res = column.column_empty( - row_count=len(qs), dtype=ser.dtype - ) + res = column_empty(row_count=len(qs), dtype=ser.dtype) result[k] = res result = DataFrame._from_data(result) @@ -7333,9 +7322,7 @@ def unnamed_group_generator(): ) all_nulls = functools.cache( - functools.partial( - column_empty, self.shape[0], common_type, masked=True - ) + functools.partial(column_empty, self.shape[0], common_type) ) # homogenize the dtypes of the columns @@ -8582,7 +8569,7 @@ def _cast_cols_to_common_dtypes(col_idxs, list_of_columns, dtypes, categories): # If column not in this df, fill with an all-null column if idx >= len(cols) or cols[idx] is None: n = len(next(x for x in cols if x is not None)) - cols[idx] = column_empty(row_count=n, dtype=dtype, masked=True) + cols[idx] = column_empty(row_count=n, dtype=dtype) else: # If column is categorical, rebase the codes with the # combined categories, and cast the new codes to the diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py index 9bb29f1920a..971f0be77f8 100644 --- a/python/cudf/cudf/core/dtypes.py +++ b/python/cudf/cudf/core/dtypes.py @@ -189,9 +189,7 @@ def categories(self) -> cudf.Index: Index(['b', 'a'], dtype='object') """ if self._categories is None: - col = cudf.core.column.column_empty( - 0, dtype="object", masked=False - ) + col = cudf.core.column.column_empty(0, dtype="object") else: col = self._categories return cudf.Index._from_column(col) diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index d4f3394833a..a8d82f977d5 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -493,9 +493,7 @@ def size(self): """ Return the size of each group. """ - col = cudf.core.column.column_empty( - len(self.obj), "int8", masked=False - ) + col = cudf.core.column.column_empty(len(self.obj), "int8") result = ( cudf.Series._from_column(col, name=getattr(self.obj, "name", None)) .groupby(self.grouping, sort=self._sort, dropna=self._dropna) @@ -523,7 +521,8 @@ def cumcount(self, ascending: bool = True): return ( cudf.Series._from_column( cudf.core.column.column_empty( - len(self.obj), "int8", masked=False + len(self.obj), + "int8", ), index=self.obj.index, ) diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index eeb6e3bd547..8d3ef1036d1 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -336,7 +336,7 @@ def _values(self) -> ColumnBase: if len(self) > 0: return column.as_column(self._range, dtype=self.dtype) else: - return column.column_empty(0, masked=False, dtype=self.dtype) + return column.column_empty(0, dtype=self.dtype) def _clean_nulls_from_index(self) -> Self: return self diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 0e6a5e03ea6..81d954960e2 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -3851,7 +3851,6 @@ def _reindex( if name in df._data else cudf.core.column.column.column_empty( dtype=dtypes.get(name, np.float64), - masked=True, row_count=len(index), ) ) diff --git a/python/cudf/cudf/io/parquet.py b/python/cudf/cudf/io/parquet.py index 66095d4a155..153ee0fa01a 100644 --- a/python/cudf/cudf/io/parquet.py +++ b/python/cudf/cudf/io/parquet.py @@ -1139,7 +1139,6 @@ def _parquet_to_frame( dfs[-1][name] = column_empty( row_count=_len, dtype=_dtype, - masked=True, ) else: dfs[-1][name] = as_column( From 1a67646fa3998788757b05a08eae1c8d1ee73eb2 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 13 Dec 2024 12:23:30 -0800 Subject: [PATCH 05/32] Move cudf._lib.sort to cudf.core._internals (#17488) Contributes to https://github.com/rapidsai/cudf/issues/17317 Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - Matthew Murray (https://github.com/Matt711) URL: https://github.com/rapidsai/cudf/pull/17488 --- python/cudf/cudf/_lib/CMakeLists.txt | 4 +- python/cudf/cudf/_lib/__init__.py | 1 - python/cudf/cudf/_lib/sort.pyx | 365 ------------------ python/cudf/cudf/core/_internals/sorting.py | 205 ++++++++++ python/cudf/cudf/core/column/column.py | 23 +- python/cudf/cudf/core/column/numerical.py | 65 ++-- .../cudf/cudf/core/column/numerical_base.py | 4 +- python/cudf/cudf/core/frame.py | 3 +- python/cudf/cudf/core/groupby/groupby.py | 25 +- python/cudf/cudf/core/indexed_frame.py | 44 ++- python/cudf/cudf/core/join/join.py | 5 +- python/cudf/cudf/core/multiindex.py | 3 +- python/cudf/cudf/core/series.py | 7 +- 13 files changed, 324 insertions(+), 430 deletions(-) delete mode 100644 python/cudf/cudf/_lib/sort.pyx create mode 100644 python/cudf/cudf/core/_internals/sorting.py diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt index 8cec8af3c67..427ffcc8c12 100644 --- a/python/cudf/cudf/_lib/CMakeLists.txt +++ b/python/cudf/cudf/_lib/CMakeLists.txt @@ -12,8 +12,8 @@ # the License. # ============================================================================= -set(cython_sources column.pyx copying.pyx groupby.pyx interop.pyx scalar.pyx sort.pyx - stream_compaction.pyx string_casting.pyx strings_udf.pyx types.pyx utils.pyx +set(cython_sources column.pyx copying.pyx groupby.pyx interop.pyx scalar.pyx stream_compaction.pyx + string_casting.pyx strings_udf.pyx types.pyx utils.pyx ) set(linked_libraries cudf::cudf) diff --git a/python/cudf/cudf/_lib/__init__.py b/python/cudf/cudf/_lib/__init__.py index 001e5cbb676..26afdd62caf 100644 --- a/python/cudf/cudf/_lib/__init__.py +++ b/python/cudf/cudf/_lib/__init__.py @@ -5,7 +5,6 @@ copying, groupby, interop, - sort, stream_compaction, string_casting, strings_udf, diff --git a/python/cudf/cudf/_lib/sort.pyx b/python/cudf/cudf/_lib/sort.pyx deleted file mode 100644 index eefe37d9880..00000000000 --- a/python/cudf/cudf/_lib/sort.pyx +++ /dev/null @@ -1,365 +0,0 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. - -from itertools import repeat - -from cudf.core.buffer import acquire_spill_lock - -from libcpp cimport bool - -from pylibcudf.libcudf.aggregation cimport rank_method -from cudf._lib.column cimport Column -from cudf._lib.utils cimport columns_from_pylibcudf_table - -import pylibcudf - - -@acquire_spill_lock() -def is_sorted( - list source_columns, object ascending=None, object null_position=None -): - """ - Checks whether the rows of a `table` are sorted in lexicographical order. - - Parameters - ---------- - source_columns : list of columns - columns to be checked for sort order - ascending : None or list-like of booleans - None or list-like of boolean values indicating expected sort order of - each column. If list-like, size of list-like must be len(columns). If - None, all columns expected sort order is set to ascending. False (0) - - descending, True (1) - ascending. - null_position : None or list-like of booleans - None or list-like of boolean values indicating desired order of nulls - compared to other elements. If list-like, size of list-like must be - len(columns). If None, null order is set to before. False (0) - after, - True (1) - before. - - Returns - ------- - returns : boolean - Returns True, if sorted as expected by ``ascending`` and - ``null_position``, False otherwise. - """ - - if ascending is None: - column_order = [pylibcudf.types.Order.ASCENDING] * len(source_columns) - else: - if len(ascending) != len(source_columns): - raise ValueError( - f"Expected a list-like of length {len(source_columns)}, " - f"got length {len(ascending)} for `ascending`" - ) - column_order = [pylibcudf.types.Order.DESCENDING] * len(source_columns) - for idx, val in enumerate(ascending): - if val: - column_order[idx] = pylibcudf.types.Order.ASCENDING - - if null_position is None: - null_precedence = [pylibcudf.types.NullOrder.AFTER] * len(source_columns) - else: - if len(null_position) != len(source_columns): - raise ValueError( - f"Expected a list-like of length {len(source_columns)}, " - f"got length {len(null_position)} for `null_position`" - ) - null_precedence = [pylibcudf.types.NullOrder.AFTER] * len(source_columns) - for idx, val in enumerate(null_position): - if val: - null_precedence[idx] = pylibcudf.types.NullOrder.BEFORE - - return pylibcudf.sorting.is_sorted( - pylibcudf.Table( - [c.to_pylibcudf(mode="read") for c in source_columns] - ), - column_order, - null_precedence - ) - - -def ordering(column_order, null_precedence): - """ - Construct order and null order vectors - - Parameters - ---------- - column_order - Iterable of bool (True for ascending order, False for descending) - null_precedence - Iterable string for null positions ("first" for start, "last" for end) - - Both iterables must be the same length (not checked) - - Returns - ------- - pair of vectors (order, and null_order) - """ - c_column_order = [] - c_null_precedence = [] - for asc, null in zip(column_order, null_precedence): - c_column_order.append( - pylibcudf.types.Order.ASCENDING if asc else pylibcudf.types.Order.DESCENDING - ) - if asc ^ (null == "first"): - c_null_precedence.append(pylibcudf.types.NullOrder.AFTER) - elif asc ^ (null == "last"): - c_null_precedence.append(pylibcudf.types.NullOrder.BEFORE) - else: - raise ValueError(f"Invalid null precedence {null}") - return c_column_order, c_null_precedence - - -@acquire_spill_lock() -def order_by( - list columns_from_table, - object ascending, - str na_position, - *, - bool stable -): - """ - Get index to sort the table in ascending/descending order. - - Parameters - ---------- - columns_from_table : list[Column] - Columns from the table which will be sorted - ascending : sequence[bool] - Sequence of boolean values which correspond to each column - in the table to be sorted signifying the order of each column - True - Ascending and False - Descending - na_position : str - Whether null values should show up at the "first" or "last" - position of **all** sorted column. - stable : bool - Should the sort be stable? (no default) - - Returns - ------- - Column of indices that sorts the table - """ - order = ordering(ascending, repeat(na_position)) - func = getattr(pylibcudf.sorting, f"{'stable_' if stable else ''}sorted_order") - - return Column.from_pylibcudf( - func( - pylibcudf.Table( - [c.to_pylibcudf(mode="read") for c in columns_from_table], - ), - order[0], - order[1], - ) - ) - - -@acquire_spill_lock() -def sort( - list values, - list column_order=None, - list null_precedence=None, -): - """ - Sort the table in ascending/descending order. - - Parameters - ---------- - values : list[Column] - Columns of the table which will be sorted - column_order : list[bool], optional - Sequence of boolean values which correspond to each column in - keys providing the sort order (default all True). - With True <=> ascending; False <=> descending. - null_precedence : list[str], optional - Sequence of "first" or "last" values (default "first") - indicating the position of null values when sorting the keys. - """ - ncol = len(values) - order = ordering( - column_order or repeat(True, ncol), - null_precedence or repeat("first", ncol), - ) - return columns_from_pylibcudf_table( - pylibcudf.sorting.sort( - pylibcudf.Table([c.to_pylibcudf(mode="read") for c in values]), - order[0], - order[1], - ) - ) - - -@acquire_spill_lock() -def sort_by_key( - list values, - list keys, - object ascending, - object na_position, - *, - bool stable, -): - """ - Sort a table by given keys - - Parameters - ---------- - values : list[Column] - Columns of the table which will be sorted - keys : list[Column] - Columns making up the sort key - ascending : list[bool] - Sequence of boolean values which correspond to each column - in the table to be sorted signifying the order of each column - True - Ascending and False - Descending - na_position : list[str] - Sequence of "first" or "last" values (default "first") - indicating the position of null values when sorting the keys. - stable : bool - Should the sort be stable? (no default) - - Returns - ------- - list[Column] - list of value columns sorted by keys - """ - order = ordering(ascending, na_position) - func = getattr(pylibcudf.sorting, f"{'stable_' if stable else ''}sort_by_key") - return columns_from_pylibcudf_table( - func( - pylibcudf.Table([c.to_pylibcudf(mode="read") for c in values]), - pylibcudf.Table([c.to_pylibcudf(mode="read") for c in keys]), - order[0], - order[1], - ) - ) - - -@acquire_spill_lock() -def segmented_sort_by_key( - list values, - list keys, - Column segment_offsets, - list column_order=None, - list null_precedence=None, - *, - bool stable, -): - """ - Sort segments of a table by given keys - - Parameters - ---------- - values : list[Column] - Columns of the table which will be sorted - keys : list[Column] - Columns making up the sort key - offsets : Column - Segment offsets - column_order : list[bool], optional - Sequence of boolean values which correspond to each column in - keys providing the sort order (default all True). - With True <=> ascending; False <=> descending. - null_precedence : list[str], optional - Sequence of "first" or "last" values (default "first") - indicating the position of null values when sorting the keys. - stable : bool - Should the sort be stable? (no default) - - Returns - ------- - list[Column] - list of value columns sorted by keys - """ - ncol = len(values) - order = ordering( - column_order or repeat(True, ncol), - null_precedence or repeat("first", ncol), - ) - func = getattr( - pylibcudf.sorting, - f"{'stable_' if stable else ''}segmented_sort_by_key" - ) - return columns_from_pylibcudf_table( - func( - pylibcudf.Table([c.to_pylibcudf(mode="read") for c in values]), - pylibcudf.Table([c.to_pylibcudf(mode="read") for c in keys]), - segment_offsets.to_pylibcudf(mode="read"), - order[0], - order[1], - ) - ) - - -@acquire_spill_lock() -def digitize(list source_columns, list bins, bool right=False): - """ - Return the indices of the bins to which each value in source_table belongs. - - Parameters - ---------- - source_columns : Input columns to be binned. - bins : List containing columns of bins - right : Indicating whether the intervals include the - right or the left bin edge. - """ - return Column.from_pylibcudf( - getattr(pylibcudf.search, "lower_bound" if right else "upper_bound")( - pylibcudf.Table( - [c.to_pylibcudf(mode="read") for c in bins] - ), - pylibcudf.Table( - [c.to_pylibcudf(mode="read") for c in source_columns] - ), - [pylibcudf.types.Order.ASCENDING]*len(bins), - [pylibcudf.types.NullOrder.BEFORE]*len(bins) - ) - ) - - -@acquire_spill_lock() -def rank_columns(list source_columns, rank_method method, str na_option, - bool ascending, bool pct - ): - """ - Compute numerical data ranks (1 through n) of each column in the dataframe - """ - column_order = ( - pylibcudf.types.Order.ASCENDING - if ascending - else pylibcudf.types.Order.DESCENDING - ) - # ascending - # #top = na_is_smallest - # #bottom = na_is_largest - # #keep = na_is_largest - # descending - # #top = na_is_largest - # #bottom = na_is_smallest - # #keep = na_is_smallest - if ascending: - if na_option == 'top': - null_precedence = pylibcudf.types.NullOrder.BEFORE - else: - null_precedence = pylibcudf.types.NullOrder.AFTER - else: - if na_option == 'top': - null_precedence = pylibcudf.types.NullOrder.AFTER - else: - null_precedence = pylibcudf.types.NullOrder.BEFORE - c_null_handling = ( - pylibcudf.types.NullPolicy.EXCLUDE - if na_option == 'keep' - else pylibcudf.types.NullPolicy.INCLUDE - ) - - return [ - Column.from_pylibcudf( - pylibcudf.sorting.rank( - col.to_pylibcudf(mode="read"), - method, - column_order, - c_null_handling, - null_precedence, - pct, - ) - ) - for col in source_columns - ] diff --git a/python/cudf/cudf/core/_internals/sorting.py b/python/cudf/cudf/core/_internals/sorting.py new file mode 100644 index 00000000000..69f9e7664b1 --- /dev/null +++ b/python/cudf/cudf/core/_internals/sorting.py @@ -0,0 +1,205 @@ +# Copyright (c) 2020-2024, NVIDIA CORPORATION. +from __future__ import annotations + +import itertools +from typing import TYPE_CHECKING, Literal + +import pylibcudf as plc + +from cudf._lib.column import Column +from cudf.core.buffer import acquire_spill_lock + +if TYPE_CHECKING: + from collections.abc import Iterable + + from cudf.core.column import ColumnBase + + +@acquire_spill_lock() +def is_sorted( + source_columns: list[ColumnBase], + ascending: list[bool] | None = None, + null_position: list[bool] | None = None, +) -> bool: + """ + Checks whether the rows of a `table` are sorted in lexicographical order. + + Parameters + ---------- + source_columns : list of columns + columns to be checked for sort order + ascending : None or list-like of booleans + None or list-like of boolean values indicating expected sort order of + each column. If list-like, size of list-like must be len(columns). If + None, all columns expected sort order is set to ascending. False (0) - + descending, True (1) - ascending. + null_position : None or list-like of booleans + None or list-like of boolean values indicating desired order of nulls + compared to other elements. If list-like, size of list-like must be + len(columns). If None, null order is set to before. False (0) - after, + True (1) - before. + + Returns + ------- + returns : boolean + Returns True, if sorted as expected by ``ascending`` and + ``null_position``, False otherwise. + """ + if ascending is None: + column_order = [plc.types.Order.ASCENDING] * len(source_columns) + else: + if len(ascending) != len(source_columns): + raise ValueError( + f"Expected a list-like of length {len(source_columns)}, " + f"got length {len(ascending)} for `ascending`" + ) + column_order = [ + plc.types.Order.ASCENDING if asc else plc.types.Order.DESCENDING + for asc in ascending + ] + + if null_position is None: + null_precedence = [plc.types.NullOrder.AFTER] * len(source_columns) + else: + if len(null_position) != len(source_columns): + raise ValueError( + f"Expected a list-like of length {len(source_columns)}, " + f"got length {len(null_position)} for `null_position`" + ) + null_precedence = [ + plc.types.NullOrder.BEFORE if null else plc.types.NullOrder.AFTER + for null in null_position + ] + + return plc.sorting.is_sorted( + plc.Table([col.to_pylibcudf(mode="read") for col in source_columns]), + column_order, + null_precedence, + ) + + +def ordering( + column_order: list[bool], + null_precedence: Iterable[Literal["first", "last"]], +) -> tuple[list[plc.types.Order], list[plc.types.NullOrder]]: + """ + Construct order and null order vectors + + Parameters + ---------- + column_order + Iterable of bool (True for ascending order, False for descending) + null_precedence + Iterable string for null positions ("first" for start, "last" for end) + + Both iterables must be the same length (not checked) + + Returns + ------- + pair of vectors (order, and null_order) + """ + c_column_order = [] + c_null_precedence = [] + for asc, null in zip(column_order, null_precedence): + c_column_order.append( + plc.types.Order.ASCENDING if asc else plc.types.Order.DESCENDING + ) + if asc ^ (null == "first"): + c_null_precedence.append(plc.types.NullOrder.AFTER) + elif asc ^ (null == "last"): + c_null_precedence.append(plc.types.NullOrder.BEFORE) + else: + raise ValueError(f"Invalid null precedence {null}") + return c_column_order, c_null_precedence + + +@acquire_spill_lock() +def order_by( + columns_from_table: list[ColumnBase], + ascending: list[bool], + na_position: Literal["first", "last"], + *, + stable: bool, +): + """ + Get index to sort the table in ascending/descending order. + + Parameters + ---------- + columns_from_table : list[Column] + Columns from the table which will be sorted + ascending : sequence[bool] + Sequence of boolean values which correspond to each column + in the table to be sorted signifying the order of each column + True - Ascending and False - Descending + na_position : str + Whether null values should show up at the "first" or "last" + position of **all** sorted column. + stable : bool + Should the sort be stable? (no default) + + Returns + ------- + Column of indices that sorts the table + """ + order = ordering(ascending, itertools.repeat(na_position)) + func = ( + plc.sorting.stable_sorted_order if stable else plc.sorting.sorted_order + ) + return Column.from_pylibcudf( + func( + plc.Table( + [col.to_pylibcudf(mode="read") for col in columns_from_table], + ), + order[0], + order[1], + ) + ) + + +@acquire_spill_lock() +def sort_by_key( + values: list[ColumnBase], + keys: list[ColumnBase], + ascending: list[bool], + na_position: list[Literal["first", "last"]], + *, + stable: bool, +) -> list[ColumnBase]: + """ + Sort a table by given keys + + Parameters + ---------- + values : list[Column] + Columns of the table which will be sorted + keys : list[Column] + Columns making up the sort key + ascending : list[bool] + Sequence of boolean values which correspond to each column + in the table to be sorted signifying the order of each column + True - Ascending and False - Descending + na_position : list[str] + Sequence of "first" or "last" values (default "first") + indicating the position of null values when sorting the keys. + stable : bool + Should the sort be stable? (no default) + + Returns + ------- + list[Column] + list of value columns sorted by keys + """ + order = ordering(ascending, na_position) + func = ( + plc.sorting.stable_sort_by_key if stable else plc.sorting.sort_by_key + ) + return [ + Column.from_pylibcudf(col) + for col in func( + plc.Table([col.to_pylibcudf(mode="read") for col in values]), + plc.Table([col.to_pylibcudf(mode="read") for col in keys]), + order[0], + order[1], + ).columns() + ] diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 624a3ac95ed..cc07af0f669 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -42,7 +42,7 @@ is_string_dtype, ) from cudf.core._compat import PANDAS_GE_210 -from cudf.core._internals import aggregation, unary +from cudf.core._internals import aggregation, sorting, unary from cudf.core._internals.timezones import get_compatible_timezone from cudf.core.abc import Serializable from cudf.core.buffer import ( @@ -996,13 +996,13 @@ def is_unique(self) -> bool: @cached_property def is_monotonic_increasing(self) -> bool: - return not self.has_nulls(include_nan=True) and libcudf.sort.is_sorted( + return not self.has_nulls(include_nan=True) and sorting.is_sorted( [self], [True], None ) @cached_property def is_monotonic_decreasing(self) -> bool: - return not self.has_nulls(include_nan=True) and libcudf.sort.is_sorted( + return not self.has_nulls(include_nan=True) and sorting.is_sorted( [self], [False], None ) @@ -1026,15 +1026,20 @@ def contains(self, other: ColumnBase) -> ColumnBase: def sort_values( self: Self, ascending: bool = True, - na_position: str = "last", + na_position: Literal["first", "last"] = "last", ) -> Self: if (not ascending and self.is_monotonic_decreasing) or ( ascending and self.is_monotonic_increasing ): return self.copy() - return libcudf.sort.sort( - [self], column_order=[ascending], null_precedence=[na_position] - )[0] + order = sorting.ordering([ascending], [na_position]) + with acquire_spill_lock(): + plc_table = plc.sorting.sort( + plc.Table([self.to_pylibcudf(mode="read")]), + order[0], + order[1], + ) + return type(self).from_pylibcudf(plc_table.columns()[0]) # type: ignore[return-value] def distinct_count(self, dropna: bool = True) -> int: try: @@ -1204,7 +1209,7 @@ def argsort( as_column(range(len(self) - 1, -1, -1)), ) else: - return libcudf.sort.order_by( + return sorting.order_by( [self], [ascending], na_position, stable=True ) @@ -1511,7 +1516,7 @@ def _return_sentinel_column(): del right_rows # reorder `codes` so that its values correspond to the # values of `self`: - (codes,) = libcudf.sort.sort_by_key( + (codes,) = sorting.sort_by_key( codes, [left_gather_map], [True], ["last"], stable=True ) return codes.fillna(na_sentinel.value) diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index 28a2bd7fa6c..f099cef3331 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -718,6 +718,40 @@ def _reduction_result_dtype(self, reduction_op: str) -> Dtype: return super()._reduction_result_dtype(reduction_op) + @acquire_spill_lock() + def digitize(self, bins: np.ndarray, right: bool = False) -> Self: + """Return the indices of the bins to which each value in column belongs. + + Parameters + ---------- + bins : np.ndarray + 1-D column-like object of bins with same type as `column`, should be + monotonically increasing. + right : bool + Indicates whether interval contains the right or left bin edge. + + Returns + ------- + A column containing the indices + """ + if self.dtype != bins.dtype: + raise ValueError( + "digitize() expects bins and input column have the same dtype." + ) + + bin_col = as_column(bins, dtype=bins.dtype) + if bin_col.nullable: + raise ValueError("`bins` cannot contain null entries.") + + return type(self).from_pylibcudf( # type: ignore[return-value] + getattr(plc.search, "lower_bound" if right else "upper_bound")( + plc.Table([bin_col.to_pylibcudf(mode="read")]), + plc.Table([self.to_pylibcudf(mode="read")]), + [plc.types.Order.ASCENDING], + [plc.types.NullOrder.BEFORE], + ) + ) + def _normalize_find_and_replace_input( input_column_dtype: DtypeObj, col_to_normalize: ColumnBase | list @@ -772,34 +806,3 @@ def _normalize_find_and_replace_input( if not normalized_column.can_cast_safely(input_column_dtype): return normalized_column return normalized_column.astype(input_column_dtype) - - -def digitize( - column: ColumnBase, bins: np.ndarray, right: bool = False -) -> ColumnBase: - """Return the indices of the bins to which each value in column belongs. - - Parameters - ---------- - column : Column - Input column. - bins : Column-like - 1-D column-like object of bins with same type as `column`, should be - monotonically increasing. - right : bool - Indicates whether interval contains the right or left bin edge. - - Returns - ------- - A column containing the indices - """ - if not column.dtype == bins.dtype: - raise ValueError( - "Digitize() expects bins and input column have the same dtype." - ) - - bin_col = as_column(bins, dtype=bins.dtype) - if bin_col.nullable: - raise ValueError("`bins` cannot contain null entries.") - - return as_column(libcudf.sort.digitize([column], [bin_col], right)) diff --git a/python/cudf/cudf/core/column/numerical_base.py b/python/cudf/cudf/core/column/numerical_base.py index 7a39355dd50..aaf2239a71e 100644 --- a/python/cudf/cudf/core/column/numerical_base.py +++ b/python/cudf/cudf/core/column/numerical_base.py @@ -10,7 +10,7 @@ import pylibcudf as plc import cudf -from cudf import _lib as libcudf +from cudf.core._internals import sorting from cudf.core.buffer import Buffer, acquire_spill_lock from cudf.core.column.column import ColumnBase from cudf.core.missing import NA @@ -144,7 +144,7 @@ def quantile( ) else: # get sorted indices and exclude nulls - indices = libcudf.sort.order_by( + indices = sorting.order_by( [self], [True], "first", stable=True ).slice(self.null_count, len(self)) with acquire_spill_lock(): diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 00199cca828..4f40ba0bd92 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -22,6 +22,7 @@ from cudf import _lib as libcudf from cudf.api.types import is_dtype_equal, is_scalar from cudf.core._compat import PANDAS_LT_300 +from cudf.core._internals import sorting from cudf.core._internals.search import search_sorted from cudf.core.abc import Serializable from cudf.core.buffer import acquire_spill_lock @@ -1476,7 +1477,7 @@ def _get_sorted_inds( else: ascending_lst = list(ascending) - return libcudf.sort.order_by( + return sorting.order_by( list(to_sort), ascending_lst, na_position, diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index a8d82f977d5..b772d35846d 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -18,11 +18,11 @@ import cudf from cudf import _lib as libcudf from cudf._lib import groupby as libgroupby -from cudf._lib.sort import segmented_sort_by_key from cudf._lib.types import size_type_dtype from cudf.api.extensions import no_default from cudf.api.types import is_list_like, is_numeric_dtype from cudf.core._compat import PANDAS_LT_300 +from cudf.core._internals import sorting from cudf.core.abc import Serializable from cudf.core.buffer import acquire_spill_lock from cudf.core.column.column import ColumnBase, StructDtype, as_column @@ -792,7 +792,7 @@ def agg(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs): # want, and right order is a matching gather map for # the result table. Get the correct order by sorting # the right gather map. - (right_order,) = libcudf.sort.sort_by_key( + (right_order,) = sorting.sort_by_key( [right_order], [left_order], [True], @@ -1248,15 +1248,20 @@ def sample( for off, size in zip(group_offsets, size_per_group): rs.shuffle(indices[off : off + size]) else: - rng = cp.random.default_rng(seed=random_state) - (indices,) = segmented_sort_by_key( - [as_column(indices)], - [as_column(rng.random(size=nrows))], - as_column(group_offsets), - [], - [], - stable=True, + keys = cp.random.default_rng(seed=random_state).random( + size=nrows ) + with acquire_spill_lock(): + plc_table = plc.sorting.stable_segmented_sort_by_key( + plc.Table( + [as_column(indices).to_pylibcudf(mode="read")] + ), + plc.Table([as_column(keys).to_pylibcudf(mode="read")]), + as_column(group_offsets).to_pylibcudf(mode="read"), + [plc.types.Order.ASCENDING], + [plc.types.NullOrder.AFTER], + ) + indices = ColumnBase.from_pylibcudf(plc_table.columns()[0]) indices = cp.asarray(indices.data_array_view(mode="read")) # Which indices are we going to want? want = np.arange(samples_per_group.sum(), dtype=size_type_dtype) diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 81d954960e2..1a667e24bef 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -6367,9 +6367,49 @@ def rank( elif source._num_columns != num_cols: dropped_cols = True - result_columns = libcudf.sort.rank_columns( - [*source._columns], method_enum, na_option, ascending, pct + column_order = ( + plc.types.Order.ASCENDING + if ascending + else plc.types.Order.DESCENDING ) + # ascending + # #top = na_is_smallest + # #bottom = na_is_largest + # #keep = na_is_largest + # descending + # #top = na_is_largest + # #bottom = na_is_smallest + # #keep = na_is_smallest + if ascending: + if na_option == "top": + null_precedence = plc.types.NullOrder.BEFORE + else: + null_precedence = plc.types.NullOrder.AFTER + else: + if na_option == "top": + null_precedence = plc.types.NullOrder.AFTER + else: + null_precedence = plc.types.NullOrder.BEFORE + c_null_handling = ( + plc.types.NullPolicy.EXCLUDE + if na_option == "keep" + else plc.types.NullPolicy.INCLUDE + ) + + with acquire_spill_lock(): + result_columns = [ + libcudf.column.Column.from_pylibcudf( + plc.sorting.rank( + col.to_pylibcudf(mode="read"), + method_enum, + column_order, + c_null_handling, + null_precedence, + pct, + ) + ) + for col in source._columns + ] if dropped_cols: result = type(source)._from_data( diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py index 5c224176730..e7ea91c1f21 100644 --- a/python/cudf/cudf/core/join/join.py +++ b/python/cudf/cudf/core/join/join.py @@ -9,6 +9,7 @@ import cudf from cudf import _lib as libcudf from cudf._lib.types import size_type_dtype +from cudf.core._internals import sorting from cudf.core.buffer import acquire_spill_lock from cudf.core.copy_types import GatherMap from cudf.core.join._join_helpers import ( @@ -256,7 +257,7 @@ def _gather_maps(self, left_cols, right_cols): for map_, n, null in zip(maps, lengths, nullify) ) ) - return libcudf.sort.sort_by_key( + return sorting.sort_by_key( list(maps), # If how is right, right map is primary sort key. key_order[:: -1 if self.how == "right" else 1], @@ -426,7 +427,7 @@ def _sort_result(self, result: cudf.DataFrame) -> cudf.DataFrame: else: to_sort = [*result._columns] index_names = None - result_columns = libcudf.sort.sort_by_key( + result_columns = sorting.sort_by_key( to_sort, by, [True] * len(by), diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index f5ee36f851c..a99e06e4a8e 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -22,6 +22,7 @@ from cudf.api.types import is_integer, is_list_like, is_object_dtype, is_scalar from cudf.core import column from cudf.core._base_index import _return_get_indexer_result +from cudf.core._internals import sorting from cudf.core.algorithms import factorize from cudf.core.buffer import acquire_spill_lock from cudf.core.column_accessor import ColumnAccessor @@ -1677,7 +1678,7 @@ def _is_sorted(self, ascending=None, null_position=None) -> bool: f"Expected a list-like or None for `null_position`, got " f"{type(null_position)}" ) - return libcudf.sort.is_sorted( + return sorting.is_sorted( [*self._columns], ascending=ascending, null_position=null_position ) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 647e20fc16b..961e5e11bc0 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -3410,7 +3410,7 @@ def describe( ) @_performance_tracking - def digitize(self, bins, right=False): + def digitize(self, bins: np.ndarray, right: bool = False) -> Self: """Return the indices of the bins to which each value belongs. Notes @@ -3441,9 +3441,8 @@ def digitize(self, bins, right=False): 3 2 dtype: int32 """ - return Series._from_column( - cudf.core.column.numerical.digitize(self._column, bins, right), - name=self.name, + return type(self)._from_column( + self._column.digitize(bins, right), name=self.name ) @_performance_tracking From 34e20451cf5452ecea74092dae3c6f5078ade0bd Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 13 Dec 2024 15:36:55 -0800 Subject: [PATCH 06/32] Mark more constexpr functions as device-available (#17545) Contributes to #7795. Also contributes to https://github.com/rapidsai/build-planning/issues/76. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Nghia Truong (https://github.com/ttnghia) - Yunsong Wang (https://github.com/PointKernel) - Bradley Dice (https://github.com/bdice) - David Wendt (https://github.com/davidwendt) URL: https://github.com/rapidsai/cudf/pull/17545 --- ci/build_docs.sh | 6 + .../cudf/column/column_device_view.cuh | 18 ++- .../cudf/detail/aggregation/aggregation.cuh | 2 +- cpp/include/cudf/detail/utilities/cuda.cuh | 11 +- .../detail/utilities/device_operators.cuh | 30 ++-- .../cudf/detail/utilities/integer_utils.hpp | 4 +- .../detail/floating_conversion.hpp | 7 +- .../cudf/hashing/detail/hash_functions.cuh | 5 +- cpp/include/cudf/hashing/detail/hashing.hpp | 2 +- cpp/include/cudf/strings/detail/utf8.hpp | 21 +-- cpp/include/cudf/strings/string_view.cuh | 8 +- .../cudf/table/experimental/row_operators.cuh | 74 +++++----- cpp/include/cudf/types.hpp | 9 +- cpp/include/cudf/utilities/span.hpp | 138 ++++++++++++------ cpp/include/cudf/utilities/traits.hpp | 42 +++--- cpp/src/binaryop/compiled/binary_ops.cuh | 6 +- cpp/src/copying/contiguous_split.cu | 3 +- cpp/src/groupby/sort/group_rank_scan.cu | 3 +- cpp/src/hash/murmurhash3_x64_128.cu | 4 +- cpp/src/hash/sha_hash.cuh | 4 +- cpp/src/hash/xxhash_64.cu | 3 +- cpp/src/io/avro/avro_common.hpp | 2 +- cpp/src/io/comp/unsnap.cu | 3 +- cpp/src/io/fst/agent_dfa.cuh | 14 +- cpp/src/io/statistics/byte_array_view.cuh | 33 +++-- .../io/statistics/typed_statistics_chunk.cuh | 5 +- cpp/src/io/utilities/parsing_utils.cuh | 19 ++- cpp/src/io/utilities/trie.cuh | 4 +- cpp/src/quantiles/quantiles_util.hpp | 9 +- cpp/src/strings/search/find.cu | 3 +- cpp/src/strings/slice.cu | 7 +- docs/cudf/source/conf.py | 2 + 32 files changed, 302 insertions(+), 199 deletions(-) diff --git a/ci/build_docs.sh b/ci/build_docs.sh index 4290d013fe4..52d8f659611 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -35,6 +35,10 @@ rapids-mamba-retry install \ export RAPIDS_DOCS_DIR="$(mktemp -d)" +EXITCODE=0 +trap "EXITCODE=1" ERR +set +e + rapids-logger "Build CPP docs" pushd cpp/doxygen aws s3 cp s3://rapidsai-docs/librmm/html/${RAPIDS_VERSION_MAJOR_MINOR}/rmm.tag . || echo "Failed to download rmm Doxygen tag" @@ -58,3 +62,5 @@ mv build/dirhtml/* "${RAPIDS_DOCS_DIR}/dask-cudf/html" popd RAPIDS_VERSION_NUMBER="${RAPIDS_VERSION_MAJOR_MINOR}" rapids-upload-docs + +exit ${EXITCODE} diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index db6d5255616..ea480b133dc 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -33,11 +33,13 @@ #include #include +#include #include #include #include #include +#include /** * @file column_device_view.cuh @@ -56,8 +58,8 @@ namespace CUDF_EXPORT cudf { * */ struct nullate { - struct YES : std::bool_constant {}; - struct NO : std::bool_constant {}; + struct YES : cuda::std::bool_constant {}; + struct NO : cuda::std::bool_constant {}; /** * @brief `nullate::DYNAMIC` defers the determination of nullability to run time rather than * compile time. The calling code is responsible for specifying whether or not nulls are @@ -80,7 +82,7 @@ struct nullate { * @return `true` if nulls are expected in the operation in which this object is applied, * otherwise false */ - constexpr operator bool() const noexcept { return value; } + CUDF_HOST_DEVICE constexpr operator bool() const noexcept { return value; } bool value; ///< True if nulls are expected }; }; @@ -319,14 +321,14 @@ class alignas(16) column_device_view_base { } template - struct has_element_accessor_impl : std::false_type {}; + struct has_element_accessor_impl : cuda::std::false_type {}; template struct has_element_accessor_impl< C, T, - void_t().template element(std::declval()))>> - : std::true_type {}; + void_t().template element(cuda::std::declval()))>> + : cuda::std::true_type {}; }; // @cond // Forward declaration @@ -534,7 +536,7 @@ class alignas(16) column_device_view : public detail::column_device_view_base { * @return `true` if `column_device_view::element()` has a valid overload, `false` otherwise */ template - static constexpr bool has_element_accessor() + CUDF_HOST_DEVICE static constexpr bool has_element_accessor() { return has_element_accessor_impl::value; } @@ -1044,7 +1046,7 @@ class alignas(16) mutable_column_device_view : public detail::column_device_view * @return `true` if `mutable_column_device_view::element()` has a valid overload, `false` */ template - static constexpr bool has_element_accessor() + CUDF_HOST_DEVICE static constexpr bool has_element_accessor() { return has_element_accessor_impl::value; } diff --git a/cpp/include/cudf/detail/aggregation/aggregation.cuh b/cpp/include/cudf/detail/aggregation/aggregation.cuh index de53e7586cd..c30c3d6f4bd 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.cuh +++ b/cpp/include/cudf/detail/aggregation/aggregation.cuh @@ -36,7 +36,7 @@ namespace cudf { namespace detail { template -constexpr bool is_product_supported() +CUDF_HOST_DEVICE constexpr bool is_product_supported() { return is_numeric(); } diff --git a/cpp/include/cudf/detail/utilities/cuda.cuh b/cpp/include/cudf/detail/utilities/cuda.cuh index 61a8e9f7ec3..72cdc3d8067 100644 --- a/cpp/include/cudf/detail/utilities/cuda.cuh +++ b/cpp/include/cudf/detail/utilities/cuda.cuh @@ -74,9 +74,10 @@ class grid_1d { * @param num_threads_per_block The number of threads per block * @return thread_index_type The global thread index */ - static constexpr thread_index_type global_thread_id(thread_index_type thread_id, - thread_index_type block_id, - thread_index_type num_threads_per_block) + __device__ static constexpr thread_index_type global_thread_id( + thread_index_type thread_id, + thread_index_type block_id, + thread_index_type num_threads_per_block) { return thread_id + block_id * num_threads_per_block; } @@ -114,8 +115,8 @@ class grid_1d { * @param num_threads_per_block The number of threads per block * @return thread_index_type The global thread index */ - static constexpr thread_index_type grid_stride(thread_index_type num_threads_per_block, - thread_index_type num_blocks_per_grid) + __device__ static constexpr thread_index_type grid_stride(thread_index_type num_threads_per_block, + thread_index_type num_blocks_per_grid) { return num_threads_per_block * num_blocks_per_grid; } diff --git a/cpp/include/cudf/detail/utilities/device_operators.cuh b/cpp/include/cudf/detail/utilities/device_operators.cuh index d16be5e22dd..923cd04479d 100644 --- a/cpp/include/cudf/detail/utilities/device_operators.cuh +++ b/cpp/include/cudf/detail/utilities/device_operators.cuh @@ -29,6 +29,8 @@ #include #include +#include + #include namespace cudf { @@ -42,7 +44,7 @@ template ()>* = nullptr> CUDF_HOST_DEVICE inline auto min(LHS const& lhs, RHS const& rhs) { - return std::min(lhs, rhs); + return cuda::std::min(lhs, rhs); } /** @@ -53,7 +55,7 @@ template ()>* = nullptr> CUDF_HOST_DEVICE inline auto max(LHS const& lhs, RHS const& rhs) { - return std::max(lhs, rhs); + return cuda::std::max(lhs, rhs); } } // namespace detail @@ -68,20 +70,20 @@ struct DeviceSum { } template ()>* = nullptr> - static constexpr T identity() + CUDF_HOST_DEVICE static constexpr T identity() { return T{typename T::duration{0}}; } template () && !cudf::is_fixed_point()>* = nullptr> - static constexpr T identity() + CUDF_HOST_DEVICE static constexpr T identity() { return T{0}; } template ()>* = nullptr> - static constexpr T identity() + CUDF_HOST_DEVICE static constexpr T identity() { #ifndef __CUDA_ARCH__ CUDF_FAIL("fixed_point does not yet support device operator identity"); @@ -109,7 +111,7 @@ struct DeviceCount { } template - static constexpr T identity() + CUDF_HOST_DEVICE static constexpr T identity() { return T{}; } @@ -129,7 +131,7 @@ struct DeviceMin { template && !cudf::is_dictionary() && !cudf::is_fixed_point()>* = nullptr> - static constexpr T identity() + CUDF_HOST_DEVICE static constexpr T identity() { // chrono types do not have std::numeric_limits specializations and should use T::max() // https://eel.is/c++draft/numeric.limits.general#6 @@ -143,7 +145,7 @@ struct DeviceMin { } template ()>* = nullptr> - static constexpr T identity() + CUDF_HOST_DEVICE static constexpr T identity() { #ifndef __CUDA_ARCH__ CUDF_FAIL("fixed_point does not yet support DeviceMin identity"); @@ -161,7 +163,7 @@ struct DeviceMin { } template ()>* = nullptr> - static constexpr T identity() + CUDF_HOST_DEVICE static constexpr T identity() { return static_cast(T::max_value()); } @@ -181,7 +183,7 @@ struct DeviceMax { template && !cudf::is_dictionary() && !cudf::is_fixed_point()>* = nullptr> - static constexpr T identity() + CUDF_HOST_DEVICE static constexpr T identity() { // chrono types do not have std::numeric_limits specializations and should use T::min() // https://eel.is/c++draft/numeric.limits.general#6 @@ -195,7 +197,7 @@ struct DeviceMax { } template ()>* = nullptr> - static constexpr T identity() + CUDF_HOST_DEVICE static constexpr T identity() { #ifndef __CUDA_ARCH__ CUDF_FAIL("fixed_point does not yet support DeviceMax identity"); @@ -212,7 +214,7 @@ struct DeviceMax { } template ()>* = nullptr> - static constexpr T identity() + CUDF_HOST_DEVICE static constexpr T identity() { return static_cast(T::lowest_value()); } @@ -229,13 +231,13 @@ struct DeviceProduct { } template ()>* = nullptr> - static constexpr T identity() + CUDF_HOST_DEVICE static constexpr T identity() { return T{1}; } template ()>* = nullptr> - static constexpr T identity() + CUDF_HOST_DEVICE static constexpr T identity() { #ifndef __CUDA_ARCH__ CUDF_FAIL("fixed_point does not yet support DeviceProduct identity"); diff --git a/cpp/include/cudf/detail/utilities/integer_utils.hpp b/cpp/include/cudf/detail/utilities/integer_utils.hpp index 957b6b70fe2..2e3d71815c0 100644 --- a/cpp/include/cudf/detail/utilities/integer_utils.hpp +++ b/cpp/include/cudf/detail/utilities/integer_utils.hpp @@ -86,7 +86,7 @@ constexpr S round_down_safe(S number_to_round, S modulus) noexcept * `modulus` is positive and does not check for overflow. */ template -constexpr S round_up_unsafe(S number_to_round, S modulus) noexcept +CUDF_HOST_DEVICE constexpr S round_up_unsafe(S number_to_round, S modulus) noexcept { auto remainder = number_to_round % modulus; if (remainder == 0) { return number_to_round; } @@ -187,7 +187,7 @@ constexpr bool is_a_power_of_two(I val) noexcept * @return Absolute value if value type is signed. */ template -constexpr auto absolute_value(T value) -> T +CUDF_HOST_DEVICE constexpr auto absolute_value(T value) -> T { if constexpr (cuda::std::is_signed()) return numeric::detail::abs(value); return value; diff --git a/cpp/include/cudf/fixed_point/detail/floating_conversion.hpp b/cpp/include/cudf/fixed_point/detail/floating_conversion.hpp index fce08b4a5c4..9e68bafb09a 100644 --- a/cpp/include/cudf/fixed_point/detail/floating_conversion.hpp +++ b/cpp/include/cudf/fixed_point/detail/floating_conversion.hpp @@ -22,6 +22,7 @@ #include #include #include +#include #include @@ -183,7 +184,7 @@ struct floating_converter { * @param integer_rep The bit-casted floating value to extract the exponent from * @return The stored base-2 exponent and significand, shifted for denormals */ - CUDF_HOST_DEVICE inline static std::pair get_significand_and_pow2( + CUDF_HOST_DEVICE inline static cuda::std::pair get_significand_and_pow2( IntegralType integer_rep) { // Extract the significand @@ -1008,7 +1009,7 @@ CUDF_HOST_DEVICE inline auto shift_to_binary_pospow(DecimalRep decimal_rep, int } // Our shifting_rep is now the integer mantissa, return it and the powers of 2 - return std::pair{shifting_rep, pow2}; + return cuda::std::pair{shifting_rep, pow2}; } /** @@ -1075,7 +1076,7 @@ CUDF_HOST_DEVICE inline auto shift_to_binary_negpow(DecimalRep decimal_rep, int } // Our shifting_rep is now the integer mantissa, return it and the powers of 2 - return std::pair{shifting_rep, pow2}; + return cuda::std::pair{shifting_rep, pow2}; } /** diff --git a/cpp/include/cudf/hashing/detail/hash_functions.cuh b/cpp/include/cudf/hashing/detail/hash_functions.cuh index 0ec41a20ef1..fd3455e761d 100644 --- a/cpp/include/cudf/hashing/detail/hash_functions.cuh +++ b/cpp/include/cudf/hashing/detail/hash_functions.cuh @@ -18,7 +18,8 @@ #include -#include +#include +#include namespace cudf::hashing::detail { @@ -29,7 +30,7 @@ template T __device__ inline normalize_nans(T const& key) { if constexpr (cudf::is_floating_point()) { - if (std::isnan(key)) { return std::numeric_limits::quiet_NaN(); } + if (cuda::std::isnan(key)) { return cuda::std::numeric_limits::quiet_NaN(); } } return key; } diff --git a/cpp/include/cudf/hashing/detail/hashing.hpp b/cpp/include/cudf/hashing/detail/hashing.hpp index a978e54a1b9..7cb80081a95 100644 --- a/cpp/include/cudf/hashing/detail/hashing.hpp +++ b/cpp/include/cudf/hashing/detail/hashing.hpp @@ -82,7 +82,7 @@ std::unique_ptr xxhash_64(table_view const& input, * @param rhs The second hash value * @return Combined hash value */ -constexpr uint32_t hash_combine(uint32_t lhs, uint32_t rhs) +CUDF_HOST_DEVICE constexpr uint32_t hash_combine(uint32_t lhs, uint32_t rhs) { return lhs ^ (rhs + 0x9e37'79b9 + (lhs << 6) + (lhs >> 2)); } diff --git a/cpp/include/cudf/strings/detail/utf8.hpp b/cpp/include/cudf/strings/detail/utf8.hpp index 85349a421b1..84957ab9f1d 100644 --- a/cpp/include/cudf/strings/detail/utf8.hpp +++ b/cpp/include/cudf/strings/detail/utf8.hpp @@ -31,7 +31,7 @@ namespace strings::detail { * @param chr Any single byte from a valid UTF-8 character * @return true if this is not the first byte of the character */ -constexpr bool is_utf8_continuation_char(unsigned char chr) +CUDF_HOST_DEVICE constexpr bool is_utf8_continuation_char(unsigned char chr) { // The (0xC0 & 0x80) bit pattern identifies a continuation byte of a character. return (chr & 0xC0) == 0x80; @@ -43,7 +43,10 @@ constexpr bool is_utf8_continuation_char(unsigned char chr) * @param chr Any single byte from a valid UTF-8 character * @return true if this the first byte of the character */ -constexpr bool is_begin_utf8_char(unsigned char chr) { return not is_utf8_continuation_char(chr); } +CUDF_HOST_DEVICE constexpr bool is_begin_utf8_char(unsigned char chr) +{ + return not is_utf8_continuation_char(chr); +} /** * @brief This will return true if the passed in byte could be the start of @@ -55,7 +58,7 @@ constexpr bool is_begin_utf8_char(unsigned char chr) { return not is_utf8_contin * @param byte The byte to be tested * @return true if this can be the first byte of a character */ -constexpr bool is_valid_begin_utf8_char(uint8_t byte) +CUDF_HOST_DEVICE constexpr bool is_valid_begin_utf8_char(uint8_t byte) { // to be the first byte of a valid (up to 4 byte) UTF-8 char, byte must be one of: // 0b0vvvvvvv a 1 byte character @@ -72,7 +75,7 @@ constexpr bool is_valid_begin_utf8_char(uint8_t byte) * @param character Single character * @return Number of bytes */ -constexpr size_type bytes_in_char_utf8(char_utf8 character) +CUDF_HOST_DEVICE constexpr size_type bytes_in_char_utf8(char_utf8 character) { return 1 + static_cast((character & 0x0000'FF00u) > 0) + static_cast((character & 0x00FF'0000u) > 0) + @@ -89,7 +92,7 @@ constexpr size_type bytes_in_char_utf8(char_utf8 character) * @param byte Byte from an encoded character. * @return Number of bytes. */ -constexpr size_type bytes_in_utf8_byte(uint8_t byte) +CUDF_HOST_DEVICE constexpr size_type bytes_in_utf8_byte(uint8_t byte) { return 1 + static_cast((byte & 0xF0) == 0xF0) // 4-byte character prefix + static_cast((byte & 0xE0) == 0xE0) // 3-byte character prefix @@ -104,7 +107,7 @@ constexpr size_type bytes_in_utf8_byte(uint8_t byte) * @param[out] character Single char_utf8 value. * @return The number of bytes in the character */ -constexpr size_type to_char_utf8(char const* str, char_utf8& character) +CUDF_HOST_DEVICE constexpr size_type to_char_utf8(char const* str, char_utf8& character) { size_type const chr_width = bytes_in_utf8_byte(static_cast(*str)); @@ -131,7 +134,7 @@ constexpr size_type to_char_utf8(char const* str, char_utf8& character) * @param[out] str Output array. * @return The number of bytes in the character */ -constexpr inline size_type from_char_utf8(char_utf8 character, char* str) +CUDF_HOST_DEVICE constexpr inline size_type from_char_utf8(char_utf8 character, char* str) { size_type const chr_width = bytes_in_char_utf8(character); for (size_type idx = 0; idx < chr_width; ++idx) { @@ -148,7 +151,7 @@ constexpr inline size_type from_char_utf8(char_utf8 character, char* str) * @param utf8_char Single UTF-8 character to convert. * @return Code-point for the UTF-8 character. */ -constexpr uint32_t utf8_to_codepoint(cudf::char_utf8 utf8_char) +CUDF_HOST_DEVICE constexpr uint32_t utf8_to_codepoint(cudf::char_utf8 utf8_char) { uint32_t unchr = 0; if (utf8_char < 0x0000'0080) // single-byte pass thru @@ -178,7 +181,7 @@ constexpr uint32_t utf8_to_codepoint(cudf::char_utf8 utf8_char) * @param unchr Character code-point to convert. * @return Single UTF-8 character. */ -constexpr cudf::char_utf8 codepoint_to_utf8(uint32_t unchr) +CUDF_HOST_DEVICE constexpr cudf::char_utf8 codepoint_to_utf8(uint32_t unchr) { cudf::char_utf8 utf8 = 0; if (unchr < 0x0000'0080) // single byte utf8 diff --git a/cpp/include/cudf/strings/string_view.cuh b/cpp/include/cudf/strings/string_view.cuh index 1ae4c3703b2..f0040e069d8 100644 --- a/cpp/include/cudf/strings/string_view.cuh +++ b/cpp/include/cudf/strings/string_view.cuh @@ -31,6 +31,8 @@ #include #endif +#include + #include // This file should only include device code logic. @@ -75,8 +77,8 @@ __device__ inline size_type characters_in_string(char const* str, size_type byte * @param pos Character position to count to * @return The number of bytes and the left over non-counted position value */ -__device__ inline std::pair bytes_to_character_position(string_view d_str, - size_type pos) +__device__ inline cuda::std::pair bytes_to_character_position( + string_view d_str, size_type pos) { size_type bytes = 0; auto ptr = d_str.data(); @@ -303,7 +305,7 @@ __device__ inline char_utf8 string_view::operator[](size_type pos) const __device__ inline size_type string_view::byte_offset(size_type pos) const { if (length() == size_bytes()) return pos; - return std::get<0>(strings::detail::bytes_to_character_position(*this, pos)); + return cuda::std::get<0>(strings::detail::bytes_to_character_position(*this, pos)); } __device__ inline int string_view::compare(string_view const& in) const diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 3f33c70c29a..8214ea6e83b 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -33,6 +33,8 @@ #include #include +#include +#include #include #include #include @@ -48,11 +50,8 @@ #include #include -#include #include -#include #include -#include namespace CUDF_EXPORT cudf { @@ -287,15 +286,16 @@ class device_row_comparator { * `null_order::BEFORE` for all columns. * @param comparator Physical element relational comparison functor. */ - device_row_comparator(Nullate check_nulls, - table_device_view lhs, - table_device_view rhs, - device_span l_dremel_device_views, - device_span r_dremel_device_views, - std::optional> depth = std::nullopt, - std::optional> column_order = std::nullopt, - std::optional> null_precedence = std::nullopt, - PhysicalElementComparator comparator = {}) noexcept + device_row_comparator( + Nullate check_nulls, + table_device_view lhs, + table_device_view rhs, + device_span l_dremel_device_views, + device_span r_dremel_device_views, + cuda::std::optional> depth = cuda::std::nullopt, + cuda::std::optional> column_order = cuda::std::nullopt, + cuda::std::optional> null_precedence = cuda::std::nullopt, + PhysicalElementComparator comparator = {}) noexcept : _lhs{lhs}, _rhs{rhs}, _l_dremel(l_dremel_device_views), @@ -331,9 +331,9 @@ class device_row_comparator { Nullate check_nulls, table_device_view lhs, table_device_view rhs, - std::optional> column_order = std::nullopt, - std::optional> null_precedence = std::nullopt, - PhysicalElementComparator comparator = {}) noexcept + cuda::std::optional> column_order = cuda::std::nullopt, + cuda::std::optional> null_precedence = cuda::std::nullopt, + PhysicalElementComparator comparator = {}) noexcept : _lhs{lhs}, _rhs{rhs}, _l_dremel{}, @@ -410,7 +410,7 @@ class device_row_comparator { return cuda::std::pair(_comparator(_lhs.element(lhs_element_index), _rhs.element(rhs_element_index)), - std::numeric_limits::max()); + cuda::std::numeric_limits::max()); } /** @@ -455,7 +455,7 @@ class device_row_comparator { } if (lcol.num_child_columns() == 0) { - return cuda::std::pair(weak_ordering::EQUIVALENT, std::numeric_limits::max()); + return cuda::std::pair(weak_ordering::EQUIVALENT, cuda::std::numeric_limits::max()); } // Non-empty structs have been modified to only have 1 child when using this. @@ -607,7 +607,7 @@ class device_row_comparator { __device__ constexpr weak_ordering operator()(size_type const lhs_index, size_type const rhs_index) const noexcept { - int last_null_depth = std::numeric_limits::max(); + int last_null_depth = cuda::std::numeric_limits::max(); size_type list_column_index{-1}; for (size_type i = 0; i < _lhs.num_columns(); ++i) { if (_lhs.column(i).type().id() == type_id::LIST) { ++list_column_index; } @@ -626,9 +626,9 @@ class device_row_comparator { // here, otherwise the current code would be failing. auto const [l_dremel_i, r_dremel_i] = _lhs.column(i).type().id() == type_id::LIST - ? std::make_tuple(optional_dremel_view(_l_dremel[list_column_index]), - optional_dremel_view(_r_dremel[list_column_index])) - : std::make_tuple(optional_dremel_view{}, optional_dremel_view{}); + ? cuda::std::make_tuple(optional_dremel_view(_l_dremel[list_column_index]), + optional_dremel_view(_r_dremel[list_column_index])) + : cuda::std::make_tuple(optional_dremel_view{}, optional_dremel_view{}); auto element_comp = element_comparator{_check_nulls, _lhs.column(i), @@ -658,9 +658,9 @@ class device_row_comparator { device_span const _l_dremel; device_span const _r_dremel; Nullate const _check_nulls; - std::optional> const _depth; - std::optional> const _column_order; - std::optional> const _null_precedence; + cuda::std::optional> const _depth; + cuda::std::optional> const _column_order; + cuda::std::optional> const _null_precedence; PhysicalElementComparator const _comparator; }; // class device_row_comparator @@ -882,10 +882,10 @@ struct preprocessed_table { * @return Device array containing respective column orders. If no explicit column orders were * specified during the creation of this object then this will be `nullopt`. */ - [[nodiscard]] std::optional> column_order() const + [[nodiscard]] cuda::std::optional> column_order() const { - return _column_order.size() ? std::optional>(_column_order) - : std::nullopt; + return _column_order.size() ? cuda::std::optional>(_column_order) + : cuda::std::nullopt; } /** @@ -895,10 +895,11 @@ struct preprocessed_table { * @return Device array containing respective column null precedence. If no explicit column null * precedences were specified during the creation of this object then this will be `nullopt`. */ - [[nodiscard]] std::optional> null_precedence() const + [[nodiscard]] cuda::std::optional> null_precedence() const { - return _null_precedence.size() ? std::optional>(_null_precedence) - : std::nullopt; + return _null_precedence.size() + ? cuda::std::optional>(_null_precedence) + : cuda::std::nullopt; } /** @@ -909,9 +910,10 @@ struct preprocessed_table { * @return std::optional> Device array containing respective column depths. * If there are no nested columns in the table then this will be `nullopt`. */ - [[nodiscard]] std::optional> depths() const + [[nodiscard]] cuda::std::optional> depths() const { - return _depths.size() ? std::optional>(_depths) : std::nullopt; + return _depths.size() ? cuda::std::optional>(_depths) + : cuda::std::nullopt; } [[nodiscard]] device_span dremel_device_views() const @@ -940,8 +942,8 @@ struct preprocessed_table { rmm::device_uvector const _depths; // Dremel encoding of list columns used for the comparison algorithm - std::optional> _dremel_data; - std::optional> _dremel_device_views; + cuda::std::optional> _dremel_data; + cuda::std::optional> _dremel_device_views; // Intermediate columns generated from transforming nested children columns into // integers columns using `cudf::rank()`, need to be kept alive. @@ -1808,7 +1810,7 @@ class element_hasher { __device__ element_hasher( Nullate nulls, uint32_t seed = DEFAULT_HASH_SEED, - hash_value_type null_hash = std::numeric_limits::max()) noexcept + hash_value_type null_hash = cuda::std::numeric_limits::max()) noexcept : _check_nulls(nulls), _seed(seed), _null_hash(null_hash) { } @@ -1892,7 +1894,7 @@ class device_row_hasher { */ template