From e4de8e42c3595d8b93eb1c501225f45948561721 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Mon, 18 Nov 2024 11:13:32 -0500 Subject: [PATCH] Move strings translate benchmarks to nvbench (#17325) Moves `cpp/benchmarks/string/translate.cpp` implementation from google-bench to nvbench. This is benchmark for the `cudf::strings::translate` API. Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Vukasin Milovanovic (https://github.com/vuule) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/17325 --- cpp/benchmarks/CMakeLists.txt | 4 +- cpp/benchmarks/string/translate.cpp | 66 +++++++++++------------------ 2 files changed, 27 insertions(+), 43 deletions(-) diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index ae78b206810..5754994f412 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -355,8 +355,7 @@ ConfigureNVBench( # ################################################################################################## # * strings benchmark ------------------------------------------------------------------- ConfigureBench( - STRINGS_BENCH string/factory.cu string/repeat_strings.cpp string/replace.cpp string/translate.cpp - string/url_decode.cu + STRINGS_BENCH string/factory.cu string/repeat_strings.cpp string/replace.cpp string/url_decode.cu ) ConfigureNVBench( @@ -386,6 +385,7 @@ ConfigureNVBench( string/slice.cpp string/split.cpp string/split_re.cpp + string/translate.cpp ) # ################################################################################################## diff --git a/cpp/benchmarks/string/translate.cpp b/cpp/benchmarks/string/translate.cpp index dc3c8c71488..020ab3ca965 100644 --- a/cpp/benchmarks/string/translate.cpp +++ b/cpp/benchmarks/string/translate.cpp @@ -14,13 +14,7 @@ * limitations under the License. */ -#include "string_bench_args.hpp" - #include -#include -#include - -#include #include #include @@ -28,20 +22,24 @@ #include -#include +#include -class StringTranslate : public cudf::benchmark {}; +#include +#include using entry_type = std::pair; -static void BM_translate(benchmark::State& state, int entry_count) +static void bench_translate(nvbench::state& state) { - cudf::size_type const n_rows{static_cast(state.range(0))}; - cudf::size_type const max_str_length{static_cast(state.range(1))}; + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const min_width = static_cast(state.get_int64("min_width")); + auto const max_width = static_cast(state.get_int64("max_width")); + auto const entry_count = static_cast(state.get_int64("entries")); + data_profile const profile = data_profile_builder().distribution( - cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); - auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile); - cudf::strings_column_view input(column->view()); + cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width); + auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile); + auto const input = cudf::strings_column_view(column->view()); std::vector entries(entry_count); std::transform(thrust::counting_iterator(0), @@ -51,33 +49,19 @@ static void BM_translate(benchmark::State& state, int entry_count) return entry_type{'!' + idx, '~' - idx}; }); - for (auto _ : state) { - cuda_event_timer raii(state, true, cudf::get_default_stream()); - cudf::strings::translate(input, entries); - } + auto stream = cudf::get_default_stream(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + auto chars_size = input.chars_size(stream); + state.add_global_memory_reads(chars_size); + state.add_global_memory_writes(chars_size); - state.SetBytesProcessed(state.iterations() * input.chars_size(cudf::get_default_stream())); + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { cudf::strings::translate(input, entries); }); } -static void generate_bench_args(benchmark::internal::Benchmark* b) -{ - int const min_rows = 1 << 12; - int const max_rows = 1 << 24; - int const row_mult = 8; - int const min_rowlen = 1 << 5; - int const max_rowlen = 1 << 13; - int const len_mult = 4; - generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult); -} - -#define STRINGS_BENCHMARK_DEFINE(name, entries) \ - BENCHMARK_DEFINE_F(StringTranslate, name) \ - (::benchmark::State & st) { BM_translate(st, entries); } \ - BENCHMARK_REGISTER_F(StringTranslate, name) \ - ->Apply(generate_bench_args) \ - ->UseManualTime() \ - ->Unit(benchmark::kMillisecond); - -STRINGS_BENCHMARK_DEFINE(translate_small, 5) -STRINGS_BENCHMARK_DEFINE(translate_medium, 25) -STRINGS_BENCHMARK_DEFINE(translate_large, 50) +NVBENCH_BENCH(bench_translate) + .set_name("translate") + .add_int64_axis("min_width", {0}) + .add_int64_axis("max_width", {32, 64, 128, 256}) + .add_int64_axis("num_rows", {32768, 262144, 2097152}) + .add_int64_axis("entries", {5, 25, 50});