forked from rapidsai/cudf
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfind.cpp
112 lines (100 loc) · 4.89 KB
/
find.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
/*
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <cudf_test/column_wrapper.hpp>
#include <cudf/filling.hpp>
#include <cudf/scalar/scalar.hpp>
#include <cudf/strings/combine.hpp>
#include <cudf/strings/find.hpp>
#include <cudf/strings/find_multiple.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/utilities/default_stream.hpp>
#include <nvbench/nvbench.cuh>
std::unique_ptr<cudf::column> build_input_column(cudf::size_type n_rows,
cudf::size_type row_width,
int32_t hit_rate);
static void bench_find_string(nvbench::state& state)
{
auto const n_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
auto const hit_rate = static_cast<cudf::size_type>(state.get_int64("hit_rate"));
auto const api = state.get_string("api");
if (static_cast<std::size_t>(n_rows) * static_cast<std::size_t>(row_width) >=
static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
state.skip("Skip benchmarks greater than size_type limit");
}
auto const stream = cudf::get_default_stream();
auto const col = build_input_column(n_rows, row_width, hit_rate);
auto const input = cudf::strings_column_view(col->view());
std::vector<std::string> h_targets({"5W", "5W43", "0987 5W43"});
cudf::string_scalar target(h_targets[2]);
cudf::test::strings_column_wrapper targets(h_targets.begin(), h_targets.end());
state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
auto const chars_size = input.chars_size(stream);
state.add_element_count(chars_size, "chars_size");
state.add_global_memory_reads<nvbench::int8_t>(chars_size);
if (api.substr(0, 4) == "find") {
state.add_global_memory_writes<nvbench::int32_t>(input.size());
} else {
state.add_global_memory_writes<nvbench::int8_t>(input.size());
}
if (api == "find") {
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { cudf::strings::find(input, target); });
} else if (api == "find_multi") {
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
cudf::strings::find_multiple(input, cudf::strings_column_view(targets));
});
} else if (api == "contains") {
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { cudf::strings::contains(input, target); });
} else if (api == "multi-contains") {
constexpr int iters = 10;
std::vector<std::string> match_targets({" abc",
"W43",
"0987 5W43",
"123 abc",
"23 abc",
"3 abc",
"é",
"7 5W43",
"87 5W43",
"987 5W43"});
auto multi_targets = std::vector<std::string>{};
for (int i = 0; i < iters; i++) {
multi_targets.emplace_back(match_targets[i % match_targets.size()]);
}
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
cudf::test::strings_column_wrapper multi_targets_column(multi_targets.begin(),
multi_targets.end());
cudf::strings::multi_contains(input, cudf::strings_column_view(multi_targets_column));
});
} else if (api == "starts_with") {
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { cudf::strings::starts_with(input, target); });
} else if (api == "ends_with") {
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { cudf::strings::ends_with(input, target); });
}
}
NVBENCH_BENCH(bench_find_string)
.set_name("find_string")
.add_string_axis("api",
{"find", "find_multi", "contains", "starts_with", "ends_with", "multi-contains"})
.add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024})
.add_int64_axis("num_rows", {260'000, 1'953'000, 16'777'216})
.add_int64_axis("hit_rate", {20, 80}); // percentage