From 8cca1cac6d826dab2df7f09ed73338777b5a6461 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Wed, 30 Oct 2024 21:23:34 +0200 Subject: [PATCH 1/5] benchmark --- benchmarks/src/search.cpp | 56 ++++++++++++++++++++++++++++----------- 1 file changed, 41 insertions(+), 15 deletions(-) diff --git a/benchmarks/src/search.cpp b/benchmarks/src/search.cpp index ad026b994f..4540860c92 100644 --- a/benchmarks/src/search.cpp +++ b/benchmarks/src/search.cpp @@ -12,8 +12,9 @@ #include #include "lorem.hpp" -using namespace std::string_view_literals; +#include "skewed_allocator.hpp" +using namespace std::string_view_literals; template constexpr auto make_fill_pattern_array() { @@ -48,12 +49,18 @@ constexpr data_and_pattern patterns[] = { /* 5. Large, evil */ {fill_pattern_view<3000, false>, fill_pattern_view<20, true>}, }; +template +using not_highly_aligned_basic_string = std::basic_string, not_highly_aligned_allocator>; + +using not_highly_aligned_string = not_highly_aligned_basic_string; +using not_highly_aligned_wstring = not_highly_aligned_basic_string; + void c_strstr(benchmark::State& state) { const auto& src_haystack = patterns[static_cast(state.range())].data; const auto& src_needle = patterns[static_cast(state.range())].pattern; - const std::string haystack(src_haystack); - const std::string needle(src_needle); + const not_highly_aligned_string haystack(src_haystack); + const not_highly_aligned_string needle(src_needle); for (auto _ : state) { benchmark::DoNotOptimize(haystack); @@ -68,8 +75,8 @@ void classic_search(benchmark::State& state) { const auto& src_haystack = patterns[static_cast(state.range())].data; const auto& src_needle = patterns[static_cast(state.range())].pattern; - const std::vector haystack(src_haystack.begin(), src_haystack.end()); - const std::vector needle(src_needle.begin(), src_needle.end()); + const std::vector> haystack(src_haystack.begin(), src_haystack.end()); + const std::vector> needle(src_needle.begin(), src_needle.end()); for (auto _ : state) { benchmark::DoNotOptimize(haystack); @@ -84,8 +91,8 @@ void ranges_search(benchmark::State& state) { const auto& src_haystack = patterns[static_cast(state.range())].data; const auto& src_needle = patterns[static_cast(state.range())].pattern; - const std::vector haystack(src_haystack.begin(), src_haystack.end()); - const std::vector needle(src_needle.begin(), src_needle.end()); + const std::vector> haystack(src_haystack.begin(), src_haystack.end()); + const std::vector> needle(src_needle.begin(), src_needle.end()); for (auto _ : state) { benchmark::DoNotOptimize(haystack); @@ -100,8 +107,8 @@ void search_default_searcher(benchmark::State& state) { const auto& src_haystack = patterns[static_cast(state.range())].data; const auto& src_needle = patterns[static_cast(state.range())].pattern; - const std::vector haystack(src_haystack.begin(), src_haystack.end()); - const std::vector needle(src_needle.begin(), src_needle.end()); + const std::vector> haystack(src_haystack.begin(), src_haystack.end()); + const std::vector> needle(src_needle.begin(), src_needle.end()); for (auto _ : state) { benchmark::DoNotOptimize(haystack); @@ -132,8 +139,8 @@ void classic_find_end(benchmark::State& state) { const auto& src_haystack = patterns[static_cast(state.range())].data; const auto& src_needle = patterns[static_cast(state.range())].pattern; - const std::vector haystack(src_haystack.begin(), src_haystack.end()); - const std::vector needle(src_needle.begin(), src_needle.end()); + const std::vector> haystack(src_haystack.begin(), src_haystack.end()); + const std::vector> needle(src_needle.begin(), src_needle.end()); for (auto _ : state) { benchmark::DoNotOptimize(haystack); @@ -148,8 +155,8 @@ void ranges_find_end(benchmark::State& state) { const auto& src_haystack = patterns[static_cast(state.range())].data; const auto& src_needle = patterns[static_cast(state.range())].pattern; - const std::vector haystack(src_haystack.begin(), src_haystack.end()); - const std::vector needle(src_needle.begin(), src_needle.end()); + const std::vector> haystack(src_haystack.begin(), src_haystack.end()); + const std::vector> needle(src_needle.begin(), src_needle.end()); for (auto _ : state) { benchmark::DoNotOptimize(haystack); @@ -159,6 +166,22 @@ void ranges_find_end(benchmark::State& state) { } } +template +void member_rfind(benchmark::State& state) { + const auto& src_haystack = patterns[static_cast(state.range())].data; + const auto& src_needle = patterns[static_cast(state.range())].pattern; + + const T haystack(src_haystack.begin(), src_haystack.end()); + const T needle(src_needle.begin(), src_needle.end()); + + for (auto _ : state) { + benchmark::DoNotOptimize(haystack); + benchmark::DoNotOptimize(needle); + auto res = haystack.rfind(needle); + benchmark::DoNotOptimize(res); + } +} + void common_args(auto bm) { bm->DenseRange(0, std::size(patterns) - 1, 1); } @@ -174,8 +197,8 @@ BENCHMARK(ranges_search)->Apply(common_args); BENCHMARK(search_default_searcher)->Apply(common_args); BENCHMARK(search_default_searcher)->Apply(common_args); -BENCHMARK(member_find)->Apply(common_args); -BENCHMARK(member_find)->Apply(common_args); +BENCHMARK(member_find)->Apply(common_args); +BENCHMARK(member_find)->Apply(common_args); BENCHMARK(classic_find_end)->Apply(common_args); BENCHMARK(classic_find_end)->Apply(common_args); @@ -183,4 +206,7 @@ BENCHMARK(classic_find_end)->Apply(common_args); BENCHMARK(ranges_find_end)->Apply(common_args); BENCHMARK(ranges_find_end)->Apply(common_args); +BENCHMARK(member_rfind)->Apply(common_args); +BENCHMARK(member_rfind)->Apply(common_args); + BENCHMARK_MAIN(); From c37000088a0ab108024e1db4cd7a798eb6a258e8 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Thu, 31 Oct 2024 07:09:45 +0200 Subject: [PATCH 2/5] Promote find_end implementation to --- stl/inc/algorithm | 18 ------------------ stl/inc/xutility | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/stl/inc/algorithm b/stl/inc/algorithm index 1b66037563..38c66b0a0f 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -59,11 +59,6 @@ const void* __stdcall __std_find_last_trivial_2(const void* _First, const void* const void* __stdcall __std_find_last_trivial_4(const void* _First, const void* _Last, uint32_t _Val) noexcept; const void* __stdcall __std_find_last_trivial_8(const void* _First, const void* _Last, uint64_t _Val) noexcept; -const void* __stdcall __std_find_end_1( - const void* _First1, const void* _Last1, const void* _First2, size_t _Count2) noexcept; -const void* __stdcall __std_find_end_2( - const void* _First1, const void* _Last1, const void* _First2, size_t _Count2) noexcept; - __declspec(noalias) _Min_max_1i __stdcall __std_minmax_1i(const void* _First, const void* _Last) noexcept; __declspec(noalias) _Min_max_1u __stdcall __std_minmax_1u(const void* _First, const void* _Last) noexcept; __declspec(noalias) _Min_max_2i __stdcall __std_minmax_2i(const void* _First, const void* _Last) noexcept; @@ -194,19 +189,6 @@ _Ty* _Find_last_vectorized(_Ty* const _First, _Ty* const _Last, const _TVal _Val } } -template -_Ty1* _Find_end_vectorized( - _Ty1* const _First1, _Ty1* const _Last1, _Ty2* const _First2, const size_t _Count2) noexcept { - _STL_INTERNAL_STATIC_ASSERT(sizeof(_Ty1) == sizeof(_Ty2)); - if constexpr (sizeof(_Ty1) == 1) { - return const_cast<_Ty1*>(static_cast(::__std_find_end_1(_First1, _Last1, _First2, _Count2))); - } else if constexpr (sizeof(_Ty1) == 2) { - return const_cast<_Ty1*>(static_cast(::__std_find_end_2(_First1, _Last1, _First2, _Count2))); - } else { - _STL_INTERNAL_STATIC_ASSERT(false); // unexpected size - } -} - template __declspec(noalias) void _Replace_vectorized( _Ty* const _First, _Ty* const _Last, const _TVal1 _Old_val, const _TVal2 _New_val) noexcept { diff --git a/stl/inc/xutility b/stl/inc/xutility index e67be17a51..25f495e8ae 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -107,6 +107,11 @@ const void* __stdcall __std_search_1( const void* __stdcall __std_search_2( const void* _First1, const void* _Last1, const void* _First2, size_t _Count2) noexcept; +const void* __stdcall __std_find_end_1( + const void* _First1, const void* _Last1, const void* _First2, size_t _Count2) noexcept; +const void* __stdcall __std_find_end_2( + const void* _First1, const void* _Last1, const void* _First2, size_t _Count2) noexcept; + const void* __stdcall __std_min_element_1(const void* _First, const void* _Last, bool _Signed) noexcept; const void* __stdcall __std_min_element_2(const void* _First, const void* _Last, bool _Signed) noexcept; const void* __stdcall __std_min_element_4(const void* _First, const void* _Last, bool _Signed) noexcept; @@ -248,6 +253,19 @@ _Ty1* _Search_vectorized(_Ty1* const _First1, _Ty1* const _Last1, _Ty2* const _F } } +template +_Ty1* _Find_end_vectorized( + _Ty1* const _First1, _Ty1* const _Last1, _Ty2* const _First2, const size_t _Count2) noexcept { + _STL_INTERNAL_STATIC_ASSERT(sizeof(_Ty1) == sizeof(_Ty2)); + if constexpr (sizeof(_Ty1) == 1) { + return const_cast<_Ty1*>(static_cast(::__std_find_end_1(_First1, _Last1, _First2, _Count2))); + } else if constexpr (sizeof(_Ty1) == 2) { + return const_cast<_Ty1*>(static_cast(::__std_find_end_2(_First1, _Last1, _First2, _Count2))); + } else { + _STL_INTERNAL_STATIC_ASSERT(false); // unexpected size + } +} + template _Ty* _Min_element_vectorized(_Ty* const _First, _Ty* const _Last) noexcept { constexpr bool _Signed = is_signed_v<_Ty>; From 8558c08d8da9407a71ff727c4a33cb30fc4e5fab Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Thu, 31 Oct 2024 07:32:19 +0200 Subject: [PATCH 3/5] coverage --- .../VSO_0000000_vector_algorithms/test.cpp | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index bba38a1d3d..67f770bf98 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -1347,6 +1347,25 @@ void test_case_string_find_str(const basic_string& input_haystack, const basi assert(expected == actual); } +template +void test_case_string_rfind_str(const basic_string& input_haystack, const basic_string& input_needle) { + ptrdiff_t expected; + if (input_needle.empty()) { + expected = static_cast(input_haystack.size()); + } else { + const auto expected_iter = last_known_good_find_end( + input_haystack.begin(), input_haystack.end(), input_needle.begin(), input_needle.end()); + + if (expected_iter != input_haystack.end()) { + expected = expected_iter - input_haystack.begin(); + } else { + expected = -1; + } + } + const auto actual = static_cast(input_haystack.rfind(input_needle)); + assert(expected == actual); +} + template void test_basic_string_dis(mt19937_64& gen, D& dis) { basic_string input_haystack; @@ -1362,12 +1381,14 @@ void test_basic_string_dis(mt19937_64& gen, D& dis) { test_case_string_find_first_of(input_haystack, input_needle); test_case_string_find_last_of(input_haystack, input_needle); test_case_string_find_str(input_haystack, input_needle); + test_case_string_rfind_str(input_haystack, input_needle); for (size_t attempts = 0; attempts < needleDataCount; ++attempts) { input_needle.push_back(static_cast(dis(gen))); test_case_string_find_first_of(input_haystack, input_needle); test_case_string_find_last_of(input_haystack, input_needle); test_case_string_find_str(input_haystack, input_needle); + test_case_string_rfind_str(input_haystack, input_needle); // For large needles the chance of a match is low, so test a guaranteed match if (input_haystack.size() > input_needle.size() * 2) { @@ -1377,6 +1398,7 @@ void test_basic_string_dis(mt19937_64& gen, D& dis) { temp.assign(overwritten_first, overwritten_first + static_cast(input_needle.size())); copy(input_needle.begin(), input_needle.end(), overwritten_first); test_case_string_find_str(input_haystack, input_needle); + test_case_string_rfind_str(input_haystack, input_needle); copy(temp.begin(), temp.end(), overwritten_first); } } From f4c7ddc4773d2b4ed5d323be3aeb2fd4fdd7e108 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Thu, 31 Oct 2024 08:08:12 +0200 Subject: [PATCH 4/5] vectorization --- stl/inc/__msvc_string_view.hpp | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/stl/inc/__msvc_string_view.hpp b/stl/inc/__msvc_string_view.hpp index a0a4d6f009..cbad70610d 100644 --- a/stl/inc/__msvc_string_view.hpp +++ b/stl/inc/__msvc_string_view.hpp @@ -683,7 +683,28 @@ constexpr size_t _Traits_rfind(_In_reads_(_Hay_size) const _Traits_ptr_t<_Traits return static_cast(-1); } - for (auto _Match_try = _Haystack + (_STD min)(_Start_at, _Hay_size - _Needle_size);; --_Match_try) { + const size_t _Actual_start_at = (_STD min)(_Start_at, _Hay_size - _Needle_size); + +#if _USE_STD_VECTOR_ALGORITHMS + if constexpr (_Is_implementation_handled_char_traits<_Traits> && sizeof(typename _Traits::char_type) <= 2) { + if (!_STD _Is_constant_evaluated()) { + // _Find_end_vectorized takes into accout the needle length when locates search start. + // As a potentially eearlier start position can be specified, need to take it into account, + // and pick between the maximum possible start position, and the specified one, + // and then add _Needle_size, so that it is subtracted back in _Find_end_vectorized. + const auto _End = _Haystack + _Actual_start_at + _Needle_size; + const auto _Ptr = _STD _Find_end_vectorized(_Haystack, _End, _Needle, _Needle_size); + + if (_Ptr != _End) { + return static_cast(_Ptr - _Haystack); + } else { + return static_cast(-1); + } + } + } +#endif // _USE_STD_VECTOR_ALGORITHMS + + for (auto _Match_try = _Haystack + _Actual_start_at;; --_Match_try) { if (_Traits::eq(*_Match_try, *_Needle) && _Traits::compare(_Match_try, _Needle, _Needle_size) == 0) { return static_cast(_Match_try - _Haystack); // found a match } From 0921024d2a13818a2a6d8ab2d4ec5f0656a2f44c Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Wed, 13 Nov 2024 10:48:18 -0800 Subject: [PATCH 5/5] Fix comment typos. --- stl/inc/__msvc_string_view.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/stl/inc/__msvc_string_view.hpp b/stl/inc/__msvc_string_view.hpp index cbad70610d..75ac8b6249 100644 --- a/stl/inc/__msvc_string_view.hpp +++ b/stl/inc/__msvc_string_view.hpp @@ -688,9 +688,9 @@ constexpr size_t _Traits_rfind(_In_reads_(_Hay_size) const _Traits_ptr_t<_Traits #if _USE_STD_VECTOR_ALGORITHMS if constexpr (_Is_implementation_handled_char_traits<_Traits> && sizeof(typename _Traits::char_type) <= 2) { if (!_STD _Is_constant_evaluated()) { - // _Find_end_vectorized takes into accout the needle length when locates search start. - // As a potentially eearlier start position can be specified, need to take it into account, - // and pick between the maximum possible start position, and the specified one, + // _Find_end_vectorized takes into account the needle length when locating the search start. + // As a potentially earlier start position can be specified, we need to take it into account, + // and pick between the maximum possible start position and the specified one, // and then add _Needle_size, so that it is subtracted back in _Find_end_vectorized. const auto _End = _Haystack + _Actual_start_at + _Needle_size; const auto _Ptr = _STD _Find_end_vectorized(_Haystack, _End, _Needle, _Needle_size);