From 51fa73e2e92c65cc9accff002e0016f08fe25c3b Mon Sep 17 00:00:00 2001 From: ThePhD Date: Tue, 6 Aug 2024 22:52:12 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=9B=A0=EF=B8=8F=20Remove=20tag=5Finvoke?= =?UTF-8?q?=20entirely?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 24 +++++++--- benchmarks/CMakeLists.txt | 2 +- benchmarks/barrier/CMakeLists.txt | 2 +- benchmarks/conversion_speed/CMakeLists.txt | 4 +- .../conversion_speed/source/cuneicode.cpp | 2 +- .../conversion_speed/source/standard_c++.cpp | 13 +++--- .../source/standard_c++.init.cpp | 13 +++--- .../conversion_speed/source/standard_c.cpp | 17 +++---- documentation/CMakeLists.txt | 4 ++ documentation/source/conf.py | 2 +- .../source/runtime_locale_encoding.cpp | 15 ++++-- include/ztd/text/code_point.hpp | 15 +++++- include/ztd/text/decode_view.hpp | 2 +- .../text/detail/encoding_iterator_storage.hpp | 16 +++---- include/ztd/text/detail/span_reconstruct.hpp | 2 +- include/ztd/text/encode_view.hpp | 2 +- include/ztd/text/impl/execution_cuchar.hpp | 46 +++++++++++++++++-- .../ztd/text/impl/wide_execution_cwchar.hpp | 36 ++++++++++++++- include/ztd/text/state.hpp | 24 ++++++++-- include/ztd/text/transcode_iterator.hpp | 13 +++--- single/CMakeLists.txt | 2 +- tests/CMakeLists.txt | 4 +- 22 files changed, 194 insertions(+), 66 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 151c7397..12a5ce30 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,8 +28,7 @@ # # ============================================================================> -cmake_minimum_required(VERSION 3.21.0) -cmake_policy(VERSION 3.21) +cmake_minimum_required(VERSION 3.28.0) # # Project kickstart # Includes a bunch of basic flags and utilities shared across projects @@ -223,9 +222,9 @@ if (ZTD_TEXT_EXAMPLES OR ZTD_TEXT_BENCHMARKS) simdutf GIT_REPOSITORY https://github.com/simdutf/simdutf GIT_SHALLOW ON - GIT_TAG master) + GIT_TAG master + EXCLUDE_FROM_ALL) FetchContent_MakeAvailable(simdutf) - set_property(DIRECTORY "${simdutf_SOURCE_DIR}" PROPERTY EXCLUDE_FROM_ALL YES) endfunction() simdutf_dependency_jail() @@ -240,13 +239,26 @@ if (ZTD_TEXT_EXAMPLES OR ZTD_TEXT_BENCHMARKS) boost.text GIT_REPOSITORY https://github.com/ThePhD/text GIT_SHALLOW ON - GIT_TAG master) + GIT_TAG master + EXCLUDE_FROM_ALL) FetchContent_MakeAvailable(boost.text) - set_property(DIRECTORY "${boost.text_SOURCE_DIR}" PROPERTY EXCLUDE_FROM_ALL YES) endfunction() if (ZTD_TEXT_BOOST.TEXT) boost_text_dependency_jail() endif() + if (ZTD_TEXT_BENCHMARKS) + function(unicode_lipsum_dependency_jail) + # unicode_lipsum check + FetchContent_Declare( + unicode_lipsum + GIT_REPOSITORY https://github.com/lemire/unicode_lipsum + GIT_SHALLOW ON + GIT_TAG main + EXCLUDE_FROM_ALL) + FetchContent_MakeAvailable(unicode_lipsum) + endfunction() + unicode_lipsum_dependency_jail() + endif() endif() if(ZTD_TEXT_TESTS) diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 79537ea0..ca2bcf4f 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -36,6 +36,7 @@ function (google_benchmark_dependency_jail) google_benchmark GIT_REPOSITORY https://github.com/google/benchmark.git GIT_TAG main + EXCLUDE_FROM_ALL ) # Benchmark variables, internal setup set(BENCHMARK_ENABLE_EXCEPTIONS ON) @@ -55,7 +56,6 @@ function (google_benchmark_dependency_jail) set(HAVE_STD_REGEX ON) endif() FetchContent_MakeAvailable(google_benchmark) - set_property(DIRECTORY "${google_benchmark_SOURCE_DIR}" PROPERTY EXCLUDE_FROM_ALL YES) endfunction() google_benchmark_dependency_jail() diff --git a/benchmarks/barrier/CMakeLists.txt b/benchmarks/barrier/CMakeLists.txt index a4dafc68..d86bbcfa 100644 --- a/benchmarks/barrier/CMakeLists.txt +++ b/benchmarks/barrier/CMakeLists.txt @@ -27,7 +27,7 @@ # # =========================================================================== # -# # Benchmarks +# # Benchmarks data, compiled behind a shared data barrier file(GLOB_RECURSE ztd.text.benchmarks.barrier.sources LIST_DIRECTORIES FALSE CONFIGURE_DEPENDS diff --git a/benchmarks/conversion_speed/CMakeLists.txt b/benchmarks/conversion_speed/CMakeLists.txt index 4765ac5a..873c32b0 100644 --- a/benchmarks/conversion_speed/CMakeLists.txt +++ b/benchmarks/conversion_speed/CMakeLists.txt @@ -94,8 +94,8 @@ function (generate_converion_speed_benchmark_targets name data_name title) ZTD_TEXT_BENCHMARKS_CONVERSION_SPEED_CUNEICODE_SINGLE_FROM_BULK_BENCHMARKS=$,1,0> ZTD_TEXT_BENCHMARKS_CONVERSION_SPEED_CUNEICODE_BASIC_UNCHECKED_BENCHMARKS=$,1,0> ZTD_TEXT_BENCHMARKS_CONVERSION_SPEED_CUNEICODE_SINGLE_FROM_BULK_UNCHECKED_BENCHMARKS=$,1,0> - ZTD_TEXT_BENCHMARKS_CONVERSION_SPEED_ICU_BENCHMARKS=1 - ZTD_TEXT_BENCHMARKS_CONVERSION_SPEED_ICONV_BENCHMARKS=1 + ZTD_TEXT_BENCHMARKS_CONVERSION_SPEED_ICU_BENCHMARKS=$,1,0> + ZTD_TEXT_BENCHMARKS_CONVERSION_SPEED_ICONV_BENCHMARKS=$,1,0> ZTD_TEXT_BENCHMARKS_CONVERSION_SPEED_ENCODING_C_BENCHMARKS=1 ZTD_TEXT_BENCHMARKS_CONVERSION_SPEED_CTRE_BENCHMARKS=1 ZTD_TEXT_BENCHMARKS_CONVERSION_SPEED_SIMDUTF_BENCHMARKS=1 diff --git a/benchmarks/conversion_speed/source/cuneicode.cpp b/benchmarks/conversion_speed/source/cuneicode.cpp index 7c17c249..c3d27dcf 100644 --- a/benchmarks/conversion_speed/source/cuneicode.cpp +++ b/benchmarks/conversion_speed/source/cuneicode.cpp @@ -58,7 +58,7 @@ const ztd_char##FROM_N##_t* input = input_data.data(); \ size_t output_size = output_data.size(); \ ztd_char##TO_N##_t* output = output_data.data(); \ - cnc_mcerr err = cnc_c##FROM_N##sntoc##TO_N##sn(&output_size, &output, &input_size, &input, &cstate); \ + cnc_mcerr err = cnc_c##FROM_N##snrtoc##TO_N##sn(&output_size, &output, &input_size, &input, &cstate); \ if (err != cnc_mcerr_ok) { \ result = false; \ } \ diff --git a/benchmarks/conversion_speed/source/standard_c++.cpp b/benchmarks/conversion_speed/source/standard_c++.cpp index 07c6e5d5..0c89650a 100644 --- a/benchmarks/conversion_speed/source/standard_c++.cpp +++ b/benchmarks/conversion_speed/source/standard_c++.cpp @@ -41,6 +41,7 @@ #include #include #include +#include #include @@ -66,7 +67,7 @@ static void utf16_to_utf32_well_formed_standard_cpp(benchmark::State& state) { to_char_t* output = output_data.data(); to_char_t* const output_last = output_data.data() + output_data.size(); for (auto _ : state) { - std::mbstate_t state {}; + ztd_mbstate_t state {}; const char* char_input = reinterpret_cast(input); const char* char_input_last = reinterpret_cast(input_last); const char* char_input_next; @@ -110,7 +111,7 @@ static void utf32_to_utf16_well_formed_standard_cpp(benchmark::State& state) { to_char_t* output = output_data.data(); to_char_t* const output_last = output_data.data() + output_data.size(); for (auto _ : state) { - std::mbstate_t state {}; + ztd_mbstate_t state {}; char* char_output = reinterpret_cast(output); char* char_output_last = reinterpret_cast(output_last); const from_char_t* input_next; @@ -155,7 +156,7 @@ static void utf32_to_utf8_well_formed_standard_cpp(benchmark::State& state) { to_char_t* output = output_data.data(); to_char_t* const output_last = output_data.data() + output_data.size(); for (auto _ : state) { - std::mbstate_t state {}; + ztd_mbstate_t state {}; char* char_output = reinterpret_cast(output); char* char_output_last = reinterpret_cast(output_last); const from_char_t* input_next; @@ -201,7 +202,7 @@ static void utf8_to_utf32_well_formed_standard_cpp(benchmark::State& state) { to_char_t* output = output_data.data(); to_char_t* const output_last = output_data.data() + output_data.size(); for (auto _ : state) { - std::mbstate_t state {}; + ztd_mbstate_t state {}; const char* char_input = reinterpret_cast(input); const char* char_input_last = reinterpret_cast(input_last); const char* char_input_next; @@ -246,7 +247,7 @@ static void utf16_to_utf8_well_formed_standard_cpp(benchmark::State& state) { to_char_t* output = output_data.data(); to_char_t* const output_last = output_data.data() + output_data.size(); for (auto _ : state) { - std::mbstate_t state {}; + ztd_mbstate_t state {}; char* char_output = reinterpret_cast(output); char* char_output_last = reinterpret_cast(output_last); const from_char_t* input_next; @@ -292,7 +293,7 @@ static void utf8_to_utf16_well_formed_standard_cpp(benchmark::State& state) { to_char_t* output = output_data.data(); to_char_t* const output_last = output_data.data() + output_data.size(); for (auto _ : state) { - std::mbstate_t state {}; + ztd_mbstate_t state {}; const char* char_input = reinterpret_cast(input); const char* char_input_last = reinterpret_cast(input_last); const char* char_input_next; diff --git a/benchmarks/conversion_speed/source/standard_c++.init.cpp b/benchmarks/conversion_speed/source/standard_c++.init.cpp index fc8a03e5..8795ea35 100644 --- a/benchmarks/conversion_speed/source/standard_c++.init.cpp +++ b/benchmarks/conversion_speed/source/standard_c++.init.cpp @@ -41,6 +41,7 @@ #include #include #include +#include #include @@ -65,7 +66,7 @@ static void utf16_to_utf32_init_well_formed_standard_cpp(benchmark::State& state std::locale current_loc = {}; auto codecvt_ptr = new std::codecvt_utf16(); current_loc = std::locale(current_loc, codecvt_ptr); - std::mbstate_t state {}; + std::ztd_mbstate_t state {}; const char* char_input = reinterpret_cast(input); const char* char_input_last = reinterpret_cast(input_last); const char* char_input_next; @@ -108,7 +109,7 @@ static void utf32_to_utf16_init_well_formed_standard_cpp(benchmark::State& state std::locale current_loc = {}; auto codecvt_ptr = new std::codecvt_utf16(); current_loc = std::locale(current_loc, codecvt_ptr); - std::mbstate_t state {}; + ztd_mbstate_t state {}; char* char_output = reinterpret_cast(output); char* char_output_last = reinterpret_cast(output_last); const from_char_t* input_next; @@ -153,7 +154,7 @@ static void utf32_to_utf8_init_well_formed_standard_cpp(benchmark::State& state) std::locale current_loc = {}; auto codecvt_ptr = new std::codecvt_utf8(); current_loc = std::locale(current_loc, codecvt_ptr); - std::mbstate_t state {}; + ztd_mbstate_t state {}; char* char_output = reinterpret_cast(output); char* char_output_last = reinterpret_cast(output_last); const from_char_t* input_next; @@ -199,7 +200,7 @@ static void utf8_to_utf32_init_well_formed_standard_cpp(benchmark::State& state) std::locale current_loc = {}; auto codecvt_ptr = new std::codecvt_utf8(); current_loc = std::locale(current_loc, codecvt_ptr); - std::mbstate_t state {}; + ztd_mbstate_t state {}; const char* char_input = reinterpret_cast(input); const char* char_input_last = reinterpret_cast(input_last); const char* char_input_next; @@ -244,7 +245,7 @@ static void utf16_to_utf8_init_well_formed_standard_cpp(benchmark::State& state) std::locale current_loc = {}; auto codecvt_ptr = new std::codecvt_utf8_utf16(); current_loc = std::locale(current_loc, codecvt_ptr); - std::mbstate_t state {}; + ztd_mbstate_t state {}; char* char_output = reinterpret_cast(output); char* char_output_last = reinterpret_cast(output_last); const from_char_t* input_next; @@ -290,7 +291,7 @@ static void utf8_to_utf16_init_well_formed_standard_cpp(benchmark::State& state) std::locale current_loc = {}; auto codecvt_ptr = new std::codecvt_utf8_utf16(); current_loc = std::locale(current_loc, codecvt_ptr); - std::mbstate_t state {}; + ztd_mbstate_t state {}; const char* char_input = reinterpret_cast(input); const char* char_input_last = reinterpret_cast(input_last); const char* char_input_next; diff --git a/benchmarks/conversion_speed/source/standard_c.cpp b/benchmarks/conversion_speed/source/standard_c.cpp index 8e7e6043..e5b61e29 100644 --- a/benchmarks/conversion_speed/source/standard_c.cpp +++ b/benchmarks/conversion_speed/source/standard_c.cpp @@ -39,6 +39,7 @@ #include #include +#include #include #if ZTD_IS_ON(ZTD_CUCHAR) @@ -55,8 +56,8 @@ static void utf16_to_utf32_well_formed_standard_c(benchmark::State& state) { std::vector output_data(c_span_char32_t_size(u32_data)); bool result = true; for (auto _ : state) { - mbstate_t from_state {}; - mbstate_t to_state {}; + ztd_mbstate_t from_state {}; + ztd_mbstate_t to_state {}; char intermediate_data[MB_LEN_MAX * 2]; to_char_t* output = output_data.data(); to_char_t* output_last = output_data.data() + output_data.size(); @@ -149,8 +150,8 @@ static void utf32_to_utf16_well_formed_standard_c(benchmark::State& state) { std::vector output_data(c_span_char16_t_size(u16_data)); bool result = true; for (auto _ : state) { - mbstate_t from_state {}; - mbstate_t to_state {}; + ztd_mbstate_t from_state {}; + ztd_mbstate_t to_state {}; char intermediate_data[MB_LEN_MAX * 2]; to_char_t* output = output_data.data(); to_char_t* output_last = output_data.data() + output_data.size(); @@ -244,7 +245,7 @@ static void utf32_to_utf8_well_formed_standard_c(benchmark::State& state) { std::vector output_data(c_span_char8_t_size(u8_data)); bool result = true; for (auto _ : state) { - mbstate_t from_state {}; + ztd_mbstate_t from_state {}; to_char_t* output = output_data.data(); to_char_t* output_last = output_data.data() + output_data.size(); const from_char_t* input = input_data.data(); @@ -286,7 +287,7 @@ static void utf8_to_utf32_well_formed_standard_c(benchmark::State& state) { std::vector output_data(c_span_char32_t_size(u32_data)); bool result = true; for (auto _ : state) { - mbstate_t to_state {}; + ztd_mbstate_t to_state {}; const from_char_t* input = input_data.data(); const from_char_t* const input_last = input_data.data() + input_data.size(); to_char_t* output = output_data.data(); @@ -362,7 +363,7 @@ static void utf16_to_utf8_well_formed_standard_c(benchmark::State& state) { std::vector output_data(c_span_char8_t_size(u8_data)); bool result = true; for (auto _ : state) { - mbstate_t from_state {}; + ztd_mbstate_t from_state {}; to_char_t* output = output_data.data(); to_char_t* output_last = output_data.data() + output_data.size(); const from_char_t* input = input_data.data(); @@ -404,7 +405,7 @@ static void utf8_to_utf16_well_formed_standard_c(benchmark::State& state) { std::vector output_data(c_span_char16_t_size(u16_data)); bool result = true; for (auto _ : state) { - mbstate_t to_state {}; + ztd_mbstate_t to_state {}; const from_char_t* input = input_data.data(); const from_char_t* const input_last = input_data.data() + input_data.size(); to_char_t* output = output_data.data(); diff --git a/documentation/CMakeLists.txt b/documentation/CMakeLists.txt index 6978c50f..d2d7cbbc 100644 --- a/documentation/CMakeLists.txt +++ b/documentation/CMakeLists.txt @@ -30,7 +30,11 @@ # Dependencies find_package(Doxygen REQUIRED) +if (NOT ZTD_TEXT_DOCUMENTATION_NO_SPHINX) +find_package(Sphinx REQUIRED) +else() find_package(Sphinx QUIET) +endif() # For the install target and related directories include(GNUInstallDirs) diff --git a/documentation/source/conf.py b/documentation/source/conf.py index 8328f9de..08c96f93 100644 --- a/documentation/source/conf.py +++ b/documentation/source/conf.py @@ -68,7 +68,7 @@ rst_prolog = """ .. |ub| replace:: ☢️☢️Undefined Behavior☢️☢️ -.. |specializations_okay_different_types| replace:: User Specializations: ✔️ Okay! You can add other types to this classification by specializing the class template. Your specialization must have a type definition named ``type`` (as in, ``using type = ...;`` or `typedef ... type;``) inside of the class specialization that is ``public:``\ ly accessible. Note that specializing any type not explicitly marked with this notice is |ub|. +.. |specializations_okay_different_types| replace:: User Specializations: ✔️ Okay! You can add other types to this classification by specializing the class template. Your specialization must have a type definition named ``type`` (as in, ``using type = ...;`` or `typedef ... type;``) inside of the class specialization that is ``public:``-ly accessible. Note that specializing any type not explicitly marked with this notice is |ub|. .. |specializations_okay_true_false_type| replace:: User Specializations: ✔️ Okay! You can add other types to this classification by specializing the class template to a definition that derives from ``std::true_type``, or turn it off explicitly by having a definition that derives from ``std::false_type``. Note that specializing any type not explicitly marked with this notice is |ub|. diff --git a/examples/documentation/snippets/source/runtime_locale_encoding.cpp b/examples/documentation/snippets/source/runtime_locale_encoding.cpp index a59f741c..f32badd4 100644 --- a/examples/documentation/snippets/source/runtime_locale_encoding.cpp +++ b/examples/documentation/snippets/source/runtime_locale_encoding.cpp @@ -34,6 +34,8 @@ // This example doesn't work on Apple/libc++ because they don't have // standard C or C++ headers. +#include + #include #if defined(__has_include) && __has_include() #include @@ -50,7 +52,6 @@ extern "C" { #define WIN32_LEAN_AND_MEAN 1 #include #else - #endif class runtime_locale { @@ -63,23 +64,31 @@ class runtime_locale { inline static constexpr std::size_t max_code_units = MB_LEN_MAX; struct decode_state { - std::mbstate_t c_stdlib_state; + ztd_mbstate_t c_stdlib_state; decode_state() noexcept : c_stdlib_state() { // properly set for mbrtoc32 state code_point ghost_ouput[2] {}; UCHAR_ACCESS mbrtoc32(ghost_ouput, "\0", 1, &c_stdlib_state); } + + bool is_complete() const noexcept { + return UCHAR_ACCESS mbsinit(&c_stdlib_state) != 0; + } }; struct encode_state { - std::mbstate_t c_stdlib_state; + ztd_mbstate_t c_stdlib_state; encode_state() noexcept : c_stdlib_state() { // properly set for c32rtomb state code_unit ghost_ouput[MB_LEN_MAX] {}; UCHAR_ACCESS c32rtomb(ghost_ouput, U'\0', &c_stdlib_state); } + + bool is_complete() const noexcept { + return UCHAR_ACCESS mbsinit(&c_stdlib_state) != 0; + } }; bool contains_unicode_encoding() const noexcept { diff --git a/include/ztd/text/code_point.hpp b/include/ztd/text/code_point.hpp index f4ccb8f5..0ef70320 100644 --- a/include/ztd/text/code_point.hpp +++ b/include/ztd/text/code_point.hpp @@ -42,7 +42,19 @@ #include ////// -/// @file code point abstractions +/// @file code_point.hpp +/// +/// @brief code point abstractions + +////// +/// @namespace ztd +/// +/// @brief The core library namespace. + +////// +/// @namespace ztd::text +/// +/// @brief The text library namespace. namespace ztd { namespace text { ZTD_TEXT_INLINE_ABI_NAMESPACE_OPEN_I_ @@ -50,7 +62,6 @@ namespace ztd { namespace text { ////// /// @addtogroup ztd_text_properties Property and Trait Helpers /// @{ - ///// ////// /// @brief Retrieves the code point type for the given type. diff --git a/include/ztd/text/decode_view.hpp b/include/ztd/text/decode_view.hpp index a8746819..9465afc1 100644 --- a/include/ztd/text/decode_view.hpp +++ b/include/ztd/text/decode_view.hpp @@ -213,7 +213,7 @@ namespace ztd { namespace text { /// @brief The reconstruct extension point for rebuilding an encoding view from its iterator and sentinel /// type. template - constexpr decode_view<_Encoding, _Range, _ErrorHandler, _State> tag_invoke(ztd::tag_t, + constexpr decode_view<_Encoding, _Range, _ErrorHandler, _State> reconstruct( ::std::in_place_type_t>, typename decode_view<_Encoding, _Range, _ErrorHandler, _State>::iterator __it, typename decode_view<_Encoding, _Range, _ErrorHandler, _State>::sentinel) noexcept(::std:: diff --git a/include/ztd/text/detail/encoding_iterator_storage.hpp b/include/ztd/text/detail/encoding_iterator_storage.hpp index f9140fd3..860eaf33 100644 --- a/include/ztd/text/detail/encoding_iterator_storage.hpp +++ b/include/ztd/text/detail/encoding_iterator_storage.hpp @@ -106,16 +106,16 @@ namespace ztd { namespace text { : private ebco, 0>, private ebco, 1>, private __state_storage, remove_cvref_t<_State>, 2>, - private ebco<__txt_detail::__span_reconstruct_t<_Range, _Range>, 3> { + private ebco<__txt_detail::__span_reconstruct_t, _Range>, 3> { private: + using _URange = unwrap_remove_cvref_t<_Range>; + using _UEncoding = unwrap_remove_cvref_t<_Encoding>; + using _UErrorHandler = unwrap_remove_cvref_t<_ErrorHandler>; + using _UState = unwrap_remove_cvref_t<_State>; using __base_encoding_t = ebco, 0>; using __base_error_handler_t = ebco, 1>; using __base_state_t = __state_storage, remove_cvref_t<_State>, 2>; - using __base_range_t = ebco<__txt_detail::__span_reconstruct_t<_Range, _Range>, 3>; - using _URange = unwrap_remove_cvref_t<_Range>; - using _UEncoding = unwrap_remove_cvref_t<_Encoding>; - using _UErrorHandler = unwrap_remove_cvref_t<_ErrorHandler>; - using _UState = unwrap_remove_cvref_t<_State>; + using __base_range_t = ebco<__txt_detail::__span_reconstruct_t<_URange, _Range>, 3>; public: using range_type = _Range; @@ -154,7 +154,7 @@ namespace ztd { namespace text { noexcept(::std::is_nothrow_move_constructible_v // cf && ::std::is_nothrow_move_constructible_v // cf && ::std::is_nothrow_move_constructible_v // cf - && noexcept(__base_state_t(::std::declval()))) + && noexcept(__base_state_t(::std::declval()))) : __base_encoding_t(::std::move(__encoding)) , __base_error_handler_t(::std::move(__error_handler)) , __base_state_t(this->_M_get_encoding()) @@ -166,7 +166,7 @@ namespace ztd { namespace text { ::std::is_nothrow_move_constructible_v // cf && ::std::is_nothrow_move_constructible_v // cf && ::std::is_nothrow_move_constructible_v // cf - && noexcept(__base_state_t(::std::declval(), ::std::declval()))) + && noexcept(__base_state_t(::std::declval(), ::std::declval()))) : __base_encoding_t(::std::move(__encoding)) , __base_error_handler_t(::std::move(__error_handler)) , __base_state_t(this->_M_get_encoding(), ::std::move(__state)) diff --git a/include/ztd/text/detail/span_reconstruct.hpp b/include/ztd/text/detail/span_reconstruct.hpp index ab68a13a..684e7c69 100644 --- a/include/ztd/text/detail/span_reconstruct.hpp +++ b/include/ztd/text/detail/span_reconstruct.hpp @@ -93,7 +93,7 @@ namespace ztd { namespace text { } else { return ::ztd::ranges::reconstruct( - ::std::in_place_type<_UInput>, ::std::forward<_Input>(__input)); + ::std::in_place_type<_UInputTag>, ::std::forward<_Input>(__input)); } } } diff --git a/include/ztd/text/encode_view.hpp b/include/ztd/text/encode_view.hpp index 13061d06..9dcbb594 100644 --- a/include/ztd/text/encode_view.hpp +++ b/include/ztd/text/encode_view.hpp @@ -208,7 +208,7 @@ namespace ztd { namespace text { /// @brief The reconstruct extension point for rebuilding an encoding view from its iterator and sentinel /// type. template - constexpr encode_view<_Encoding, _Range, _ErrorHandler, _State> tag_invoke(ztd::tag_t, + constexpr encode_view<_Encoding, _Range, _ErrorHandler, _State> reconstruct( ::std::in_place_type_t>, typename encode_view<_Encoding, _Range, _ErrorHandler, _State>::iterator __it, typename encode_view<_Encoding, _Range, _ErrorHandler, _State>::sentinel) noexcept(::std:: diff --git a/include/ztd/text/impl/execution_cuchar.hpp b/include/ztd/text/impl/execution_cuchar.hpp index 8278d9cc..6a3bf961 100644 --- a/include/ztd/text/impl/execution_cuchar.hpp +++ b/include/ztd/text/impl/execution_cuchar.hpp @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -76,9 +77,15 @@ namespace ztd { namespace text { namespace __txt_detail { class __execution_decode_state { public: + ////// + /// @brief The narrow mbstate_t from the standard library for multibyte conversion sequences. ztd_mbstate_t __narrow_state; + ////// + /// @brief Whether or not there might be some accumulated data in the state. bool __output_pending; + ////// + /// @brief Zero-initializes to its initial state, which includes the initial conversion sequence. __execution_decode_state() noexcept : __narrow_state(), __output_pending(false) { ztd_char32_t __ghost_space[2]; ::std::size_t __init_result = ZTD_UCHAR_SCOPE_I_ mbrtoc32(__ghost_space, "\0", 1, &__narrow_state); @@ -90,9 +97,15 @@ namespace ztd { namespace text { class __execution_encode_state { public: + ////// + /// @brief The narrow mbstate_t from the standard library for multibyte conversion sequences. ztd_mbstate_t __narrow_state; + ////// + /// @brief Whether or not there might be some accumulated data in the state. bool __output_pending; + ////// + /// @brief Zero-initializes to its initial state, which includes the initial conversion sequence. __execution_encode_state() noexcept : __narrow_state(), __output_pending(false) { char __ghost_space[MB_LEN_MAX]; ::std::size_t __init_result = ZTD_UCHAR_SCOPE_I_ c32rtomb(__ghost_space, U'\0', &__narrow_state); @@ -208,7 +221,7 @@ namespace ztd { namespace text { /// reconstructed output range, error handler, and a reference to the passed-in state. /// /// @remarks Platform APIs and/or the C Standard Library may be used to properly decode one complete unit - /// of information (alongside std::mbstate_t usage). Whether or not the state is used is based on the + /// of information (alongside mbstate_t usage). Whether or not the state is used is based on the /// implementation and what it chooses. /// /// @remarks To the best ability of the implementation, the iterators will be @@ -386,7 +399,7 @@ namespace ztd { namespace text { /// reconstructed output range, error handler, and a reference to the passed-in state. /// /// @remarks Platform APIs and/or the C Standard Library may be used to properly decode one complete unit - /// of information (alongside std::mbstate_t usage). Whether or not the state is used is based on the + /// of information (alongside mbstate_t usage). Whether or not the state is used is based on the /// implementation and what it chooses. /// /// @remarks To the best ability of the implementation, the iterators will be @@ -524,7 +537,7 @@ namespace ztd { namespace text { ::std::size_t __state_offset = 0; ::std::size_t __state_count = 1; for (; __state_offset < max_code_units; (void)++__state_offset, (void)++__state_count) { - ::std::mbstate_t __preserved_state = __s.__narrow_state; + ztd_mbstate_t __preserved_state = __s.__narrow_state; __intermediary_input[__state_offset] = *__in_it; ::ztd::ranges::iter_advance(__in_it); ztd_char32_t __intermediary_output[1] {}; @@ -606,6 +619,33 @@ namespace ztd { namespace text { _SubOutput(::std::move(__out_it), ::std::move(__out_last)), __s, encoding_error::ok); } } + + ////// + /// @brief Finds out whether or not the state contains any unused data that needs to complete an + /// indivisible unit of work. + /// + /// @param[in] __state The decode state for the standard-API based wide execution encoding. + /// + /// @returns Whether or not there are additional information stored in any part of the standard-based + /// streams have accumulated information for a continual decode operation. + bool state_is_complete(const __txt_detail::__execution_decode_state& __state) const noexcept { + return ::std::mbsinit(&__state.__wide_state) + && ::ztd::text::is_state_complete(__state.__narrow_state); + } + + + ////// + /// @brief Finds out whether or not the state contains any unused data that needs to complete an + /// indivisible unit of work. + /// + /// @param[in] __state The encode state for the standard-API based wide execution encoding. + /// + /// @returns Whether or not there are additional information stored in any part of the standard-based + /// streams have accumulated information for a continual encode operation. + bool state_is_complete(const __txt_detail::__execution_encode_state& __state) const noexcept { + return ::std::mbsinit(&__state.__wide_state) + && ::ztd::text::is_state_complete(__state.__narrow_state); + } }; } // namespace __txt_impl diff --git a/include/ztd/text/impl/wide_execution_cwchar.hpp b/include/ztd/text/impl/wide_execution_cwchar.hpp index 554b13bf..2acac6e9 100644 --- a/include/ztd/text/impl/wide_execution_cwchar.hpp +++ b/include/ztd/text/impl/wide_execution_cwchar.hpp @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -63,9 +64,15 @@ namespace ztd { namespace text { class __wide_execution_decode_state { public: + ////// + /// @brief The narrow mbstate_t from the standard library for multibyte conversion sequences. ztd_mbstate_t __wide_state; + ////// + /// @brief The narrow state from the relied-upon standard execution multibyte encoding. decode_state_t __narrow_state; + ////// + /// @brief Zero-initializes to its initial state, which includes the initial conversion sequence. __wide_execution_decode_state() noexcept : __wide_state(), __narrow_state() { char __ghost_space[MB_LEN_MAX]; #if ZTD_IS_ON(ZTD_LIBVCXX) @@ -79,13 +86,29 @@ namespace ztd { namespace text { ZTD_TEXT_ASSERT(__init_result == 1 && __ghost_space[0] == '\0'); ZTD_TEXT_ASSERT(::std::mbsinit(&__wide_state) != 0); } + + ////// + /// @brief Finds out whether or not the state contains any unused data that needs to complete an + /// indivisible unit of work. + /// + /// @returns Whether or not there are additional information stored in any part of the standard-based + /// streams have accumulated information for a continual decode operation. + bool is_complete() const noexcept { + return ::std::mbsinit(&this->__wide_state) && ::ztd::text::is_state_complete(this->__narrow_state); + } }; class __wide_execution_encode_state { public: - ::std::mbstate_t __wide_state; + ////// + /// @brief The wide mbstate_t from the standard library for multibyte conversion sequences. + ztd_mbstate_t __wide_state; + ////// + /// @brief The narrow state from the relied-upon standard execution multibyte encoding. encode_state_t __narrow_state; + ////// + /// @brief Zero-initializes to its initial state, which includes the initial conversion sequence. __wide_execution_encode_state() noexcept : __wide_state(), __narrow_state() { wchar_t __ghost_space[2]; ::std::size_t __init_result = ::std::mbrtowc(__ghost_space, "", 1, &__wide_state); @@ -93,6 +116,17 @@ namespace ztd { namespace text { ZTD_TEXT_ASSERT(__init_result == 0 && __ghost_space[0] == L'\0'); ZTD_TEXT_ASSERT(::std::mbsinit(&__wide_state) != 0); } + + + ////// + /// @brief Finds out whether or not the state contains any unused data that needs to complete an + /// indivisible unit of work. + /// + /// @returns Whether or not there are additional information stored in any part of the standard-based + /// streams have accumulated information for a continual encode operation. + bool is_complete() const noexcept { + return ::std::mbsinit(&this->__wide_state) && ::ztd::text::is_state_complete(this->__narrow_state); + } }; } // namespace __txt_detail diff --git a/include/ztd/text/state.hpp b/include/ztd/text/state.hpp index 219ecf1d..6aef889d 100644 --- a/include/ztd/text/state.hpp +++ b/include/ztd/text/state.hpp @@ -327,6 +327,25 @@ namespace ztd { namespace text { } } + ////// + /// @brief Returns whether or not a state has completed any associated operations and has no more manipulations on + /// the output to perform, even if the input source is empty. + /// + /// @param[in] __state The state to check for completion. + /// + /// @remarks If the state does not have a member function `is_complete`, then this will simply return `true`. + /// Otherwise, it invokes `__state.is_complete()`. + template + constexpr bool is_state_complete(_State& __state) noexcept { + if constexpr (::ztd::is_detected_v<__txt_detail::__detect_state_is_complete, _State&>) { + return __state.is_complete(); + } + else { + (void)__state; + return true; + } + } + ////// /// @brief Returns whether or not a state has completed any associated operations and has no more manipulations on /// the output to perform, even if the input source is empty. @@ -341,11 +360,8 @@ namespace ztd { namespace text { if constexpr (::ztd::is_detected_v<__txt_detail::__detect_encoding_is_state_complete, _Encoding&, _State&>) { return __encoding.is_state_complete(__state); } - else if constexpr (::ztd::is_detected_v<__txt_detail::__detect_state_is_complete, _State&>) { - return __state.is_complete(); - } else { - return true; + return is_state_complete(__state); } } diff --git a/include/ztd/text/transcode_iterator.hpp b/include/ztd/text/transcode_iterator.hpp index 386009a4..586c7805 100644 --- a/include/ztd/text/transcode_iterator.hpp +++ b/include/ztd/text/transcode_iterator.hpp @@ -101,11 +101,10 @@ namespace ztd { namespace text { unwrap_remove_cvref_t<_FromErrorHandler>> // cf && encode_error_handler_always_returns_ok_v, unwrap_remove_cvref_t<_ToErrorHandler>>>, - private ebco<__txt_detail::__span_reconstruct_t, unwrap_remove_cvref_t<_Range>>, - 5> { + private ebco<__txt_detail::__span_reconstruct_t, _Range>, 5> { private: using _UNonRRange = unwrap_remove_cvref_t<_Range>; - using _URange = ranges::range_reconstruct_t<_UNonRRange>; + using _URange = __txt_detail::__span_reconstruct_t<_UNonRRange, _Range>; using _UFromEncoding = unwrap_remove_cvref_t<_FromEncoding>; using _UToEncoding = unwrap_remove_cvref_t<_ToEncoding>; using _UFromErrorHandler = unwrap_remove_cvref_t<_FromErrorHandler>; @@ -132,7 +131,7 @@ namespace ztd { namespace text { = __txt_detail::__state_storage, remove_cvref_t<_FromState>, 0>; using __base_to_state_t = __txt_detail::__state_storage, remove_cvref_t<_ToState>, 1>; - using __base_range_t = ebco<__txt_detail::__span_reconstruct_t<_URange, _URange>, 5>; + using __base_range_t = ebco<_URange, 5>; inline static constexpr bool _IsBackwards = is_detected_v<__txt_detail::__detect_object_encode_one_backwards, _UFromEncoding, _URange, _UFromErrorHandler, _UFromState>; @@ -249,7 +248,7 @@ namespace ztd { namespace text { constexpr transcode_iterator( range_type __range, from_encoding_type __from_encoding, to_encoding_type __to_encoding) : transcode_iterator(::std::move(__range), ::std::move(__from_encoding), ::std::move(__to_encoding), - from_error_handler_type {}, to_error_handler_type {}) { + from_error_handler_type {}, to_error_handler_type {}) { } ////// @@ -264,8 +263,8 @@ namespace ztd { namespace text { to_encoding_type __to_encoding, from_error_handler_type __from_error_handler, to_error_handler_type __to_error_handler) : transcode_iterator(::std::move(__range), ::std::move(__from_encoding), ::std::move(__to_encoding), - ::std::move(__from_error_handler), ::std::move(__to_error_handler), from_state_type {}, - to_state_type {}) { + ::std::move(__from_error_handler), ::std::move(__to_error_handler), from_state_type {}, + to_state_type {}) { } ////// diff --git a/single/CMakeLists.txt b/single/CMakeLists.txt index 9592b1d0..45dd3150 100644 --- a/single/CMakeLists.txt +++ b/single/CMakeLists.txt @@ -30,7 +30,7 @@ # # # # ztd.text, single # # # Required minimum version statement -cmake_minimum_required(VERSION 3.13.0) +cmake_minimum_required(VERSION 3.28.0) # # # ztd.text single generation header # generate the single header if we can find the python3 interpreter diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 979d3e0a..89813fc6 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -34,9 +34,9 @@ FetchContent_Declare( catch2 GIT_REPOSITORY https://github.com/catchorg/Catch2.git GIT_SHALLOW ON - GIT_TAG devel) + GIT_TAG devel + EXCLUDE_FROM_ALL) FetchContent_MakeAvailable(catch2) -set_property(DIRECTORY "${catch2_SOURCE_DIR}" PROPERTY EXCLUDE_FROM_ALL YES) # # Test Subdirectories add_subdirectory(inclusion)