Skip to content

Commit

Permalink
✨ mbstate_t and UCHAR/WCHAR scope accessor fixes!
Browse files Browse the repository at this point in the history
  • Loading branch information
ThePhD committed Oct 28, 2023
1 parent 3ead068 commit f2048a7
Show file tree
Hide file tree
Showing 8 changed files with 194 additions and 192 deletions.
2 changes: 1 addition & 1 deletion benchmarks/conversion_speed/source/standard_c++.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@

#include <ztd/text/version.hpp>

#if (ZTD_IS_ON(ZTD_UCHAR) || ZTD_IS_ON(ZTD_CUCHAR)) && ZTD_IS_OFF(ZTD_PLATFORM_MAC_OS)
#if (ZTD_IS_ON(ZTD_UCHAR_H) || ZTD_IS_ON(ZTD_CUCHAR)) && ZTD_IS_OFF(ZTD_PLATFORM_MAC_OS)

#include <benchmark/benchmark.h>

Expand Down
2 changes: 1 addition & 1 deletion benchmarks/conversion_speed/source/standard_c++.init.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@

#include <ztd/text/version.hpp>

#if (ZTD_IS_ON(ZTD_UCHAR) || ZTD_IS_ON(ZTD_CUCHAR)) && ZTD_IS_OFF(ZTD_PLATFORM_MAC_OS)
#if (ZTD_IS_ON(ZTD_UCHAR_H) || ZTD_IS_ON(ZTD_CUCHAR)) && ZTD_IS_OFF(ZTD_PLATFORM_MAC_OS)

#include <benchmark/benchmark.h>

Expand Down
34 changes: 17 additions & 17 deletions benchmarks/conversion_speed/source/standard_c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@

#include <ztd/text/version.hpp>

#if (ZTD_IS_ON(ZTD_UCHAR) || ZTD_IS_ON(ZTD_CUCHAR)) && ZTD_IS_OFF(ZTD_PLATFORM_MAC_OS)
#if (ZTD_IS_ON(ZTD_UCHAR_H) || ZTD_IS_ON(ZTD_CUCHAR)) && ZTD_IS_OFF(ZTD_PLATFORM_MAC_OS)

#include <benchmark/benchmark.h>

Expand Down Expand Up @@ -65,7 +65,7 @@ static void utf16_to_utf32_well_formed_standard_c(benchmark::State& state) {
const from_char_t* const input_last = input_data.data() + input_data.size();
for (;;) {
if (input != input_last) {
const size_t from_result = ZTD_UCHAR_ACCESSOR_I_ c16rtomb(intermediate, *input, &from_state);
const size_t from_result = ZTD_UCHAR_SCOPE_I_ c16rtomb(intermediate, *input, &from_state);
switch (from_result) {
case static_cast<size_t>(-1):
// encoding error: everyhting has failed. bail.
Expand All @@ -80,7 +80,7 @@ static void utf16_to_utf32_well_formed_standard_c(benchmark::State& state) {
}
const size_t intermediate_size = from_result + (intermediate - intermediate_data);
const size_t to_result
= ZTD_UCHAR_ACCESSOR_I_ mbrtoc32(output, intermediate_data, intermediate_size, &to_state);
= ZTD_UCHAR_SCOPE_I_ mbrtoc32(output, intermediate_data, intermediate_size, &to_state);
switch (to_result) {
case static_cast<size_t>(-1):
// encoding error: everyhting has failed. bail.
Expand Down Expand Up @@ -110,10 +110,10 @@ static void utf16_to_utf32_well_formed_standard_c(benchmark::State& state) {
// finish processing text without having to check if you have cleaned out
// the state. You have to call the function more to make sure
// you have drained off all the surrogates...
while (ZTD_UCHAR_ACCESSOR_I_ mbsinit(&to_state) == 0) {
while (ZTD_UCHAR_SCOPE_I_ mbsinit(&to_state) == 0) {
// try to drain the (last?) input out...
to_char_t last_c;
const size_t last_to_result = ZTD_UCHAR_ACCESSOR_I_ mbrtoc32(&last_c, "", 1, &to_state);
const size_t last_to_result = ZTD_UCHAR_SCOPE_I_ mbrtoc32(&last_c, "", 1, &to_state);
if (last_to_result == static_cast<size_t>(-3)) {
// we had extra, vomit it out...
*output = last_c;
Expand Down Expand Up @@ -159,7 +159,7 @@ static void utf32_to_utf16_well_formed_standard_c(benchmark::State& state) {
const from_char_t* const input_last = input_data.data() + input_data.size();
for (;;) {
if (input != input_last) {
const size_t from_result = ZTD_UCHAR_ACCESSOR_I_ c32rtomb(intermediate, *input, &from_state);
const size_t from_result = ZTD_UCHAR_SCOPE_I_ c32rtomb(intermediate, *input, &from_state);
switch (from_result) {
case static_cast<size_t>(-1):
// encoding error: everyhting has failed. bail.
Expand All @@ -174,7 +174,7 @@ static void utf32_to_utf16_well_formed_standard_c(benchmark::State& state) {
}
const size_t intermediate_size = from_result + (intermediate - intermediate_data);
const size_t to_result
= ZTD_UCHAR_ACCESSOR_I_ mbrtoc16(output, intermediate_data, intermediate_size, &to_state);
= ZTD_UCHAR_SCOPE_I_ mbrtoc16(output, intermediate_data, intermediate_size, &to_state);
switch (to_result) {
case static_cast<size_t>(-1):
// encoding error: everyhting has failed. bail.
Expand Down Expand Up @@ -205,10 +205,10 @@ static void utf32_to_utf16_well_formed_standard_c(benchmark::State& state) {
// finish processing text without having to check if you have cleaned out
// the state. You have to call the function more to make sure
// you have drained off all the surrogates...
while (ZTD_UCHAR_ACCESSOR_I_ mbsinit(&to_state) == 0) {
while (ZTD_UCHAR_SCOPE_I_ mbsinit(&to_state) == 0) {
// try to drain the (last?) input out...
to_char_t last_c;
const size_t last_to_result = ZTD_UCHAR_ACCESSOR_I_ mbrtoc16(&last_c, "", 1, &to_state);
const size_t last_to_result = ZTD_UCHAR_SCOPE_I_ mbrtoc16(&last_c, "", 1, &to_state);
if (last_to_result == static_cast<size_t>(-3)) {
// we had extra, vomit it out...
*output = last_c;
Expand Down Expand Up @@ -250,7 +250,7 @@ static void utf32_to_utf8_well_formed_standard_c(benchmark::State& state) {
const from_char_t* input = input_data.data();
const from_char_t* const input_last = input_data.data() + input_data.size();
for (; input != input_last;) {
const size_t from_result = ZTD_UCHAR_ACCESSOR_I_ c32rtomb(output, *input, &from_state);
const size_t from_result = ZTD_UCHAR_SCOPE_I_ c32rtomb(output, *input, &from_state);
switch (from_result) {
case static_cast<size_t>(-1):
// encoding error: everyhting has failed. bail.
Expand Down Expand Up @@ -294,7 +294,7 @@ static void utf8_to_utf32_well_formed_standard_c(benchmark::State& state) {
for (;;) {
if (input != input_last) {
const std::size_t input_size = input_last - input;
const size_t to_result = ZTD_UCHAR_ACCESSOR_I_ mbrtoc32(output, input, input_size, &to_state);
const size_t to_result = ZTD_UCHAR_SCOPE_I_ mbrtoc32(output, input, input_size, &to_state);
switch (to_result) {
case static_cast<size_t>(-1):
// encoding error: everyhting has failed. bail.
Expand Down Expand Up @@ -323,10 +323,10 @@ static void utf8_to_utf32_well_formed_standard_c(benchmark::State& state) {
// finish processing text without having to check if you have cleaned out
// the state. You have to call the function more to make sure
// you have drained off all the surrogates...
while (ZTD_UCHAR_ACCESSOR_I_ mbsinit(&to_state) == 0) {
while (ZTD_UCHAR_SCOPE_I_ mbsinit(&to_state) == 0) {
// try to drain the (last?) input out...
to_char_t last_c;
const size_t last_to_result = ZTD_UCHAR_ACCESSOR_I_ mbrtoc32(&last_c, "", 1, &to_state);
const size_t last_to_result = ZTD_UCHAR_SCOPE_I_ mbrtoc32(&last_c, "", 1, &to_state);
if (last_to_result == static_cast<size_t>(-3)) {
// we had extra, vomit it out...
*output = last_c;
Expand Down Expand Up @@ -368,7 +368,7 @@ static void utf16_to_utf8_well_formed_standard_c(benchmark::State& state) {
const from_char_t* input = input_data.data();
const from_char_t* const input_last = input_data.data() + input_data.size();
for (; input != input_last;) {
const size_t from_result = ZTD_UCHAR_ACCESSOR_I_ c16rtomb(output, *input, &from_state);
const size_t from_result = ZTD_UCHAR_SCOPE_I_ c16rtomb(output, *input, &from_state);
switch (from_result) {
case static_cast<size_t>(-1):
// encoding error: everyhting has failed. bail.
Expand Down Expand Up @@ -412,7 +412,7 @@ static void utf8_to_utf16_well_formed_standard_c(benchmark::State& state) {
for (;;) {
if (input != input_last) {
const std::size_t input_size = input_last - input;
const size_t to_result = ZTD_UCHAR_ACCESSOR_I_ mbrtoc16(output, input, input_size, &to_state);
const size_t to_result = ZTD_UCHAR_SCOPE_I_ mbrtoc16(output, input, input_size, &to_state);
switch (to_result) {
case static_cast<size_t>(-1):
// encoding error: everyhting has failed. bail.
Expand Down Expand Up @@ -441,10 +441,10 @@ static void utf8_to_utf16_well_formed_standard_c(benchmark::State& state) {
// finish processing text without having to check if you have cleaned out
// the state. You have to call the function more to make sure
// you have drained off all the surrogates...
while (ZTD_UCHAR_ACCESSOR_I_ mbsinit(&to_state) == 0) {
while (ZTD_UCHAR_SCOPE_I_ mbsinit(&to_state) == 0) {
// try to drain the (last?) input out...
to_char_t last_c;
const size_t last_to_result = ZTD_UCHAR_ACCESSOR_I_ mbrtoc16(&last_c, "", 1, &to_state);
const size_t last_to_result = ZTD_UCHAR_SCOPE_I_ mbrtoc16(&last_c, "", 1, &to_state);
if (last_to_result == static_cast<size_t>(-3)) {
// we had extra, vomit it out...
*output = last_c;
Expand Down
2 changes: 1 addition & 1 deletion include/ztd/text/execution.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ namespace ztd { namespace text {
/// Standard Library and bugs in glibc/musl libc's current locale encoding support. On Apple, this is cuurrently
/// assumed to be UTF-8 since they do not support the @c \<cuchar\> or @c \<uchar.h\> headers.
class execution_t : public
#if (ZTD_IS_ON(ZTD_CUCHAR) || ZTD_IS_ON(ZTD_UCHAR)) && ZTD_IS_OFF(ZTD_PLATFORM_MAC_OS)
#if (ZTD_IS_ON(ZTD_CUCHAR) || ZTD_IS_ON(ZTD_UCHAR_H)) && ZTD_IS_OFF(ZTD_PLATFORM_MAC_OS)
__txt_impl::__execution_cuchar
#elif ZTD_IS_ON(ZTD_PLATFORM_MAC_OS)
__txt_impl::__execution_mac_os
Expand Down
25 changes: 12 additions & 13 deletions include/ztd/text/impl/execution_cuchar.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,10 @@
#include <ztd/idk/span.hpp>
#include <ztd/idk/encoding_detection.hpp>
#include <ztd/idk/type_traits.hpp>
#include <ztd/idk/mbstate_t.hpp>
#include <ztd/idk/detail/windows.hpp>

#if (ZTD_IS_ON(ZTD_CUCHAR) || ZTD_IS_ON(ZTD_UCHAR)) && ZTD_IS_OFF(ZTD_PLATFORM_MAC_OS)
#if (ZTD_IS_ON(ZTD_CUCHAR) || ZTD_IS_ON(ZTD_UCHAR_H)) && ZTD_IS_OFF(ZTD_PLATFORM_MAC_OS)

// clang-format off
#if ZTD_IS_ON(ZTD_CUCHAR)
Expand All @@ -75,13 +76,12 @@ namespace ztd { namespace text {
namespace __txt_detail {
class __execution_decode_state {
public:
::std::mbstate_t __narrow_state;
ztd_mbstate_t __narrow_state;
bool __output_pending;

__execution_decode_state() noexcept : __narrow_state(), __output_pending(false) {
ztd_char32_t __ghost_space[2];
::std::size_t __init_result
= ZTD_UCHAR_ACCESSOR_I_ mbrtoc32(__ghost_space, "\0", 1, &__narrow_state);
::std::size_t __init_result = ZTD_UCHAR_SCOPE_I_ mbrtoc32(__ghost_space, "\0", 1, &__narrow_state);
// make sure it is initialized
ZTD_TEXT_ASSERT_I_(__init_result == 0 && __ghost_space[0] == U'\0');
ZTD_TEXT_ASSERT_I_(::std::mbsinit(&__narrow_state) != 0);
Expand All @@ -90,12 +90,12 @@ namespace ztd { namespace text {

class __execution_encode_state {
public:
::std::mbstate_t __narrow_state;
ztd_mbstate_t __narrow_state;
bool __output_pending;

__execution_encode_state() noexcept : __narrow_state(), __output_pending(false) {
char __ghost_space[MB_LEN_MAX];
::std::size_t __init_result = ZTD_UCHAR_ACCESSOR_I_ c32rtomb(__ghost_space, U'\0', &__narrow_state);
::std::size_t __init_result = ZTD_UCHAR_SCOPE_I_ c32rtomb(__ghost_space, U'\0', &__narrow_state);
// make sure it is initialized
ZTD_TEXT_ASSERT_I_(__init_result == 1 && __ghost_space[0] == '\0');
ZTD_TEXT_ASSERT_I_(::std::mbsinit(&__narrow_state) != 0);
Expand Down Expand Up @@ -336,7 +336,7 @@ namespace ztd { namespace text {
code_point __codepoint = *__in_it;
::ztd::ranges::iter_advance(__in_it);
code_unit __intermediary_output[(MB_LEN_MAX)] {};
::std::size_t __res = ZTD_UCHAR_ACCESSOR_I_ c32rtomb(
::std::size_t __res = ZTD_UCHAR_SCOPE_I_ c32rtomb(
__intermediary_output, __codepoint, ::std::addressof(__s.__narrow_state));
if constexpr (__call_error_handler) {
if (__res == static_cast<::std::size_t>(-1)) {
Expand Down Expand Up @@ -502,9 +502,8 @@ namespace ztd { namespace text {
if (__s.__output_pending) {
// need to drain potential mbstate_t of any leftover code points?
ztd_char32_t __intermediary_output[max_code_points] {};
::std::size_t __res
= ZTD_UCHAR_ACCESSOR_I_ mbrtoc32(::std::addressof(__intermediary_output[0]), nullptr, 0,
::std::addressof(__s.__narrow_state));
::std::size_t __res = ZTD_UCHAR_SCOPE_I_ mbrtoc32(::std::addressof(__intermediary_output[0]),
nullptr, 0, ::std::addressof(__s.__narrow_state));
if constexpr (__call_error_handler) {
if (__res == static_cast<::std::size_t>(-1)) {
__execution_cuchar __self {};
Expand All @@ -529,9 +528,9 @@ namespace ztd { namespace text {
__intermediary_input[__state_offset] = *__in_it;
::ztd::ranges::iter_advance(__in_it);
ztd_char32_t __intermediary_output[1] {};
::std::size_t __res = ZTD_UCHAR_ACCESSOR_I_ mbrtoc32(
::std::addressof(__intermediary_output[0]), ::std::addressof(__intermediary_input[0]),
__state_count, ::std::addressof(__preserved_state));
::std::size_t __res = ZTD_UCHAR_SCOPE_I_ mbrtoc32(::std::addressof(__intermediary_output[0]),
::std::addressof(__intermediary_input[0]), __state_count,
::std::addressof(__preserved_state));

switch (__res) {
case static_cast<::std::size_t>(-2):
Expand Down
3 changes: 2 additions & 1 deletion include/ztd/text/impl/wide_execution_cwchar.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
#include <ztd/ranges/range.hpp>
#include <ztd/idk/span.hpp>
#include <ztd/idk/encoding_detection.hpp>
#include <ztd/idk/mbstate_t.hpp>
#include <ztd/idk/type_traits.hpp>

#include <ztd/prologue.hpp>
Expand All @@ -62,7 +63,7 @@ namespace ztd { namespace text {

class __wide_execution_decode_state {
public:
::std::mbstate_t __wide_state;
ztd_mbstate_t __wide_state;
decode_state_t<execution_t> __narrow_state;

__wide_execution_decode_state() noexcept : __wide_state(), __narrow_state() {
Expand Down
Loading

0 comments on commit f2048a7

Please sign in to comment.