From 381aecc4c684e1e1e33b8a33ba062fb6a658ea77 Mon Sep 17 00:00:00 2001 From: Nathan Young Date: Tue, 19 Mar 2024 19:40:17 -0400 Subject: [PATCH] BUG/ENH: StringUtilities Split Function Updates (#892) --- .../utils/PythonPluginTemplateFile.hpp | 4 +- src/simplnx/Utilities/StringUtilities.hpp | 189 ++++++++++++------ test/CMakeLists.txt | 1 + test/StringUtilitiesTest.cpp | 115 +++++++++++ 4 files changed, 246 insertions(+), 63 deletions(-) create mode 100644 test/StringUtilitiesTest.cpp diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/utils/PythonPluginTemplateFile.hpp b/src/Plugins/SimplnxCore/src/SimplnxCore/utils/PythonPluginTemplateFile.hpp index f63900d429..b3630bf3a3 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/utils/PythonPluginTemplateFile.hpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/utils/PythonPluginTemplateFile.hpp @@ -73,7 +73,7 @@ inline Result<> InsertFilterNameInPluginFiles(const std::filesystem::path& plugi buffer << file.rdbuf(); std::string content = buffer.str(); file.close(); - std::vector lines = nx::core::StringUtilities::split_2(content, "\n", true); + std::vector lines = nx::core::StringUtilities::split(content, "\n", true); if(lines.back().empty()) { lines.pop_back(); @@ -117,7 +117,7 @@ inline Result<> InsertFilterNameInPluginFiles(const std::filesystem::path& plugi buffer << file.rdbuf(); std::string content = buffer.str(); file.close(); - std::vector lines = nx::core::StringUtilities::split_2(content, "\n", true); + std::vector lines = nx::core::StringUtilities::split(content, "\n", true); if(lines.back().empty()) { lines.pop_back(); diff --git a/src/simplnx/Utilities/StringUtilities.hpp b/src/simplnx/Utilities/StringUtilities.hpp index 3b4f923bf6..c685b0f3aa 100644 --- a/src/simplnx/Utilities/StringUtilities.hpp +++ b/src/simplnx/Utilities/StringUtilities.hpp @@ -48,19 +48,25 @@ * '\r'(0x0d)carriage return (CR) */ -namespace nx::core +namespace { -namespace StringUtilities -{ -inline constexpr StringLiteral k_Whitespaces = " \t\f\v\n\r"; - -template -void for_each_token(InputIt first, InputIt last, ForwardIt s_first, ForwardIt s_last, BinOp binary_op) +template +void tokenize(InputIt first, InputIt last, ForwardIt s_first, ForwardIt s_last, std::vector& tokens) { while(true) { const auto pos = std::find_first_of(first, last, s_first, s_last); - binary_op(first, pos); + if(first != pos) + { + tokens.emplace_back(std::string{first, pos}); + } + else + { + if constexpr(ProcessEmptyV) + { + tokens.emplace_back(""); + } + } if(pos == last) { break; @@ -69,6 +75,80 @@ void for_each_token(InputIt first, InputIt last, ForwardIt s_first, ForwardIt s_ } } +template +struct SplitTypeOptions +{ + static inline constexpr bool AllowConsecutiveAsEmpty = ConsecutiveAsEmptyV; + static inline constexpr bool AllowEmptyInital = EmptyInitialV; + static inline constexpr bool AllowEmptyFinal = EmptyFinalV; +}; + +using SplitIgnoreEmpty = SplitTypeOptions; +using SplitAllowAll = SplitTypeOptions; +using SplitNoStripIgnoreConsecutive = SplitTypeOptions; +using SplitOnlyConsecutive = SplitTypeOptions; +using SplitAllowEmptyLeftAnalyze = SplitTypeOptions; +using SplitAllowEmptyRightAnalyze = SplitTypeOptions; + +template +inline std::vector optimized_split(std::string_view str, nonstd::span delimiters) +{ + auto endPos = str.end(); + auto startPos = str.begin(); + + std::vector tokens; + tokens.reserve(str.size() / 2); + + if constexpr(SplitTypeOptionsV::AllowEmptyInital) + { + if(std::find(delimiters.cbegin(), delimiters.cend(), str[0]) != delimiters.cend()) + { + tokens.emplace_back(""); + startPos++; + } + } + + if constexpr(!SplitTypeOptionsV::AllowEmptyFinal) + { + if(std::find(delimiters.cbegin(), delimiters.cend(), str[str.size() - 1]) != delimiters.cend()) + { + endPos--; + } + } + + if constexpr(!SplitTypeOptionsV::AllowConsecutiveAsEmpty) + { + tokenize(startPos, endPos, delimiters.cbegin(), delimiters.cend(), tokens); + if constexpr(SplitTypeOptionsV::AllowEmptyFinal) + { + if(std::find(delimiters.cbegin(), delimiters.cend(), str[str.size() - 1]) != delimiters.cend()) + { + tokens.emplace_back(""); + } + } + } + else + { + if constexpr(!SplitTypeOptionsV::AllowEmptyInital) + { + if(std::find(delimiters.cbegin(), delimiters.cend(), str[0]) != delimiters.cend()) + { + startPos++; + } + } + tokenize(startPos, endPos, delimiters.cbegin(), delimiters.cend(), tokens); + } + + tokens.shrink_to_fit(); + + return tokens; +} +} // namespace + +namespace nx::core::StringUtilities +{ +inline constexpr StringLiteral k_Whitespaces = " \t\f\v\n\r"; + /** * @brief Replace characters in a string. If 'from' is empty, the origin string is returned. * @param str Input String @@ -173,66 +253,54 @@ inline bool ends_with(std::string_view value, std::string_view ending) return std::equal(ending.rbegin(), ending.rend(), value.rbegin()); } -inline std::vector split(std::string_view str, nonstd::span delimiters, bool consecutiveDelimiters) +enum SplitType : uint8 { - std::vector tokens; - auto endPos = str.end(); - for_each_token(str.begin(), endPos, delimiters.cbegin(), delimiters.cend(), [&tokens, &consecutiveDelimiters](auto first, auto second) { - if(first != second) - { - std::string substr = {first, second}; - if(!substr.empty() || !consecutiveDelimiters) - { - tokens.push_back(substr); - } - } - }); - return tokens; -} - -inline std::vector split(std::string_view str, char delim) + IgnoreEmpty, + AllowAll, + NoStripIgnoreConsecutive, + OnlyConsecutive, + AllowEmptyLeftAnalyze, + AllowEmptyRightAnalyze +}; + +inline std::vector specific_split(std::string_view str, nonstd::span delimiters, SplitType splitType) { - std::array delims = {delim}; - return split(str, delims, false); + switch(splitType) + { + case IgnoreEmpty: + return optimized_split<::SplitIgnoreEmpty>(str, delimiters); + case AllowAll: + return optimized_split<::SplitAllowAll>(str, delimiters); + case NoStripIgnoreConsecutive: + return optimized_split<::SplitNoStripIgnoreConsecutive>(str, delimiters); + case OnlyConsecutive: + return optimized_split<::SplitOnlyConsecutive>(str, delimiters); + case AllowEmptyLeftAnalyze: + return optimized_split<::SplitAllowEmptyLeftAnalyze>(str, delimiters); + case AllowEmptyRightAnalyze: + return optimized_split<::SplitAllowEmptyRightAnalyze>(str, delimiters); + } + + return {}; } -/** - * - * @param input - * @param delimiter - * @param consecutiveDelimiters - * @return - */ -inline std::vector split_2(std::string_view input, nonstd::span delimiter, bool consecutiveDelimiters) +inline std::vector split(std::string_view str, nonstd::span delimiters, bool consecutiveDelimiters) { - std::vector result; - std::string current; - for(char ch : input) + if(consecutiveDelimiters) { - if(ch == delimiter[0]) - { - // If consecutive delimiters should lead to empty strings, or if the current string is not empty, - // we add the current string (which could be empty) to the result. - if(consecutiveDelimiters || !current.empty()) - { - result.push_back(current); - current.clear(); // Reset current for the next word. - } - // If consecutiveDelimiters is false, we simply skip this part, - // which avoids adding an empty string for consecutive delimiters. - } - else - { - current += ch; - } + // Split Allow All was selected to match QString's base split functionality + return optimized_split<::SplitAllowAll>(str, delimiters); } - // Add the last word to the result if it's not empty, or if the last character was a delimiter - // and consecutiveDelimiters is true. - if(!current.empty() || (consecutiveDelimiters && !input.empty() && input.back() == delimiter[0])) + else { - result.push_back(current); + return optimized_split<::SplitIgnoreEmpty>(str, delimiters); } - return result; +} + +inline std::vector split(std::string_view str, char delim) +{ + std::array delimiters = {delim}; + return optimized_split<::SplitIgnoreEmpty>(str, delimiters); } inline std::string join(nonstd::span vec, std::string_view delim) @@ -361,5 +429,4 @@ inline std::string toLower(std::string input) return input; } -} // namespace StringUtilities -} // namespace nx::core +} // namespace nx::core::StringUtilities diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index f0a28406c4..832e88113b 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -37,6 +37,7 @@ add_executable(simplnx_test ParametersTest.cpp PipelineSaveTest.cpp UuidTest.cpp + StringUtilitiesTest.cpp ) target_link_libraries(simplnx_test diff --git a/test/StringUtilitiesTest.cpp b/test/StringUtilitiesTest.cpp new file mode 100644 index 0000000000..6f2bde9764 --- /dev/null +++ b/test/StringUtilitiesTest.cpp @@ -0,0 +1,115 @@ +#include "simplnx/Utilities/StringUtilities.hpp" +#include "simplnx/unit_test/simplnx_test_dirs.hpp" + +#include + +#include + +using namespace nx::core; + +TEST_CASE("Utility Function Test: split(str, char)") +{ + // Case 1 + std::string inputStr = "This|Is|A|Baseline|Test"; + + std::vector result = StringUtilities::split(inputStr, '|'); + + REQUIRE(result == std::vector{"This", "Is", "A", "Baseline", "Test"}); + + // Case 2 + inputStr = "|This|Is|A|Baseline|Test|"; + + result = StringUtilities::split(inputStr, '|'); + + REQUIRE(result == std::vector{"This", "Is", "A", "Baseline", "Test"}); + + // Case 3 + inputStr = "||This|Is||A||Baseline|Test||"; + + result = StringUtilities::split(inputStr, '|'); + + REQUIRE(result == std::vector{"This", "Is", "A", "Baseline", "Test"}); +} + +TEST_CASE("Utility Function Test: split(str, char, bool)") +{ + std::array k_Delimiter = {'|'}; + // Case 1 + std::string inputStr = "This|Is|A|Baseline|Test"; + + std::vector result = StringUtilities::split(inputStr, k_Delimiter, false); + + REQUIRE(result == std::vector{"This", "Is", "A", "Baseline", "Test"}); + + // Case 2 + inputStr = "|This|Is|A|Baseline|Test|"; + + result = StringUtilities::split(inputStr, k_Delimiter, false); + + REQUIRE(result == std::vector{"This", "Is", "A", "Baseline", "Test"}); + + // Case 3 + inputStr = "||This|Is||A||Baseline|Test||"; + + result = StringUtilities::split(inputStr, k_Delimiter, false); + + REQUIRE(result == std::vector{"This", "Is", "A", "Baseline", "Test"}); + + // Case 4 + inputStr = "This|Is|A|Baseline|Test"; + + result = StringUtilities::split(inputStr, k_Delimiter, true); + + REQUIRE(result == std::vector{"This", "Is", "A", "Baseline", "Test"}); + + // Case 5 + inputStr = "|This|Is|A|Baseline|Test|"; + + result = StringUtilities::split(inputStr, k_Delimiter, true); + + REQUIRE(result == std::vector{"", "This", "Is", "A", "Baseline", "Test", ""}); + + // Case 6 + inputStr = "||This|Is||A||Baseline|Test||"; + + result = StringUtilities::split(inputStr, k_Delimiter, true); + + REQUIRE(result == std::vector{"", "", "This", "Is", "", "A", "", "Baseline", "Test", "", ""}); +} + +TEST_CASE("Utility Function Test: specific_split(str, char, StringUtilities::SplitType)") +{ + std::array k_Delimiter = {'|'}; + std::string inputStr = "||This|Is||A||Baseline|Test||"; + std::vector result; + + // Case 1 + result = StringUtilities::specific_split(inputStr, k_Delimiter, StringUtilities::SplitType::AllowAll); + + REQUIRE(result == std::vector{"", "", "This", "Is", "", "A", "", "Baseline", "Test", "", ""}); + + // Case 2 + result = StringUtilities::specific_split(inputStr, k_Delimiter, StringUtilities::SplitType::IgnoreEmpty); + + REQUIRE(result == std::vector{"This", "Is", "A", "Baseline", "Test"}); + + // Case 3 + result = StringUtilities::specific_split(inputStr, k_Delimiter, StringUtilities::SplitType::NoStripIgnoreConsecutive); + + REQUIRE(result == std::vector{"", "This", "Is", "A", "Baseline", "Test", ""}); + + // Case 4 + result = StringUtilities::specific_split(inputStr, k_Delimiter, StringUtilities::SplitType::OnlyConsecutive); + + REQUIRE(result == std::vector{"", "This", "Is", "", "A", "", "Baseline", "Test", ""}); + + // Case 5 + result = StringUtilities::specific_split(inputStr, k_Delimiter, StringUtilities::SplitType::AllowEmptyLeftAnalyze); + + REQUIRE(result == std::vector{"", "", "This", "Is", "", "A", "", "Baseline", "Test", ""}); + + // Case 5 + result = StringUtilities::specific_split(inputStr, k_Delimiter, StringUtilities::SplitType::AllowEmptyRightAnalyze); + + REQUIRE(result == std::vector{"", "This", "Is", "", "A", "", "Baseline", "Test", "", ""}); +}