From 6978ad8efd7572aa8dc5ee670d51fd6c22d2546d Mon Sep 17 00:00:00 2001 From: xendalm Date: Fri, 10 Nov 2023 15:10:24 +0300 Subject: [PATCH] (#271) regex lexer tests --- .clang-format | 3 +- apps/UnitTestsApp/src/UnitTests.cpp | 57 ++++++++++++++++---- libs/Objects/CMakeLists.txt | 10 ++++ libs/Objects/include/Objects/AlgExpression.h | 5 +- libs/Objects/src/AlgExpression.cpp | 21 ++++---- 5 files changed, 75 insertions(+), 21 deletions(-) diff --git a/.clang-format b/.clang-format index 1be1c078..9f9a7610 100644 --- a/.clang-format +++ b/.clang-format @@ -7,4 +7,5 @@ AllowShortIfStatementsOnASingleLine: Never SpaceBeforeParens: ControlStatements AllowShortFunctionsOnASingleLine: Empty AlignAfterOpenBracket: Align -ColumnLimit: 100 \ No newline at end of file +ColumnLimit: 100 +BinPackArguments: false \ No newline at end of file diff --git a/apps/UnitTestsApp/src/UnitTests.cpp b/apps/UnitTestsApp/src/UnitTests.cpp index dc1baa55..a28bdef2 100644 --- a/apps/UnitTestsApp/src/UnitTests.cpp +++ b/apps/UnitTestsApp/src/UnitTests.cpp @@ -10,12 +10,44 @@ #include "gtest/gtest.h" #include -TEST(TestCaseName, Test_regex_minus) { - string str = "^(c^(a^(b)d))e"; - Regex r1(str); - string r1_str = r1.to_txt(); - Regex r2(r1_str); - ASSERT_EQ(true, Regex::equivalent(r1, r2)); +TEST(ParseStringTest, Test_regex_lexer) { + using L = AlgExpression::Lexeme::Type; + + struct Test { + string regex_str; + bool want_err; + int lexemes_len = 0; + }; + + vector tests = { + {"[]", true}, + {"[]:", true}, + {"[a]", true}, + {"[a]:", true}, + {"[[a]:1", true}, + {"a]:1", true}, + {"[a]:1", false, 3}, + {"&", true}, + {"&1", false, 1}, + {"[b[a]:1&1]:2&2", false, 11}, + }; + + for (const auto& t : tests) { + stringstream message; + message << "Case: " << t.regex_str << ", WantErr: " << t.want_err; + SCOPED_TRACE(message.str()); + + vector l = AlgExpression::parse_string(t.regex_str); + ASSERT_FALSE(l.empty()); + + if (t.want_err) { + ASSERT_EQ(L::error, l[0].type); + } else { + ASSERT_NE(L::error, l[0].type); + ASSERT_EQ(t.lexemes_len, l.size()); + // TODO: добавить проверку содержимого l + } + } } TEST(TestCaseName, Test_random_regex_parsing) { @@ -246,14 +278,19 @@ TEST(TestCaseName, Test_ambiguity) { {16, "(a|b|c)*(a|b|c|d)(a|b|c)*|(ac*|ad*)*", glushkov, FiniteAutomaton::almost_unambigious}, {17, "(ab)*ab(ab)*|(ac)*(ac)*|(d|c)*", // (abab)*abab(abab)*|(aac)*(aac)*|(b|d|c)* - glushkov, FiniteAutomaton::almost_unambigious}, + glushkov, + FiniteAutomaton::almost_unambigious}, {18, "(abab)*abab(abab)*|(aac)*(aac)*", glushkov, FiniteAutomaton::polynomially_ambigious}, - {19, "(ab)*ab(ab)*", // (abab)*abab(abab)* - glushkov, FiniteAutomaton::polynomially_ambigious}, + {19, + "(ab)*ab(ab)*", // (abab)*abab(abab)* + glushkov, + FiniteAutomaton::polynomially_ambigious}, {20, "(ab)*ab(ab)*|(ac)*(ac)*", glushkov, FiniteAutomaton::polynomially_ambigious}, // {21, "(a|b)*(f*)*q", thompson, // FiniteAutomaton::exponentially_ambiguous}, - {22, "((bb*c|c)c*b|bb*b|b)(b|(c|bb*c)c*b|bb*b)*", glushkov, + {22, + "((bb*c|c)c*b|bb*b|b)(b|(c|bb*c)c*b|bb*b)*", + glushkov, FiniteAutomaton::exponentially_ambiguous}, }; diff --git a/libs/Objects/CMakeLists.txt b/libs/Objects/CMakeLists.txt index d78f0881..1d1ff0f5 100644 --- a/libs/Objects/CMakeLists.txt +++ b/libs/Objects/CMakeLists.txt @@ -1,6 +1,15 @@ # Set the project name project(Objects) +include(FetchContent) +FetchContent_Declare( + googletest + URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip +) + +set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) +FetchContent_MakeAvailable(googletest) + # Create a sources variable with a link to all cpp files to compile set(SOURCES src/TransformationMonoid.cpp @@ -26,4 +35,5 @@ target_include_directories(${PROJECT_NAME} target_link_libraries(${PROJECT_NAME} Fraction AutomatonToImage + gtest ) \ No newline at end of file diff --git a/libs/Objects/include/Objects/AlgExpression.h b/libs/Objects/include/Objects/AlgExpression.h index 0e5480e4..6ab887af 100644 --- a/libs/Objects/include/Objects/AlgExpression.h +++ b/libs/Objects/include/Objects/AlgExpression.h @@ -2,10 +2,13 @@ #include "BaseObject.h" #include "iLogTemplate.h" +#include "gtest/gtest.h" #include #include class AlgExpression : public BaseObject { + FRIEND_TEST(ParseStringTest, Test_regex_lexer); + protected: struct Lexeme { enum Type { @@ -15,7 +18,7 @@ class AlgExpression : public BaseObject { alt, // | conc, // . star, // * - minus, // ^ + minus, // ^ symb, // alphabet symbol eps, // Epsilon squareBrL, // [ diff --git a/libs/Objects/src/AlgExpression.cpp b/libs/Objects/src/AlgExpression.cpp index e10b0bb9..698a439f 100644 --- a/libs/Objects/src/AlgExpression.cpp +++ b/libs/Objects/src/AlgExpression.cpp @@ -402,7 +402,7 @@ bool AlgExpression::from_string(const string& str) { return true; } - vector l = parse_string(str); + vector l = parse_string(str); AlgExpression* root = expr(l, 0, l.size()); if (root == nullptr || root->type == eps) { @@ -440,7 +440,7 @@ AlgExpression* AlgExpression::expr(const vector& lexemes, if (!p) { p = scan_par(lexemes, index_start, index_end); } - + return p; } @@ -451,11 +451,10 @@ void AlgExpression::update_balance(const AlgExpression::Lexeme& l, int& balance) if (l.type == Lexeme::Type::parR || l.type == Lexeme::Type::squareBrR) { balance--; } - return; } AlgExpression* AlgExpression::scan_minus(const vector& lexemes, - int index_start, int index_end) { + int index_start, int index_end) { AlgExpression* p = nullptr; if (lexemes[index_start].type != Lexeme::Type::minus) { @@ -473,7 +472,7 @@ AlgExpression* AlgExpression::scan_minus(const vector& le p->type = minus; p->alphabet = l->alphabet; - return p; + return p; } AlgExpression* AlgExpression::scan_conc(const vector& lexemes, @@ -638,15 +637,19 @@ bool AlgExpression::contains_eps() const { } bool AlgExpression::equality_checker(const AlgExpression* expr1, const AlgExpression* expr2) { - if (expr1 == nullptr && expr2 == nullptr) return true; - if (expr1 == nullptr || expr2 == nullptr) return false; - if (expr1->value.type != expr2->value.type) return false; + if (expr1 == nullptr && expr2 == nullptr) + return true; + if (expr1 == nullptr || expr2 == nullptr) + return false; + if (expr1->value.type != expr2->value.type) + return false; if (expr1->value.type == Lexeme::Type::symb) { alphabet_symbol r1_symb, r2_symb; r1_symb = expr1->value.symbol; r2_symb = expr2->value.symbol; - if (r1_symb != r2_symb) return false; + if (r1_symb != r2_symb) + return false; } if (equality_checker(expr1->term_l, expr2->term_l) &&