From 22bfa759ef597179f31c5d9bdb4258afc9a6f410 Mon Sep 17 00:00:00 2001 From: Alexander Delman Date: Thu, 20 Jun 2024 02:27:41 +0300 Subject: [PATCH] (#330) fixed CGs paths calculation and fa construction by CGs --- .../src/MetamorphicTests.cpp | 17 +++- apps/UnitTestsApp/src/UnitTests.cpp | 1 + .../include/Objects/MemoryFiniteAutomaton.h | 3 +- libs/Objects/src/MemoryFiniteAutomaton.cpp | 90 ++++++++++++------- 4 files changed, 73 insertions(+), 38 deletions(-) diff --git a/apps/MetamorphicTestsApp/src/MetamorphicTests.cpp b/apps/MetamorphicTestsApp/src/MetamorphicTests.cpp index ad76036e..33aa3696 100644 --- a/apps/MetamorphicTestsApp/src/MetamorphicTests.cpp +++ b/apps/MetamorphicTestsApp/src/MetamorphicTests.cpp @@ -144,14 +144,23 @@ TEST(TestMFA, Fuzzing) { } // TEST(TestMFA, Fuzz) { -// string rgx_str = "(a[[b|]:1|]:2*[[c|]:1|]:2*&1&2)*"; -// MemoryFiniteAutomaton mfa1 = BackRefRegex(rgx_str).to_mfa(); -// MemoryFiniteAutomaton mfa2 = BackRefRegex(rgx_str).to_mfa_additional(); +// BackRefRegex rgx(""); +// MemoryFiniteAutomaton mfa1 = rgx.to_mfa(); +// MemoryFiniteAutomaton mfa2 = rgx.to_mfa_additional(); // // std::cout << mfa1.to_txt() << mfa2.to_txt(); // // MetamorphicTests::cmp_automatons(mfa1, mfa2); -//} +// } + +// TEST(TestMFA, Cmp) { +// MemoryFiniteAutomaton mfa1 = BackRefRegex().to_mfa_additional(); +// MemoryFiniteAutomaton mfa2 = BackRefRegex().to_mfa_additional(); +// +// std::cout << mfa1.to_txt() << mfa2.to_txt(); +// +// MetamorphicTests::cmp_automatons(mfa1, mfa2); +// } TEST(TestMFA, ToTxt) { RegexGenerator rg(5, 3, 3, 2); diff --git a/apps/UnitTestsApp/src/UnitTests.cpp b/apps/UnitTestsApp/src/UnitTests.cpp index 93f98c8b..9c4fb291 100644 --- a/apps/UnitTestsApp/src/UnitTests.cpp +++ b/apps/UnitTestsApp/src/UnitTests.cpp @@ -798,6 +798,7 @@ TEST(TestBisimilar, MFA_Bisimilar) { {"[a]:1*&1|[b]:1&1", "[b]:1&1|[a]:1*&1", true}, {"[a]:1&1(&1|[b]:1)*", "[a]:1&1(&1|[b]:1)*", true}, {"[a|b]:1*&1", "(|[a|b]:1(a|b)*)&1", true}, + {"[ab*]:1*&1", "[ab*b*]:1*&1", true}, // перекрестная бисимуляция {"[a*]:1a*&1", "a*[a*]:1&1", false}, {"b[a*]:1a*&1", "ba*[a*]:1&1", false}, diff --git a/libs/Objects/include/Objects/MemoryFiniteAutomaton.h b/libs/Objects/include/Objects/MemoryFiniteAutomaton.h index a354febc..c54221a2 100644 --- a/libs/Objects/include/Objects/MemoryFiniteAutomaton.h +++ b/libs/Objects/include/Objects/MemoryFiniteAutomaton.h @@ -168,7 +168,8 @@ class MemoryFiniteAutomaton : public AbstractMachine { std::vector get_reversed_transitions() const; std::pair>, std::vector>> find_cg_paths( - int state_index, std::unordered_set visited, int cell, int opening_state) const; + int state_index, std::unordered_set visited, int cell, int opening_state, + bool was_in_opening_state) const; std::vector find_capture_groups_backward( int ref_incoming_state, int cell, const std::vector& reversed_transitions, diff --git a/libs/Objects/src/MemoryFiniteAutomaton.cpp b/libs/Objects/src/MemoryFiniteAutomaton.cpp index 1a7877e3..1b9e99ad 100644 --- a/libs/Objects/src/MemoryFiniteAutomaton.cpp +++ b/libs/Objects/src/MemoryFiniteAutomaton.cpp @@ -774,9 +774,9 @@ pair MemoryFiniteAutomaton::_parse(const string& s, Matcher* matcher) } visited_states.insert(cur_state); + counter++; } current_states = following_states; - counter++; } return {counter, false}; @@ -1377,8 +1377,8 @@ void find_opening_states_dfs(int state_index, for (const auto& [symbol, symbol_transitions] : reversed_transitions[state_index]) for (const auto& tr : symbol_transitions) { optional action; - if (tr.memory_actions.count(cell)) - action = tr.memory_actions.at(cell); + if (auto it = tr.memory_actions.find(cell); it != tr.memory_actions.end()) + action = it->second; if (action && (action == MFATransition::open || action == MFATransition::reset)) opening_states.insert(tr.to); else if (!visited[tr.to]) @@ -1386,36 +1386,56 @@ void find_opening_states_dfs(int state_index, } } -bool is_valid_transition(bool is_opening_state, optional action) { +bool opening_action_only_from_opening_state(bool is_opening_state, + optional action) { bool is_open_action = action && action == MFATransition::open; return is_opening_state == is_open_action; } pair>, vector>> MemoryFiniteAutomaton::find_cg_paths( - int state_index, std::unordered_set visited, int cell, int opening_state) const { + int state_index, std::unordered_set visited, int cell, int opening_state, + bool was_in_opening_state) const { vector> paths; vector> reset_paths; visited.insert(state_index); + bool is_opening_state = state_index == opening_state; for (const auto& [symbol, symbol_transitions] : states[state_index].transitions) for (const auto& tr : symbol_transitions) { optional action; if (auto it = tr.memory_actions.find(cell); it != tr.memory_actions.end()) action = it->second; if (action && action == MFATransition::close) { - paths.push_back({state_index}); + if (!(is_opening_state && !was_in_opening_state)) + paths.push_back({state_index}); } else if (action && action == MFATransition::reset) { reset_paths.push_back({state_index}); } else { - bool is_opening_state = state_index == opening_state; - bool should_process = !visited.count(tr.to) || - // чтобы обработать случай, когда открывающее совпадает с - // закрывающим, e.g. [a*a]:1*&1 - (!is_opening_state && tr.to == opening_state); - if (should_process && is_valid_transition(is_opening_state, action)) { - auto [t, _] = find_cg_paths(tr.to, visited, cell, opening_state); + bool should_process = !visited.count(tr.to); + if (is_opening_state) { + // чтобы обработать случай, когда открывающее совпадает с + // закрывающим, e.g. [a*a]:1*&1 + should_process |= tr.to == opening_state && !was_in_opening_state; + should_process &= opening_action_only_from_opening_state(true, action) || + (was_in_opening_state && !action); + } else { + // чтобы обработать случай, когда открывающее совпадает с + // закрывающим, e.g. [a*a]:1*&1 + should_process |= tr.to == opening_state; + should_process &= opening_action_only_from_opening_state(false, action); + } + + if (should_process) { + auto [t, _] = find_cg_paths(tr.to, + visited, + cell, + opening_state, + is_opening_state || was_in_opening_state); for (auto& i : t) { - i.insert(i.begin(), state_index); + if (!(is_opening_state && !was_in_opening_state && + tr.to == opening_state)) { + i.insert(i.begin(), state_index); + } paths.emplace_back(i); } } @@ -1436,7 +1456,7 @@ vector MemoryFiniteAutomaton::find_capture_groups_backward( vector res; for (auto opening_st : opening_states) { - auto [paths, reset_paths] = find_cg_paths(opening_st, {}, cell, opening_st); + auto [paths, reset_paths] = find_cg_paths(opening_st, {}, cell, opening_st, false); for (const auto& reset_path : reset_paths) res.push_back(CaptureGroup(cell, {reset_path}, fa_classes, true)); if (!paths.empty()) @@ -1460,7 +1480,8 @@ bool MemoryFiniteAutomaton::find_decisions(int state_index, vector& visited it != tr.memory_actions.end()) action = it->second; - if (is_valid_transition(state_index == cg.get_opening_state_index(), action) && + if (opening_action_only_from_opening_state(state_index == cg.get_opening_state_index(), + action) && (states_to_check.count(tr.to) || following_states.count(tr.to)) && !(following_states.count(state_index) && !states_to_check.count(tr.to))) { if (visited[tr.to] == 0) { @@ -1510,9 +1531,9 @@ FiniteAutomaton MemoryFiniteAutomaton::get_cg_fa(const CaptureGroup& cg) const { bool additional_state = false; unordered_set terminal_states; for (const auto& path : cg.get_paths()) - if (path.size() != 1) + if (path[path.size() - 1] != cg_opening_state_index) terminal_states.insert(path[path.size() - 1]); - else + else // если у стартового состояния есть открывающий переход в самого себя additional_state = true; unordered_map indexes; @@ -1525,18 +1546,19 @@ FiniteAutomaton MemoryFiniteAutomaton::get_cg_fa(const CaptureGroup& cg) const { if (additional_state) sub_states.emplace_back(idx, "", true); - int initial_index = indexes.at(cg_opening_state_index); - for (const auto& st : cg.get_states()) + for (const auto& st : cg.get_states()) { + bool is_opening_state = st.index == cg_opening_state_index; for (const auto& [symbol, symbol_transitions] : states[st.index].transitions) for (const auto& tr : symbol_transitions) { // не просто false, чтобы обработать ниже переходы без открытия памяти из стартового - bool skip = st.index == cg_opening_state_index; + bool skip = is_opening_state && !additional_state; + bool is_opening_transition = false; for (const auto& [_, action] : tr.memory_actions) { - if (st.index == cg_opening_state_index & action == MFATransition::open) { + if (is_opening_state && action == MFATransition::open) { skip = false; - } else if ((action == MFATransition::open && - st.index != cg_opening_state_index) || - (action == MFATransition::close && !terminal_states.count(tr.to)) || + is_opening_transition = true; + } else if (action == MFATransition::open && !is_opening_state || + action == MFATransition::close && !terminal_states.count(tr.to) || action == MFATransition::reset) { skip = true; break; @@ -1544,18 +1566,20 @@ FiniteAutomaton MemoryFiniteAutomaton::get_cg_fa(const CaptureGroup& cg) const { } if (skip) continue; - if (st.index == cg_opening_state_index && tr.to == cg_opening_state_index) { - // используем финальное additional_state - alphabet.insert(symbol); - sub_states[indexes.at(st.index)].add_transition(sub_states.size() - 1, symbol); - } else if (indexes.count(tr.to)) { - alphabet.insert(symbol); - sub_states[indexes.at(st.index)].add_transition(indexes.at(tr.to), symbol); + alphabet.insert(symbol); + int target_index = + (tr.to == cg_opening_state_index) ? sub_states.size() - 1 : indexes.at(tr.to); + + if (is_opening_state && additional_state && !is_opening_transition) { + sub_states[sub_states.size() - 1].add_transition(target_index, symbol); + } else { + sub_states[indexes.at(st.index)].add_transition(target_index, symbol); } } + } // начальное состояние общее у всех cg.paths - return {initial_index, sub_states, alphabet}; + return {indexes.at(cg_opening_state_index), sub_states, alphabet}; } optional MemoryFiniteAutomaton::bisimilarity_checker(const MemoryFiniteAutomaton& mfa1,