diff --git a/apps/MetamorphicTestsApp/src/MetamorphicTests.cpp b/apps/MetamorphicTestsApp/src/MetamorphicTests.cpp index 4724a6a0..33aa3696 100644 --- a/apps/MetamorphicTestsApp/src/MetamorphicTests.cpp +++ b/apps/MetamorphicTestsApp/src/MetamorphicTests.cpp @@ -89,7 +89,7 @@ std::string MetamorphicTests::generate_bregex(RegexGenerator& rg, int cells_num) condition = false; } if (condition) - condition &= r.check_refs(); + condition &= r.check_refs_and_memory_writers_usefulness(); } while (!condition); return rgx_str; @@ -144,14 +144,23 @@ TEST(TestMFA, Fuzzing) { } // TEST(TestMFA, Fuzz) { -// string rgx_str = "(a[[b|]:1|]:2*[[c|]:1|]:2*&1&2)*"; -// MemoryFiniteAutomaton mfa1 = BackRefRegex(rgx_str).to_mfa(); -// MemoryFiniteAutomaton mfa2 = BackRefRegex(rgx_str).to_mfa_additional(); +// BackRefRegex rgx(""); +// MemoryFiniteAutomaton mfa1 = rgx.to_mfa(); +// MemoryFiniteAutomaton mfa2 = rgx.to_mfa_additional(); // // std::cout << mfa1.to_txt() << mfa2.to_txt(); // // MetamorphicTests::cmp_automatons(mfa1, mfa2); -//} +// } + +// TEST(TestMFA, Cmp) { +// MemoryFiniteAutomaton mfa1 = BackRefRegex().to_mfa_additional(); +// MemoryFiniteAutomaton mfa2 = BackRefRegex().to_mfa_additional(); +// +// std::cout << mfa1.to_txt() << mfa2.to_txt(); +// +// MetamorphicTests::cmp_automatons(mfa1, mfa2); +// } TEST(TestMFA, ToTxt) { RegexGenerator rg(5, 3, 3, 2); diff --git a/apps/UnitTestsApp/src/UnitTests.cpp b/apps/UnitTestsApp/src/UnitTests.cpp index 5264157f..9c4fb291 100644 --- a/apps/UnitTestsApp/src/UnitTests.cpp +++ b/apps/UnitTestsApp/src/UnitTests.cpp @@ -49,6 +49,10 @@ TEST(TestParseString, FromString) { {"a|(c^)", false, false, 8}, // a | ( c . ^ eps ) {"[b[a]:1&1]:2&2^a", true, true}, {"[b[a]:1&1]:2&2^a", true, false}, + {"[a]:1[]:1&1", false, true, 9}, + {"[a]:1[(|)]:1&1", true, true}, + {"O1aC1R1&1", false, false, 9}, + {"O1aC1X1&1", true, false}, }; for (const auto& t : tests) { @@ -755,6 +759,27 @@ TEST(TestParsing, MFA_equivalence) { }); } +TEST(TestBrgexChecker, CheckRefsAndMWs) { + using Test = std::tuple; + vector tests = { + {"[a]:1&1", true}, + {"&1[a]:1", false}, + {"(&1[a]:1)*", true}, + {"&1[a]:1&1", false}, + {"[a]:1&1[a]:2", false}, + {"&2[a]:1&1[a]:2", false}, + {"(&2[a]:1&1[a]:2)*", true}, + {"(&1[a]:1[a]:1&1[a]:2)*", false}, + {"(&2[a]:1&1[a]:2)*[a]:3*", false}, + {"(&2[a]:1&1[a]:2)*[a]:3*&3", true}, + }; + for_each(tests.begin(), tests.end(), [](const Test& test) { + auto [rgx, expected_res] = test; + SCOPED_TRACE(rgx); + ASSERT_EQ(BackRefRegex(rgx).check_refs_and_memory_writers_usefulness(), expected_res); + }); +} + TEST(TestReverse, BRegex_Reverse) { ASSERT_TRUE(BackRefRegex::equal(BackRefRegex("([a*b]:1&1|b&1)").reverse(), BackRefRegex("[ba*]:1&1|&1b"))); @@ -772,6 +797,8 @@ TEST(TestBisimilar, MFA_Bisimilar) { {"[a|a]:1*&1", "[a]:1*[a]:1*&1", true}, {"[a]:1*&1|[b]:1&1", "[b]:1&1|[a]:1*&1", true}, {"[a]:1&1(&1|[b]:1)*", "[a]:1&1(&1|[b]:1)*", true}, + {"[a|b]:1*&1", "(|[a|b]:1(a|b)*)&1", true}, + {"[ab*]:1*&1", "[ab*b*]:1*&1", true}, // перекрестная бисимуляция {"[a*]:1a*&1", "a*[a*]:1&1", false}, {"b[a*]:1a*&1", "ba*[a*]:1&1", false}, @@ -780,6 +807,8 @@ TEST(TestBisimilar, MFA_Bisimilar) { // несовпадение раскрасок {"[a]:1*&1", "[a*]:1*&1", false}, {"[a]:1*[a*]:1&1", "[a|]:1*&1", false}, + {"([a|]:1*&1)*", "([aa*|]:1&1)*", false}, + {"b*[a*]:1*&1", "b*[a*]:1&1", false}, // несовпадение по решающим действиям {"[a|b]:1c(a|b)&1", "(a|b)c[a|b]:1&1", false}, // несовпадение CG diff --git a/libs/AutomatonToImage/src/AutomatonToImage.cpp b/libs/AutomatonToImage/src/AutomatonToImage.cpp index 9e731e58..77e86094 100644 --- a/libs/AutomatonToImage/src/AutomatonToImage.cpp +++ b/libs/AutomatonToImage/src/AutomatonToImage.cpp @@ -1,17 +1,42 @@ #include #include +#include #include +#include #include "AutomatonToImage/AutomatonToImage.h" +using std::cout; using std::ifstream; +using std::ofstream; using std::string; using std::stringstream; +using std::vector; AutomatonToImage::AutomatonToImage() {} AutomatonToImage::~AutomatonToImage() {} +string replace_before_dot2tex(const string& s) { + vector> substrs_to_replace = {{"\\^", "#^ "}, {"&", "#&"}}; + + string result = s; + for (const auto& [old_substr, new_substr] : substrs_to_replace) { + std::regex re(old_substr); + result = std::regex_replace(result, re, new_substr); + } + + return result; +} + +void write_to_file(const string& file_name, const string& content) { + ofstream file; + file.open(file_name, ofstream::trunc); + if (file.is_open()) + file << content; + file.close(); +} + void remove_file(string dir, string file, bool guarded = false) { stringstream command; command << "cd " << dir; @@ -32,13 +57,13 @@ void remove_file(string dir, string file, bool guarded = false) { string AutomatonToImage::to_image(string automaton) { remove_file("refal", "Meta_log.raux", true); remove_file("refal", "Aux_input.raux", true); - FILE* fo; - fo = fopen("./refal/input.dot", "wt"); - fprintf(fo, "%s", automaton.c_str()); - fclose(fo); + ofstream fo; + write_to_file("./refal/input.dot", replace_before_dot2tex(automaton)); system("cd refal && refgo Preprocess+MathMode+FrameFormatter input.dot > " "error_Preprocess.raux"); + system("cd refal && dot2tex -ftikz -tmath \"Mod_input.dot\" > input.tex"); + system("cd refal && refgo Postprocess+MathMode+FrameFormatter input.tex > " "error_Postprocess.raux " "2>&1"); @@ -67,16 +92,10 @@ string AutomatonToImage::to_image(string automaton) { string AutomatonToImage::colorize(string automaton, string metadata) { - FILE* fo; - FILE* md; ifstream infile_for_Final; - fo = fopen("./refal/Col_input.tex", "wt"); - fprintf(fo, "%s", automaton.c_str()); - fclose(fo); + write_to_file("./refal/Col_input.tex", automaton); if (metadata != "") { - md = fopen("./refal/Meta_input.raux", "wt"); - fprintf(md, "%s", metadata.c_str()); - fclose(md); + write_to_file("./refal/Meta_input.raux", metadata); system("cd refal && refgo Colorize+MathMode Col_input.tex > " "error_Colorize.raux"); infile_for_Final.open("./refal/Final_input.tex"); diff --git a/libs/Interpreter/src/Interpreter.cpp b/libs/Interpreter/src/Interpreter.cpp index 71808cd5..b3a79eca 100644 --- a/libs/Interpreter/src/Interpreter.cpp +++ b/libs/Interpreter/src/Interpreter.cpp @@ -1087,7 +1087,8 @@ optional Interpreter::scan_expression(const vector pos && lexems[pos].type == Lexem::regex) { string str = lexems[pos].value; // выбор между backref и regex - if (str.find("&") != string::npos || str.find(":") != string::npos) { + // TODO: костыль + if (str.find('&') != string::npos || str.find("]:") != string::npos) { expr.type = ObjectType::BRefRegex; expr.value = BackRefRegex(str); } else { diff --git a/libs/Logger/src/LogTemplate.cpp b/libs/Logger/src/LogTemplate.cpp index 720d12f6..34a6c93b 100644 --- a/libs/Logger/src/LogTemplate.cpp +++ b/libs/Logger/src/LogTemplate.cpp @@ -145,7 +145,6 @@ string LogTemplate::render() const { automaton = std::get(param.value).to_txt(); else automaton = std::get(param.value).to_txt(); - automaton = replace_for_rendering(automaton); size_t hash = hasher(automaton); if (cache_automatons.count(hash) != 0) { c_graph = cache_automatons[hash]; @@ -245,16 +244,11 @@ string LogTemplate::log_table(Table t) { table += "$\\begin{array}{" + format + "}\\rowcolor{HeaderColor}\n"; table += cols + "\\hline\n"; for (int i = 0; i < t.rows.size(); i++) { - string r = t.rows[i] == " " ? "eps" : t.rows[i]; - row = r + " & "; + row = t.rows[i] == " " ? "eps" : t.rows[i]; for (int j = 0; j < t.columns.size(); j++) { - if (j != t.columns.size() - 1) { - row = row + t.data[i * t.columns.size() + j] + " &"; - } else { - row = row + t.data[i * t.columns.size() + j] + "\\\\"; - } + row += " & " + replace_for_rendering(t.data[i * t.columns.size() + j]); } - table += row + "\n"; + table += row + "\\\\\n"; } table += "\\end{array}$\n"; return table; diff --git a/libs/Objects/include/Objects/BackRefRegex.h b/libs/Objects/include/Objects/BackRefRegex.h index b282b08a..1b102268 100644 --- a/libs/Objects/include/Objects/BackRefRegex.h +++ b/libs/Objects/include/Objects/BackRefRegex.h @@ -51,6 +51,7 @@ class BackRefRegex : public AlgExpression { // возвращает вектор листьев дерева // устанавливает для них in_lin_cells, first_in_cells и last_in_cells + // линеаризует memoryWriters void preorder_traversal( std::vector& terms, // NOLINT(runtime/references) int& lin_counter, // NOLINT(runtime/references) @@ -79,7 +80,7 @@ class BackRefRegex : public AlgExpression { CellSet>>>& // NOLINT(runtime/references) ) const; - // преобразует star в conc (раскрывает каждую итерацию один раз) и линеаризует memoryWriter + // преобразует star в conc (раскрывает каждую итерацию один раз) и линеаризует memoryWriters void unfold_iterations(int& number); // NOLINT(runtime/references) // рекурсивно проверяет, является ли регулярное выражение ацикличным bool _is_acreg( @@ -87,7 +88,9 @@ class BackRefRegex : public AlgExpression { std::unordered_map>&) const; // NOLINT(runtime/references) void linearize_refs(int& number); // NOLINT(runtime/references) - void _check_refs(std::unordered_set&, std::unordered_set&) const; + void _check_memory_writers(std::unordered_map>&, + std::unordered_set&, // NOLINT(runtime/references) + std::unordered_set&) const; // NOLINT(runtime/references) // меняет порядок конкатенаций в дереве (swap term_l и term_r) void _reverse(std::unordered_map&); // NOLINT(runtime/references) @@ -123,6 +126,8 @@ class BackRefRegex : public AlgExpression { // обращение выражения (для СНФ) BackRefRegex reverse(iLogTemplate* log = nullptr) const; // проверяет, что каждая ссылка может следовать за записью в память (соответствующую ячейку) - bool check_refs() const; + // и что каждый memoryWriter не будет однозначно переинициализирован без возможности + // сослаться на него (существует хотя бы один путь, в котором присутствует ссылка на него) + bool check_refs_and_memory_writers_usefulness() const; BackRefRegex rewrite_aci() const; }; \ No newline at end of file diff --git a/libs/Objects/include/Objects/FiniteAutomaton.h b/libs/Objects/include/Objects/FiniteAutomaton.h index e3fa667e..90910d61 100644 --- a/libs/Objects/include/Objects/FiniteAutomaton.h +++ b/libs/Objects/include/Objects/FiniteAutomaton.h @@ -73,10 +73,10 @@ class FiniteAutomaton : public AbstractMachine { // eps-переходам (если флаг установлен в 0 - по всем переходам) std::set closure(const std::set&, bool) const; - std::vector get_bisimilar_classes() const; + std::vector get_bisimulation_classes() const; // объединение эквивалентных классов (принимает на вход вектор размера states.size()) // на i-й позиции номер класса i-го состояния - std::tuple> merge_equivalent_classes( + std::tuple> merge_classes( const std::vector&) const; static bool equality_checker(const FiniteAutomaton& fa1, const FiniteAutomaton& fa2); // дополнительно возвращает в векторах номера классов состояний каждого автомата @@ -118,8 +118,6 @@ class FiniteAutomaton : public AbstractMachine { std::unordered_map& states_mapping, // NOLINT(runtime/references) MFATransition::MemoryActions memory_actions, int from_mfa_state) const; - FiniteAutomaton get_subautomaton(const CaptureGroup&); - public: FiniteAutomaton(); FiniteAutomaton(int initial_state, std::vector states, diff --git a/libs/Objects/include/Objects/MemoryCommon.h b/libs/Objects/include/Objects/MemoryCommon.h index e93cdcdb..8fa8bdbe 100644 --- a/libs/Objects/include/Objects/MemoryCommon.h +++ b/libs/Objects/include/Objects/MemoryCommon.h @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -66,7 +67,7 @@ struct MFATransition { }; }; -struct CaptureGroup { +class CaptureGroup { struct State { int index; int class_num; @@ -82,14 +83,25 @@ struct CaptureGroup { std::unordered_set, VectorHasher> paths; std::unordered_set states; std::unordered_set state_classes; + bool is_reset; + public: CaptureGroup() = default; CaptureGroup(int, const std::vector>&, const std::vector&, - bool reset = false); + bool is_reset = false); bool operator==(const CaptureGroup& other) const; - std::unordered_set get_states_diff( - const std::unordered_set& other_state_classes) const; + bool get_is_reset() const; + bool get_cell_number() const; + int get_opening_state_index() const; + + const std::unordered_set, VectorHasher>& get_paths() const; + const std::unordered_set& get_states() const; + + std::tuple, std::unordered_set> get_states_diff( + const CaptureGroup& other) const; + + friend std::ostream& operator<<(std::ostream& os, const CaptureGroup& cg); }; std::ostream& operator<<(std::ostream& os, const CaptureGroup& cg); diff --git a/libs/Objects/include/Objects/MemoryFiniteAutomaton.h b/libs/Objects/include/Objects/MemoryFiniteAutomaton.h index 34367179..c54221a2 100644 --- a/libs/Objects/include/Objects/MemoryFiniteAutomaton.h +++ b/libs/Objects/include/Objects/MemoryFiniteAutomaton.h @@ -147,7 +147,7 @@ class MemoryFiniteAutomaton : public AbstractMachine { std::pair _parse_slow(const std::string&, Matcher*) const; std::pair _parse(const std::string&, Matcher*) const; - // поиск множества состояний НКА, + // поиск множества состояний MFA, // достижимых из множества состояний по eps-переходам std::tuple, std::unordered_set, MFATransition::MemoryActions> get_eps_closure(const std::set& indices) const; @@ -158,7 +158,7 @@ class MemoryFiniteAutomaton : public AbstractMachine { int state_index, std::vector& visited, // NOLINT(runtime/references) const MemoryConfiguration& opened_cells, - std::unordered_map>& colors, // NOLINT(runtime/references) + std::vector>& state_colors, // NOLINT(runtime/references) const std::vector& ab_classes, std::unordered_map& ab_class_to_first_state // NOLINT(runtime/references) ) const; @@ -168,21 +168,30 @@ class MemoryFiniteAutomaton : public AbstractMachine { std::vector get_reversed_transitions() const; std::pair>, std::vector>> find_cg_paths( - int state_index, std::unordered_set visited, int cell, int opening_state) const; + int state_index, std::unordered_set visited, int cell, int opening_state, + bool was_in_opening_state) const; std::vector find_capture_groups_backward( - int ref_incoming_state, int cell, const std::vector& fa_classes) const; + int ref_incoming_state, int cell, + const std::vector& reversed_transitions, + const std::vector& fa_classes) const; bool find_decisions(int state_index, std::vector& visited, // NOLINT(runtime/references) - const std::unordered_set& states_to_check) const; - bool states_have_decisions(const std::unordered_set& states_to_check) const; + const std::unordered_set& states_to_check, + const std::unordered_set& following_states, + const CaptureGroup& cg) const; + bool states_have_decisions(const std::unordered_set& states_to_check, + const std::unordered_set& following_states, + const CaptureGroup& cg) const; + + FiniteAutomaton get_cg_fa(const CaptureGroup& cg) const; static std::optional bisimilarity_checker(const MemoryFiniteAutomaton&, const MemoryFiniteAutomaton&); // объединение эквивалентных классов (принимает на вход вектор размера states.size()) // на i-й позиции номер класса i-го состояния - std::tuple> merge_equivalent_classes( + std::tuple> merge_classes( const std::vector&) const; public: diff --git a/libs/Objects/include/Objects/Symbol.h b/libs/Objects/include/Objects/Symbol.h index bdfa1a61..3a798ae4 100644 --- a/libs/Objects/include/Objects/Symbol.h +++ b/libs/Objects/include/Objects/Symbol.h @@ -6,7 +6,7 @@ #include // Символ, по которому осуществляются переходы в автомате. -// Может быть символом-буквой (и входить в алфавит) или ссылкой (&i) +// Может быть символом-буквой (и входить ТОЛЬКО в алфавит FA) или ссылкой (&i) class Symbol { private: std::vector annote_numbers; @@ -90,4 +90,6 @@ class MemorySymbols { static bool is_open(const Symbol& s); static int get_cell_number(const Symbol& s); -}; \ No newline at end of file +}; + +bool is_special_symbol(const Symbol& s); diff --git a/libs/Objects/include/Objects/Tools.h b/libs/Objects/include/Objects/Tools.h index 6ed57f39..73c6bd96 100644 --- a/libs/Objects/include/Objects/Tools.h +++ b/libs/Objects/include/Objects/Tools.h @@ -33,7 +33,7 @@ struct TupleHasher { size_t operator()(const std::tuple& p) const; }; -using IntPairSet = std::unordered_set, IntPairHasher>; +using IntPairsSet = std::unordered_set, IntPairHasher>; std::ostream& operator<<(std::ostream& os, const std::vector& vec); diff --git a/libs/Objects/src/AlgExpression.cpp b/libs/Objects/src/AlgExpression.cpp index 5277b3ee..c6f45a34 100644 --- a/libs/Objects/src/AlgExpression.cpp +++ b/libs/Objects/src/AlgExpression.cpp @@ -338,6 +338,7 @@ vector AlgExpression::parse_string(string str, bool allow return {Lexeme::Type::error}; if (allow_ref) + // не будет входить в алфавит, только для обозначения перехода в MFA lexeme.type = Lexeme::Type::ref; else lexeme.type = Lexeme::Type::symb; diff --git a/libs/Objects/src/BackRefRegex.cpp b/libs/Objects/src/BackRefRegex.cpp index 1c2e3faf..4d43df99 100644 --- a/libs/Objects/src/BackRefRegex.cpp +++ b/libs/Objects/src/BackRefRegex.cpp @@ -438,8 +438,6 @@ void BackRefRegex::preorder_traversal(vector& terms, int& lin_cou vector& last_in_cells, unordered_set cur_in_lin_cells, CellSet cur_first_in_cells, CellSet cur_last_in_cells) { - bool l_contains_eps, r_contains_eps; - switch (type) { case alt: cast(term_l)->preorder_traversal(terms, @@ -459,9 +457,9 @@ void BackRefRegex::preorder_traversal(vector& terms, int& lin_cou cur_first_in_cells, cur_last_in_cells); return; - case conc: - l_contains_eps = cast(term_l)->contains_eps(); - r_contains_eps = cast(term_r)->contains_eps(); + case conc: { + bool l_contains_eps = cast(term_l)->contains_eps(); + bool r_contains_eps = cast(term_r)->contains_eps(); cast(term_l)->preorder_traversal(terms, lin_counter, in_lin_cells, @@ -479,6 +477,7 @@ void BackRefRegex::preorder_traversal(vector& terms, int& lin_cou l_contains_eps ? cur_first_in_cells : CellSet(), cur_last_in_cells); return; + } case star: cast(term_l)->preorder_traversal(terms, lin_counter, @@ -517,17 +516,16 @@ void BackRefRegex::preorder_traversal(vector& terms, int& lin_cou } void BackRefRegex::calculate_may_be_eps(unordered_map>& memory_writers) { - unordered_map> memory_writers_copy; - unordered_map>::iterator it_ref_to; switch (type) { - case alt: - memory_writers_copy = memory_writers; + case alt: { + auto memory_writers_copy = memory_writers; cast(term_l)->calculate_may_be_eps(memory_writers); cast(term_r)->calculate_may_be_eps(memory_writers_copy); for (const auto& [num, refs_to] : memory_writers_copy) for (const auto& memory_writer : refs_to) memory_writers[num].push_back(memory_writer); return; + } case conc: cast(term_l)->calculate_may_be_eps(memory_writers); cast(term_r)->calculate_may_be_eps(memory_writers); @@ -540,14 +538,14 @@ void BackRefRegex::calculate_may_be_eps(unordered_map memory_writers[cell_number] = {this}; cast(term_l)->calculate_may_be_eps(memory_writers); return; - case ref: - it_ref_to = memory_writers.find(cell_number); - if (it_ref_to != memory_writers.end()) + case ref: { + if (auto it_ref_to = memory_writers.find(cell_number); it_ref_to != memory_writers.end()) for (const auto& memory_writer : it_ref_to->second) may_be_eps |= memory_writer->contains_eps(); else may_be_eps = true; return; + } default: return; } @@ -802,9 +800,6 @@ void BackRefRegex::get_cells_under_iteration(unordered_set& iteration_over_ void BackRefRegex::get_follow( vector, CellSet>>>& following_states) const { vector> first, last; - unordered_set iteration_over_cells; - CellSet iteration_over_empty_cells; - pair is_eps; switch (type) { case Type::alt: cast(term_l)->get_follow(following_states); @@ -824,11 +819,12 @@ void BackRefRegex::get_follow( } } return; - case Type::star: + case Type::star: { cast(term_l)->get_follow(following_states); - is_eps = contains_eps_tracking_resets(); + pair is_eps = contains_eps_tracking_resets(); last = cast(term_l)->get_last_nodes_tracking_resets(); first = cast(term_l)->get_first_nodes_tracking_resets(); + unordered_set iteration_over_cells; get_cells_under_iteration(iteration_over_cells); for (auto& [i, last_to_reset] : last) { for (auto& [j, first_to_reset] : first) { @@ -843,6 +839,7 @@ void BackRefRegex::get_follow( } } return; + } case Type::memoryWriter: return cast(term_l)->get_follow(following_states); default: @@ -979,11 +976,9 @@ void BackRefRegex::unfold_iterations(int& number) { bool BackRefRegex::_is_acreg(unordered_set in_cells, unordered_set in_lin_cells, unordered_map>& refs_in_cells) const { - unordered_map>::iterator refs_in_cell; - unordered_map> refs_in_cells_copy; switch (type) { - case alt: - refs_in_cells_copy = refs_in_cells; + case alt: { + auto refs_in_cells_copy = refs_in_cells; if (!cast(term_l)->_is_acreg(in_cells, in_lin_cells, refs_in_cells)) return false; if (!cast(term_r)->_is_acreg(in_cells, in_lin_cells, refs_in_cells_copy)) @@ -991,6 +986,7 @@ bool BackRefRegex::_is_acreg(unordered_set in_cells, unordered_set in_ for (const auto& [num, refs] : refs_in_cells_copy) refs_in_cells[num].insert(refs.begin(), refs.end()); return true; + } case conc: if (!cast(term_l)->_is_acreg(in_cells, in_lin_cells, refs_in_cells)) return false; @@ -1001,8 +997,8 @@ bool BackRefRegex::_is_acreg(unordered_set in_cells, unordered_set in_ refs_in_cells[cell_number] = {lin_number}; return cast(term_l)->_is_acreg(in_cells, in_lin_cells, refs_in_cells); case ref: - refs_in_cell = refs_in_cells.find(cell_number); - if (refs_in_cell != refs_in_cells.end()) { + if (auto refs_in_cell = refs_in_cells.find(cell_number); + refs_in_cell != refs_in_cells.end()) { for (auto cell_lin_num : in_lin_cells) // если ссылается на те же линеаризованные memoryWriter, в которых находится сама if (refs_in_cell->second.count(cell_lin_num)) @@ -1051,54 +1047,67 @@ void BackRefRegex::linearize_refs(int& number) { cast(term_l)->linearize_refs(number); break; case ref: - symbol.linearize(number); - number++; + symbol.linearize(number++); break; default: break; } } -void BackRefRegex::_check_refs(unordered_set& found, unordered_set& found_for_lin) const { - unordered_set found_copy; +void BackRefRegex::_check_memory_writers( + unordered_map>& found_memory_writers, + unordered_set& refs_check_set, unordered_set& memory_writers_check_set) const { switch (type) { - case alt: - found_copy = found; - cast(term_l)->_check_refs(found, found_for_lin); - cast(term_r)->_check_refs(found_copy, found_for_lin); - found.insert(found_copy.begin(), found_copy.end()); + case alt: { + auto found_copy = found_memory_writers; + cast(term_l)->_check_memory_writers( + found_memory_writers, refs_check_set, memory_writers_check_set); + cast(term_r)->_check_memory_writers(found_copy, refs_check_set, memory_writers_check_set); + for (const auto& [memory_writer_cell_number, memory_writer_lin_numbers] : found_copy) { + found_memory_writers[memory_writer_cell_number].insert( + memory_writer_lin_numbers.begin(), memory_writer_lin_numbers.end()); + } break; + } case conc: - cast(term_l)->_check_refs(found, found_for_lin); - cast(term_r)->_check_refs(found, found_for_lin); + cast(term_l)->_check_memory_writers( + found_memory_writers, refs_check_set, memory_writers_check_set); + cast(term_r)->_check_memory_writers( + found_memory_writers, refs_check_set, memory_writers_check_set); break; case memoryWriter: - found.insert(cell_number); - cast(term_l)->_check_refs(found, found_for_lin); + found_memory_writers[cell_number] = {lin_number}; + cast(term_l)->_check_memory_writers( + found_memory_writers, refs_check_set, memory_writers_check_set); break; case ref: - if (found.count(cell_number)) - found_for_lin.insert(symbol.last_linearization_number()); + if (auto it = found_memory_writers.find(cell_number); it != found_memory_writers.end()) { + refs_check_set.insert(symbol.last_linearization_number()); + for (const auto& memory_writer_lin_num : it->second) + memory_writers_check_set.insert(memory_writer_lin_num); + } break; default: break; } } -bool BackRefRegex::check_refs() const { +bool BackRefRegex::check_refs_and_memory_writers_usefulness() const { BackRefRegex temp(*this); - int lin_counter = 0; - temp.linearize_refs(lin_counter); + int refs_lin_counter = 0; + temp.linearize_refs(refs_lin_counter); - int n = 0; - temp.unfold_iterations(n); + int memory_writers_lin_counter = 0; + temp.unfold_iterations(memory_writers_lin_counter); - unordered_set found; - unordered_set found_for_lin; - temp._check_refs(found, found_for_lin); + unordered_map> found_memory_writers; + unordered_set refs_check_set; + unordered_set memory_writers_check_set; + temp._check_memory_writers(found_memory_writers, refs_check_set, memory_writers_check_set); - return found_for_lin.size() == lin_counter; + return refs_check_set.size() == refs_lin_counter && + memory_writers_check_set.size() == memory_writers_lin_counter; } void BackRefRegex::_reverse(unordered_map& memory_writers) { diff --git a/libs/Objects/src/FiniteAutomaton.cpp b/libs/Objects/src/FiniteAutomaton.cpp index 241481e8..3f7df7f0 100644 --- a/libs/Objects/src/FiniteAutomaton.cpp +++ b/libs/Objects/src/FiniteAutomaton.cpp @@ -383,7 +383,7 @@ FiniteAutomaton FiniteAutomaton::minimize(bool is_trim, iLogTemplate* log) const classes[groups[i][j]] = i; } } - auto [minimized_dfa, class_to_index] = dfa.merge_equivalent_classes(classes); + auto [minimized_dfa, class_to_index] = dfa.merge_classes(classes); // кэширование language->set_min_dfa(minimized_dfa); @@ -1361,7 +1361,7 @@ bool FiniteAutomaton::is_one_unambiguous(iLogTemplate* log) const { return true; } -tuple> FiniteAutomaton::merge_equivalent_classes( +tuple> FiniteAutomaton::merge_classes( const vector& classes) const { map> class_to_indexes; for (int i = 0; i < classes.size(); i++) @@ -1398,7 +1398,7 @@ tuple> FiniteAutomaton::merge_equivalen return {{class_to_index.at(classes[initial_state]), new_states, language}, class_to_index}; } -vector FiniteAutomaton::get_bisimilar_classes() const { +vector FiniteAutomaton::get_bisimulation_classes() const { vector fa_items; vector nonterminals; vector terminals; @@ -1418,8 +1418,8 @@ vector FiniteAutomaton::get_bisimilar_classes() const { FiniteAutomaton FiniteAutomaton::merge_bisimilar(iLogTemplate* log) const { MetaInfo old_meta, new_meta; - vector classes = get_bisimilar_classes(); - auto [result, class_to_index] = merge_equivalent_classes(classes); + vector classes = get_bisimulation_classes(); + auto [result, class_to_index] = merge_classes(classes); for (int i = 0; i < classes.size(); i++) { for (int j = 0; j < classes.size(); j++) @@ -1602,9 +1602,9 @@ bool FiniteAutomaton::equality_checker(const FiniteAutomaton& fa1, const FiniteA if (t != 0) return false; - vector bisimilar_classes(nonterminals.size()); + vector bisimulation_classes(nonterminals.size()); for (int i = 0; i < nonterminals.size(); i++) - bisimilar_classes[i] = nonterminals[i]->class_number; + bisimulation_classes[i] = nonterminals[i]->class_number; // биективная бисимуляция обратных грамматик vector>> fa1_reverse_rules = RLGrammar::get_reverse_grammar( @@ -1623,22 +1623,22 @@ bool FiniteAutomaton::equality_checker(const FiniteAutomaton& fa1, const FiniteA RLGrammar::get_bisimilar_grammar( reverse_rules, nonterminals, reverse_bisimilar_nonterminals, class_to_nonterminals); // сопоставление состояний 1 к 1 - vector reverse_bisimilar_classes; + vector reverse_bisimulation_classes; for (RLGrammar::Item* nont : nonterminals) { - reverse_bisimilar_classes.push_back(nont->class_number); + reverse_bisimulation_classes.push_back(nont->class_number); nont->class_number = -1; } // устанавливаем классы нетерминалов-состояний (1 к 1), чтобы после сопоставить переходы int new_class = 0; - for (int i = 0; i < bisimilar_classes.size(); i++) { + for (int i = 0; i < bisimulation_classes.size(); i++) { if (nonterminals[i]->class_number != -1) continue; nonterminals[i]->class_number = new_class; // поиск нетерминалов с классом, как у i-го - for (int j = i + 1; j < bisimilar_classes.size(); j++) { - if (bisimilar_classes[j] == bisimilar_classes[i]) - if (reverse_bisimilar_classes[j] == reverse_bisimilar_classes[i]) + for (int j = i + 1; j < bisimulation_classes.size(); j++) { + if (bisimulation_classes[j] == bisimulation_classes[i]) + if (reverse_bisimulation_classes[j] == reverse_bisimulation_classes[i]) nonterminals[j]->class_number = new_class; } new_class++; @@ -2702,11 +2702,15 @@ void FiniteAutomaton::to_mfa_dfs(int state_index, vector& visited, MemoryFiniteAutomaton FiniteAutomaton::to_mfa() const { vector mfa_states; + Alphabet alphabet; mfa_states.emplace_back(0, states[initial_state].identifier, states[initial_state].is_terminal); vector visited(size(), false); unordered_map states_mapping; to_mfa_dfs(initial_state, visited, mfa_states, states_mapping, {}, 0); - return {initial_state, mfa_states, language->get_alphabet()}; + for (const auto& symbol : language->get_alphabet()) + if (!is_special_symbol(symbol)) + alphabet.insert(symbol); + return {initial_state, mfa_states, alphabet}; } void FiniteAutomaton::fill_order(int state_index, vector& visited, stack& order) { @@ -2768,34 +2772,4 @@ std::vector> FiniteAutomaton::get_SCCs() { } return SCCs; -} - -FiniteAutomaton FiniteAutomaton::get_subautomaton(const CaptureGroup& cg) { - int n = cg.states.size(); - vector sub_states; - sub_states.reserve(cg.states.size()); - Alphabet alphabet; - - unordered_set terminal_states; - for (const auto& path : cg.paths) - terminal_states.insert(path[path.size() - 1]); - - unordered_map indexes; - int idx = 0; - for (auto st : cg.states) { - indexes[st.index] = idx; - sub_states.emplace_back(idx, states[st.index].identifier, terminal_states.count(st.index)); - idx++; - } - - for (const auto& st : cg.states) - for (const auto& [symbol, symbol_transitions] : states[st.index].transitions) - for (const auto& to : symbol_transitions) - if (indexes.count(to)) { - alphabet.insert(symbol); - sub_states[indexes.at(st.index)].add_transition(indexes.at(to), symbol); - } - - // начальное состояние общее у всех cg.paths - return {indexes.at((*cg.paths.begin())[0]), sub_states, alphabet}; -} +} \ No newline at end of file diff --git a/libs/Objects/src/MemoryCommon.cpp b/libs/Objects/src/MemoryCommon.cpp index b80d7fcc..f80e29b5 100644 --- a/libs/Objects/src/MemoryCommon.cpp +++ b/libs/Objects/src/MemoryCommon.cpp @@ -1,5 +1,6 @@ #include "Objects/MemoryCommon.h" +using std::tuple; using std::unordered_set; using std::vector; @@ -40,12 +41,12 @@ bool CaptureGroup::State::operator==(const State& other) const { } CaptureGroup::CaptureGroup(int cell, const vector>& _paths, - const vector& _state_classes, bool reset) - : cell(cell) { + const vector& _state_classes, bool is_reset) + : cell(cell), is_reset(is_reset) { for (const auto& path : _paths) { paths.insert(path); for (auto st : path) { - int class_num = (reset) ? State::reset_class : _state_classes[st]; + int class_num = (is_reset) ? State::reset_class : _state_classes[st]; states.insert({st, class_num}); state_classes.insert(class_num); } @@ -56,19 +57,40 @@ bool CaptureGroup::operator==(const CaptureGroup& other) const { return cell == other.cell && states == other.states; } -unordered_set CaptureGroup::get_states_diff( - const unordered_set& other_state_classes) const { +bool CaptureGroup::get_is_reset() const { + return is_reset; +} + +bool CaptureGroup::get_cell_number() const { + return cell; +} + +int CaptureGroup::get_opening_state_index() const { + return (*paths.begin())[0]; +} + +const std::unordered_set, VectorHasher>& CaptureGroup::get_paths() const { + return paths; +} + +const unordered_set& CaptureGroup::get_states() + const { + return states; +} + +tuple, unordered_set> CaptureGroup::get_states_diff( + const CaptureGroup& other) const { unordered_set diff; for (auto st : states) - if (st.class_num != State::reset_class && !other_state_classes.count(st.class_num)) + if (st.class_num != State::reset_class && !other.state_classes.count(st.class_num)) diff.insert(st.index); - unordered_set res(diff); + unordered_set following(diff); for (const auto& path : paths) for (size_t i = path.size() - 1; i > 0; i--) if (diff.count(path[i - 1])) - res.insert(path[i]); - return res; + following.insert(path[i]); + return {diff, following}; } std::ostream& operator<<(std::ostream& os, const CaptureGroup& cg) { @@ -79,4 +101,4 @@ std::ostream& operator<<(std::ostream& os, const CaptureGroup& cg) { for (const auto& i : cg.states) os << "{" << i.index << ": " << i.class_num << "} "; return os << "]\n"; -} +} \ No newline at end of file diff --git a/libs/Objects/src/MemoryFiniteAutomaton.cpp b/libs/Objects/src/MemoryFiniteAutomaton.cpp index a81478aa..1b9e99ad 100644 --- a/libs/Objects/src/MemoryFiniteAutomaton.cpp +++ b/libs/Objects/src/MemoryFiniteAutomaton.cpp @@ -198,6 +198,9 @@ MemoryFiniteAutomaton::MemoryFiniteAutomaton(int initial_state, std::vectorget_alphabet()) + if (is_special_symbol(symbol)) + throw std::logic_error("alphabet of MFA mustn't contain special symbols"); } template @@ -771,9 +774,9 @@ pair MemoryFiniteAutomaton::_parse(const string& s, Matcher* matcher) } visited_states.insert(cur_state); + counter++; } current_states = following_states; - counter++; } return {counter, false}; @@ -1062,7 +1065,7 @@ size_t TraversalState::Hasher::operator()(const TraversalState& s) const { pair, unordered_set> MemoryFiniteAutomaton::generate_test_set( int max_len) const { unordered_set words_in_language; - unordered_map words_to_mutate; + unordered_map words_to_mutate; unordered_set current_states; current_states.insert(TraversalState(&states[initial_state])); @@ -1070,11 +1073,10 @@ pair, unordered_set> MemoryFiniteAutomaton::genera unordered_set visited_states; while (!current_states.empty()) { unordered_set following_states; - for (const auto& state_to_process : current_states) { - if (visited_states.count(state_to_process)) + for (auto cur_state : current_states) { + if (visited_states.count(cur_state)) continue; - auto cur_state = state_to_process; cur_state.process_mutations(); const MFAState* state = cur_state.state; if (state->is_terminal) { @@ -1216,16 +1218,16 @@ FiniteAutomaton MemoryFiniteAutomaton::to_symbolic_fa(iLogTemplate* log) const { set opens; set closes; set resets; - for (const auto& [num, action] : tr.memory_actions) { + for (const auto& [cell, action] : tr.memory_actions) { switch (action) { case MFATransition::open: - opens.insert(num); + opens.insert(cell); break; case MFATransition::close: - closes.insert(num); + closes.insert(cell); break; case MFATransition::reset: - resets.insert(num); + resets.insert(cell); break; } } @@ -1305,11 +1307,11 @@ MemoryConfiguration update_memory_configuration(const MFATransition::MemoryActio void MemoryFiniteAutomaton::color_mem_dfs(int state_index, vector& visited, const MemoryConfiguration& opened_cells, - unordered_map>& colors, + vector>& state_colors, const vector& ab_classes, unordered_map& ab_class_to_first_state) const { visited[state_index] = true; - colors[state_index] = opened_cells; + state_colors[state_index] = opened_cells; if (!ab_class_to_first_state.count(ab_classes[state_index])) ab_class_to_first_state[ab_classes[state_index]] = state_index; for (const auto& [symbol, symbol_transitions] : states[state_index].transitions) { @@ -1318,7 +1320,7 @@ void MemoryFiniteAutomaton::color_mem_dfs(int state_index, vector& visited color_mem_dfs(tr.to, visited, update_memory_configuration(tr.memory_actions, opened_cells), - colors, + state_colors, ab_classes, ab_class_to_first_state); } @@ -1345,7 +1347,8 @@ MemoryFiniteAutomaton MemoryFiniteAutomaton::get_subautomaton(const vector& for (const auto& [symbol, symbol_transitions] : states[state_index].transitions) for (const auto& tr : symbol_transitions) if (indexes.count(tr.to)) { - alphabet.insert(symbol); + if (!is_special_symbol(symbol)) + alphabet.insert(symbol); sub_states[indexes.at(state_index)].add_transition( MFATransition(indexes.at(tr.to), tr.memory_actions), symbol); } @@ -1374,8 +1377,8 @@ void find_opening_states_dfs(int state_index, for (const auto& [symbol, symbol_transitions] : reversed_transitions[state_index]) for (const auto& tr : symbol_transitions) { optional action; - if (tr.memory_actions.count(cell)) - action = tr.memory_actions.at(cell); + if (auto it = tr.memory_actions.find(cell); it != tr.memory_actions.end()) + action = it->second; if (action && (action == MFATransition::open || action == MFATransition::reset)) opening_states.insert(tr.to); else if (!visited[tr.to]) @@ -1383,27 +1386,58 @@ void find_opening_states_dfs(int state_index, } } +bool opening_action_only_from_opening_state(bool is_opening_state, + optional action) { + bool is_open_action = action && action == MFATransition::open; + return is_opening_state == is_open_action; +} + pair>, vector>> MemoryFiniteAutomaton::find_cg_paths( - int state_index, std::unordered_set visited, int cell, int opening_state) const { + int state_index, std::unordered_set visited, int cell, int opening_state, + bool was_in_opening_state) const { vector> paths; vector> reset_paths; visited.insert(state_index); + bool is_opening_state = state_index == opening_state; for (const auto& [symbol, symbol_transitions] : states[state_index].transitions) for (const auto& tr : symbol_transitions) { optional action; - if (tr.memory_actions.count(cell)) - action = tr.memory_actions.at(cell); + if (auto it = tr.memory_actions.find(cell); it != tr.memory_actions.end()) + action = it->second; if (action && action == MFATransition::close) { - paths.push_back({state_index}); + if (!(is_opening_state && !was_in_opening_state)) + paths.push_back({state_index}); } else if (action && action == MFATransition::reset) { reset_paths.push_back({state_index}); - } else if (!visited.count(tr.to) && !(state_index == opening_state && - (!action || action != MFATransition::open))) { - auto [t, _] = find_cg_paths(tr.to, visited, cell, opening_state); - for (auto i : t) { - i.insert(i.begin(), state_index); - paths.emplace_back(i); + } else { + bool should_process = !visited.count(tr.to); + if (is_opening_state) { + // чтобы обработать случай, когда открывающее совпадает с + // закрывающим, e.g. [a*a]:1*&1 + should_process |= tr.to == opening_state && !was_in_opening_state; + should_process &= opening_action_only_from_opening_state(true, action) || + (was_in_opening_state && !action); + } else { + // чтобы обработать случай, когда открывающее совпадает с + // закрывающим, e.g. [a*a]:1*&1 + should_process |= tr.to == opening_state; + should_process &= opening_action_only_from_opening_state(false, action); + } + + if (should_process) { + auto [t, _] = find_cg_paths(tr.to, + visited, + cell, + opening_state, + is_opening_state || was_in_opening_state); + for (auto& i : t) { + if (!(is_opening_state && !was_in_opening_state && + tr.to == opening_state)) { + i.insert(i.begin(), state_index); + } + paths.emplace_back(i); + } } } } @@ -1412,8 +1446,8 @@ pair>, vector>> MemoryFiniteAutomaton::find_cg_pa } vector MemoryFiniteAutomaton::find_capture_groups_backward( - int ref_incoming_state, int cell, const std::vector& fa_classes) const { - vector reversed_transitions = get_reversed_transitions(); + int ref_incoming_state, int cell, const vector& reversed_transitions, + const std::vector& fa_classes) const { unordered_set opening_states; vector visited(size(), false); find_opening_states_dfs( @@ -1422,7 +1456,7 @@ vector MemoryFiniteAutomaton::find_capture_groups_backward( vector res; for (auto opening_st : opening_states) { - auto [paths, reset_paths] = find_cg_paths(opening_st, {}, cell, opening_st); + auto [paths, reset_paths] = find_cg_paths(opening_st, {}, cell, opening_st, false); for (const auto& reset_path : reset_paths) res.push_back(CaptureGroup(cell, {reset_path}, fa_classes, true)); if (!paths.empty()) @@ -1431,15 +1465,25 @@ vector MemoryFiniteAutomaton::find_capture_groups_backward( return res; } -bool MemoryFiniteAutomaton::find_decisions(int state_index, std::vector& visited, - const std::unordered_set& states_to_check) const { +bool MemoryFiniteAutomaton::find_decisions(int state_index, vector& visited, + const unordered_set& states_to_check, + const unordered_set& following_states, + const CaptureGroup& cg) const { visited[state_index] = 1; optional single_tr; int count = 0; for (const auto& [symbol, symbol_transitions] : states[state_index].transitions) - for (const auto& tr : symbol_transitions) - if (states_to_check.count(tr.to)) { + for (const auto& tr : symbol_transitions) { + optional action; + if (auto it = tr.memory_actions.find(cg.get_cell_number()); + it != tr.memory_actions.end()) + action = it->second; + + if (opening_action_only_from_opening_state(state_index == cg.get_opening_state_index(), + action) && + (states_to_check.count(tr.to) || following_states.count(tr.to)) && + !(following_states.count(state_index) && !states_to_check.count(tr.to))) { if (visited[tr.to] == 0) { if (++count > 1) return true; @@ -1448,40 +1492,114 @@ bool MemoryFiniteAutomaton::find_decisions(int state_index, std::vector& vi return true; } } + } bool found = false; if (single_tr) - found = find_decisions(single_tr->to, visited, states_to_check); + found = find_decisions(single_tr->to, visited, states_to_check, following_states, cg); visited[state_index] = 2; return found; } -bool MemoryFiniteAutomaton::states_have_decisions( - const std::unordered_set& states_to_check) const { +bool MemoryFiniteAutomaton::states_have_decisions(const unordered_set& states_to_check, + const unordered_set& following_states, + const CaptureGroup& cg) const { vector visited(size(), 0); for (auto start : states_to_check) { if (visited[start] != 0) continue; - if (find_decisions(start, visited, states_to_check)) + if (find_decisions(start, visited, states_to_check, following_states, cg)) return true; } return false; } +FiniteAutomaton MemoryFiniteAutomaton::get_cg_fa(const CaptureGroup& cg) const { + int n = cg.get_states().size(); + vector sub_states; + sub_states.reserve(cg.get_states().size()); + Alphabet alphabet; + + int cg_opening_state_index = cg.get_opening_state_index(); + + if (cg.get_is_reset()) { + sub_states.emplace_back(0, states[cg_opening_state_index].identifier, true); + return {0, sub_states, alphabet}; + } + + bool additional_state = false; + unordered_set terminal_states; + for (const auto& path : cg.get_paths()) + if (path[path.size() - 1] != cg_opening_state_index) + terminal_states.insert(path[path.size() - 1]); + else // если у стартового состояния есть открывающий переход в самого себя + additional_state = true; + + unordered_map indexes; + int idx = 0; + for (auto st : cg.get_states()) { + indexes[st.index] = idx; + sub_states.emplace_back(idx, states[st.index].identifier, terminal_states.count(st.index)); + idx++; + } + if (additional_state) + sub_states.emplace_back(idx, "", true); + + for (const auto& st : cg.get_states()) { + bool is_opening_state = st.index == cg_opening_state_index; + for (const auto& [symbol, symbol_transitions] : states[st.index].transitions) + for (const auto& tr : symbol_transitions) { + // не просто false, чтобы обработать ниже переходы без открытия памяти из стартового + bool skip = is_opening_state && !additional_state; + bool is_opening_transition = false; + for (const auto& [_, action] : tr.memory_actions) { + if (is_opening_state && action == MFATransition::open) { + skip = false; + is_opening_transition = true; + } else if (action == MFATransition::open && !is_opening_state || + action == MFATransition::close && !terminal_states.count(tr.to) || + action == MFATransition::reset) { + skip = true; + break; + } + } + if (skip) + continue; + alphabet.insert(symbol); + int target_index = + (tr.to == cg_opening_state_index) ? sub_states.size() - 1 : indexes.at(tr.to); + + if (is_opening_state && additional_state && !is_opening_transition) { + sub_states[sub_states.size() - 1].add_transition(target_index, symbol); + } else { + sub_states[indexes.at(st.index)].add_transition(target_index, symbol); + } + } + } + + // начальное состояние общее у всех cg.paths + return {indexes.at(cg_opening_state_index), sub_states, alphabet}; +} + optional MemoryFiniteAutomaton::bisimilarity_checker(const MemoryFiniteAutomaton& mfa1, const MemoryFiniteAutomaton& mfa2) { // #define DEBUG const int N = 2; vector mfas({&mfa1, &mfa2}); +#ifdef DEBUG + using std::cout; + cout << mfas[0]->to_txt() << mfas[1]->to_txt(); +#endif // проверяем action bisimilarity vector fas({mfas[0]->to_action_fa(), mfas[1]->to_action_fa()}); auto [ab_res, _, ab_classes] = FiniteAutomaton::bisimilarity_checker(fas[0], fas[1]); if (!ab_res) return false; - vector> ab_class_to_first_state(N); + vector> first_state_in_ab_class(N); // раскрашиваем состояния - vector>> mfa_colors(N); + vector>> mfa_colors = {vector>(mfas[0]->size()), + vector>(mfas[1]->size())}; for (int i = 0; i < N; i++) { vector visited(mfas[i]->size(), false); mfas[i]->color_mem_dfs(mfas[i]->get_initial(), @@ -1489,15 +1607,11 @@ optional MemoryFiniteAutomaton::bisimilarity_checker(const MemoryFiniteAut {}, mfa_colors[i], ab_classes[i], - ab_class_to_first_state[i]); + first_state_in_ab_class[i]); } -#ifdef DEBUG - using std::cout; - cout << mfas[0]->to_txt() << mfas[1]->to_txt(); -#endif for (const auto& mfa_colors_i : mfa_colors) - for (const auto& j : mfa_colors_i) { - if (j.second.size() > 1) + for (const auto& colors_of_state : mfa_colors_i) { + if (colors_of_state.size() > 1) return std::nullopt; } // проверяем совпадение раскраски эквивалентных состояний в КСС @@ -1505,39 +1619,49 @@ optional MemoryFiniteAutomaton::bisimilarity_checker(const MemoryFiniteAut vector>>>> colored_SCCs(N); for (int i = 0; i < N; i++) { for (const auto& SCC : SCCs[i]) { - unordered_set colors_to_ignore; + unordered_set colors_to_ignore_mandatory; + unordered_set colors_to_ignore_optional; for (auto state : SCC) { - unordered_set state_colors_to_ignore; - bool has_transitions_without_actions = false; + unordered_set cur_colors_to_ignore; + unordered_set colors_of_internal_transitions; for (const auto& [symbol, symbol_transitions] : mfas[i]->states[state].transitions) { for (const auto& tr : symbol_transitions) { if (SCC.count(tr.to)) { if (tr.memory_actions.empty()) { - state_colors_to_ignore.clear(); - has_transitions_without_actions = true; - break; + for (auto color : mfa_colors[i][state]) { + if (mfa_colors[i][tr.to].find(color) != + mfa_colors[i][tr.to].end()) { + colors_of_internal_transitions.insert(color); + } + } } for (const auto& [cell, action] : tr.memory_actions) - state_colors_to_ignore.insert(cell); + if (mfa_colors[i][state].count(cell)) + cur_colors_to_ignore.insert(cell); } } - if (has_transitions_without_actions) - break; } - colors_to_ignore.insert(state_colors_to_ignore.begin(), - state_colors_to_ignore.end()); + colors_to_ignore_optional.insert(cur_colors_to_ignore.begin(), + cur_colors_to_ignore.end()); + for (const auto& color : colors_of_internal_transitions) + cur_colors_to_ignore.erase(color); + colors_to_ignore_mandatory.insert(cur_colors_to_ignore.begin(), + cur_colors_to_ignore.end()); } set>> colored_SCC; for (auto j : SCC) { - unordered_set j_colors; - for (auto color : mfa_colors[i].at(j)) - if (!colors_to_ignore.count(color)) - j_colors.insert(color); - if (!j_colors.empty()) - colored_SCC.insert( - {ab_classes[i][j], set(j_colors.begin(), j_colors.end())}); + vector> colors_to_ignore = {colors_to_ignore_mandatory}; + if (!colors_to_ignore_optional.empty()) + colors_to_ignore.emplace_back(colors_to_ignore_optional); + for (const auto& ignore : colors_to_ignore) { + unordered_set j_colors; + for (auto color : mfa_colors[i][j]) + if (!ignore.count(color)) + j_colors.insert(color); + colored_SCC.insert({ab_classes[i][j], {j_colors.begin(), j_colors.end()}}); + } } if (!colored_SCC.empty()) colored_SCCs[i].insert(colored_SCC); @@ -1570,8 +1694,8 @@ optional MemoryFiniteAutomaton::bisimilarity_checker(const MemoryFiniteAut in_SCCs[i].insert(j); vector symbolic_fas({mfas[0]->to_symbolic_fa(), mfas[1]->to_symbolic_fa()}); - vector> symbolic_classes = {symbolic_fas[0].get_bisimilar_classes(), - symbolic_fas[1].get_bisimilar_classes()}; + vector> symbolic_classes = {symbolic_fas[0].get_bisimulation_classes(), + symbolic_fas[1].get_bisimulation_classes()}; #ifdef DEBUG cout << ab_classes[0] << ab_classes[1]; cout << FiniteAutomaton::bisimilar(symbolic_fas[0], symbolic_fas[1]) << "\n"; @@ -1596,15 +1720,15 @@ optional MemoryFiniteAutomaton::bisimilarity_checker(const MemoryFiniteAut << "\n"; #endif if (!symbolic_bisimilar( - mfas[0]->get_subautomaton(ab_states_0, ab_class_to_first_state[0][ab_class]), - mfas[1]->get_subautomaton(ab_states_1, ab_class_to_first_state[0][ab_class]))) + mfas[0]->get_subautomaton(ab_states_0, first_state_in_ab_class[0][ab_class]), + mfas[1]->get_subautomaton(ab_states_1, first_state_in_ab_class[0][ab_class]))) return false; } } // ищем пары состояний, от которых будем делать обратный расчет - vector> reversed_transitions( - {fas[0].get_reversed_transitions(), fas[1].get_reversed_transitions()}); + vector> reversed_transitions( + {mfas[0]->get_reversed_transitions(), mfas[1]->get_reversed_transitions()}); // {класс action-бисимилярности -> {номер ячейки -> {индексы состояний}}} // для каждого класса и номера ячейки ищем состояния, в которые входят переходы по ссылкам vector>>> states_with_incoming_refs(N); @@ -1624,11 +1748,11 @@ optional MemoryFiniteAutomaton::bisimilarity_checker(const MemoryFiniteAut for (const auto& [cell, ab_states_0] : incoming_refs) { pair, vector> refs; for (auto st : ab_states_0) - refs.first.emplace_back( - Ref{mfas[0]->find_capture_groups_backward(st, cell, ab_classes[0])}); + refs.first.emplace_back(Ref{mfas[0]->find_capture_groups_backward( + st, cell, reversed_transitions[0], ab_classes[0])}); for (auto st : states_with_incoming_refs[1][ab_class][cell]) - refs.second.emplace_back( - Ref{mfas[1]->find_capture_groups_backward(st, cell, ab_classes[1])}); + refs.second.emplace_back(Ref{mfas[1]->find_capture_groups_backward( + st, cell, reversed_transitions[1], ab_classes[1])}); refs_to_compare.emplace_back(refs); } } @@ -1667,13 +1791,11 @@ optional MemoryFiniteAutomaton::bisimilarity_checker(const MemoryFiniteAut for (int i = 0; i < CGs_0.size(); i++) for (int j = 0; j < CGs_1.size(); j++) { const auto &cg0 = CGs_0[i], cg1 = CGs_1[j]; - unordered_set states_to_check_0 = - cg0.get_states_diff(cg1.state_classes), - states_to_check_1 = - cg1.get_states_diff(cg0.state_classes); + auto [diff0, following0] = cg0.get_states_diff(cg1); + auto [diff1, following1] = cg1.get_states_diff(cg0); - if (!mfa1.states_have_decisions(states_to_check_0) && - !mfa2.states_have_decisions(states_to_check_1)) { + if (!mfa1.states_have_decisions(diff0, following0, cg0) && + !mfa2.states_have_decisions(diff1, following1, cg1)) { check_set_0.insert(i); check_set_1.insert(j); } @@ -1682,12 +1804,12 @@ optional MemoryFiniteAutomaton::bisimilarity_checker(const MemoryFiniteAut if (check_set_0.size() != CGs_0.size() || check_set_1.size() != CGs_1.size()) continue; - FiniteAutomaton CGs_0_fa(fas[0].get_subautomaton(CGs_0[0])), - CGs_1_fa(fas[1].get_subautomaton(CGs_1[0])); + FiniteAutomaton CGs_0_fa(mfas[0]->get_cg_fa(CGs_0[0])), + CGs_1_fa(mfas[1]->get_cg_fa(CGs_1[0])); for (int i = 1; i < CGs_0.size(); i++) - CGs_0_fa = FiniteAutomaton::uunion(CGs_0_fa, fas[0].get_subautomaton(CGs_0[i])); + CGs_0_fa = FiniteAutomaton::uunion(CGs_0_fa, mfas[0]->get_cg_fa(CGs_0[i])); for (int i = 1; i < CGs_1.size(); i++) - CGs_1_fa = FiniteAutomaton::uunion(CGs_1_fa, fas[1].get_subautomaton(CGs_1[i])); + CGs_1_fa = FiniteAutomaton::uunion(CGs_1_fa, mfas[1]->get_cg_fa(CGs_1[i])); if (FiniteAutomaton::equivalent(CGs_0_fa, CGs_1_fa)) { refs_0_check_set.insert(ref_0_index); refs_1_check_set.insert(ref_1_index); @@ -1718,8 +1840,8 @@ optional MemoryFiniteAutomaton::bisimilar(const MemoryFiniteAutomaton& mfa return result; } -tuple> MemoryFiniteAutomaton:: - merge_equivalent_classes(const vector& classes) const { +tuple> MemoryFiniteAutomaton::merge_classes( + const vector& classes) const { map> class_to_indexes; for (int i = 0; i < classes.size(); i++) class_to_indexes[classes[i]].push_back(i); @@ -1757,9 +1879,9 @@ tuple> MemoryFiniteAutomaton:: MemoryFiniteAutomaton MemoryFiniteAutomaton::merge_bisimilar(iLogTemplate* log) const { MetaInfo old_meta, new_meta; - vector classes = to_symbolic_fa().get_bisimilar_classes(); + vector classes = to_symbolic_fa().get_bisimulation_classes(); classes.resize(size()); // в symbolic_fa первые size() состояний - состояния исходного mfa - auto [result, class_to_index] = merge_equivalent_classes(classes); + auto [result, class_to_index] = merge_classes(classes); for (int i = 0; i < classes.size(); i++) { for (int j = 0; j < classes.size(); j++) diff --git a/libs/Objects/src/Regex.cpp b/libs/Objects/src/Regex.cpp index 97b81d43..6d4cf255 100644 --- a/libs/Objects/src/Regex.cpp +++ b/libs/Objects/src/Regex.cpp @@ -145,9 +145,6 @@ vector Regex::_to_thompson(const Alphabet& root_alphabet) const { vector fa_left; // список состояний и макс индекс состояния для правого автомата относительно операции vector fa_right; - // автомат для отрицания, строится обычный томпсон и берется дополнение - FiniteAutomaton fa_negative; - vector fa_negative_states; switch (type) { case Type::eps: @@ -262,11 +259,11 @@ vector Regex::_to_thompson(const Alphabet& root_alphabet) const { fa_states.emplace_back(int(fa_left.size()) + 1, true); return fa_states; - case Type::negative: + case Type::negative: { // строим автомат для отрицания - fa_negative_states = Regex::cast(term_l)->_to_thompson(root_alphabet); - - fa_negative = FiniteAutomaton(0, fa_negative_states, root_alphabet); + vector fa_negative_states = Regex::cast(term_l)->_to_thompson(root_alphabet); + // автомат для отрицания, строится обычный томпсон и берется дополнение + FiniteAutomaton fa_negative = FiniteAutomaton(0, fa_negative_states, root_alphabet); fa_negative = fa_negative.minimize(); // берем дополнение автомата fa_negative = fa_negative.complement(); @@ -287,6 +284,7 @@ vector Regex::_to_thompson(const Alphabet& root_alphabet) const { // возвращаем состояния и макс индекс return fa_negative.states; + } default: break; } @@ -660,7 +658,7 @@ void Regex::get_prefix(int len, set& prefs) const { bool Regex::derivative_with_respect_to_sym(Regex* respected_sym, const Regex* reg_e, Regex& result) const { if (respected_sym->type != Type::eps && respected_sym->type != Type::symb) { - cout << "Invalid input: unexpected regex instead of symbol\n"; + cerr << "Invalid input: unexpected regex instead of symbol\n"; return false; } if (respected_sym->type == Type::eps) { @@ -750,7 +748,7 @@ bool Regex::derivative_with_respect_to_sym(Regex* respected_sym, const Regex* re result.type = Type::conc; if (result.term_l == nullptr) result.term_l = new Regex(); - bool answer = derivative_with_respect_to_sym( + answer = derivative_with_respect_to_sym( respected_sym, Regex::cast(reg_e->term_l), *Regex::cast(result.term_l)); result.term_r = reg_e->make_copy(); return answer; @@ -761,7 +759,7 @@ bool Regex::partial_derivative_with_respect_to_sym(Regex* respected_sym, const R vector& result) const { Regex cur_result; if (respected_sym->type != Type::eps && respected_sym->type != Type::symb) { - cout << "Invalid input: unexpected regex instead of symbol\n"; + cerr << "Invalid input: unexpected regex instead of symbol\n"; return false; } if (respected_sym->type == Type::eps) { diff --git a/libs/Objects/src/Symbol.cpp b/libs/Objects/src/Symbol.cpp index 4a433b57..04c7e291 100644 --- a/libs/Objects/src/Symbol.cpp +++ b/libs/Objects/src/Symbol.cpp @@ -183,3 +183,7 @@ int MemorySymbols::get_cell_number(const Symbol& s) { int number = stoi(number_str); return number; } + +bool is_special_symbol(const Symbol& s) { + return s.is_ref() || MemorySymbols::is_memory_symbol(s); +} \ No newline at end of file diff --git a/refal/FrameFormatter.ref b/refal/FrameFormatter.ref index 14f6a60c..bd7433a4 100644 --- a/refal/FrameFormatter.ref +++ b/refal/FrameFormatter.ref @@ -128,7 +128,14 @@ FindSeparator { TakePrefix { 0 e.Rest = (e.Rest); s.Size = (); - s.Length s.Sym e.Rest = s.Sym > e.Rest>; + s.Length s.Sym e.Rest + , : + { e.A1 s.Sym e.A2 = s.Sym > e.Rest>; + e.A + , e.Rest : s.S1 e.Rest1 + = s.Sym s.S1 >> e.Rest1>; + e.A = s.Sym (); + }; } Weight { @@ -772,4 +779,4 @@ ChooseContent { = ; -} +} \ No newline at end of file diff --git a/refal/MathMode.ref b/refal/MathMode.ref index 739941ac..d558c513 100644 --- a/refal/MathMode.ref +++ b/refal/MathMode.ref @@ -14,8 +14,11 @@ $ENTRY Trim { /* Hacked derivatives. TODO: parameterize by indices activation */ SpecialSymbolList { = (NOREGEX ('eps')('\\empt'))(NOREGEX ('DERIV')('\\delta_'))(OK ('FIRST')('\\First'))(OK ('FOLLOW')('\\Follow'))(OK ('LAST')('\\Last')) - (OK ('UNION')('\\cup'))(OK ('\\{')('\\{'))(OK ('\\}')('\\}'))(OK ('{')('\\{'))(OK ('}')('\\}'))(OK ('[')('['))(OK (']')(']')) - (NOREGEX ('LANG')('\\Lang'))(OK ('\\&')('\\memref'))(NOREGEX ('\\textasciicircum')('\\mathbf{\\textasciicircum}\\hspace{-0.2ex}')) + (OK ('UNION')('\\cup'))(OK ('\\{')('\\{'))(OK ('\\}')('\\}'))(OK ('{')('\\{'))(OK ('}')('\\}'))(OK ('[[')('{'))(OK (']]')('}')) + (NOREGEX ('LANG')('\\Lang')) + (OK ('#&')('\\memref'))(OK ('\\&')('\\memref')) + (NOREGEX ('#^')('\\mathbf{\\textasciicircum}\\hspace{-0.2ex}')) + (NOREGEX ('\\textasciicircum')('\\mathbf{\\textasciicircum}\\hspace{-0.2ex}')) (OK ('>->>')('\\transit'))(MEM ('MEMLOPEN') ('\\langle o:\\,'))(MEM ('MEMR')('\\rangle'))(MEM ('MEMLCLOSE')('\\langle c:\\,')); } @@ -43,10 +46,10 @@ SeparateModes { , )(s.1)>)s.2> : {True True = ; - e.OtherValues = '\\regexpstr{'' }' e.Image; + e.OtherValues = > e.Image; }; = e.Image; - e.Nempt = '\\regexpstr{'' }' e.Image; + e.Nempt = > e.Image; }; MEM = ; OK = ; @@ -57,7 +60,7 @@ SeparateModes { False , : { = ; - e.Nempty = '\\regexpstr{'' }'; + e.Nempty = >; }; }; (e.Spec) Text (e.Prefix) s.Sym e.Rest @@ -75,7 +78,14 @@ SeparateModes { }; (e.Spec) Text (e.Prefix) = '\\text{'e.Prefix' }'; (e.Spec) Normal ( ) = ; - (e.Spec) Normal (e.Prefix) = '\\regexpstr{'' }'; + (e.Spec) Normal (e.Prefix) = >; +} + +WrapInRegexpstr { + e.x' & 'e.y = ' & '; + e.x'\\\\'e.y = '\\regexpstr{'e.x' }\\\\'; + = ; + e.y = '\\regexpstr{'e.y' }'; } AdjustSpaceBeforeStar { @@ -250,7 +260,7 @@ $ENTRY NonASCII { } $ENTRY ASCIIStandard { - = '$%&*()-_=+`~!@#;:\'\"<>,./?[]{}\\|^ \t\n'; + = '$%&*()-_=+`~!@#;:\'\"<>,./?[]{}\\|^ \t\n\r'; } $ENTRY Nempty {