From 7d765a828f85828df89fb059b025fc486db0f42a Mon Sep 17 00:00:00 2001 From: Ivan Koryabkin Date: Wed, 24 Jan 2018 14:24:09 +0300 Subject: [PATCH 01/24] Removed unnecessary explicit attributes in AttributeValue --- core/src/main/lspl/text/attributes/AttributeValue.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/src/main/lspl/text/attributes/AttributeValue.h b/core/src/main/lspl/text/attributes/AttributeValue.h index 9f9607c6..0087c2cf 100644 --- a/core/src/main/lspl/text/attributes/AttributeValue.h +++ b/core/src/main/lspl/text/attributes/AttributeValue.h @@ -74,10 +74,10 @@ class LSPL_EXPORT AttributeValue { */ static AttributeValue createIndexed( const std::string & abbrevation, const std::string & name, const std::string & title ); public: - explicit AttributeValue() : type( AttributeType( AttributeType::INDEXED_ID ) ), value( 0 ) {} - explicit AttributeValue( int index ) : type( AttributeType( AttributeType::INDEXED_ID ) ), value( index ) {} - explicit AttributeValue( const AttributeContainer & container ) : type( AttributeType( AttributeType::COMPOUND_ID ) ), value( reinterpret_cast( &container ) ) {} - explicit AttributeValue( const std::string & str ); + AttributeValue() : type( AttributeType( AttributeType::INDEXED_ID ) ), value( 0 ) {} + AttributeValue( int index ) : type( AttributeType( AttributeType::INDEXED_ID ) ), value( index ) {} + AttributeValue( const AttributeContainer & container ) : type( AttributeType( AttributeType::COMPOUND_ID ) ), value( reinterpret_cast( &container ) ) {} + AttributeValue( const std::string & str ); AttributeValue( const AttributeValue & att ); ~AttributeValue(); From 46446f50448c1935a40f58acff3addbd5f67423a Mon Sep 17 00:00:00 2001 From: Ivan Koryabkin Date: Wed, 24 Jan 2018 16:00:54 +0300 Subject: [PATCH 02/24] Fixed template depth for GCC 7.2 in external dependency --- deps/aot/Source/common/common_mak | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/deps/aot/Source/common/common_mak b/deps/aot/Source/common/common_mak index d6bd6e58..11a4a332 100755 --- a/deps/aot/Source/common/common_mak +++ b/deps/aot/Source/common/common_mak @@ -17,32 +17,32 @@ ifndef libmode endif ifeq ($(mode), debug) - cparams := $(cparams) -MD -c -O0 -g $(common_parms) -ftemplate-depth-35 + cparams := $(cparams) -MD -c -O0 -g $(common_parms) -ftemplate-depth-100 ct := d else ifeq ($(mode), debug_thread) - cparams := $(cparams) -pthread -MD -c -O0 -g $(common_parms) -ftemplate-depth-35 + cparams := $(cparams) -pthread -MD -c -O0 -g $(common_parms) -ftemplate-depth-100 lparams := -pthread ct := td else ifeq ($(mode), release_thread) - cparams := $(cparams) -pthread -MD -c -O3 -funroll-loops -fomit-frame-pointer $(common_parms) -ftemplate-depth-35 + cparams := $(cparams) -pthread -MD -c -O3 -funroll-loops -fomit-frame-pointer $(common_parms) -ftemplate-depth-100 ct := t lparams := -pthread else ct := r ifeq ($(mode), profile) ifdef profile - cparams := $(cparams) -pg -MD -c -O3 -funroll-loops $(common_parms) -ftemplate-depth-35 + cparams := $(cparams) -pg -MD -c -O3 -funroll-loops $(common_parms) -ftemplate-depth-100 lparams := -lgcov -pg ct := .prof else - cparams := $(cparams) -g -MD -c -O3 -funroll-loops $(common_parms) -ftemplate-depth-35 + cparams := $(cparams) -g -MD -c -O3 -funroll-loops $(common_parms) -ftemplate-depth-100 ct := .prof endif else ifeq ($(mode), release) - cparams := $(cparams) -MD -c -O3 -funroll-loops -fomit-frame-pointer $(common_parms) -ftemplate-depth-35 + cparams := $(cparams) -MD -c -O3 -funroll-loops -fomit-frame-pointer $(common_parms) -ftemplate-depth-100 else mode_error = $(error unknown mode. It can be debug, release, profile, debug_thread, release_thread) endif From 8bda79d419b15820e792dd65a1c3a79f41feb7a4 Mon Sep 17 00:00:00 2001 From: Ivan Koryabkin Date: Wed, 31 Jan 2018 18:36:01 +0300 Subject: [PATCH 03/24] Fixed building on Linux with system-wide installed Boost --- core/CMakeLists.txt | 4 ++-- tools/CMakeLists.txt | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 3b377d1f..8ea97f74 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -113,7 +113,7 @@ set(LSPL_TEST_SOURCES add_executable(lspl-test ${LSPL_TEST_SOURCES}) -target_link_libraries(lspl-test lspl) +target_link_libraries(lspl-test lspl boost_system) ### lspl-benchmark binary @@ -123,7 +123,7 @@ set(LSPL_BENCHMARK_SOURCES add_executable(lspl-benchmark ${LSPL_BENCHMARK_SOURCES}) -target_link_libraries(lspl-benchmark lspl) +target_link_libraries(lspl-benchmark lspl boost_system) # Flags for effective error parsing diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 30cf1853..76473915 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -94,7 +94,7 @@ set(LSPL_UTIL_TEST_SOURCES add_executable(lspl-util-test ${LSPL_UTIL_TEST_SOURCES}) -target_link_libraries(lspl-util-test lspl) +target_link_libraries(lspl-util-test lspl boost_system) # Flags for effective error parsing From 07914b36d8a6f4164788837cb761a7e7a8ada8f3 Mon Sep 17 00:00:00 2001 From: Ivan Koryabkin Date: Wed, 31 Jan 2018 18:42:10 +0300 Subject: [PATCH 04/24] Another Boost build fix --- tools/CMakeLists.txt | 2 +- tools/src/lspl/terms/Term.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 76473915..e765fbb5 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -23,7 +23,7 @@ set(LSPL_GENERATOR_SOURCES src/generator.cpp ) -add_executable(lspl-gen ${LSPL_GENERATOR_SOURCES}) +add_executable(lspl-gen ${LSPL_GENERATOR_SOURCES} boost_system) target_link_libraries(lspl-gen lspl) diff --git a/tools/src/lspl/terms/Term.h b/tools/src/lspl/terms/Term.h index 38f3b2f6..6bae5ab0 100644 --- a/tools/src/lspl/terms/Term.h +++ b/tools/src/lspl/terms/Term.h @@ -14,6 +14,7 @@ #include #include #include +#include #include From eef53a18dded3bd94043c259c5aee48ac4449fda Mon Sep 17 00:00:00 2001 From: Ivan Koryabkin Date: Mon, 30 Apr 2018 22:34:45 +0300 Subject: [PATCH 05/24] New parser version. Still needs a lot of testing, but works almost fine --- core/CMakeLists.txt | 7 +- core/src/main/lspl/morphology/Morphology.cpp | 131 +- core/src/main/lspl/morphology/Morphology.h | 5 + core/src/main/lspl/patterns/Alternative.cpp | 9 +- .../src/main/lspl/patterns/PatternBuilder.cpp | 1132 ++++++++++++----- core/src/main/lspl/patterns/PatternBuilder.h | 8 +- .../lspl/patterns/matchers/BaseComparator.cpp | 97 ++ .../lspl/patterns/matchers/BaseComparator.h | 104 ++ .../src/main/lspl/patterns/matchers/Forward.h | 2 + .../lspl/patterns/matchers/LoopMatcher.cpp | 28 +- .../main/lspl/patterns/matchers/LoopMatcher.h | 13 +- .../main/lspl/patterns/matchers/Matcher.cpp | 4 + .../src/main/lspl/patterns/matchers/Matcher.h | 5 + .../patterns/matchers/MatcherContainer.cpp | 4 +- .../lspl/patterns/matchers/MatcherContainer.h | 6 +- .../lspl/patterns/matchers/WordMatcher.cpp | 28 +- .../main/lspl/patterns/matchers/WordMatcher.h | 19 +- .../lspl/patterns/parsers/CharacterSets.cpp | 56 + .../lspl/patterns/parsers/CharacterSets.h | 28 + .../main/lspl/patterns/parsers/Functions.cpp | 14 +- .../restrictions/AgreementRestriction.cpp | 8 +- .../restrictions/AgreementRestriction.h | 6 +- .../patterns/restrictions/NotRestriction.cpp | 44 + .../patterns/restrictions/NotRestriction.h | 29 + .../patterns/restrictions/OrRestriction.cpp | 71 ++ .../patterns/restrictions/OrRestriction.h | 41 + core/src/main/lspl/text/markup/Word.h | 1 + .../main/lspl/text/readers/JsonTextReader.cpp | 2 + .../transforms/PatternTransformBuilder.cpp | 2 +- .../lspl/transforms/TextTransformBuilder.cpp | 2 +- core/src/test/LsplBenchmarks.cpp | 30 +- core/src/test/tests/PatternsTest.cpp | 137 +- 32 files changed, 1550 insertions(+), 523 deletions(-) create mode 100644 core/src/main/lspl/patterns/matchers/BaseComparator.cpp create mode 100644 core/src/main/lspl/patterns/matchers/BaseComparator.h create mode 100644 core/src/main/lspl/patterns/parsers/CharacterSets.cpp create mode 100644 core/src/main/lspl/patterns/parsers/CharacterSets.h create mode 100644 core/src/main/lspl/patterns/restrictions/NotRestriction.cpp create mode 100644 core/src/main/lspl/patterns/restrictions/NotRestriction.h create mode 100644 core/src/main/lspl/patterns/restrictions/OrRestriction.cpp create mode 100644 core/src/main/lspl/patterns/restrictions/OrRestriction.h diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 8ea97f74..07ea8ede 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -21,6 +21,7 @@ set(LSPL_CORE_SOURCES src/main/lspl/morphology/AotMorphology.cpp src/main/lspl/patterns/Alternative.cpp + src/main/lspl/patterns/matchers/BaseComparator.cpp src/main/lspl/patterns/matchers/Context.cpp src/main/lspl/patterns/matchers/LoopMatcher.cpp src/main/lspl/patterns/matchers/Matcher.cpp @@ -42,8 +43,12 @@ set(LSPL_CORE_SOURCES src/main/lspl/patterns/restrictions/DictionaryRestriction.cpp src/main/lspl/patterns/restrictions/AgreementRestriction.cpp src/main/lspl/patterns/restrictions/AndRestriction.cpp - + src/main/lspl/patterns/restrictions/NotRestriction.cpp + src/main/lspl/patterns/restrictions/OrRestriction.cpp + + src/main/lspl/patterns/parsers/CharacterSets.cpp src/main/lspl/patterns/parsers/Functions.cpp + src/main/lspl/patterns/Pattern.cpp src/main/lspl/patterns/PatternBuilder.cpp src/main/lspl/utils/Conversion.cpp diff --git a/core/src/main/lspl/morphology/Morphology.cpp b/core/src/main/lspl/morphology/Morphology.cpp index b0b1e9b2..d6a5cc61 100644 --- a/core/src/main/lspl/morphology/Morphology.cpp +++ b/core/src/main/lspl/morphology/Morphology.cpp @@ -25,66 +25,77 @@ Morphology::~Morphology() { } +struct AttributeInfo { + int bitsCount; + int bits[10]; + const AttributeValue* values[10]; +}; + +static AttributeInfo attributeInfos[10] = { + { + 8, + { rNominativ, rGenitiv, rDativ, rAccusativ, rInstrumentalis, rLocativ, rVocativ, rIndeclinable }, + { &AttributeValue::NOMINATIVE, &AttributeValue::GENITIVE, &AttributeValue::DATIVE, &AttributeValue::ACCUSATIVE, &AttributeValue::INSTRUMENTAL, &AttributeValue::PREPOSITIONAL, &AttributeValue::PREPOSITIONAL, &AttributeValue::UNINFLECTED } + }, + { + 2, + { rSingular, rPlural }, + { &AttributeValue::SINGULAR, &AttributeValue::PLURAL } + }, + { + 3, + { rMasculinum, rFeminum, rNeutrum }, + { &AttributeValue::MASCULINE, &AttributeValue::FEMININE, &AttributeValue::NEUTER } + }, + { + 2, + { rComparative, rSuperlative }, + { &AttributeValue::COMPARATIVE, &AttributeValue::SUPERLATIVE } + }, + { + 3, + { rPresentTense, rPastTense, rFutureTense }, + { &AttributeValue::PRESENT, &AttributeValue::PAST, &AttributeValue::FUTURE } + }, + { + 2, + { rAnimative, rNonAnimative }, + { &AttributeValue::ANIMATE, &AttributeValue::INANIMATE } + }, + { // Форма + 0, + {}, + {} + }, + { + 1, + { rImperative }, + { &AttributeValue::IMPERATIVE } + }, + { + 3, + { rFirstPerson, rSecondPerson, rThirdPerson }, + { &AttributeValue::FIRST, &AttributeValue::SECOND, &AttributeValue::THIRD } + }, + { + 0, + {}, + {} + } +}; + +AttributeKey Morphology::getAttributeKeyByValue(AttributeValue value) const { + if (value.type != AttributeType::INDEXED) + return AttributeKey::UNDEFINED; + for (uint i = 0; i < 10; ++i) { + for (uint j = 0; j < attributeInfos[i].bitsCount; ++j) + if ((*attributeInfos[i].values[j]) == value) + return AttributeKey(i + 1); + } + return AttributeKey::UNDEFINED; +} + AttributeValue Morphology::extractAttributeValue( uint64 attributeSet, AttributeKey key ) { - struct AttributeInfo { - int bitsCount; - - int bits[10]; - AttributeValue values[10]; - }; - - static AttributeInfo attributeInfos[10] = { - { - 8, - { rNominativ, rGenitiv, rDativ, rAccusativ, rInstrumentalis, rLocativ, rVocativ, rIndeclinable }, - { AttributeValue::NOMINATIVE, AttributeValue::GENITIVE, AttributeValue::DATIVE, AttributeValue::ACCUSATIVE, AttributeValue::INSTRUMENTAL, AttributeValue::PREPOSITIONAL, AttributeValue::PREPOSITIONAL, AttributeValue::UNINFLECTED } - }, - { - 2, - { rSingular, rPlural }, - { AttributeValue::SINGULAR, AttributeValue::PLURAL } - }, - { - 3, - { rMasculinum, rFeminum, rNeutrum }, - { AttributeValue::MASCULINE, AttributeValue::FEMININE, AttributeValue::NEUTER } - }, - { - 2, - { rComparative, rSuperlative }, - { AttributeValue::COMPARATIVE, AttributeValue::SUPERLATIVE } - }, - { - 3, - { rPresentTense, rPastTense, rFutureTense }, - { AttributeValue::PRESENT, AttributeValue::PAST, AttributeValue::FUTURE } - }, - { - 2, - { rAnimative, rNonAnimative }, - { AttributeValue::ANIMATE, AttributeValue::INANIMATE } - }, - { // Форма - 0, - {}, - {} - }, - { - 1, - { rImperative }, - { AttributeValue::IMPERATIVE } - }, - { - 3, - { rFirstPerson, rSecondPerson, rThirdPerson }, - { AttributeValue::FIRST, AttributeValue::SECOND, AttributeValue::THIRD } - }, - { - 0, - {}, - {} - } - }; if ( key.index <= 0 || key.index > 10 ) // Если тип запрашиваемого аттрибута некорректен return AttributeValue::UNDEFINED; // Возвращаем неопределенное значение @@ -93,7 +104,7 @@ AttributeValue Morphology::extractAttributeValue( uint64 attributeSet, Attribute for ( int i = 0; i < info.bitsCount; ++ i ) if ( attributeSet & ( ((uint64)1) << info.bits[ i ] ) ) - return info.values[ i ]; + return *(info.values[ i ]); return AttributeValue::UNDEFINED; } diff --git a/core/src/main/lspl/morphology/Morphology.h b/core/src/main/lspl/morphology/Morphology.h index 08ad0318..3806fd24 100644 --- a/core/src/main/lspl/morphology/Morphology.h +++ b/core/src/main/lspl/morphology/Morphology.h @@ -57,6 +57,11 @@ class LSPL_EXPORT Morphology { return result.release(); } + /** + * По признаку получить его вид. Работает только для индексных (не составных и не строковых) признаков + */ + virtual text::attributes::AttributeKey getAttributeKeyByValue(text::attributes::AttributeValue value) const; + virtual std::string getAttributesString( uint64 attValues ) = 0; std::string upcase( const std::string & str ) { return upcase( str.c_str() ); } diff --git a/core/src/main/lspl/patterns/Alternative.cpp b/core/src/main/lspl/patterns/Alternative.cpp index 1f51c209..0c127147 100644 --- a/core/src/main/lspl/patterns/Alternative.cpp +++ b/core/src/main/lspl/patterns/Alternative.cpp @@ -114,7 +114,14 @@ void Alternative::dump( std::ostream & out, const std::string & tabs ) const { getMatcher( i ).dump( out, tabs + "\t" ); } - out << "\n" << tabs << "] }"; + out << "\n" << tabs << "], bindings = [\n"; + + for ( BindingMap::const_iterator it = bindings.begin(); it != bindings.end(); ++ it ) { + out << tabs << "\t" << it->first.getAbbrevation() << " : "; + it->second->dump(out); + out << '\n'; + } + out << tabs << "] }"; } } } // namespace lspl::patterns diff --git a/core/src/main/lspl/patterns/PatternBuilder.cpp b/core/src/main/lspl/patterns/PatternBuilder.cpp index 26e05ba0..f4039976 100644 --- a/core/src/main/lspl/patterns/PatternBuilder.cpp +++ b/core/src/main/lspl/patterns/PatternBuilder.cpp @@ -1,35 +1,32 @@ #include "../base/BaseInternal.h" - -#define PHOENIX_LIMIT 4 -#define BOOST_SPIRIT_CLOSURE_LIMIT 4 - -#include "../utils/RusConsts.h" +#include "../Namespace.h" #include "PatternBuilder.h" #include "Pattern.h" -#include "parsers/Functions.h" -#include "parsers/VariableParser.h" -#include "parsers/AttributeKeyParser.h" +#include "parsers/CharacterSets.h" + +#include "../morphology/Morphology.h" #include "matchers/Matcher.h" +#include "matchers/TokenMatcher.h" +#include "matchers/RegexpMatcher.h" +#include "matchers/LoopMatcher.h" +#include "matchers/PatternMatcher.h" +#include "matchers/WordMatcher.h" #include "../transforms/TransformBuilder.h" #include "../transforms/TextTransformBuilder.h" #include "../transforms/PatternTransformBuilder.h" -#include -#include -#include -#include -#include -#include -#include - -#include +#include "restrictions/AgreementRestriction.h" +#include "restrictions/NotRestriction.h" +#include "restrictions/OrRestriction.h" -using namespace boost::spirit::classic; -using namespace phoenix; +#include "expressions/CurrentAnnotationExpression.h" +#include "expressions/ConstantExpression.h" +#include "expressions/AttributeExpression.h" +#include "expressions/VariableExpression.h" using namespace lspl::text::attributes; @@ -37,341 +34,783 @@ using namespace lspl::patterns::restrictions; using namespace lspl::patterns::expressions; using namespace lspl::patterns::matchers; using namespace lspl::patterns::parsers; +using lspl::morphology::Morphology; LSPL_REFCOUNT_CLASS( lspl::patterns::PatternBuilder ); namespace lspl { namespace patterns { -class ParserImpl : public grammar, public PatternBuilder::Parser { -public: - - struct AgreementRestrictionClosure : public boost::spirit::classic::closure< AgreementRestrictionClosure, Restriction *, boost::ptr_vector > { - member1 restriction; - member2 args; - }; - - struct DictionaryRestrictionClosure : public boost::spirit::classic::closure< DictionaryRestrictionClosure, Restriction *, std::string, boost::ptr_vector > { - member1 restriction; - member2 dictionaryName; - member3 args; - }; - - struct MatcherClosure : public boost::spirit::classic::closure< MatcherClosure, uint, boost::ptr_vector > { - member1 index; - member2 restrictions; - }; - - struct WordMatcherClosure : public boost::spirit::classic::closure< WordMatcherClosure, std::string, SpeechPart > { - member1 base; - member2 speechPart; - }; - - struct TokenMatcherClosure : public boost::spirit::classic::closure< TokenMatcherClosure, std::string > { - member1 token; - }; - - struct PatternMatcherClosure : public boost::spirit::classic::closure< PatternMatcherClosure, std::string > { - member1 name; - }; - - struct LoopMatcherClosure : public boost::spirit::classic::closure< LoopMatcherClosure, uint, uint, std::vector > { - member1 min; - member2 max; - member3 alternativesCount; - }; - - struct LoopBodyClosure : public boost::spirit::classic::closure< LoopBodyClosure, uint > { - member1 matcherCount; - }; - - struct PatternClosure : public boost::spirit::classic::closure< PatternClosure, std::string, boost::ptr_vector > { - member1 name; - member2 alternatives; - }; - - struct AlternativeClosure : public boost::spirit::classic::closure< AlternativeClosure, uint, boost::ptr_vector, boost::ptr_map, std::string, std::string > { - member1 stub; - member2 matchers; - member3 bindings; - member4 transformSource; - member5 transformType; - }; - - struct BindingClosure : public boost::spirit::classic::closure< BindingClosure, AttributeKey, Expression * > { - member1 att; - member2 exp; - }; - - struct ExpressionClosure : public boost::spirit::classic::closure< ExpressionClosure, Expression*, boost::ptr_vector > { - member1 exp; - member2 args; - }; - - enum Errors { - BindingEndMissing, - RestrictionEndMissing, - LoopEndMissing, - OptionalEndMissing, - NoMatchersInAlternative, - NoMatchersInGroup, - NoRestrictionBody, - InvalidPatternName, - ClosingSglQuoteMissed, - ClosingDblQuoteMissed, - AttributeValueExpected - }; - - template class definition { - public: - public: - definition( const ParserImpl & self_c ) : variable( typeSymbol ) { - ParserImpl & self = *const_cast( &self_c ); - - assertion expect_binding_end(BindingEndMissing); - assertion expect_restriction_end(RestrictionEndMissing); - assertion expect_loop_end(LoopEndMissing); - assertion expect_optional_end(OptionalEndMissing); - assertion expect_alt_matcher(NoMatchersInAlternative); - assertion expect_grp_matcher(NoMatchersInGroup); - assertion expect_restriction_body(NoRestrictionBody); - assertion expect_valid_pattern_name(InvalidPatternName); - assertion expect_closing_sgl_quote(ClosingSglQuoteMissed); - assertion expect_closing_dbl_quote(ClosingDblQuoteMissed); - assertion expect_attribute_value(AttributeValueExpected); - - function add; - - function addBinding; - function addRestriction; - - function addPatternMatcher( AddPatternMatcherImpl( *self.space, typeSymbol ) ); - function addWordMatcher; - function addTokenMatcher; - function addLoopMatcher; - function addAlternativeDefinition( AddAlternativeDefinitionImpl( self.transformBuilders ) ); - function addPatternDefinition( AddPatternDefinitionImpl( *self.space, typeSymbol ) ); - - function createAgreementRestriction; - function createDictionaryRestriction( *self.space ); - - function createCurrentAttributeExpression; - function createVariableExpression; - function createAttributeExpression; - function createConcatExpression; - function createStringLiteralExpression; - function createLiteralExpression; - - endLoop = expect_loop_end( ch_p( '}' ) ); - endOptional = expect_optional_end( ch_p( ']' ) ); - endBinding = expect_binding_end( ch_p(')') ); - endRestriction = expect_restriction_end( ch_p('>') ); - - source = *pattern; - - pattern = ( - ( patternName[ pattern.name = construct_( arg1, arg2 ) ] >> '=' >> ( alternative % '|' ) ) | - ( ( alternative % '|' )[ pattern.name = construct_( arg1, arg2 ) ] >> !( ch_p('=') >> expect_valid_pattern_name( nothing_p ) ) ) - )[ addPatternDefinition( pattern.name, pattern.alternatives ) ]; - - alternative = ( matcher >> *(matcher|patternRestrictions) >> !bindingList >> !alternativeTransformSource ) - [ addAlternativeDefinition( pattern.alternatives, alternative.matchers, alternative.bindings, construct_( arg1, arg2 ), alternative.transformSource, alternative.transformType ) ]; - - alternativeTransformSource = ch_p('=') >> lexeme_d[ +chset_p("a-z") ] [ alternative.transformType = construct_( arg1, arg2 ) ] >> ch_p('>') - >> lexeme_d[ *~chset_p("\n|") ][ alternative.transformSource = construct_( arg1, arg2 ) ]; - - patternName = lexeme_d[ +chset_p("a-zA-Z" RUS_ALPHA "-") >> ~epsilon_p(chset_p("a-zA-Z" RUS_ALPHA "-")) ]; - - binding = ( expression[ binding.exp = arg1 ] >> !( "AS" >> attributeKey[ binding.att = arg1 ] ) ) - [ addBinding( alternative.bindings, binding.att, binding.exp ) ]; - - bindingList = '(' >> binding % ',' >> endBinding; - - /* - * Парсер сопоставителя - */ - - matcher = wordMatcher | tokenMatcher | patternMatcher | loopMatcher; - - matcherVariable = epsilon_p[ matcher.index = 0 ] >> !uint_p[ matcher.index = arg1 ]; - - /* - * Парсер сопоставителя слов - */ - - wordMatcher = ( wordType >> matcherVariable >> !wordRestriction ) - [ addWordMatcher( alternative.matchers, wordMatcher.base, wordMatcher.speechPart, matcher.index, matcher.restrictions ) ]; - - wordType = lexeme_d[ speechPart[ wordMatcher.speechPart = arg1 ] >> ~epsilon_p(chset_p("a-zA-Z")) ]; - - wordRestriction = ch_p('<')[ wordMatcher.base = "" ] - >> ( - ( wordBase >> !(chset_p(";,") >> ( matcherRestriction[add( matcher.restrictions, arg1 )] % ',' )) ) | - ( matcherRestriction[add( matcher.restrictions, arg1 )] % ',' ) - ) >> endRestriction; - - wordBase = ( lexeme_d[ +chset_p("a-zA-Z0-9" RUS_ALPHA "-") ] >> epsilon_p(chset_p(";,>")) )[ wordMatcher.base = construct_( arg1, arg2 ) ]; - - /* - * Парсер сопоставителя лексем - */ - - tokenMatcher = lexeme_d[ switch_p[ - case_p< '"' >( (+~ch_p('"'))[ tokenMatcher.token = construct_( arg1, arg2 ) ] >> expect_closing_dbl_quote( ch_p('"') ) ), - case_p< '\'' >( (+~ch_p('\''))[ tokenMatcher.token = construct_( arg1, arg2 ) ] >> expect_closing_sgl_quote( ch_p('\'') ) ) - ] ][ addTokenMatcher( alternative.matchers, tokenMatcher.token ) ]; - - /* - * Парсер сопоставителя шаблонов - */ - - patternMatcher = ( patternName[ patternMatcher.name = construct_( arg1, arg2 ) ] >> matcherVariable >> !( '<' >> ( matcherRestriction[add( matcher.restrictions, arg1 )] % ',' ) >> '>' ) )[ addPatternMatcher( alternative.matchers, patternMatcher.name, matcher.index, matcher.restrictions ) ]; - - /* - * Парсер сопоставителя циклов - */ - loopMatcher = switch_p[ - case_p< '{' >( ( loopBody % '|' ) >> endLoop[ loopMatcher.min = 0 ][ loopMatcher.max = 0 ] >> !loopRestriction ), - case_p< '[' >( ( loopBody % '|' ) >> endOptional[ loopMatcher.min = 0 ][ loopMatcher.max = 1 ] ) - ][ addLoopMatcher( alternative.matchers, loopMatcher.min, loopMatcher.max, loopMatcher.alternativesCount ) ]; - - loopBody = expect_grp_matcher( matcher[ loopBody.matcherCount = 1 ] ) >> ( *(matcher[ loopBody.matcherCount ++ ] | patternRestrictions) )[ add( loopMatcher.alternativesCount, loopBody.matcherCount ) ]; +class ParserImpl: public PatternBuilder::Parser { +private: + const char *buffer; + uint pos; + + /* + * Пропускает пробельные символы в буфере + */ + void skipSpaces() { + while (isSpace(buffer[pos])) + ++pos; + } + + /** + * Проверка на наличие впереди конца ввода + */ + bool seekEndOfInput() { + skipSpaces(); + return buffer[pos] == '\0'; + } + + /** + * Создаёт экземпляр исключения с заданным сообщением об ошибке, хранящий + * информацию о текущей позиции парсера и входных данных + */ + PatternBuildingException produceException(const std::string &description) { + std::string combinedDesc = description; + combinedDesc += " at character #" + std::to_string(pos); + PatternBuildingException e(combinedDesc, buffer, pos); + return e; + } + + /* + * Вырезать из токена-идентификатора индекс + */ + bool cutIndexFromToken(std::string &token, uint &index) { + int pos = token.length(); + while (pos != 0 && isDigit(token[pos - 1])) + --pos; + + if (pos == token.length()) + return false; + if (pos == 0) + throw produceException("Invalid token: matcher with index expected, but only integer found"); + + try { + index = std::stoul(token.substr(pos)); + } catch (std::out_of_range &e) { + throw produceException("Integer overflow in matcher index"); + } catch (...) { + throw produceException("Unknown exception while retrieving matcher index"); + } + + token.resize(pos); + return true; + } + + /* + * Убеждается, что текст продолжается строкой pattern + */ + bool strFollows(const char *pattern) { + skipSpaces(); + uint i = 0; + while (pattern[i] != '\0' && pattern[i] == buffer[pos + i]) + ++i; + return pattern[i] == '\0'; + } + + /* + * В точности повторяет действие функции strFollows, но считывает pattern из буфера + * и кидает исключение, если не встречает строку + */ + void readStrFollows(const char *pattern) { + if (!strFollows(pattern)) + throw produceException(std::string("Expected \"") + pattern + "\""); + pos += strlen(pattern); + } + + /** + * Считывает токен + */ + std::string readToken() { + skipSpaces(); + std::string token; + while (!isInvalidChar(buffer[pos]) && !isPunct(buffer[pos]) && !isSpace(buffer[pos])) + token += buffer[pos++]; + return token; + } + + /* + * Считывает беззнаковое целое + */ + uint readUInt() { + std::string token = readToken(); + uint index = 0; + try { + index = std::stoul(token); + } catch (std::out_of_range &e) { + throw produceException("Integer overflow"); + } catch (...) { + throw produceException("Unknown exception"); + } + return index; + } + + /** + * Обработка имени шаблона + */ + std::string readPatternName() { + skipSpaces(); + if (getCharacterSets(buffer[pos]) != (LSPL_CHARACTERSETS_LATIN | LSPL_CHARACTERSETS_UPPERCASE)) + throw produceException("Pattern name should start with an uppercase latin letter"); + std::string patternName = readToken(); + if (!isLatin(patternName.back())) + throw produceException("Pattern name should end with a latin letter"); + return patternName; + } + + /** + * Обработка параметров шаблона + */ + std::vector readPatternArguments(PatternRef pattern) { + std::vector result; + readStrFollows("("); + result.push_back(readAttributeExpression()); + while (strFollows(",")) { + readStrFollows(","); + result.push_back(readAttributeExpression()); + } + readStrFollows(")"); + return result; + } + + /** + * Является ли строка регулярным выражением? + */ + static bool isRegexp( const std::string & str ) { + static std::string regexSymbols(".[{()\\*+?|^$'"); + + for (uint i = 0; i < regexSymbols.length(); ++i) + if (str.find(regexSymbols.at(i) ) != std::string::npos) + return true; + + return false; + } + + /* + * Разделить строку на токены + */ + static std::vector split(const std::string &contents) { + std::vector words; + std::string word; + for (uint i = 0; i < contents.length(); ++i) { + if (!isSpace(contents[i])) + word += contents[i]; + else if (word.length() != 0) + words.push_back(word); + } + if (word.length() != 0) + words.push_back(word); + return words; + } + + /** + * Считать строковую константу + */ + std::string readStringConstant() { + readStrFollows("\""); + std::string contents; + while (buffer[pos] != '\0' && buffer[pos] != '\"') + contents += buffer[pos++]; + if (buffer[pos] == '\0') + throw produceException("Unexpected end of input (closing \" excepted)"); + readStrFollows("\""); + return contents; + } + + /** + * элемент_строка := "регулярное выражение" + */ + Matcher* readStringMatcher() { + std::string contents = readStringConstant(); + if (isRegexp(contents)) + return new RegexpMatcher(contents); + + // Разделяем на отдельные слова, если строка не является + // регулярным выражением + std::vector words = split(contents); + if (words.size() == 0) + throw produceException("Empty string cannot be matched"); + if (words.size() == 1) + return new TokenMatcher(words[0]); + + // Слов больше, чем одно. Создаём отдельные сопоставители для каждого слова + LoopMatcher *wordMatcher = new LoopMatcher(1, 1); + MatcherContainer &container = wordMatcher->newAlternative(); + for (std::string &word : words) + container.addMatcher(new TokenMatcher(word)); + return wordMatcher; + } + + /** + * Обработка элемента шаблона + * + * элемент_шаблона ::= простой_элемент | опциональный_элемент | повторение_элементов + * | (набор_альтернатив) + * + * простой элемент := элемент_строка | элемент_слово | экземпляр_шаблона + */ + Matcher* readMatcher() { + skipSpaces(); + if (strFollows("{")) + return readNestedMatcher(0, 0, "{", "}", true); + if (strFollows("[")) + return readNestedMatcher(0, 1, "[", "]", false); + if (strFollows("(")) + return readNestedMatcher(1, 1, "(", ")", false); + if (strFollows("\"")) + return readStringMatcher(); + + // элемент_слово + std::string token = readToken(); + if (token == "") + throw produceException("Matcher expected"); + uint index = 0; + cutIndexFromToken(token, index); + for (int i = 0; i < SpeechPart::COUNT; ++i) + if (token == SpeechPart(i).getAbbrevation()) + return readWordMatcher(SpeechPart(i), index); + + // экземпляр_шаблона + PatternRef pattern = space->getPatternByName(token); + if (!pattern) + throw produceException("No pattern with specified name"); + return readPatternMatcher(pattern, index); + } + + /* + * Считать сопоставитель с вложенным списком альтернатив, т.е. + * + * 1) опциональный_элемент + * 2) (набор_альтернатив) + * 3) повторение_элементов + * + * В параметрах min и max задаётся минимальное и максимальное количество повторений + * (см. LoopMatcher) + * + * lbrace и rbrace -- это открывающая и закрывающая скобка (круглая, квадратная или + * фигурная) + * + * Параметр allow задаёт, можно ли переопределять значения min и max в самом коде шаблона + * + */ + Matcher* readNestedMatcher(uint min, uint max, const char* lbrace, const char* rbrace, bool allow) { + readStrFollows(lbrace); + std::vector > alts = readAlternatives(); + readStrFollows(rbrace); + + if (allow && strFollows("<") && !strFollows("<<")) { + readStrFollows("<"); + min = readUInt(); + if (strFollows(",")) { + readStrFollows(","); + max = readUInt(); + } + readStrFollows(">"); + } + + LoopMatcher *matcher = new LoopMatcher(min, max); + for (std::vector &alt : alts) + matcher->newAlternative().addMatchers(alt.begin(), alt.end()); + return matcher; + } + + /** + * Считать основу слова или лемму + */ + std::string readMatcherBase() { + std::string token; + while (isCyrillic(buffer[pos]) || isDigit(buffer[pos]) || buffer[pos] == '-') + token += buffer[pos++]; + if (token.empty()) + throw produceException("Empty lemma or stem"); + if (token.front() == '-' || token.back() == '-') + throw produceException("Lemma or stem can't start/finish with a hyphen"); + return token; + } + + /** + * Считывает последовательность альтернатив лемм/основ + */ + AlternativeBaseComparator* readAlternativeBaseComparator(AlternativeBaseComparator *cmp) { + cmp->addAlternativeBase(readMatcherBase()); + while (strFollows("|")) { + readStrFollows("|"); + cmp->addAlternativeBase(readMatcherBase()); + } + return cmp; + } + + /** + * условия_на_лемму ::= [ lemma = ] лемма { | лемма } | [ lemma ] != лемма { | лемма } + */ + void readLemmaRestriction(Matcher *matcher) { + WordMatcher *word_m = dynamic_cast(matcher); + if (word_m == nullptr) + throw produceException("No lemma restrictions on a non-word matcher"); + + bool readAnouncement = false; + if (strFollows("lemma")) { + readStrFollows("lemma"); + readAnouncement = true; + } + + bool negative = false; // != + if (strFollows("=")) + readStrFollows("="); + else if (strFollows("!=")) { + negative = true; + readStrFollows("!="); + } else if (readAnouncement) + throw produceException("= or != expected"); + + if (strFollows("\"")) + word_m->setBaseComparator(new LemmaRegexpComparator(readStringConstant(), negative)); + else + word_m->setBaseComparator(readAlternativeBaseComparator(new LemmaComparator(negative))); + } + + /** + * условия_на_основу ::= stem = основа { | основа } | stem != основа { | основа} + */ + void readStemRestriction(Matcher *matcher) { + readStrFollows("stem"); + + WordMatcher *word_m = dynamic_cast(matcher); + if (word_m == nullptr) + throw produceException("No stem restrictions on a non-word matcher"); + + bool negative = false; // != + if (strFollows("=")) + readStrFollows("="); + else if (strFollows("!=")) { + negative = true; + readStrFollows("!="); + } else + throw produceException("= or != expected"); + + if (strFollows("\"")) + word_m->setBaseComparator(new StemRegexpComparator(readStringConstant(), negative)); + else + word_m->setBaseComparator(readAlternativeBaseComparator(new StemComparator(negative))); + } + + /** + * Создаёт для сопоставителя ограничение на характеристику attributeName, которая в качестве + * значений может принимать аргументы из набора attributeNames + * + * negative == true меняет поведения ограничения на обратное (условие не должно выполняться) + */ + Restriction* appendMatcherAttributeRestriction(std::string &attributeName, bool negative, std::vector attributeNames) { + std::vector values(attributeNames.size()); + std::transform(attributeNames.begin(), attributeNames.end(), values.begin(), AttributeValue::findIndexedByAbbrevation); + + std::vector::iterator it; + if ((it = std::find(values.begin(), values.end(), AttributeValue::UNDEFINED)) != values.end()) + throw produceException("Unknown attribute value \"" + attributeNames[it - values.begin()] + "\""); + + AttributeKey key = attributeName != "" ? AttributeKey::findByAbbrevation(attributeName) + : Morphology::instance().getAttributeKeyByValue(attributeNames.front()); + if (key == AttributeKey::UNDEFINED) + throw produceException("Unable to retrieve attribute type"); + + it = std::find_if(values.begin(), values.end(), [&](AttributeValue &v){ return Morphology::instance().getAttributeKeyByValue(v) != key; }); + if (it != values.end()) + throw produceException("Attribute value \"" + attributeNames[it - values.begin()] + "\" doesn't correspond to common attribute type"); + + // Аргрументы проверены, можно собирать ограничение + Restriction* result; + std::vector alternativeRestrictions; + for (AttributeValue &v: values){ + AgreementRestriction *r = new AgreementRestriction(); + r->addArgument(new AttributeExpression(new CurrentAnnotationExpression(), key)); + r->addArgument(new ConstantExpression(v)); + alternativeRestrictions.push_back(r); + } + + if (alternativeRestrictions.size() == 1) + result = alternativeRestrictions.front(); + else { + OrRestriction *r = new OrRestriction(); + for (Restriction *alt : alternativeRestrictions) + r->addArgument(alt); + result = r; + } + + if (negative) + result = new NotRestriction(result); + return result; + } + + + /** + * Чтение одного ограничения на аттрибут + * + * характеристика ::= [ название_ признака = ] значение_ признака { | значение_ признака } | + * [ название_ признака ] != значение_ признака { | значение_ признака } + * + */ + void readAttributeRestriction(Matcher *matcher) { + bool negative = false; // != + std::string attributeName; + std::vector valueNames; + + skipSpaces(); + if (isLatin(buffer[pos])) { + attributeName = readToken(); + if (strFollows("|") || strFollows(",") || strFollows(">")) { + valueNames.emplace_back(std::move(attributeName)); + if (strFollows("|")) readStrFollows("|"); + } + else if (strFollows("=")) + readStrFollows("="); + else if (!strFollows("!=")) + throw produceException("Invalid expression in matcher restriction"); + } + + if (strFollows("!=")) { + readStrFollows("!="); + negative = true; + }; + + skipSpaces(); + if (!isLatin(buffer[pos])) + return; + + valueNames.push_back(readToken()); + while (strFollows("|")) { + readStrFollows(""); + valueNames.push_back(readToken()); + if (valueNames.back().length() == 0) + throw produceException("Expression value expected"); + } + + matcher->addRestriction(appendMatcherAttributeRestriction(attributeName, negative, valueNames)); + } + + /** + * Чтение одного ограничения сопоставителя + * + */ + void readMatcherRestriction(Matcher *matcher) { + if (strFollows("lemma") || isCyrillic(buffer[pos]) || strFollows("\"")) + readLemmaRestriction(matcher); + else if (strFollows("stem")) + readStemRestriction(matcher); + else if (strFollows("!=")) { + // По одному != непонятно, что будет дальше, ограничение на лемму или на признак + // Запоминаем позицию для отката назад, если не угадали + uint backtrack_pos = pos; + readStrFollows("!="); + skipSpaces(); + if (isCyrillic(buffer[pos]) || isDigit(buffer[pos])) { + pos = backtrack_pos; + readLemmaRestriction(matcher); + } else { + pos = backtrack_pos; + readAttributeRestriction(matcher); + } + + } else + readAttributeRestriction(matcher); + } + + /* + * Чтение списка ограничений сопоставителя + */ + void readMatcherRestrictions(Matcher *matcher) { + readStrFollows("<"); + readMatcherRestriction(matcher); + while (!strFollows(">")) { + readStrFollows(","); + readAttributeRestriction(matcher); + } + readStrFollows(">"); + } + + /** + * Считать сопоставитель-шаблон + * + * экземпляр-шаблона ::= имя_шаблона [индекс] | имя_шаблона [индекс] <характеристика { , характеристика }> + */ + Matcher* readPatternMatcher(PatternRef pattern, uint index) { + PatternMatcher *matcher = new PatternMatcher(*pattern); + matcher->variable = Variable(*pattern, index); + if (strFollows("<") && !strFollows("<<")) + readMatcherRestrictions(matcher); + return matcher; + } + + /* + * Считать сопоставитель-слово + */ + Matcher* readWordMatcher(const SpeechPart &sp, uint index) { + WordMatcher *matcher = new WordMatcher(sp); + matcher->variable = Variable(sp, index); + if (strFollows("<") && !strFollows("<<")) + readMatcherRestrictions(matcher); + return matcher; + } + + /* + * Сгенерировать сопоставитель, реализующий перестановку из + * указанного набора сопоставителей + */ + Matcher* makePermutationMatcher(std::vector source) { + if (source.size() == 0) + throw produceException("Internal error: empty permutation requested"); + if (source.size() == 1) + return source.front(); + + LoopMatcher *wordMatcher = new LoopMatcher(1, 1, true); + sort(source.begin(), source.end()); + do { + wordMatcher->newAlternative().addMatchers(source.begin(), source.end()); + } while (next_permutation(source.begin(), source.end())); + return wordMatcher; + } + + /** + * Считать одно имя переменной + признак (если есть) + * + * имя ::= имя_элемента [. название_признака] | имя_шаблона [индекс] [. название_признака] + */ + Expression* readAttributeExpression() { + std::string token = readToken(); + if (token == "") + throw produceException("Variable name expected"); + uint index = 0; + cutIndexFromToken(token, index); + + // Элемент-слово + Expression* result = nullptr; + for (int i = 0; i < SpeechPart::COUNT; ++i) + if (token == SpeechPart(i).getAbbrevation()) { + result = new VariableExpression(SpeechPart(i), index); + break; + } + + // Шаблон + if (!result) { + PatternRef pattern = space->getPatternByName(token); + if (!pattern) + throw produceException("No pattern with specified name"); + result = new VariableExpression(*pattern, index); + } + + if (strFollows(".")) { + readStrFollows("."); + AttributeKey key = AttributeKey::findByAbbrevation(readToken()); + if (key == AttributeKey::UNDEFINED) + throw produceException("Unknown attribute"); + result = new AttributeExpression(result, key); + } + + return result; + } + + /** + * Считать = или == + */ + std::string readAgreement() { + if (strFollows("==")) { + readStrFollows("=="); + return "=="; + } + if (strFollows("=")) { + readStrFollows("="); + return "="; + } + throw produceException("= or == expected"); + } + + /** + * Считать одно ограничение согласования для перестановки + * + * условие ::= условие_ согласования + * условие_ согласования ::= имя = имя { = имя } | имя == имя { == имя } + */ + void readPermutationRestriction(std::vector &matchers) { + + std::vector exps(1, readAttributeExpression()); + std::string agreementType = readAgreement(); + exps.push_back(readAttributeExpression()); + while (strFollows("=")) { + if (readAgreement() != agreementType) + throw produceException("Weak (=) and strong (==) agreements mixed"); + exps.push_back(readAttributeExpression()); + } + AgreementRestriction *restriction = new AgreementRestriction(); + for (Expression *e : exps) + restriction->addArgument(e); + + + // Ок, теперь нужно найти необходимый сопоставитель. Перебираем их от последнего к первому и смотрим + for (std::vector::reverse_iterator it = matchers.rbegin(); it != matchers.rend(); ++it) + if ((*it)->variable != Variable() && restriction->containsVariable((*it)->variable)) { + (*it)->addRestriction(restriction); + return; + } + + // Ограничение не подошло ни к одному сопоставителю. Возможно, тут совсем ничего не нужно делать? + // Мы пересрахуемся и всё же добавим ограничение в конец. + matchers.back()->addRestriction(restriction); + // FIXME: возможно, нужно бросить исключение? + } + + /** + * Считать ограничения согласования для перестановки + * + * условия ::= условие {, условие } + */ + void readPermutationRestrictions(std::vector &matchers) { + readStrFollows("<<"); + readPermutationRestriction(matchers); + while (strFollows(",")) { + readStrFollows(","); + readPermutationRestriction(matchers); + } + readStrFollows(">>"); + } + + /** + * Обработка шаблона распознавания (последовательности перестановок) + * + * шаблон_распознавания ::= последовательность_перестановок + * + * последовательность_перестановок ::= последовательность_элементов + * { ~ последовательность_элементов } + * [ <<условия>> ] + * + * последовательность элементов := элемент_шаблона { элемент_шаблона } + * + */ + std::vector readPermutation() { + std::vector matchers; + std::vector permutation; + permutation.push_back(readMatcher()); + static std::string followers = "[({\"~"; + + while (!seekEndOfInput() && (isLatin(buffer[pos]) || followers.find(buffer[pos]) != std::string::npos)) { + if (strFollows("~")) + readStrFollows("~"); + else + matchers.push_back(makePermutationMatcher(std::move(permutation))); + permutation.push_back(readMatcher()); + } + matchers.push_back(makePermutationMatcher(std::move(permutation))); + if (strFollows("<<")) + readPermutationRestrictions(matchers); + return matchers; + } + + std::vector > readAlternatives() { + std::vector > alts; + alts.push_back(readPermutation()); + while (strFollows("|")) + alts.push_back(readPermutation()); + return alts; + } + + /** + * Считать альтернативу шаблона pattern, сохранив её source + */ + void readAlternativeWithSource(PatternRef pattern) { + uint before_pos = pos; + std::vector alt = readPermutation(); + pattern->newAlternative(std::string(buffer + before_pos, buffer + pos)).addMatchers(alt.begin(), alt.end()); + } + + /** + * Прицепить к альтернативе параметр шаблона + */ + void appendAlternativeBinding(Alternative &alt, Expression *exp) { + AttributeKey key = AttributeKey::UNDEFINED; + Variable var; + + if (dynamic_cast(exp)) { + // Переменная с аттрибутом + AttributeExpression* attrexp = static_cast(exp); + key = attrexp->attribute; + const VariableExpression* varexp = dynamic_cast(attrexp->base); + if (!varexp) throw new std::logic_error("Invalid alternative binding in parser"); + var = varexp->getVariable(); + } else if (dynamic_cast(exp)) { + // Просто переменная, без аттрибута + var = static_cast(exp)->getVariable(); + } else + throw new std::logic_error("Invalid alternative binding in parser"); + + bool applicable = false; + for (uint i = 0; i < alt.getMatcherCount(); ++i) + if (alt.getMatcher(i).containsVariable(var)) + applicable = true; + + // Создаём связывание и добавляем его к альтенративе шаблона + if (applicable) { + Expression *e = new VariableExpression(var); + if (key != AttributeKey::UNDEFINED) + e = new AttributeExpression(e, key); + alt.addBinding(key, e); + } + } + + /** + * Обработка шаблона + * + * описание_шаблона ::= имя_шаблона [ (параметры_шаблона) ] = + * шаблон_распознавания { | шаблон_распознавания } + * [ (параметры_шаблона) ] + * [ =text> шаблоны_извлечения_текста ] + * + * TODO: =>text пока никак не обрабатывается + */ + void readPattern() { + std::string patternName = readPatternName(); + PatternRef pattern = space->getPatternByName(patternName); + if (!pattern) + pattern = space->addPattern(new Pattern(patternName)); + + std::vector arguments; + uint alternativeCountBefore = pattern->alternatives.size(); + // Параметры шаблона (слева) + if (strFollows("(")) { + arguments = readPatternArguments(pattern); + } + + readStrFollows("="); + readAlternativeWithSource(pattern); + while (strFollows("|")) { + readStrFollows("|"); + readAlternativeWithSource(pattern); + } + + for (Expression *exp : arguments) { + for (uint i = alternativeCountBefore; i < pattern->alternatives.size(); ++i) + appendAlternativeBinding(pattern->alternatives[i], exp); + delete exp; + } + } - loopRestriction = '<' >> epsilon_p(chset_p("0-9,")) >> !uint_p[ loopMatcher.min = arg1 ] >> !( ',' >> uint_p[ loopMatcher.max = arg1 ] ) >> endRestriction; - - /* - * Парсеры ограничений - */ - - matcherRestriction = ( localExpression[ add( matcherRestriction.args, arg1 ) ] >> *( '=' >> expression[ add( matcherRestriction.args, arg1 ) ] ) ) - [ matcherRestriction.restriction = createAgreementRestriction( matcherRestriction.args ) ]; - - patternRestrictions = '<' >> expect_restriction_body( ( - agreementRestriction[addRestriction( alternative.matchers, arg1 )] | - dictionaryRestriction[addRestriction( alternative.matchers, arg1 )] - ) % ',' ) >> endRestriction; - - agreementRestriction = ( expression[ add( agreementRestriction.args, arg1 ) ] % '=' ) - [ agreementRestriction.restriction = createAgreementRestriction( agreementRestriction.args ) ]; - - dictionaryRestriction = ( ( lexeme_d[ +chset_p("a-zA-Z") ][ dictionaryRestriction.dictionaryName = construct_( arg1, arg2 ) ] ) >> "(" >> ( expression[ add( dictionaryRestriction.args, arg1 ) ] % "," ) >> ")" ) - [ dictionaryRestriction.restriction = createDictionaryRestriction( dictionaryRestriction.dictionaryName, dictionaryRestriction.args ) ]; - - /* - * Парсеры выражений - */ - - expression = ~eps_p( str_p( "AS" ) ) >> ( stringLiteralExpression | propertyExpression | literalExpression ) >> - *( expression[ expression.exp = createConcatExpression( expression.exp, arg1 ) ] ); - - localExpression = attributeKey[ localExpression.exp = createCurrentAttributeExpression( arg1 ) ]; - - propertyExpression = variable[ expression.exp = createVariableExpression( arg1 ) ] >> - *( '.' >> attributeKey[ expression.exp = createAttributeExpression( expression.exp, arg1 ) ] ); - - stringLiteralExpression = lexeme_d[ switch_p[ - case_p< '"' >( (+~ch_p('"'))[ expression.exp = createStringLiteralExpression( arg1, arg2 ) ] >> expect_closing_dbl_quote( ch_p('"') ) ), - case_p< '\'' >( (+~ch_p('\''))[ expression.exp = createStringLiteralExpression( arg1, arg2 ) ] >> expect_closing_sgl_quote( ch_p('\'') ) ) - ] ]; - - literalExpression = attributeValue[ expression.exp = createLiteralExpression( arg1 ) ]; - - /* - * Таблицы символов - */ - - for ( uint i = 0; i < SpeechPart::COUNT; ++ i ) { - speechPart.add( SpeechPart( i ).getAbbrevation().c_str(), SpeechPart( i ) ); - speechPart.add( SpeechPart( i ).getName().c_str(), SpeechPart( i ) ); - typeSymbol.add( SpeechPart( i ).getAbbrevation().c_str(), i ); - typeSymbol.add( SpeechPart( i ).getName().c_str(), i ); - } - - for ( uint i = 0; i < AttributeValue::indexedCount(); ++ i ) { - attributeValue.add( AttributeValue( i ).getAbbrevation().c_str(), AttributeValue( i ) ); - attributeValue.add( AttributeValue( i ).getName().c_str(), AttributeValue( i ) ); - } - } - - rule const & start() const { return source; } - - private: - - symbols typeSymbol; - symbols speechPart; - symbols attributeValue; - - rule patternName, wordType, source, wordBase, wordRestriction, matcherVariable, loopRestriction, patternRestrictions, bindingList, endLoop, endOptional, endBinding, endRestriction, alternativeTransformSource; - - rule alternative; - - rule agreementRestriction, matcherRestriction; - rule dictionaryRestriction; - - rule matcher; - rule wordMatcher; - rule patternMatcher; - rule loopMatcher; - rule loopBody; - rule tokenMatcher; - - rule binding; - - rule pattern; - - rule expression, localExpression; - rule propertyExpression, stringLiteralExpression, literalExpression; - - VariableParser variable; - AttributeKeyParser attributeKey; - }; - - ParserImpl( NamespaceRef space, const std::map& tbs ) : Parser( space, tbs ) {} - ~ParserImpl() {} - - PatternBuilder::BuildInfo build( const char * str ) throw (PatternBuildingException) { - try { - parse_info pi = boost::spirit::classic::parse( str, *this, space_p ); - - PatternBuilder::BuildInfo bi; - bi.parseLength = (uint) pi.length; - bi.parseTail = pi.stop; - - return bi; - } catch ( parser_error & err ) { - switch( err.descriptor ) { - case BindingEndMissing: - throw PatternBuildingException( "Binding end missing" ); - case RestrictionEndMissing: - throw PatternBuildingException( "Restriction end missing" ); - case LoopEndMissing: - throw PatternBuildingException( "Loop end missing" ); - case OptionalEndMissing: - throw PatternBuildingException( "Optional group end missing" ); - case NoMatchersInAlternative: - throw PatternBuildingException( "No matchers in alternative" ); - case NoMatchersInGroup: - throw PatternBuildingException( "No matchers in group" ); - case NoRestrictionBody: - throw PatternBuildingException( "No valid restrictions" ); - case InvalidPatternName: - throw PatternBuildingException( "Invalid pattern name" ); - case ClosingSglQuoteMissed: - throw PatternBuildingException( "Closing single quote missed" ); - case ClosingDblQuoteMissed: - throw PatternBuildingException( "Closing double quote missed" ); - case AttributeValueExpected: - throw PatternBuildingException( "Invalid or no attribute value" ); - default: - throw PatternBuildingException( (boost::format( "Error parsing template: %1%. Descriptor: %2%. Where: %3%" ) % err.what() % err.descriptor % err.where).str() ); - } - } catch ( const std::exception & e ) { - throw PatternBuildingException( (boost::format( "Error parsing template: %1% in template %2%" ) % e.what() % str).str() ); - } catch ( ... ) { - throw PatternBuildingException( "Unknown error during parsing template" ); - } - } +public: + ParserImpl(NamespaceRef space, const std::map& tbs): + Parser(space, tbs), buffer(nullptr), pos(0) {} + ~ParserImpl() {} + + PatternBuilder::BuildInfo build(const char * str) throw (PatternBuildingException) { + buffer = str; + pos = 0; + + while (!seekEndOfInput()) + readPattern(); + + PatternBuilder::BuildInfo bi; + bi.parseLength = pos; + bi.parseTail = std::string(buffer + pos); + return bi; + } }; + PatternBuilder::PatternBuilder( const NamespaceRef & ns, transforms::TransformBuilderRef defaultTransformBuilder ) : space( ns ), parser( new ParserImpl( space, transformBuilders ) ) { @@ -387,6 +826,7 @@ PatternBuilder::PatternBuilder( const NamespaceRef & ns ) : PatternBuilder(ns, n PatternBuilder::~PatternBuilder() { } + PatternBuilder::BuildInfo PatternBuilder::build( const std::string & str ) throw (PatternBuildingException) { return parser->build( str.c_str() ); } diff --git a/core/src/main/lspl/patterns/PatternBuilder.h b/core/src/main/lspl/patterns/PatternBuilder.h index b662cd0a..7d67e6d7 100644 --- a/core/src/main/lspl/patterns/PatternBuilder.h +++ b/core/src/main/lspl/patterns/PatternBuilder.h @@ -8,6 +8,7 @@ #include "../Namespace.h" #include "../transforms/TransformBuilder.h" +#include #include #include @@ -17,8 +18,11 @@ namespace lspl { namespace patterns { class LSPL_EXPORT PatternBuildingException : public base::Exception { public: - PatternBuildingException(const std::string & description) : - Exception( description ) { + uint errorPos; + std::string input; + + PatternBuildingException(const std::string & description, const std::string &input, uint errorPos) : + Exception(description), input(input), errorPos(errorPos) { } ~PatternBuildingException() throw() {} diff --git a/core/src/main/lspl/patterns/matchers/BaseComparator.cpp b/core/src/main/lspl/patterns/matchers/BaseComparator.cpp new file mode 100644 index 00000000..51004e44 --- /dev/null +++ b/core/src/main/lspl/patterns/matchers/BaseComparator.cpp @@ -0,0 +1,97 @@ +#include "BaseComparator.h" +#include "../../morphology/Morphology.h" + +using lspl::text::markup::Word; +using lspl::morphology::Morphology; + +namespace lspl { namespace patterns { namespace matchers { + +BaseComparator::~BaseComparator() { +} + +void AlternativeBaseComparator::addAlternativeBase(const std::string &base) { + alts.push_back(Morphology::instance().upcase(base)); +} + +bool AlternativeBaseComparator::equals(const BaseComparator &other) const { + if (typeid(other) != typeid(*this)) + return false; + return alts == dynamic_cast(other).alts; +} + +void AlternativeBaseComparator::dump(std::ostream & out, const std::string & tabs) const { + out << tabs << "BaseComparator { " << getBaseTypeName() << ' ' << (negative ? "!=" : "==") << ' '; + for (uint i = 0; i < alts.size(); ++i) { + if (i) out << " | "; + out << alts[i]; + } + out << " }"; +} + +bool AlternativeBaseComparator::match(const Word &word) const { + bool result = negative; + for (const std::string &alt : alts) + if ((getWordValue(word) == alt) ^ negative) { + result = !result; + break; + } + return result; +} + +RegexpBaseComparator::RegexpBaseComparator(const std::string &token, bool negative): + exp(Morphology::instance().upcase( token )), negative(negative) {} + +void RegexpBaseComparator::dump(std::ostream &out, const std::string &tabs) const { + out << tabs << "BaseComparator { " << getBaseTypeName() << " = RegExp(" + << exp.pattern() << ") }"; +} + +bool RegexpBaseComparator::match(const Word &word) const { + return exp.FullMatch(getWordValue(word)); +} + +bool RegexpBaseComparator::equals(const BaseComparator &other) const { + if (typeid(other) != typeid(*this)) + return false; + return exp.pattern() == dynamic_cast(other).exp.pattern(); +} + +// Конструкторы + +LemmaComparator::LemmaComparator(bool negative) { this->negative = negative; } +StemComparator::StemComparator(bool negative) { this->negative = negative; } + +LemmaRegexpComparator::LemmaRegexpComparator(const std::string &token, bool negative): + RegexpBaseComparator(token, negative) {} +StemRegexpComparator::StemRegexpComparator(const std::string &token, bool negative): + RegexpBaseComparator(token, negative) {} + +// Имена типов + +static std::string LEMMA = "lemma"; +static std::string STEM = "stem"; + +const std::string& LemmaComparator::getBaseTypeName() const { return LEMMA; } +const std::string& LemmaRegexpComparator::getBaseTypeName() const { return LEMMA; } +const std::string& StemComparator::getBaseTypeName() const { return STEM; } +const std::string& StemRegexpComparator::getBaseTypeName() const { return STEM; } + +// Получение значений из слова + +std::string LemmaComparator::getWordValue(const Word &word) const { + return word.getBase(); +} + +std::string LemmaRegexpComparator::getWordValue(const Word &word) const { + return word.getBase(); +} + +std::string StemComparator::getWordValue(const Word &word) const { + return word.getStem(); +} + +std::string StemRegexpComparator::getWordValue(const Word &word) const { + return word.getStem(); +} + +}}} diff --git a/core/src/main/lspl/patterns/matchers/BaseComparator.h b/core/src/main/lspl/patterns/matchers/BaseComparator.h new file mode 100644 index 00000000..dac791e2 --- /dev/null +++ b/core/src/main/lspl/patterns/matchers/BaseComparator.h @@ -0,0 +1,104 @@ +#ifndef _LSPL_MATCHERS_BASECOMPARATOR_H_ +#define _LSPL_MATCHERS_BASECOMPARATOR_H_ + +#include "Forward.h" +#include "../../text/markup/Word.h" + +#include + +namespace lspl { namespace patterns { namespace matchers { + +/** + * Base Comparator является частью WordMatcher и позволяет ограничить + * применение сопоставителя только для слов с определенной леммой или основой + */ +class LSPL_EXPORT BaseComparator { +public: + virtual ~BaseComparator(); + + /** + * Метод возвращает true, если данное слово согласовано с ограничением + * В базовом классе по умолчанию согласует все слова + */ + virtual bool match(const lspl::text::markup::Word &) const = 0; + virtual void dump(std::ostream & out, const std::string & tabs = "") const = 0; + virtual bool equals(const BaseComparator &) const = 0; +}; + + +class LSPL_EXPORT AlternativeBaseComparator: public BaseComparator { +public: + virtual std::string getWordValue(const lspl::text::markup::Word &) const = 0; + virtual const std::string& getBaseTypeName() const = 0; + virtual bool match(const lspl::text::markup::Word &) const; + virtual bool equals(const BaseComparator &) const; + virtual void addAlternativeBase(const std::string &); + virtual void dump(std::ostream & out, const std::string & tabs = "") const; + +public: + std::vector alts; + bool negative; // Если true, то != +}; + +class LSPL_EXPORT RegexpBaseComparator: public BaseComparator { +public: + RegexpBaseComparator(const std::string &token, bool negative = false); + + virtual std::string getWordValue(const lspl::text::markup::Word &) const = 0; + virtual const std::string& getBaseTypeName() const = 0; + virtual bool match(const lspl::text::markup::Word &) const; + virtual void dump(std::ostream & out, const std::string & tabs = "") const; + virtual bool equals(const BaseComparator &) const; + +public: + const pcrecpp::RE exp; + bool negative; +}; + +/** + * Ограничение на лемму + */ +class LSPL_EXPORT LemmaComparator: public AlternativeBaseComparator { +public: + LemmaComparator(bool negative = false); + + virtual std::string getWordValue(const lspl::text::markup::Word &) const; + virtual const std::string& getBaseTypeName() const; +}; + +/** + * Ограничение на основу + */ +class LSPL_EXPORT StemComparator: public AlternativeBaseComparator { +public: + StemComparator(bool negative = false); + + virtual std::string getWordValue(const lspl::text::markup::Word &) const; + virtual const std::string& getBaseTypeName() const; +}; + +/** + * Ограничение на лемму по регулярному выражению + */ +class LSPL_EXPORT LemmaRegexpComparator: public RegexpBaseComparator { +public: + LemmaRegexpComparator(const std::string &token, bool negative = false); + + virtual std::string getWordValue(const lspl::text::markup::Word &) const; + virtual const std::string& getBaseTypeName() const; +}; + +/** + * Ограничение на основу по регулярному выражению + */ +class LSPL_EXPORT StemRegexpComparator: public RegexpBaseComparator { +public: + StemRegexpComparator(const std::string &token, bool negative = false); + + virtual std::string getWordValue(const lspl::text::markup::Word &) const; + virtual const std::string& getBaseTypeName() const; +}; + +}}} + +#endif /*_LSPL_MATCHERS_BASECOMPARATOR_H_*/ diff --git a/core/src/main/lspl/patterns/matchers/Forward.h b/core/src/main/lspl/patterns/matchers/Forward.h index 153301d7..c07f5268 100644 --- a/core/src/main/lspl/patterns/matchers/Forward.h +++ b/core/src/main/lspl/patterns/matchers/Forward.h @@ -18,6 +18,8 @@ class TokenMatcher; class Variable; class Context; +class BaseComparator; + } } } // namespace lspl::patterns::matchers #endif /*_LSPL_PATTERNS_MATCHERS_FORWARD_H_*/ diff --git a/core/src/main/lspl/patterns/matchers/LoopMatcher.cpp b/core/src/main/lspl/patterns/matchers/LoopMatcher.cpp index 4f3dd28d..01b7e039 100644 --- a/core/src/main/lspl/patterns/matchers/LoopMatcher.cpp +++ b/core/src/main/lspl/patterns/matchers/LoopMatcher.cpp @@ -198,13 +198,29 @@ static void processLoop( const LoopMatchState & state, ChainList & results ) { processLoop( state, iterations, results ); } -LoopMatcher::LoopMatcher() : AnnotationChainMatcher( LOOP ), minLoops( 0 ), maxLoops( 0 ) { +LoopMatcher::LoopMatcher() : AnnotationChainMatcher( LOOP ), minLoops( 0 ), maxLoops( 0 ), is_permutation(false) { } -LoopMatcher::LoopMatcher( uint min, uint max ) : AnnotationChainMatcher( LOOP ), minLoops( min ), maxLoops( max ) { +LoopMatcher::LoopMatcher( uint min, uint max, bool is_permutation ) : AnnotationChainMatcher( LOOP ), minLoops( min ), maxLoops( max ), + is_permutation(is_permutation) +{ } LoopMatcher::~LoopMatcher() { + /** + * Если сопоставитель содержит перестановку, то очищать нужно только первую альтернативу, т.к. вторая и последующие + * состоят из тех же самых указателей. Да, это грязный хак, но без этого придётся существенно переделывать всю систему + * сопоставителей. + */ + if (is_permutation) { + for (uint i = 1; i < alternatives.size(); ++i) { + uint j = alternatives[i].getMatchers().size(); + do { + --j; + alternatives[i].getMatchers().release(alternatives[i].getMatchers().begin() + j).release(); + } while (j != 0); + } + } } void LoopMatcher::buildChains( const text::Node & node, const Context & context, ChainList & results ) const { @@ -273,4 +289,12 @@ void LoopMatcher::dump( std::ostream & out, const std::string & tabs ) const { out << "\n" << tabs << "] }"; } +bool LoopMatcher::containsVariable(const Variable &var) const { + for ( uint i = 0; i < alternatives.size(); ++i ) + for (uint j = 0; j < alternatives[i].getMatcherCount(); ++j) + if (alternatives[i].getMatcher(j).containsVariable(var)) + return true; + return var == variable; +} + } } } // namespace lspl::patterns::matchers diff --git a/core/src/main/lspl/patterns/matchers/LoopMatcher.h b/core/src/main/lspl/patterns/matchers/LoopMatcher.h index 8679201f..2cad9b62 100644 --- a/core/src/main/lspl/patterns/matchers/LoopMatcher.h +++ b/core/src/main/lspl/patterns/matchers/LoopMatcher.h @@ -35,7 +35,13 @@ class LSPL_EXPORT LoopMatcher : public AnnotationChainMatcher * @param min минимальное число повторений. 0 - без ограничений. * @param max максимальное число повторений. 0 - без ограничений. */ - LoopMatcher( uint min, uint max ); + LoopMatcher( uint min, uint max, bool is_permutation = false ); + + + /** + * Содержит ли переменную? + */ + virtual bool containsVariable(const Variable &) const; /** * Деструктор @@ -77,6 +83,11 @@ class LSPL_EXPORT LoopMatcher : public AnnotationChainMatcher */ uint maxLoops; + /** + * Является ли перестановкой? + */ + bool is_permutation; + /** * Список альтернатив последовательности. */ diff --git a/core/src/main/lspl/patterns/matchers/Matcher.cpp b/core/src/main/lspl/patterns/matchers/Matcher.cpp index 50f78e07..dab0983f 100644 --- a/core/src/main/lspl/patterns/matchers/Matcher.cpp +++ b/core/src/main/lspl/patterns/matchers/Matcher.cpp @@ -59,6 +59,10 @@ void Matcher::dumpRestrictions( std::ostream & out, const std::string & tabs ) c out << " >"; } +bool Matcher::containsVariable(const Variable &v) const { + return variable == v; +} + TransitionList AnnotationMatcher::buildTransitions( const text::Node & node, const Context & context ) const { TransitionList transitions; diff --git a/core/src/main/lspl/patterns/matchers/Matcher.h b/core/src/main/lspl/patterns/matchers/Matcher.h index 2d0c0469..093d9f1a 100644 --- a/core/src/main/lspl/patterns/matchers/Matcher.h +++ b/core/src/main/lspl/patterns/matchers/Matcher.h @@ -52,6 +52,11 @@ class LSPL_EXPORT Matcher */ virtual bool equals( const Matcher & m ) const; + /** + * Содержит ли сопоставитель переменную + */ + virtual bool containsVariable(const Variable &) const; + /** * Добавить ограничение к сопоставителю * @param r ограничение diff --git a/core/src/main/lspl/patterns/matchers/MatcherContainer.cpp b/core/src/main/lspl/patterns/matchers/MatcherContainer.cpp index 90e9f5b1..7cf26253 100644 --- a/core/src/main/lspl/patterns/matchers/MatcherContainer.cpp +++ b/core/src/main/lspl/patterns/matchers/MatcherContainer.cpp @@ -27,8 +27,8 @@ TokenMatcher & MatcherContainer::newTokenMatcher( const std::string & token ) { return *matcher; } -WordMatcher & MatcherContainer::newWordMatcher( const std::string & base, text::attributes::SpeechPart speechPart ) { - WordMatcher * matcher = new WordMatcher( base, speechPart ); +WordMatcher & MatcherContainer::newWordMatcher( text::attributes::SpeechPart speechPart ) { + WordMatcher * matcher = new WordMatcher( speechPart ); addMatcher( matcher ); diff --git a/core/src/main/lspl/patterns/matchers/MatcherContainer.h b/core/src/main/lspl/patterns/matchers/MatcherContainer.h index ac5940ae..1dbe6632 100644 --- a/core/src/main/lspl/patterns/matchers/MatcherContainer.h +++ b/core/src/main/lspl/patterns/matchers/MatcherContainer.h @@ -34,7 +34,7 @@ class LSPL_EXPORT MatcherContainer { /** * Создать новый сопоставитель слов */ - WordMatcher & newWordMatcher( const std::string & base, text::attributes::SpeechPart speechPart ); + WordMatcher & newWordMatcher( text::attributes::SpeechPart speechPart ); /** * Создать новый сопоставитель шаблона @@ -97,6 +97,10 @@ class LSPL_EXPORT MatcherContainer { return matchers; } + boost::ptr_vector & getMatchers() { + return matchers; + } + /** * Получить сопоставитель из контейнера по индексу */ diff --git a/core/src/main/lspl/patterns/matchers/WordMatcher.cpp b/core/src/main/lspl/patterns/matchers/WordMatcher.cpp index 2e0f7213..9fea8119 100644 --- a/core/src/main/lspl/patterns/matchers/WordMatcher.cpp +++ b/core/src/main/lspl/patterns/matchers/WordMatcher.cpp @@ -18,8 +18,8 @@ using lspl::morphology::Morphology; namespace lspl { namespace patterns { namespace matchers { -WordMatcher::WordMatcher( const std::string & base, SpeechPart speechPart ) : - AnnotationMatcher( WORD ), base( Morphology::instance().upcase( base ) ), speechPart( speechPart ) { +WordMatcher::WordMatcher(SpeechPart speechPart, BaseComparator *baseComparator) : + AnnotationMatcher( WORD ), baseComparator(baseComparator), speechPart( speechPart ) { } @@ -42,25 +42,41 @@ bool WordMatcher::matchTransition( const Transition & transition, const Context if ( speechPart != SpeechPart::WORD && speechPart != word.getSpeechPart() ) // Проверяем соответствие частей речи return false; - if ( base != "" && base != word.getBase() ) // Проверяем соответствие начальной формы - return false; + if (baseComparator != nullptr && !baseComparator->match(word)) + return false; return matchRestrictions( transition, context ); } void WordMatcher::dump( std::ostream & out, const std::string & tabs ) const { - out << "WordMatcher{ base = " << base << ", speechPart = " << speechPart.getAbbrevation() << ", variable = " << variable << ", restrictions = "; + out << "WordMatcher{ speechPart = " << speechPart.getAbbrevation() << ", variable = " << variable << ", baseComparator = "; + if (baseComparator == nullptr) + out << "NULL"; + else + baseComparator->dump( out ); + out << ", restrictions = "; dumpRestrictions( out ); out << " }"; } +void WordMatcher::setBaseComparator(BaseComparator *cmp) { + baseComparator.reset(cmp); +} + bool WordMatcher::equals( const Matcher & m ) const { if ( !Matcher::equals( m ) ) return false; // Разные сопоставители const WordMatcher & wm = static_cast( m ); if ( wm.speechPart != speechPart ) return false; // Различная часть речи - if ( wm.base != base ) return false; // Различная основа + + if (wm.baseComparator == nullptr && baseComparator == nullptr) + return true; + + if (wm.baseComparator == nullptr || baseComparator == nullptr) + return false; + + if ( !wm.baseComparator->equals(*baseComparator) ) return false; // Различная основа return true; } diff --git a/core/src/main/lspl/patterns/matchers/WordMatcher.h b/core/src/main/lspl/patterns/matchers/WordMatcher.h index 0be3beb6..fde0d24a 100644 --- a/core/src/main/lspl/patterns/matchers/WordMatcher.h +++ b/core/src/main/lspl/patterns/matchers/WordMatcher.h @@ -1,8 +1,11 @@ #ifndef _LSPL_MATCHERS_WORDMATCHER_H_ #define _LSPL_MATCHERS_WORDMATCHER_H_ +#include + #include "Forward.h" #include "Matcher.h" +#include "BaseComparator.h" #include "../../text/attributes/SpeechPart.h" @@ -14,14 +17,14 @@ namespace lspl { namespace patterns { namespace matchers { * Используется для поиска слов с заданной основой и частью речи. */ class LSPL_EXPORT WordMatcher : public AnnotationMatcher { + public: /** * Конструктор. - * @param base основа слова - * @param speechPart частьр речи + * @param speechPart часть речи */ - WordMatcher( const std::string & base, text::attributes::SpeechPart speechPart ); + WordMatcher( text::attributes::SpeechPart speechPart, BaseComparator* baseComparator = nullptr ); /** * Деструктор @@ -32,18 +35,18 @@ class LSPL_EXPORT WordMatcher : public AnnotationMatcher { virtual bool matchTransition( const text::Transition & transition, const Context & context ) const; virtual void dump( std::ostream & out, const std::string & tabs = "" ) const; virtual bool equals( const Matcher & m ) const; + virtual void setBaseComparator(BaseComparator *); public: - /** - * Основа слова + * Часть речи */ - std::string base; + text::attributes::SpeechPart speechPart; /** - * Часть речи + * Ограничение на основу/лемму */ - text::attributes::SpeechPart speechPart; + std::unique_ptr baseComparator; }; } } } // namespace lspl::patterns::matchers diff --git a/core/src/main/lspl/patterns/parsers/CharacterSets.cpp b/core/src/main/lspl/patterns/parsers/CharacterSets.cpp new file mode 100644 index 00000000..73120522 --- /dev/null +++ b/core/src/main/lspl/patterns/parsers/CharacterSets.cpp @@ -0,0 +1,56 @@ +#include "CharacterSets.h" +#include + +namespace lspl { namespace patterns { namespace parsers { + + uint getCharacterSets(unsigned char c) { + uint result = 0; + if (c >= (unsigned char)'0' && c <= (unsigned char)'9') + result |= LSPL_CHARACTERSETS_DIGIT; + if (c >= (unsigned char)'a' && c <= (unsigned char)'z') + result |= LSPL_CHARACTERSETS_LOWERCASE | LSPL_CHARACTERSETS_LATIN; + if (c >= (unsigned char)'A' && c <= (unsigned char)'Z') + result |= LSPL_CHARACTERSETS_UPPERCASE | LSPL_CHARACTERSETS_LATIN; + if ((c >= (unsigned char)'' && c <= (unsigned char)'') || c == (unsigned char)'') + result |= LSPL_CHARACTERSETS_UPPERCASE | LSPL_CHARACTERSETS_CYRILLIC; + if ((c >= (unsigned char)'' && c <= (unsigned char)'') || c == (unsigned char)'') + result |= LSPL_CHARACTERSETS_LOWERCASE | LSPL_CHARACTERSETS_CYRILLIC; + if (c == (unsigned char)' ' || c == (unsigned char)'\t' || c == (unsigned char)'\n' || c == (unsigned char)'\r') + result |= LSPL_CHARACTERSETS_SPACE; + if (ispunct(c & 0x7f)) + result |= LSPL_CHARACTERSETS_PUNCT; + return result; + } + + bool isLatin(unsigned char c) { + return getCharacterSets(c) & LSPL_CHARACTERSETS_LATIN; + } + + bool isCyrillic(unsigned char c) { + return getCharacterSets(c) & LSPL_CHARACTERSETS_CYRILLIC; + } + + bool isSpace(unsigned char c) { + return getCharacterSets(c) & LSPL_CHARACTERSETS_SPACE; + } + + bool isPunct(unsigned char c) { + return getCharacterSets(c) & LSPL_CHARACTERSETS_PUNCT; + } + + bool isDigit(unsigned char c) { + return getCharacterSets(c) & LSPL_CHARACTERSETS_DIGIT; + } + + bool isLowercase(unsigned char c) { + return getCharacterSets(c) & LSPL_CHARACTERSETS_LOWERCASE; + } + + bool isUppercase(unsigned char c) { + return getCharacterSets(c) & LSPL_CHARACTERSETS_UPPERCASE; + } + + bool isInvalidChar(unsigned char c) { + return !getCharacterSets(c); + } +}}} diff --git a/core/src/main/lspl/patterns/parsers/CharacterSets.h b/core/src/main/lspl/patterns/parsers/CharacterSets.h new file mode 100644 index 00000000..94e87056 --- /dev/null +++ b/core/src/main/lspl/patterns/parsers/CharacterSets.h @@ -0,0 +1,28 @@ +#ifndef _LSPL_UTILS_CHARACTERSETS_H_ +#define _LSPL_UTILS_CHARACTERSETS_H_ + +#include "../../base/BaseInternal.h" + + +#define LSPL_CHARACTERSETS_LATIN 1 +#define LSPL_CHARACTERSETS_CYRILLIC 2 +#define LSPL_CHARACTERSETS_SPACE 4 +#define LSPL_CHARACTERSETS_PUNCT 8 +#define LSPL_CHARACTERSETS_DIGIT 16 +#define LSPL_CHARACTERSETS_LOWERCASE 32 +#define LSPL_CHARACTERSETS_UPPERCASE 64 + +namespace lspl { namespace patterns { namespace parsers { + uint getCharacterSets(unsigned char); + bool isLatin(unsigned char); + bool isCyrillic(unsigned char); + bool isSpace(unsigned char); + bool isPunct(unsigned char); + bool isDigit(unsigned char); + bool isLowercase(unsigned char); + bool isUppercase(unsigned char); + bool isInvalidChar(unsigned char); +}}} + + +#endif /* _LSPL_UTILS_RUSCONSTS_H_ */ diff --git a/core/src/main/lspl/patterns/parsers/Functions.cpp b/core/src/main/lspl/patterns/parsers/Functions.cpp index 6aa9ab68..da8d79a3 100644 --- a/core/src/main/lspl/patterns/parsers/Functions.cpp +++ b/core/src/main/lspl/patterns/parsers/Functions.cpp @@ -57,7 +57,15 @@ static bool isRegexp( const std::string & str ) { } void AddWordMatcherImpl::operator()( boost::ptr_vector & matchers, const std::string & base, SpeechPart speechPart, uint index, boost::ptr_vector< Restriction > & restrictions ) const { - Matcher * matcher = new WordMatcher( base, speechPart ); + WordMatcher * matcher; + + if (base == "") + matcher = new WordMatcher( speechPart ); + else { + LemmaComparator *lcmp = new LemmaComparator(false); + lcmp->addAlternativeBase(base); + matcher = new WordMatcher( speechPart, lcmp ); + } matcher->variable = Variable( speechPart, index ); matcher->addRestrictions( restrictions ); @@ -125,7 +133,7 @@ void AddAlternativeDefinitionImpl::operator()( boost::ptr_vector & const auto transformBuilder = transformBuilders.find(transformType); if (transformBuilder == transformBuilders.end()) - throw PatternBuildingException("Invalid transform type: =" + transformType + ">"); + throw PatternBuildingException("Invalid transform type: =" + transformType + ">", "", 0); alternative->setTransform( std::auto_ptr( transformBuilder->second->build( *alternative, alternative->getTransformSource() ) ) ); @@ -176,7 +184,7 @@ Restriction * CreateDictionaryRestrictionImpl::operator()( const std::string & d dictionaries::DictionaryRef dict = ns.getDictionaryByName( dictionaryName ); if ( !dict ) // Не нашли словаря - выкидываем исключение - throw PatternBuildingException( "No dictionary found" ); + throw PatternBuildingException( "No dictionary found", "", 0 ); DictionaryRestriction * dr = new DictionaryRestriction( dict ); diff --git a/core/src/main/lspl/patterns/restrictions/AgreementRestriction.cpp b/core/src/main/lspl/patterns/restrictions/AgreementRestriction.cpp index a9c48b82..f3ec4040 100644 --- a/core/src/main/lspl/patterns/restrictions/AgreementRestriction.cpp +++ b/core/src/main/lspl/patterns/restrictions/AgreementRestriction.cpp @@ -19,7 +19,7 @@ using lspl::patterns::expressions::Expression; namespace lspl { namespace patterns { namespace restrictions { -AgreementRestriction::AgreementRestriction() { +AgreementRestriction::AgreementRestriction(bool weak): weak(weak) { } AgreementRestriction::~AgreementRestriction() { @@ -74,7 +74,7 @@ bool AgreementRestriction::checkAgreement( const std::vector & v bool AgreementRestriction::checkAgreement( AttributeValue val1, AttributeValue val2 ) const { if ( val1 == AttributeValue::UNDEFINED || val2 == AttributeValue::UNDEFINED ) - return true; + return weak || (val1 == val2); if ( val1.type != val2.type ) return false; @@ -91,12 +91,12 @@ bool AgreementRestriction::checkAgreement( AttributeValue val1, AttributeValue v for ( uint i = 0; i < AttributeKey::count(); ++ i ) { AttributeValue sv1 = cont1.getAttribute( AttributeKey(i) ); - if ( sv1 == AttributeValue::UNDEFINED ) + if ( weak && sv1 == AttributeValue::UNDEFINED ) continue; AttributeValue sv2 = cont2.getAttribute( AttributeKey(i) ); - if ( sv2 == AttributeValue::UNDEFINED ) + if ( weak && sv2 == AttributeValue::UNDEFINED ) continue; if ( sv1.type == AttributeType::STRING && sv2.type == AttributeType::STRING ) diff --git a/core/src/main/lspl/patterns/restrictions/AgreementRestriction.h b/core/src/main/lspl/patterns/restrictions/AgreementRestriction.h index 1d0cb85a..04c4fdbc 100644 --- a/core/src/main/lspl/patterns/restrictions/AgreementRestriction.h +++ b/core/src/main/lspl/patterns/restrictions/AgreementRestriction.h @@ -28,7 +28,7 @@ namespace lspl { namespace patterns { namespace restrictions { */ class LSPL_EXPORT AgreementRestriction : public Restriction { public: - AgreementRestriction(); + AgreementRestriction(bool weak = true); virtual ~AgreementRestriction(); void addArgument( expressions::Expression * arg ) { @@ -64,6 +64,10 @@ class LSPL_EXPORT AgreementRestriction : public Restriction { bool checkAgreement( text::attributes::AttributeValue val1, text::attributes::AttributeValue val2 ) const; bool checkAgreement( const std::vector & val1, const std::vector & val2 ) const; + /** + * Слабое согласование (одинарное =) + */ + bool weak; private: /** diff --git a/core/src/main/lspl/patterns/restrictions/NotRestriction.cpp b/core/src/main/lspl/patterns/restrictions/NotRestriction.cpp new file mode 100644 index 00000000..edbe7b91 --- /dev/null +++ b/core/src/main/lspl/patterns/restrictions/NotRestriction.cpp @@ -0,0 +1,44 @@ +#include "../../base/BaseInternal.h" + +#include "NotRestriction.h" + +namespace lspl { namespace patterns { namespace restrictions { + +NotRestriction::NotRestriction(Restriction * arg): arg(arg) { +} + +NotRestriction::~NotRestriction() { +} + +bool NotRestriction::matches( const text::Transition * currentAnnotation, const matchers::Variable currentVar, const matchers::Context & ctx ) const { + return !arg->matches(currentAnnotation, currentVar, ctx); +} + +void NotRestriction::dump( std::ostream & out, const std::string & tabs ) const { + out << tabs << "!("; + arg->dump(out); + out << ")" << std::endl; +} + +bool NotRestriction::equals( const Restriction & r ) const { + try { + const NotRestriction &nr = dynamic_cast(r); + return arg->equals(*nr.arg); + } catch (...) { + return false; + } +} + +bool NotRestriction::containsVariable( matchers::Variable var ) const { + return arg->containsVariable(var); +} + +bool NotRestriction::containsVariables() const { + return arg->containsVariables(); +} + +bool NotRestriction::containsCurrentAnnotation() const { + return arg->containsCurrentAnnotation(); +} + +} } } // namespace lspl::patterns::matchers diff --git a/core/src/main/lspl/patterns/restrictions/NotRestriction.h b/core/src/main/lspl/patterns/restrictions/NotRestriction.h new file mode 100644 index 00000000..31ef4dbe --- /dev/null +++ b/core/src/main/lspl/patterns/restrictions/NotRestriction.h @@ -0,0 +1,29 @@ +#ifndef _LSPL_PATTERNS_RESTRICTIONS_NOTRESTRICTION_H_ +#define _LSPL_PATTERNS_RESTRICTIONS_NOTRESTRICTION_H_ + +#include "Restriction.h" + +namespace lspl { namespace patterns { namespace restrictions { + +class LSPL_EXPORT NotRestriction : public Restriction { +public: + NotRestriction( Restriction * arg ); + virtual ~NotRestriction(); + + virtual bool matches( const text::Transition * currentAnnotation, const matchers::Variable currentVar, const matchers::Context & ctx ) const; + virtual void dump( std::ostream & out, const std::string & tabs = "" ) const; + virtual bool equals( const Restriction & r ) const; + + virtual bool containsVariable( matchers::Variable var ) const; + virtual bool containsVariables() const; + virtual bool containsCurrentAnnotation() const; + +private: + + Restriction* arg; + +}; + +} } } // namespace lspl::patterns::matchers + +#endif /* _LSPL_PATTERNS_RESTRICTIONS_NOTRESTRICTION_H_ */ diff --git a/core/src/main/lspl/patterns/restrictions/OrRestriction.cpp b/core/src/main/lspl/patterns/restrictions/OrRestriction.cpp new file mode 100644 index 00000000..da63d54e --- /dev/null +++ b/core/src/main/lspl/patterns/restrictions/OrRestriction.cpp @@ -0,0 +1,71 @@ +#include "../../base/BaseInternal.h" + +#include "OrRestriction.h" + +namespace lspl { namespace patterns { namespace restrictions { + +OrRestriction::OrRestriction() { +} + +OrRestriction::~OrRestriction() { +} + +bool OrRestriction::matches( const text::Transition * currentAnnotation, const matchers::Variable currentVar, const matchers::Context & ctx ) const { + for( boost::ptr_vector::const_iterator it = args.begin(); it != args.end(); ++ it ) + if ( it->matches( currentAnnotation, currentVar, ctx ) ) + return true; + + return false; +} + +void OrRestriction::dump( std::ostream & out, const std::string & tabs ) const { + if ( args.size() <= 1 ) + throw std::logic_error( "Too less arguments" ); + + args[0].dump( out ); + + for ( uint i = 1; i < args.size(); ++i ) { + out << " || "; + args[i].dump( out ); + } +} + +bool OrRestriction::equals( const Restriction & r ) const { + if ( const OrRestriction * ar = dynamic_cast( &r ) ) { + if ( ar->args.size() != args.size() ) return false; + + for ( uint i = 0, l = args.size(); i < l; ++ i ) + if ( !ar->args[i].equals( args[i] ) ) + return false; + + return true; + } else { + return false; + } +} + +bool OrRestriction::containsVariable( matchers::Variable var ) const { + for( boost::ptr_vector::const_iterator it = args.begin(); it != args.end(); ++ it ) + if ( it->containsVariable( var ) ) + return true; + + return false; +} + +bool OrRestriction::containsVariables() const { + for( boost::ptr_vector::const_iterator it = args.begin(); it != args.end(); ++ it ) + if ( it->containsVariables() ) + return true; + + return false; +} + +bool OrRestriction::containsCurrentAnnotation() const { + for( boost::ptr_vector::const_iterator it = args.begin(); it != args.end(); ++ it ) + if ( it->containsCurrentAnnotation() ) + return true; + + return false; +} + +} } } // namespace lspl::patterns::matchers diff --git a/core/src/main/lspl/patterns/restrictions/OrRestriction.h b/core/src/main/lspl/patterns/restrictions/OrRestriction.h new file mode 100644 index 00000000..b0304177 --- /dev/null +++ b/core/src/main/lspl/patterns/restrictions/OrRestriction.h @@ -0,0 +1,41 @@ +#ifndef _LSPL_PATTERNS_RESTRICTIONS_ORRESTRICTION_H_ +#define _LSPL_PATTERNS_RESTRICTIONS_ORRESTRICTION_H_ + +#include "Restriction.h" + +#include + +namespace lspl { namespace patterns { namespace restrictions { + +class LSPL_EXPORT OrRestriction : public Restriction { +public: + OrRestriction(); + virtual ~OrRestriction(); + + void addArgument( Restriction * arg ) { + args.push_back( arg ); + } + + template + void addArguments( PtrContainer & r ) { + if ( r.begin() != r.end() ) + args.transfer( args.end(), r.begin(), r.end(), r ); + } + + virtual bool matches( const text::Transition * currentAnnotation, const matchers::Variable currentVar, const matchers::Context & ctx ) const; + virtual void dump( std::ostream & out, const std::string & tabs = "" ) const; + virtual bool equals( const Restriction & r ) const; + + virtual bool containsVariable( matchers::Variable var ) const; + virtual bool containsVariables() const; + virtual bool containsCurrentAnnotation() const; + +private: + + boost::ptr_vector args; + +}; + +} } } // namespace lspl::patterns::matchers + +#endif /* _LSPL_PATTERNS_RESTRICTIONS_ORRESTRICTION_H_ */ diff --git a/core/src/main/lspl/text/markup/Word.h b/core/src/main/lspl/text/markup/Word.h index 17c618e5..9d196298 100644 --- a/core/src/main/lspl/text/markup/Word.h +++ b/core/src/main/lspl/text/markup/Word.h @@ -39,6 +39,7 @@ class LSPL_EXPORT Word : public Transition { const std::string & getToken() const { return token->getToken(); } const std::string & getBase() const { return base; } + const std::string & getStem() const { return stem; } attributes::SpeechPart getSpeechPart() const { return speechPart; } diff --git a/core/src/main/lspl/text/readers/JsonTextReader.cpp b/core/src/main/lspl/text/readers/JsonTextReader.cpp index 82a0fc54..7aeaf202 100644 --- a/core/src/main/lspl/text/readers/JsonTextReader.cpp +++ b/core/src/main/lspl/text/readers/JsonTextReader.cpp @@ -1,6 +1,8 @@ #include "../../base/BaseInternal.h" #include "JsonTextReader.h" +#include + #include "../../patterns/Pattern.h" #include "../TextConfig.h" diff --git a/core/src/main/lspl/transforms/PatternTransformBuilder.cpp b/core/src/main/lspl/transforms/PatternTransformBuilder.cpp index 180cbb4a..3e02d114 100644 --- a/core/src/main/lspl/transforms/PatternTransformBuilder.cpp +++ b/core/src/main/lspl/transforms/PatternTransformBuilder.cpp @@ -29,7 +29,7 @@ PatternTransform * PatternTransformBuilder::build( lspl::patterns::PatternTransformParser pars(space); boost::ptr_vector* matchers=NULL; if(!boost::spirit::classic::parse( source.c_str(), pars[ var(matchers) = arg1 ], space_p ).full) { - throw lspl::patterns::PatternBuildingException( source.c_str() ); + throw lspl::patterns::PatternBuildingException( "Pattern transform builder failed", source.c_str(), 0 ); } return new PatternTransform( space, matchers ); diff --git a/core/src/main/lspl/transforms/TextTransformBuilder.cpp b/core/src/main/lspl/transforms/TextTransformBuilder.cpp index 3c7a8b76..e3a1e096 100644 --- a/core/src/main/lspl/transforms/TextTransformBuilder.cpp +++ b/core/src/main/lspl/transforms/TextTransformBuilder.cpp @@ -32,7 +32,7 @@ TextTransform * TextTransformBuilder::build( parse_info pi = boost::spirit::classic::parse( source.c_str(), pars[ var(matchers) = arg1 ], space_p ); if(!pi.full) { - throw lspl::patterns::PatternBuildingException( source.c_str() ); + throw lspl::patterns::PatternBuildingException( "Transform builder failed", source, pi.length); } return new TextTransform( matchers, alt.getBindings() ); diff --git a/core/src/test/LsplBenchmarks.cpp b/core/src/test/LsplBenchmarks.cpp index 343e538a..c745a7ea 100644 --- a/core/src/test/LsplBenchmarks.cpp +++ b/core/src/test/LsplBenchmarks.cpp @@ -37,16 +37,16 @@ void definePatterns( NamespaceRef ns ) { std::cout.flush(); boost::timer tm; - builder->build( "Pact = N V" ); - builder->build( "Act = N V " ); - builder->build( "AAA = A (A) | Ap (Ap)" ); - builder->build( "ABB = {A} N " ); - builder->build( "ACC = {AAA} N " ); - builder->build( "ADD = \"\" Act" ); - builder->build( "AEE = N \"\" N" ); - builder->build( "ANom = N V" ); - builder->build( "AGen = N V" ); + builder->build( "Pact = N V" ); // 5582 [v] + builder->build( "Act = N V <>" ); // 4505 [v] + builder->build( "AAA (A, Ap) = A | Ap"); // 27333 [v] + builder->build( "ABB = {A} N <>" ); // 48508 [v] + builder->build( "ACC = {AAA} N <>" ); // 49146 [v] + builder->build( "ADD = \"\" Act" ); // 55 [v] + builder->build( "AEE = N \"\" N" ); // 446 [v] + builder->build( "ANom = N V" ); // 4199 [v] + builder->build( "AGen = N V" ); // 1373 [v] std::cout << "Done in " << tm.elapsed() << " seconds." << std::endl; } @@ -58,6 +58,9 @@ void findPatterns() { for ( uint i = 0; i < ns->getPatternCount(); ++ i ) { patterns::PatternRef pt = ns->getPatternByIndex( i ); + std::cout << "Dumping: "; + pt->dump(std::cout); + std::cout << std::endl << std::endl; std::cout << "Matching " << pt->getSource() << "... "; std::cout.flush(); @@ -83,7 +86,14 @@ int main() { std::cout << "Testing matching performance..." << std::endl; loadMorphology(); - findPatterns(); + + try { + findPatterns(); + } catch (patterns::PatternBuildingException &e) { + std::cerr << "Pattern building exception" << std::endl; + std::cerr << e.what() << std::endl; + std::cerr << e.input << std::endl; + } std::cout << "Exiting..." << std::endl; diff --git a/core/src/test/tests/PatternsTest.cpp b/core/src/test/tests/PatternsTest.cpp index f9a7c2ca..4de320e6 100644 --- a/core/src/test/tests/PatternsTest.cpp +++ b/core/src/test/tests/PatternsTest.cpp @@ -49,8 +49,11 @@ static void testSimplePatterns() { assertBuilds( "Act = N<> V" ); assertBuilds( "Act = N<-> V" ); assertBuilds( "Act = N V" ); - assertBuilds( "N2<, n=plur>" ); - assertBuilds( "Act = Noun Verb Adjective" ); + assertBuilds( "Act = N2<, n=plur>" ); + assertBuilds( "Act = V" ); + assertBuilds( "Act = A N" ); + assertBuilds( "Act = N" ); + assertFails( "Act = N " ); assertFails( "Act1 = N1 N1" ); // Wrong rule naming: Act1 assertFails( "Act = N1 N1 <3270 0" ); assertFails( "Act = N1 N1 <3=7>" ); @@ -59,28 +62,23 @@ static void testSimplePatterns() { static void testPatternNames() { // Names - assertBuilds( " = N<-> V" ); - assertBuilds( "- = N<-> V" ); + assertFails( " = N<-> V" ); + assertFails( "- = N<-> V" ); } static void testRestrictions() { // Restrictions - assertBuilds( "Act = N V " ); - assertBuilds( "Act = A N V " ); -} - -static void testUnnamedPatterns() { - // Unnamed - assertBuilds( "N V" ); - assertBuilds( "N<> V" ); + assertBuilds( "Act = N V <>" ); + assertBuilds( "Act = A N V <>" ); + assertBuilds( "Act = A N V <>" ); } static void testTokens() { // Tokens - assertBuilds( "\"\"" ); - assertBuilds( "''" ); - assertBuilds( "\".*\"" ); - assertBuilds( "'+'" ); + assertBuilds( "Act = \"\"" ); + assertFails( "Act = ''" ); + assertBuilds( "Act = \".*\"" ); + assertBuilds( "Act = \"+\"" ); assertFails( "'" ); assertFails( "\"" ); @@ -92,11 +90,11 @@ static void testLoops() { assertBuilds( "AAA = { N V }" ); assertBuilds( "AAA = { N V }<1>" ); assertBuilds( "AAA = { N V }<1,5>" ); - assertBuilds( "AAA = { N V }<1>" ); - assertBuilds( "AAA = { A N V }<1>" ); - assertBuilds( "AAA = A N { V }<1> " ); - assertBuilds( "AAA = A N { V } " ); - assertBuilds( "AAA = A N { V } " ); + assertBuilds( "AAA = { N V <> }<1>" ); + assertBuilds( "AAA = { A N V <> }<1>" ); + assertBuilds( "AAA = A N { V }<1> <>" ); + assertBuilds( "AAA = A N { V } <>" ); + assertBuilds( "AAA = A N { V } <>" ); } static void testLoopAlternatives() { @@ -105,28 +103,21 @@ static void testLoopAlternatives() { assertBuilds( "AAA = { N | V }" ); assertBuilds( "AAA = { N | V }<1>" ); assertBuilds( "AAA = { N | V }<1,5>" ); - assertBuilds( "AAA = { '' | '' }" ); + assertBuilds( "AAA = { \"\" | \"\" }" ); } static void testParameters() { // Parameters - assertBuilds( "AAA = N V ( N )" ); - assertBuilds( "AAA = N V ( N.c AS d )" ); - assertBuilds( "AAA = N V ( N.c, V.t )" ); - assertBuilds( "AAA = N V ( N.c AS d, V.t AS h )" ); -} - -static void testCompoundAttributes() { - // Compound attributes - assertBuilds( "AAA = N V ( N.k.l, V.t )" ); - assertFails( "AAA = N V ( .k.l, V.t )" ); + assertBuilds( "AAA ( N ) = N V" ); + assertBuilds( "AAA ( N.c ) = N V" ); + assertBuilds( "AAA ( N.c, V.t ) = N V" ); } -static void testConcat() { +/*static void testConcat() { // Concat assertBuilds( "AAA = N V ( N V AS f )" ); assertBuilds( "AAA = N V ( N V )" ); -} +}*/ static void testMultipleEquals() { // Multiple equal @@ -134,7 +125,7 @@ static void testMultipleEquals() { assertBuilds("AAA = N1 N2 N3 "); } -static void testDictionaries() { +/*static void testDictionaries() { NamespaceRef ns = new Namespace(); ns->addDictionary( new dictionaries::MemoryDictionary("DIC") ); @@ -162,7 +153,7 @@ static void testLiteralsInDictionaries() { assertBuildsNS( "AAA = N V ", ns ); assertBuildsNS( "AAA = N V ", ns ); assertBuildsNS( "AAA = N V ", ns ); -} +}*/ static void testParentNamespace() { NamespaceRef ns1 = new Namespace(); @@ -188,18 +179,18 @@ cute::suite patternBuildingSuite() { s += CUTE(testSimplePatterns); s += CUTE(testPatternNames); s += CUTE(testRestrictions); - s += CUTE(testUnnamedPatterns); + //s += CUTE(testUnnamedPatterns); s += CUTE(testTokens); s += CUTE(testLoops); s += CUTE(testLoopAlternatives); s += CUTE(testParameters); - s += CUTE(testCompoundAttributes); - s += CUTE(testConcat); + //s += CUTE(testCompoundAttributes); + //s += CUTE(testConcat); s += CUTE(testMultipleEquals); - s += CUTE(testDictionaries); - s += CUTE(testExpressionsInDictionaries); - s += CUTE(testLiteralsInDictionaries); - s += CUTE(testParentNamespace); + //s += CUTE(testDictionaries); + //s += CUTE(testExpressionsInDictionaries); + //s += CUTE(testLiteralsInDictionaries); + //s += CUTE(testParentNamespace); s += CUTE(testMemoryLeaks); return s; @@ -208,32 +199,32 @@ cute::suite patternBuildingSuite() { static void testEquality() { Pattern p1("TP"); Alternative & a = p1.newAlternative( "A N" ); - a.newWordMatcher( "",SpeechPart::ADJECTIVE ); - a.newWordMatcher( "",SpeechPart::NOUN ); + a.newWordMatcher( SpeechPart::ADJECTIVE ); + a.newWordMatcher( SpeechPart::NOUN ); { Alternative & a2 = p1.newAlternative( "A N" ); - a2.newWordMatcher( "",SpeechPart::ADJECTIVE ); - a2.newWordMatcher( "",SpeechPart::NOUN ); + a2.newWordMatcher( SpeechPart::ADJECTIVE ); + a2.newWordMatcher( SpeechPart::NOUN ); assertTrueM( a.equals( a2 ), "a1 should be equal a2" ); Alternative & a3 = p1.newAlternative( "A A" ); - a3.newWordMatcher( "",SpeechPart::ADJECTIVE ); - a3.newWordMatcher( "",SpeechPart::ADJECTIVE ); + a3.newWordMatcher( SpeechPart::ADJECTIVE ); + a3.newWordMatcher( SpeechPart::ADJECTIVE ); assertFalseM( a.equals( a3 ), "a1 shouldn't be equal a3" ); Alternative & a4 = p1.newAlternative( "A N A" ); - a4.newWordMatcher( "",SpeechPart::ADJECTIVE ); - a4.newWordMatcher( "",SpeechPart::NOUN ); - a4.newWordMatcher( "",SpeechPart::ADJECTIVE ); + a4.newWordMatcher( SpeechPart::ADJECTIVE ); + a4.newWordMatcher( SpeechPart::NOUN ); + a4.newWordMatcher( SpeechPart::ADJECTIVE ); assertFalseM( a.equals( a4 ), "a1 shouldn't be equal a4" ); Alternative & a5 = p1.newAlternative( "A { N }" ); - a4.newWordMatcher( "",SpeechPart::ADJECTIVE ); - a4.newLoopMatcher(0,0).newAlternative().newWordMatcher( "",SpeechPart::NOUN ); + a4.newWordMatcher( SpeechPart::ADJECTIVE ); + a4.newLoopMatcher(0,0).newAlternative().newWordMatcher( SpeechPart::NOUN ); assertFalseM( a.equals( a5 ), "a1 shouldn't be equal a5" ); } @@ -245,8 +236,8 @@ static void testEquality() { r6->addArgument( new ConstantExpression( AttributeValue::ACCUSATIVE ) ); Alternative & a6 = p1.newAlternative( "A N" ); - a6.newWordMatcher( "",SpeechPart::ADJECTIVE ).addRestriction( r6 ); - a6.newWordMatcher( "",SpeechPart::NOUN ); + a6.newWordMatcher( SpeechPart::ADJECTIVE ).addRestriction( r6 ); + a6.newWordMatcher( SpeechPart::NOUN ); assertFalseM( a.equals( a6 ), "a1 shouldn't be equal a6" ); @@ -255,8 +246,8 @@ static void testEquality() { r61->addArgument( new ConstantExpression( AttributeValue::ACCUSATIVE ) ); Alternative & a61 = p1.newAlternative( "A N" ); - a61.newWordMatcher( "",SpeechPart::ADJECTIVE ).addRestriction( r61 ); - a61.newWordMatcher( "",SpeechPart::NOUN ); + a61.newWordMatcher( SpeechPart::ADJECTIVE ).addRestriction( r61 ); + a61.newWordMatcher( SpeechPart::NOUN ); assertTrueM( a6.equals( a61 ), "a6 should be equal a61" ); @@ -265,8 +256,8 @@ static void testEquality() { r7->addArgument( new ConstantExpression( AttributeValue::NOMINATIVE ) ); Alternative & a7 = p1.newAlternative( "A N" ); - a7.newWordMatcher( "",SpeechPart::ADJECTIVE ).addRestriction( r7 ); - a7.newWordMatcher( "",SpeechPart::NOUN ); + a7.newWordMatcher( SpeechPart::ADJECTIVE ).addRestriction( r7 ); + a7.newWordMatcher( SpeechPart::NOUN ); assertFalseM( a6.equals( a7 ), "a6 shouldn't be equal a7" ); } @@ -274,29 +265,29 @@ static void testEquality() { // With bindings { Alternative & ab = p1.newAlternative( "A N" ); - ab.newWordMatcher( "",SpeechPart::ADJECTIVE ); - ab.newWordMatcher( "",SpeechPart::NOUN ); + ab.newWordMatcher( SpeechPart::ADJECTIVE ); + ab.newWordMatcher( SpeechPart::NOUN ); ab.addBinding( AttributeKey::CASE, new AttributeExpression( new VariableExpression( Variable( SpeechPart::ADJECTIVE, 0 ) ), AttributeKey::CASE ) ); assertFalseM( a.equals( ab ), "a shouldn't be equal ab" ); Alternative & ab1 = p1.newAlternative( "A N" ); - ab1.newWordMatcher( "",SpeechPart::ADJECTIVE ); - ab1.newWordMatcher( "",SpeechPart::NOUN ); + ab1.newWordMatcher( SpeechPart::ADJECTIVE ); + ab1.newWordMatcher( SpeechPart::NOUN ); ab1.addBinding( AttributeKey::CASE, new AttributeExpression( new VariableExpression( Variable( SpeechPart::ADJECTIVE, 0 ) ), AttributeKey::CASE ) ); assertTrueM( ab.equals( ab1 ), "ab should be equal ab1" ); Alternative & ab2 = p1.newAlternative( "A N" ); - ab2.newWordMatcher( "",SpeechPart::ADJECTIVE ); - ab2.newWordMatcher( "",SpeechPart::NOUN ); + ab2.newWordMatcher( SpeechPart::ADJECTIVE ); + ab2.newWordMatcher( SpeechPart::NOUN ); ab2.addBinding( AttributeKey::CASE, new AttributeExpression( new VariableExpression( Variable( SpeechPart::ADJECTIVE, 2 ) ), AttributeKey::CASE ) ); assertFalseM( ab.equals( ab2 ), "ab shouldn't be equal ab2" ); Alternative & ab3 = p1.newAlternative( "A N" ); - ab3.newWordMatcher( "",SpeechPart::ADJECTIVE ); - ab3.newWordMatcher( "",SpeechPart::NOUN ); + ab3.newWordMatcher( SpeechPart::ADJECTIVE ); + ab3.newWordMatcher( SpeechPart::NOUN ); ab3.addBinding( AttributeKey::DOC, new AttributeExpression( new VariableExpression( Variable( SpeechPart::ADJECTIVE, 0 ) ), AttributeKey::CASE ) ); assertFalseM( ab.equals( ab3 ), "ab shouldn't be equal ab3" ); @@ -306,15 +297,15 @@ static void testEquality() { static void testRemoveDuplicates() { Pattern p2("TP"); Alternative & a1 = p2.newAlternative( "A N" ); - a1.newWordMatcher( "",SpeechPart::ADJECTIVE ); - a1.newWordMatcher( "",SpeechPart::NOUN ); + a1.newWordMatcher( SpeechPart::ADJECTIVE ); + a1.newWordMatcher( SpeechPart::NOUN ); Alternative & a2 = p2.newAlternative( "A N" ); - a2.newWordMatcher( "",SpeechPart::ADJECTIVE ); - a2.newWordMatcher( "",SpeechPart::NOUN ); + a2.newWordMatcher( SpeechPart::ADJECTIVE ); + a2.newWordMatcher( SpeechPart::NOUN ); Alternative & a3 = p2.newAlternative( "A" ); - a3.newWordMatcher( "",SpeechPart::ADJECTIVE ); + a3.newWordMatcher( SpeechPart::ADJECTIVE ); p2.removeDuplicateAlternatives(); From ccc8b05822ab07955446c5f228f6eadac8834695 Mon Sep 17 00:00:00 2001 From: Ivan Koryabkin Date: Mon, 7 May 2018 15:44:46 +0300 Subject: [PATCH 06/24] Fixup commit no. 1 --- .../src/main/lspl/patterns/PatternBuilder.cpp | 15 ++- .../lspl/patterns/matchers/BaseComparator.cpp | 7 +- .../lspl/patterns/matchers/WordMatcher.cpp | 2 +- core/src/test/tests/MatchingTest.cpp | 123 +++++++++--------- core/src/test/tests/PatternsTest.cpp | 8 +- 5 files changed, 83 insertions(+), 72 deletions(-) diff --git a/core/src/main/lspl/patterns/PatternBuilder.cpp b/core/src/main/lspl/patterns/PatternBuilder.cpp index f4039976..b517d047 100644 --- a/core/src/main/lspl/patterns/PatternBuilder.cpp +++ b/core/src/main/lspl/patterns/PatternBuilder.cpp @@ -195,8 +195,10 @@ class ParserImpl: public PatternBuilder::Parser { for (uint i = 0; i < contents.length(); ++i) { if (!isSpace(contents[i])) word += contents[i]; - else if (word.length() != 0) + else if (word.length() != 0) { words.push_back(word); + word.clear(); + } } if (word.length() != 0) words.push_back(word); @@ -231,13 +233,14 @@ class ParserImpl: public PatternBuilder::Parser { if (words.size() == 0) throw produceException("Empty string cannot be matched"); if (words.size() == 1) - return new TokenMatcher(words[0]); + return new TokenMatcher(words.front()); // Слов больше, чем одно. Создаём отдельные сопоставители для каждого слова LoopMatcher *wordMatcher = new LoopMatcher(1, 1); MatcherContainer &container = wordMatcher->newAlternative(); - for (std::string &word : words) + for (std::string &word : words) { container.addMatcher(new TokenMatcher(word)); + } return wordMatcher; } @@ -639,7 +642,7 @@ class ParserImpl: public PatternBuilder::Parser { throw produceException("Weak (=) and strong (==) agreements mixed"); exps.push_back(readAttributeExpression()); } - AgreementRestriction *restriction = new AgreementRestriction(); + AgreementRestriction *restriction = new AgreementRestriction(agreementType == "="); for (Expression *e : exps) restriction->addArgument(e); @@ -706,8 +709,10 @@ class ParserImpl: public PatternBuilder::Parser { std::vector > readAlternatives() { std::vector > alts; alts.push_back(readPermutation()); - while (strFollows("|")) + while (strFollows("|")) { + readStrFollows("|"); alts.push_back(readPermutation()); + } return alts; } diff --git a/core/src/main/lspl/patterns/matchers/BaseComparator.cpp b/core/src/main/lspl/patterns/matchers/BaseComparator.cpp index 51004e44..c89fb072 100644 --- a/core/src/main/lspl/patterns/matchers/BaseComparator.cpp +++ b/core/src/main/lspl/patterns/matchers/BaseComparator.cpp @@ -30,11 +30,12 @@ void AlternativeBaseComparator::dump(std::ostream & out, const std::string & tab bool AlternativeBaseComparator::match(const Word &word) const { bool result = negative; - for (const std::string &alt : alts) - if ((getWordValue(word) == alt) ^ negative) { + for (const std::string &alt : alts) { + if (getWordValue(word) == alt) { result = !result; break; } + } return result; } @@ -47,7 +48,7 @@ void RegexpBaseComparator::dump(std::ostream &out, const std::string &tabs) cons } bool RegexpBaseComparator::match(const Word &word) const { - return exp.FullMatch(getWordValue(word)); + return exp.FullMatch(getWordValue(word)) != negative; } bool RegexpBaseComparator::equals(const BaseComparator &other) const { diff --git a/core/src/main/lspl/patterns/matchers/WordMatcher.cpp b/core/src/main/lspl/patterns/matchers/WordMatcher.cpp index 9fea8119..d5919375 100644 --- a/core/src/main/lspl/patterns/matchers/WordMatcher.cpp +++ b/core/src/main/lspl/patterns/matchers/WordMatcher.cpp @@ -43,7 +43,7 @@ bool WordMatcher::matchTransition( const Transition & transition, const Context return false; if (baseComparator != nullptr && !baseComparator->match(word)) - return false; + return false; return matchRestrictions( transition, context ); } diff --git a/core/src/test/tests/MatchingTest.cpp b/core/src/test/tests/MatchingTest.cpp index 48b21254..eaf7c354 100644 --- a/core/src/test/tests/MatchingTest.cpp +++ b/core/src/test/tests/MatchingTest.cpp @@ -33,117 +33,116 @@ namespace lspl { namespace tests { static void testRegexpTokens() { // Regexp tokens - assertMatches( "", 0, 1, "''" ); - assertMatches( "", 0, 1, "'.*'" ); - assertMatches( "", 0, 1, "'.*'" ); - assertMatches( "", 0, 1, "'.*'" ); - assertNoMatches( "", "'.*'" ); + assertMatches( "", 0, 1, "Act = \"\"" ); + assertMatches( "", 0, 1, "Act = \".*\"" ); + assertMatches( "", 0, 1, "Act = \".*\"" ); + assertMatches( "", 0, 1, "Act = \".*\"" ); + assertNoMatches( "", "Act = \".*\"" ); } static void testSimplePatterns() { // Simple assertMatches( " ", 0, 2, "Act = N V" ); assertMatches( " ", 0, 2, "Act = N<> V" ); - assertMatches( " ", 0, 2, "Act = N V" ); - assertMatches( " ", 0, 2, "Act = N { V }<1,1>" ); - assertMatches( " ", 0, 2, "Act = N { { V }<1,1> }<1,1>" ); - assertMatches( " ", 0, 2, "Act = N V" ); + assertMatches( " ", 0, 2, "Act = N V<>" ); + assertMatches( " ", 0, 2, "Act = N { V<> }<1,1>" ); + assertMatches( " ", 0, 2, "Act = N { { V<> }<1,1> }<1,1>" ); + assertMatches( " ", 0, 2, "Act = N V<>" ); assertMatches( " ", 0, 2, "Act = N V" ); - assertMatches( " ", 0, 2, "Act = Noun Verb" ); assertMatches( " ", 0, 2, "Act = \"\" V" ); assertNoMatches( " ", "Act = N<> V" ); } static void testRestrictionsOnBase() { - // Pattern restrictions - assertMatches( " ", 0, 2, "N V " ); - assertNoMatches( " ", "N V " ); - - // Matcher restrictions - assertMatches( " ", 0, 2, "N V" ); - assertNoMatches( " ", "N V" ); + assertMatches( " ", 0, 2, "Act = N V " ); + assertMatches( " ", 0, 2, "Act = N V <>" ); + assertMatches( " ", 1, 2, "Act = V "); + assertMatches( " ", 1, 2, "Act = V <\".*.*\">"); + assertNoMatches( " ", "Act = V <\".*.*\">"); + assertNoMatches( " ", "Act = V " ); + assertNoMatches( " ", "Act = N V " ); + assertNoMatches( " ", "Act = N V<>" ); } static void testRestrictionsOnStem() { - // Pattern restrictions - assertMatches( " ", 0, 2, "N V " ); - assertNoMatches( " ", "N V " ); - - // Matcher restrictions - assertMatches( " ", 0, 2, "N V " ); - assertNoMatches( " ", "N V " ); + assertMatches( " ", 0, 2, "Act = N V " ); + assertMatches( " ", 0, 2, "Act = N V " ); + assertNoMatches( " ", "Act = N V " ); + assertMatches( " ", 1, 2, "Act = V " ); + assertNoMatches( " ", "Act = N V " ); } static void testTerm() { // Term - assertMatches( " ", 0, 3, "\"\" \"\" \"\"" ); - assertMatches( " ", 0, 3, "N1<> { \"\" \"\" | \"-\" }<1,1>" ); + assertMatches( " ", 0, 3, "Act = \"\" \"\" \"\"" ); + assertMatches( " ", 0, 3, "Act = \" \"" ); + assertMatches( " ", 0, 3, "Act = N1<> { \"\" \"\" | \"-\" }<1,1>" ); } static void testTokensInLoop() { assertMatches( " ", 0, 1, "Act = { \"\" }<1>" ); assertMatches( " ", 0, 2, "Act = { \"\" }<2>" ); - assertMatches( " ", 0, 3, "AAA = { '' }<3>" ); + assertMatches( " ", 0, 3, "AAA = { \"\" }<3>" ); } static void testLoopRestrictions() { // Loop restrictions: positive assertMatches( " ", 0, 2, "Act = {N} V" ); - assertMatches( " ", 0, 2, "Act = {N} V" ); - assertMatches( " ", 0, 2, "Act = {N} V" ); + assertMatches( " ", 0, 2, "Act = {N} V<>" ); + assertMatches( " ", 0, 2, "Act = {N} V<>" ); // Loop restriction: empty loop assertMatches( " ", 0, 1, "Act = [A] N" ); assertMatches( " ", 0, 1, "Act = {A} N" ); - assertMatches( " ", 0, 1, "Act = {A} N " ); + assertMatches( " ", 0, 1, "Act = {A} N <>" ); // Loop restriction: failing longest - assertMatches( " ", 1, 3, "Act = {A} N " ); + assertMatches( " ", 1, 3, "Act = {A} N <>" ); } static void testLoopAlternatives() { - assertMatches( " ", 0, 3, "AAA = { '' | '' }<3>" ); - assertMatches( " ", 0, 3, "AAA = { '' | '' }<3>" ); - assertMatches( " ", 0, 3, "AAA = { '' | '' }<3>" ); + assertMatches( " ", 0, 3, "AAA = { \"\" | \"\" }<3>" ); + assertMatches( " ", 0, 3, "AAA = { \"\" | \"\" }<3>" ); + assertMatches( " ", 0, 3, "AAA = { \"\" | \"\" }<3>" ); } static void testMultipleEquality() { // Multiple equality: positive - assertMatches( " ", 0, 3, "Act = A N V" ); - assertMatches( " ", 0, 3, "Act = A N V" ); + assertMatches( " ", 0, 3, "Act = A N V<>" ); + assertMatches( " ", 0, 3, "Act = A N V<>" ); // Multiple equality: negative - assertNoMatches( " ", "Act = A N V" ); - assertNoMatches( " ", "Act = A N V" ); - assertNoMatches( " ", "Act = A N V" ); - assertNoMatches( " ", "Act = A N V" ); + assertNoMatches( " ", "Act = A N V<>" ); + assertNoMatches( " ", "Act = A N V<>" ); + assertNoMatches( " ", "Act = A N V<>" ); + assertNoMatches( " ", "Act = A N V<>" ); } static void testBaseEquality() { // Base equality - assertMatches( ", , ?", 0, 3, "Act = W1 \",\" W2 " ); + assertMatches( ", , ?", 0, 3, "Act = W1 \",\" W2 <>" ); assertNoMatches( " ", "Act = N V" ); - assertNoMatches( " ", "Act = N V" ); - assertNoMatches( " ", "Act = W1 W2 " ); + assertNoMatches( " ", "Act = N V<>" ); + assertNoMatches( " ", "Act = W1 W2 <>" ); - assertMatches( " ", 0, 2, "Act = N V ( N V )" ); + assertMatches( " ", 0, 2, "Act = N V<> " ); } static void testReusing1() { NamespaceRef ns = new Namespace(); - assertMatchesNS( ns, " ", 0, 1, "AA = A (A) | Pa (Pa)" ); + assertMatchesNS( ns, " ", 0, 1, "AA (A, Pa) = A | Pa" ); assertMatchesNS( ns, " ", 0, 2, "TestA = AA N" ); - assertMatchesNS( ns, " ", 0, 2, "TestB = AA N " ); + assertMatchesNS( ns, " ", 0, 2, "TestB = AA N <>" ); } static void testReusing2() { NamespaceRef ns = new Namespace(); - assertNoMatchesNS( ns, " ", "AA = A (A) | Pa (Pa)" ); + assertNoMatchesNS( ns, " ", "AA(A, Pa) = A | Pa" ); assertMatchesNS( ns, " ", 0, 1, "TestA = {AA} N" ); - assertMatchesNS( ns, " ", 0, 1, "TestB = {AA} N " ); + assertMatchesNS( ns, " ", 0, 1, "TestB = {AA} N <>" ); } static void testCommonNamespace() { @@ -151,9 +150,16 @@ static void testCommonNamespace() { assertMatchesNS( ns, " ", 0, 1, "UN = N" ); assertMatchesNS( ns, " ", 0, 2, "AB = UN V" ); - assertMatchesNS( ns, " ", 0, 2, "AC = UN V " ); + assertMatchesNS( ns, " ", 0, 2, "AC = UN V <>" ); +} + +static void testStrictAgreement() { + assertMatches( " ", 0, 2, "Pattern = W1 W2 <>" ); + assertNoMatches( " ", "Pattern = W1 W2 <>" ); + assertMatches( " ", 0, 2, "Pattern = W1 W2 <>" ); } +/* static void testCompoundAttributes() { NamespaceRef ns = new Namespace(); patterns::PatternBuilderRef builder = new patterns::PatternBuilder( ns ); @@ -198,18 +204,18 @@ static void testDictionariesWithLiterals() { assertMatchesNS( ns, " ", 1, 3, "AA = W1 W2 " ); assertNoMatchesNS( ns, " ", "AB = W1 W2 " ); -} +}*/ static void testRestrictedMatches() { AgreementRestriction r; r.addArgument( new AttributeExpression( new VariableExpression( SpeechPart::VERB, 1 ), AttributeKey::BASE ) ); r.addArgument( new ConstantExpression( "" ) ); - assertMatches( " ", 0, 2, "N1 V1" ); - assertMatches( " ", 1, 3, "N1 V1" ); + assertMatches( " ", 0, 2, "Act = N1 V1" ); + assertMatches( " ", 1, 3, "Act = N1 V1" ); - assertRestrictedMatches( " ", 0, 2, "N1 V1", r ); - assertNoRestrictedMatches( " ", "N1 V1", r ); + assertRestrictedMatches( " ", 0, 2, "Act = N1 V1", r ); + assertNoRestrictedMatches( " ", "Act = N1 V1", r ); } cute::suite matchingSuite() { @@ -228,11 +234,12 @@ cute::suite matchingSuite() { s += CUTE(testReusing1); s += CUTE(testReusing2); s += CUTE(testCommonNamespace); - s += CUTE(testCompoundAttributes); - s += CUTE(testDictionaries); - s += CUTE(testDictionariesWithConcat); - s += CUTE(testDictionariesWithLiterals); + //s += CUTE(testCompoundAttributes); + //s += CUTE(testDictionaries); + //s += CUTE(testDictionariesWithConcat); + //s += CUTE(testDictionariesWithLiterals); s += CUTE(testRestrictedMatches); + s += CUTE(testStrictAgreement); return s; } diff --git a/core/src/test/tests/PatternsTest.cpp b/core/src/test/tests/PatternsTest.cpp index 4de320e6..3cf925ef 100644 --- a/core/src/test/tests/PatternsTest.cpp +++ b/core/src/test/tests/PatternsTest.cpp @@ -9,8 +9,6 @@ #include "../assertions/PatternsAssertions.h" -#include - #include #include #include @@ -121,8 +119,8 @@ static void testParameters() { static void testMultipleEquals() { // Multiple equal - assertBuilds("AAA = N1 N2 N3 "); - assertBuilds("AAA = N1 N2 N3 "); + assertBuilds("AAA = N1 N2 N3 <>"); + assertBuilds("AAA = N1 N2 N3 <>"); } /*static void testDictionaries() { @@ -190,7 +188,7 @@ cute::suite patternBuildingSuite() { //s += CUTE(testDictionaries); //s += CUTE(testExpressionsInDictionaries); //s += CUTE(testLiteralsInDictionaries); - //s += CUTE(testParentNamespace); + s += CUTE(testParentNamespace); s += CUTE(testMemoryLeaks); return s; From 9f903462095e707c41e4933ea47e08c8905def92 Mon Sep 17 00:00:00 2001 From: Ivan Koryabkin Date: Wed, 9 May 2018 00:33:38 +0300 Subject: [PATCH 07/24] Memory leak fixed (with unique_ptr enforced) --- .../src/main/lspl/patterns/PatternBuilder.cpp | 85 ++++++++++--------- .../lspl/patterns/matchers/MatcherContainer.h | 9 ++ 2 files changed, 55 insertions(+), 39 deletions(-) diff --git a/core/src/main/lspl/patterns/PatternBuilder.cpp b/core/src/main/lspl/patterns/PatternBuilder.cpp index b517d047..64cce1af 100644 --- a/core/src/main/lspl/patterns/PatternBuilder.cpp +++ b/core/src/main/lspl/patterns/PatternBuilder.cpp @@ -40,6 +40,8 @@ LSPL_REFCOUNT_CLASS( lspl::patterns::PatternBuilder ); namespace lspl { namespace patterns { +typedef std::unique_ptr MatcherPtr; + class ParserImpl: public PatternBuilder::Parser { private: const char *buffer; @@ -222,10 +224,10 @@ class ParserImpl: public PatternBuilder::Parser { /** * элемент_строка := "регулярное выражение" */ - Matcher* readStringMatcher() { + MatcherPtr readStringMatcher() { std::string contents = readStringConstant(); if (isRegexp(contents)) - return new RegexpMatcher(contents); + return MatcherPtr(new RegexpMatcher(contents)); // Разделяем на отдельные слова, если строка не является // регулярным выражением @@ -233,7 +235,7 @@ class ParserImpl: public PatternBuilder::Parser { if (words.size() == 0) throw produceException("Empty string cannot be matched"); if (words.size() == 1) - return new TokenMatcher(words.front()); + return MatcherPtr(new TokenMatcher(words.front())); // Слов больше, чем одно. Создаём отдельные сопоставители для каждого слова LoopMatcher *wordMatcher = new LoopMatcher(1, 1); @@ -241,7 +243,7 @@ class ParserImpl: public PatternBuilder::Parser { for (std::string &word : words) { container.addMatcher(new TokenMatcher(word)); } - return wordMatcher; + return MatcherPtr(wordMatcher); } /** @@ -252,7 +254,7 @@ class ParserImpl: public PatternBuilder::Parser { * * простой элемент := элемент_строка | элемент_слово | экземпляр_шаблона */ - Matcher* readMatcher() { + MatcherPtr readMatcher() { skipSpaces(); if (strFollows("{")) return readNestedMatcher(0, 0, "{", "}", true); @@ -296,9 +298,9 @@ class ParserImpl: public PatternBuilder::Parser { * Параметр allow задаёт, можно ли переопределять значения min и max в самом коде шаблона * */ - Matcher* readNestedMatcher(uint min, uint max, const char* lbrace, const char* rbrace, bool allow) { + MatcherPtr readNestedMatcher(uint min, uint max, const char* lbrace, const char* rbrace, bool allow) { readStrFollows(lbrace); - std::vector > alts = readAlternatives(); + std::vector > alts = readAlternatives(); readStrFollows(rbrace); if (allow && strFollows("<") && !strFollows("<<")) { @@ -312,9 +314,9 @@ class ParserImpl: public PatternBuilder::Parser { } LoopMatcher *matcher = new LoopMatcher(min, max); - for (std::vector &alt : alts) - matcher->newAlternative().addMatchers(alt.begin(), alt.end()); - return matcher; + for (std::vector &alt : alts) + matcher->newAlternative().addMatchers(alt); + return MatcherPtr(matcher); } /** @@ -346,8 +348,8 @@ class ParserImpl: public PatternBuilder::Parser { /** * условия_на_лемму ::= [ lemma = ] лемма { | лемма } | [ lemma ] != лемма { | лемма } */ - void readLemmaRestriction(Matcher *matcher) { - WordMatcher *word_m = dynamic_cast(matcher); + void readLemmaRestriction(MatcherPtr &matcher) { + WordMatcher *word_m = dynamic_cast(matcher.get()); if (word_m == nullptr) throw produceException("No lemma restrictions on a non-word matcher"); @@ -375,10 +377,10 @@ class ParserImpl: public PatternBuilder::Parser { /** * условия_на_основу ::= stem = основа { | основа } | stem != основа { | основа} */ - void readStemRestriction(Matcher *matcher) { + void readStemRestriction(MatcherPtr &matcher) { readStrFollows("stem"); - WordMatcher *word_m = dynamic_cast(matcher); + WordMatcher *word_m = dynamic_cast(matcher.get()); if (word_m == nullptr) throw produceException("No stem restrictions on a non-word matcher"); @@ -452,7 +454,7 @@ class ParserImpl: public PatternBuilder::Parser { * [ название_ признака ] != значение_ признака { | значение_ признака } * */ - void readAttributeRestriction(Matcher *matcher) { + void readAttributeRestriction(MatcherPtr &matcher) { bool negative = false; // != std::string attributeName; std::vector valueNames; @@ -494,7 +496,7 @@ class ParserImpl: public PatternBuilder::Parser { * Чтение одного ограничения сопоставителя * */ - void readMatcherRestriction(Matcher *matcher) { + void readMatcherRestriction(MatcherPtr &matcher) { if (strFollows("lemma") || isCyrillic(buffer[pos]) || strFollows("\"")) readLemmaRestriction(matcher); else if (strFollows("stem")) @@ -520,7 +522,7 @@ class ParserImpl: public PatternBuilder::Parser { /* * Чтение списка ограничений сопоставителя */ - void readMatcherRestrictions(Matcher *matcher) { + void readMatcherRestrictions(MatcherPtr &matcher) { readStrFollows("<"); readMatcherRestriction(matcher); while (!strFollows(">")) { @@ -535,41 +537,46 @@ class ParserImpl: public PatternBuilder::Parser { * * экземпляр-шаблона ::= имя_шаблона [индекс] | имя_шаблона [индекс] <характеристика { , характеристика }> */ - Matcher* readPatternMatcher(PatternRef pattern, uint index) { + MatcherPtr readPatternMatcher(PatternRef pattern, uint index) { PatternMatcher *matcher = new PatternMatcher(*pattern); + MatcherPtr result(matcher); matcher->variable = Variable(*pattern, index); if (strFollows("<") && !strFollows("<<")) - readMatcherRestrictions(matcher); - return matcher; + readMatcherRestrictions(result); + return result; } /* * Считать сопоставитель-слово */ - Matcher* readWordMatcher(const SpeechPart &sp, uint index) { + MatcherPtr readWordMatcher(const SpeechPart &sp, uint index) { WordMatcher *matcher = new WordMatcher(sp); + MatcherPtr result(matcher); matcher->variable = Variable(sp, index); if (strFollows("<") && !strFollows("<<")) - readMatcherRestrictions(matcher); - return matcher; + readMatcherRestrictions(result); + return result; } /* * Сгенерировать сопоставитель, реализующий перестановку из * указанного набора сопоставителей */ - Matcher* makePermutationMatcher(std::vector source) { + MatcherPtr makePermutationMatcher(std::vector source) { if (source.size() == 0) throw produceException("Internal error: empty permutation requested"); if (source.size() == 1) - return source.front(); + return std::move(source.front()); + std::vector permutation; + for (uint i = 0; i < source.size(); ++i) + permutation.push_back(source[i].release()); LoopMatcher *wordMatcher = new LoopMatcher(1, 1, true); - sort(source.begin(), source.end()); + sort(permutation.begin(), permutation.end()); do { - wordMatcher->newAlternative().addMatchers(source.begin(), source.end()); - } while (next_permutation(source.begin(), source.end())); - return wordMatcher; + wordMatcher->newAlternative().addMatchers(permutation.begin(), permutation.end()); + } while (next_permutation(permutation.begin(), permutation.end())); + return MatcherPtr(wordMatcher); } /** @@ -632,7 +639,7 @@ class ParserImpl: public PatternBuilder::Parser { * условие ::= условие_ согласования * условие_ согласования ::= имя = имя { = имя } | имя == имя { == имя } */ - void readPermutationRestriction(std::vector &matchers) { + void readPermutationRestriction(std::vector &matchers) { std::vector exps(1, readAttributeExpression()); std::string agreementType = readAgreement(); @@ -648,7 +655,7 @@ class ParserImpl: public PatternBuilder::Parser { // Ок, теперь нужно найти необходимый сопоставитель. Перебираем их от последнего к первому и смотрим - for (std::vector::reverse_iterator it = matchers.rbegin(); it != matchers.rend(); ++it) + for (std::vector::reverse_iterator it = matchers.rbegin(); it != matchers.rend(); ++it) if ((*it)->variable != Variable() && restriction->containsVariable((*it)->variable)) { (*it)->addRestriction(restriction); return; @@ -665,7 +672,7 @@ class ParserImpl: public PatternBuilder::Parser { * * условия ::= условие {, условие } */ - void readPermutationRestrictions(std::vector &matchers) { + void readPermutationRestrictions(std::vector &matchers) { readStrFollows("<<"); readPermutationRestriction(matchers); while (strFollows(",")) { @@ -687,9 +694,9 @@ class ParserImpl: public PatternBuilder::Parser { * последовательность элементов := элемент_шаблона { элемент_шаблона } * */ - std::vector readPermutation() { - std::vector matchers; - std::vector permutation; + std::vector readPermutation() { + std::vector matchers; + std::vector permutation; permutation.push_back(readMatcher()); static std::string followers = "[({\"~"; @@ -706,8 +713,8 @@ class ParserImpl: public PatternBuilder::Parser { return matchers; } - std::vector > readAlternatives() { - std::vector > alts; + std::vector > readAlternatives() { + std::vector > alts; alts.push_back(readPermutation()); while (strFollows("|")) { readStrFollows("|"); @@ -721,8 +728,8 @@ class ParserImpl: public PatternBuilder::Parser { */ void readAlternativeWithSource(PatternRef pattern) { uint before_pos = pos; - std::vector alt = readPermutation(); - pattern->newAlternative(std::string(buffer + before_pos, buffer + pos)).addMatchers(alt.begin(), alt.end()); + std::vector alt = readPermutation(); + pattern->newAlternative(std::string(buffer + before_pos, buffer + pos)).addMatchers(alt); } /** diff --git a/core/src/main/lspl/patterns/matchers/MatcherContainer.h b/core/src/main/lspl/patterns/matchers/MatcherContainer.h index 1dbe6632..f8b0d79f 100644 --- a/core/src/main/lspl/patterns/matchers/MatcherContainer.h +++ b/core/src/main/lspl/patterns/matchers/MatcherContainer.h @@ -82,6 +82,15 @@ class LSPL_EXPORT MatcherContainer { matchers.transfer( matchers.end(), b, e, r ); } + /** + * Переместить сопоставители в контейнер + */ + void addMatchers(std::vector > &v) { + for (uint i = 0; i < v.size(); ++i) + addMatcher(v[i].release()); + } + + /** * Получить количество сопоставителей в контейнере. Этот метод возвращает размер массива сопоставителей, т.е. кол-во как существующих сопоставителей, так и путсых ссылок на них в контейнере. * @return кол-во сопоставителей From 333d91eae9f2bdfdf8c3d856f995b331603572bb Mon Sep 17 00:00:00 2001 From: Ivan Koryabkin Date: Tue, 29 May 2018 11:05:49 +0300 Subject: [PATCH 08/24] Utilities build fixed --- common/CMakeCommon.cmake | 4 ++-- tools/CMakeLists.txt | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/common/CMakeCommon.cmake b/common/CMakeCommon.cmake index a7b09259..eae8b7cc 100644 --- a/common/CMakeCommon.cmake +++ b/common/CMakeCommon.cmake @@ -39,9 +39,9 @@ else(WIN32) ${CMAKE_SOURCE_DIR}/../deps/aot/Source/LemmatizerLib/ ${CMAKE_SOURCE_DIR}/../deps/aot/Source/MorphWizardLib/ ${CMAKE_SOURCE_DIR}/../deps/aot/Source/StructDictLib/ - ${CMAKE_SOURCE_DIR}/../core/build-linux64/ + ${CMAKE_SOURCE_DIR}/../core/ ) SET(MY_TARGETLIB_PREFIX "lib") SET(MY_TARGETLIB_SUFFIX ".so") -endif(WIN32) \ No newline at end of file +endif(WIN32) diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index e765fbb5..2dcc2824 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -13,7 +13,7 @@ set(LSPL_CONSOLE_SOURCES add_executable(lspl-console ${LSPL_CONSOLE_SOURCES}) -target_link_libraries(lspl-console lspl) +target_link_libraries(lspl-console lspl boost_system) ### LsplGenerator @@ -23,9 +23,9 @@ set(LSPL_GENERATOR_SOURCES src/generator.cpp ) -add_executable(lspl-gen ${LSPL_GENERATOR_SOURCES} boost_system) +add_executable(lspl-gen ${LSPL_GENERATOR_SOURCES}) -target_link_libraries(lspl-gen lspl) +target_link_libraries(lspl-gen lspl boost_system) ### lspl-find @@ -35,7 +35,7 @@ set(LSPL_FIND_SOURCES add_executable(lspl-find ${LSPL_FIND_SOURCES}) -target_link_libraries(lspl-find lspl) +target_link_libraries(lspl-find lspl boost_system) ### lspl-DictionaryRecognizer @@ -51,7 +51,7 @@ set(LSPL_DICTIONARY_RECOGNIZER_SOURCES add_executable(lspl-dictionary-recognizer ${LSPL_DICTIONARY_RECOGNIZER_SOURCES}) -target_link_libraries(lspl-dictionary-recognizer lspl) +target_link_libraries(lspl-dictionary-recognizer lspl boost_system) ### lspl-Similarity @@ -70,7 +70,7 @@ set(LSPL_SIMILARITY_SOURCES add_executable(lspl-similarity ${LSPL_SIMILARITY_SOURCES}) -target_link_libraries(lspl-similarity lspl) +target_link_libraries(lspl-similarity lspl boost_system) ### lspl-RangeSetDecartTreeTest @@ -82,7 +82,7 @@ set(LSPL_RANGESET_DECART_TREE_TEST_SOURCES add_executable(lspl-rangeset-decart-tree-test ${LSPL_RANGESET_DECART_TREE_TEST_SOURCES}) -target_link_libraries(lspl-rangeset-decart-tree-test lspl) +target_link_libraries(lspl-rangeset-decart-tree-test lspl boost_system) ### lspl-UtilTest From 9bff42be99a196774121505daac9ae55839156ba Mon Sep 17 00:00:00 2001 From: Ivan Koryabkin Date: Tue, 29 May 2018 12:40:27 +0300 Subject: [PATCH 09/24] Patterns bindings at the end of definition are fixed --- .../src/main/lspl/patterns/PatternBuilder.cpp | 65 ++++++++++++++++--- 1 file changed, 57 insertions(+), 8 deletions(-) diff --git a/core/src/main/lspl/patterns/PatternBuilder.cpp b/core/src/main/lspl/patterns/PatternBuilder.cpp index 64cce1af..d8aa0692 100644 --- a/core/src/main/lspl/patterns/PatternBuilder.cpp +++ b/core/src/main/lspl/patterns/PatternBuilder.cpp @@ -257,11 +257,11 @@ class ParserImpl: public PatternBuilder::Parser { MatcherPtr readMatcher() { skipSpaces(); if (strFollows("{")) - return readNestedMatcher(0, 0, "{", "}", true); + return readNestedMatcher(0, 0, "{", "}", true, false); if (strFollows("[")) - return readNestedMatcher(0, 1, "[", "]", false); + return readNestedMatcher(0, 1, "[", "]", false, false); if (strFollows("(")) - return readNestedMatcher(1, 1, "(", ")", false); + return readNestedMatcher(1, 1, "(", ")", false, true); if (strFollows("\"")) return readStringMatcher(); @@ -297,10 +297,28 @@ class ParserImpl: public PatternBuilder::Parser { * * Параметр allow задаёт, можно ли переопределять значения min и max в самом коде шаблона * + * Параметр canBeBinding задаёт, может ли вложенный список альтернатив на самом деле + * быть параметрами шаблона + * */ - MatcherPtr readNestedMatcher(uint min, uint max, const char* lbrace, const char* rbrace, bool allow) { + MatcherPtr readNestedMatcher(uint min, uint max, const char* lbrace, const char* rbrace, + bool allow, bool canBeBinding) { + uint before_pos = pos; + readStrFollows(lbrace); - std::vector > alts = readAlternatives(); + std::vector > alts; + try { + alts = readAlternatives(); + } catch (PatternBuildingException &e) { + if (!canBeBinding) throw e; + pos = before_pos; + return nullptr; + } + if (canBeBinding && alts.size() == 1) { + pos = before_pos; + return nullptr; + } + readStrFollows(rbrace); if (allow && strFollows("<") && !strFollows("<<")) { @@ -399,6 +417,17 @@ class ParserImpl: public PatternBuilder::Parser { word_m->setBaseComparator(readAlternativeBaseComparator(new StemComparator(negative))); } + /** + * Получить имя аттрибута по его имени или сокращению + */ + AttributeKey searchForAttributeByName(const std::string &name) { + AttributeKey key = AttributeKey::findByAbbrevation(name); + if (key != AttributeKey::UNDEFINED) + return key; + key = AttributeKey::findByName(name); + return key; + } + /** * Создаёт для сопоставителя ограничение на характеристику attributeName, которая в качестве * значений может принимать аргументы из набора attributeNames @@ -413,7 +442,7 @@ class ParserImpl: public PatternBuilder::Parser { if ((it = std::find(values.begin(), values.end(), AttributeValue::UNDEFINED)) != values.end()) throw produceException("Unknown attribute value \"" + attributeNames[it - values.begin()] + "\""); - AttributeKey key = attributeName != "" ? AttributeKey::findByAbbrevation(attributeName) + AttributeKey key = attributeName != "" ? searchForAttributeByName(attributeName) : Morphology::instance().getAttributeKeyByValue(attributeNames.front()); if (key == AttributeKey::UNDEFINED) throw produceException("Unable to retrieve attribute type"); @@ -698,16 +727,27 @@ class ParserImpl: public PatternBuilder::Parser { std::vector matchers; std::vector permutation; permutation.push_back(readMatcher()); + if (!permutation.back()) { + permutation.pop_back(); + return permutation; + } + static std::string followers = "[({\"~"; - while (!seekEndOfInput() && (isLatin(buffer[pos]) || followers.find(buffer[pos]) != std::string::npos)) { + while (!seekEndOfInput() + && (isLatin(buffer[pos]) || followers.find(buffer[pos]) != std::string::npos) + && permutation.back()) { if (strFollows("~")) readStrFollows("~"); else matchers.push_back(makePermutationMatcher(std::move(permutation))); permutation.push_back(readMatcher()); } - matchers.push_back(makePermutationMatcher(std::move(permutation))); + + if (!permutation.back()) + permutation.pop_back(); + if (!permutation.empty()) + matchers.push_back(makePermutationMatcher(std::move(permutation))); if (strFollows("<<")) readPermutationRestrictions(matchers); return matchers; @@ -784,8 +824,10 @@ class ParserImpl: public PatternBuilder::Parser { std::vector arguments; uint alternativeCountBefore = pattern->alternatives.size(); + bool hasPatternAttributes = false; // Параметры шаблона (слева) if (strFollows("(")) { + hasPatternAttributes = true; arguments = readPatternArguments(pattern); } @@ -796,6 +838,13 @@ class ParserImpl: public PatternBuilder::Parser { readAlternativeWithSource(pattern); } + if (strFollows("(")) { + if (hasPatternAttributes) + throw produceException("Double pattern attributes declaration"); + hasPatternAttributes = true; + arguments = readPatternArguments(pattern); + } + for (Expression *exp : arguments) { for (uint i = alternativeCountBefore; i < pattern->alternatives.size(); ++i) appendAlternativeBinding(pattern->alternatives[i], exp); From 49a47af76cf14178db5f7760d7196b163395ab9d Mon Sep 17 00:00:00 2001 From: Ivan Koryabkin Date: Wed, 30 May 2018 22:55:45 +0300 Subject: [PATCH 10/24] Parser integer exceptions fixed --- core/src/main/lspl/patterns/PatternBuilder.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/core/src/main/lspl/patterns/PatternBuilder.cpp b/core/src/main/lspl/patterns/PatternBuilder.cpp index d8aa0692..cd11d707 100644 --- a/core/src/main/lspl/patterns/PatternBuilder.cpp +++ b/core/src/main/lspl/patterns/PatternBuilder.cpp @@ -136,13 +136,20 @@ class ParserImpl: public PatternBuilder::Parser { */ uint readUInt() { std::string token = readToken(); + if (token.length() == 0) { + throw produceException(std::string("Integer expected, but ") + + (seekEndOfInput() + ? std::string("end of file") + : std::string() + buffer[pos]) + + " found"); + } uint index = 0; try { index = std::stoul(token); } catch (std::out_of_range &e) { throw produceException("Integer overflow"); - } catch (...) { - throw produceException("Unknown exception"); + } catch (std::invalid_argument &e) { + throw produceException("Invalid integer \"" + token + "\""); } return index; } From 94fb55d4626ddcd5ebac6358497df158c68c4797 Mon Sep 17 00:00:00 2001 From: Ivan Koryabkin Date: Sun, 3 Jun 2018 16:25:49 +0300 Subject: [PATCH 11/24] Friendlier exception messages in lspl-console --- core/src/main/lspl/utils/Console.cpp | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/core/src/main/lspl/utils/Console.cpp b/core/src/main/lspl/utils/Console.cpp index 1af52c31..152461a6 100644 --- a/core/src/main/lspl/utils/Console.cpp +++ b/core/src/main/lspl/utils/Console.cpp @@ -66,12 +66,24 @@ void Console::run() { return; } else if ( command == "define" ) { std::ostringstream out; - patterns::PatternBuilder::BuildInfo bi = patternBuilder.build( args ); - - out << "Parsed " << bi.parseLength << " characters. "; + bool success = true; + + patterns::PatternBuilder::BuildInfo bi; + try { + bi = patternBuilder.build( args ); + } catch (patterns::PatternBuildingException &e) { + success = false; + out << "Exception happened during parsing: " << std::endl; + out << " " << e.what() << std::endl; + out << "Context:" << std::endl; + out << " " << e.input << std::endl << " "; + for (uint i = 0; i < e.errorPos; ++i) + out << ' '; + out << '^' << std::endl; + } - if ( bi.parseTail.length() > 0 ) - out << "Not parsed: \"" << bi.parseTail << "\" "; + if (success) + out << "Ok, parsed " << bi.parseLength << " characters"; output << outputConversion.convert( out.str() ) << std::endl; } else if ( command == "dump" ){ From 8dcdf573bd3596ab41cbb92850727a4e69dce315 Mon Sep 17 00:00:00 2001 From: Ivan Koryabkin Date: Sun, 3 Jun 2018 18:49:38 +0300 Subject: [PATCH 12/24] Transforms are hopefully working. Enabled special characters (_ and -) in pattern names --- core/src/main/lspl/patterns/Alternative.h | 4 +- .../src/main/lspl/patterns/PatternBuilder.cpp | 70 ++++++++++++++----- core/src/main/lspl/patterns/PatternBuilder.h | 2 +- tools/src/find.cpp | 18 ++++- 4 files changed, 71 insertions(+), 23 deletions(-) diff --git a/core/src/main/lspl/patterns/Alternative.h b/core/src/main/lspl/patterns/Alternative.h index 044a3457..5ce88193 100644 --- a/core/src/main/lspl/patterns/Alternative.h +++ b/core/src/main/lspl/patterns/Alternative.h @@ -102,6 +102,8 @@ class LSPL_EXPORT Alternative : public matchers::MatcherContainer { public: + std::string transformSource; + /** * Добавить связывания аттрибутов */ @@ -152,8 +154,6 @@ class LSPL_EXPORT Alternative : public matchers::MatcherContainer { */ std::string source; - std::string transformSource; - /** * Список связываний аттрибутов шаблона */ diff --git a/core/src/main/lspl/patterns/PatternBuilder.cpp b/core/src/main/lspl/patterns/PatternBuilder.cpp index cd11d707..4b49e71e 100644 --- a/core/src/main/lspl/patterns/PatternBuilder.cpp +++ b/core/src/main/lspl/patterns/PatternBuilder.cpp @@ -120,13 +120,22 @@ class ParserImpl: public PatternBuilder::Parser { pos += strlen(pattern); } + /** + * Допустим ли символ в токене + */ + static bool isAllowedCharacterInToken(char c) { + if (c == '_' || c == '-') + return true; + return !isInvalidChar(c) && !isPunct(c) && !isSpace(c); + } + /** * Считывает токен */ std::string readToken() { skipSpaces(); std::string token; - while (!isInvalidChar(buffer[pos]) && !isPunct(buffer[pos]) && !isSpace(buffer[pos])) + while (isAllowedCharacterInToken(buffer[pos])) token += buffer[pos++]; return token; } @@ -257,9 +266,9 @@ class ParserImpl: public PatternBuilder::Parser { * Обработка элемента шаблона * * элемент_шаблона ::= простой_элемент | опциональный_элемент | повторение_элементов - * | (набор_альтернатив) - * - * простой элемент := элемент_строка | элемент_слово | экземпляр_шаблона + * | (набор_альтернатив) + * + * простой элемент := элемент_строка | элемент_слово | экземпляр_шаблона */ MatcherPtr readMatcher() { skipSpaces(); @@ -722,13 +731,13 @@ class ParserImpl: public PatternBuilder::Parser { * Обработка шаблона распознавания (последовательности перестановок) * * шаблон_распознавания ::= последовательность_перестановок - * - * последовательность_перестановок ::= последовательность_элементов - * { ~ последовательность_элементов } - * [ <<условия>> ] - * - * последовательность элементов := элемент_шаблона { элемент_шаблона } - * + * + * последовательность_перестановок ::= последовательность_элементов + * { ~ последовательность_элементов } + * [ <<условия>> ] + * + * последовательность элементов := элемент_шаблона { элемент_шаблона } + * */ std::vector readPermutation() { std::vector matchers; @@ -821,7 +830,6 @@ class ParserImpl: public PatternBuilder::Parser { * [ (параметры_шаблона) ] * [ =text> шаблоны_извлечения_текста ] * - * TODO: =>text пока никак не обрабатывается */ void readPattern() { std::string patternName = readPatternName(); @@ -838,6 +846,7 @@ class ParserImpl: public PatternBuilder::Parser { arguments = readPatternArguments(pattern); } + // Описание шаблона readStrFollows("="); readAlternativeWithSource(pattern); while (strFollows("|")) { @@ -845,6 +854,7 @@ class ParserImpl: public PatternBuilder::Parser { readAlternativeWithSource(pattern); } + // Параметры шаблона (справа) if (strFollows("(")) { if (hasPatternAttributes) throw produceException("Double pattern attributes declaration"); @@ -852,11 +862,40 @@ class ParserImpl: public PatternBuilder::Parser { arguments = readPatternArguments(pattern); } + // Подключение параметров шаблона к соответствующим им альтернативам for (Expression *exp : arguments) { for (uint i = alternativeCountBefore; i < pattern->alternatives.size(); ++i) appendAlternativeBinding(pattern->alternatives[i], exp); delete exp; } + + // Преобразование(?) + if (strFollows("=")) { + readStrFollows("="); + std::string transformName = readToken(); + readStrFollows(">"); + + auto tf = transformBuilders.find(transformName); + if (tf == transformBuilders.end()) + throw produceException("Undefined transform name =" + transformName + ">"); + + // Предсказываем окончание преобразования + uint end_pos = pos; + while (buffer[end_pos] != '\0' && buffer[end_pos] != '\n' && buffer[end_pos] != '\r') + ++end_pos; + + for (uint i = alternativeCountBefore; i < pattern->alternatives.size(); ++i) { + pattern->alternatives[i].transformSource = std::string(buffer + pos, buffer + end_pos); + pattern->alternatives[i].setTransform( + std::auto_ptr(tf->second->build( + pattern->alternatives[i], + pattern->alternatives[i].getTransformSource() + )) + ); + } + + pos = end_pos; + } } public: @@ -879,18 +918,13 @@ class ParserImpl: public PatternBuilder::Parser { }; -PatternBuilder::PatternBuilder( const NamespaceRef & ns, transforms::TransformBuilderRef defaultTransformBuilder ) : +PatternBuilder::PatternBuilder( const NamespaceRef & ns ) : space( ns ), parser( new ParserImpl( space, transformBuilders ) ) { - transformBuilders.insert(std::make_pair("", defaultTransformBuilder)); transformBuilders.insert(std::make_pair("text", new transforms::TextTransformBuilder( space ))); transformBuilders.insert(std::make_pair("pattern", new transforms::PatternTransformBuilder( space ))); } -PatternBuilder::PatternBuilder( const NamespaceRef & ns ) : PatternBuilder(ns, new transforms::DummyTransformBuilder()) { - // Just delegate to two-arg constructor with dummy transform as default -} - PatternBuilder::~PatternBuilder() { } diff --git a/core/src/main/lspl/patterns/PatternBuilder.h b/core/src/main/lspl/patterns/PatternBuilder.h index 7d67e6d7..86f83fec 100644 --- a/core/src/main/lspl/patterns/PatternBuilder.h +++ b/core/src/main/lspl/patterns/PatternBuilder.h @@ -63,7 +63,7 @@ class LSPL_EXPORT PatternBuilder : public base::RefCountObject, public base::Ide public: PatternBuilder( const NamespaceRef & ns = new Namespace() ); - PatternBuilder( const NamespaceRef & ns, transforms::TransformBuilderRef defaultTransformBuilder); + virtual ~PatternBuilder(); /** diff --git a/tools/src/find.cpp b/tools/src/find.cpp index 6caf7a30..8c891b21 100644 --- a/tools/src/find.cpp +++ b/tools/src/find.cpp @@ -104,11 +104,25 @@ lspl::patterns::PatternList buildGoals( const lspl::patterns::PatternBuilderRef return goals; } +template +bool checkForTransformType(const lspl::patterns::PatternRef &pattern) { + for (const lspl::patterns::Alternative &alt : pattern->getAlternatives()) { + if (!alt.hasTransform()) continue; + try { + dynamic_cast(alt.getTransform()); + } catch (...) { + continue; + } + return true; + } + return false; +} + void processGoal( const lspl::patterns::PatternRef & goal, const lspl::text::TextRef & text, std::ostream *outs[], std::ostream & err ) { int patternType; - if (goal->getSource().find("=pattern>") != std::string::npos) { + if (checkForTransformType(goal)) { patternType = 2; - } else if (goal->getSource().find("=text>") != std::string::npos) { + } else if (checkForTransformType(goal)) { patternType = 1; } else { patternType = 0; From 3516cfd1d930d37d2ca23cadd9a72b30363936df Mon Sep 17 00:00:00 2001 From: Ivan Koryabkin Date: Wed, 9 Jan 2019 23:35:14 +0300 Subject: [PATCH 13/24] Revert some changes in buildscripts made before upstream fixed everything --- common/CMakeCommon.cmake | 2 +- core/CMakeLists.txt | 4 ++-- tools/CMakeLists.txt | 12 ++++++------ 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/common/CMakeCommon.cmake b/common/CMakeCommon.cmake index 3e448384..119125cd 100644 --- a/common/CMakeCommon.cmake +++ b/common/CMakeCommon.cmake @@ -45,7 +45,7 @@ else(WIN32) ${CMAKE_SOURCE_DIR}/../deps/aot/Source/LemmatizerLib/ ${CMAKE_SOURCE_DIR}/../deps/aot/Source/MorphWizardLib/ ${CMAKE_SOURCE_DIR}/../deps/aot/Source/StructDictLib/ - ${CMAKE_SOURCE_DIR}/../core/ + ${CMAKE_SOURCE_DIR}/../core/build-linux64/ ) SET(MY_TARGETLIB_PREFIX "lib") diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 526a5d53..adec0823 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -145,7 +145,7 @@ set(LSPL_TEST_SOURCES add_executable(lspl-test ${LSPL_TEST_SOURCES}) -target_link_libraries(lspl-test lspl boost_system) +target_link_libraries(lspl-test lspl) ### lspl-benchmark binary @@ -155,7 +155,7 @@ set(LSPL_BENCHMARK_SOURCES add_executable(lspl-benchmark ${LSPL_BENCHMARK_SOURCES}) -target_link_libraries(lspl-benchmark lspl boost_system) +target_link_libraries(lspl-benchmark lspl) # Flags for effective error parsing diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 9ab4831d..c38b1ce8 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -23,7 +23,7 @@ set(LSPL_CONSOLE_SOURCES add_executable(lspl-console ${LSPL_CONSOLE_SOURCES}) -target_link_libraries(lspl-console lspl boost_system) +target_link_libraries(lspl-console lspl) ### LsplGenerator @@ -45,7 +45,7 @@ set(LSPL_FIND_SOURCES add_executable(lspl-find ${LSPL_FIND_SOURCES}) -target_link_libraries(lspl-find lspl boost_system) +target_link_libraries(lspl-find lspl) ### lspl-DictionaryRecognizer @@ -61,7 +61,7 @@ set(LSPL_DICTIONARY_RECOGNIZER_SOURCES add_executable(lspl-dictionary-recognizer ${LSPL_DICTIONARY_RECOGNIZER_SOURCES}) -target_link_libraries(lspl-dictionary-recognizer lspl boost_system) +target_link_libraries(lspl-dictionary-recognizer lspl) ### lspl-Similarity @@ -80,7 +80,7 @@ set(LSPL_SIMILARITY_SOURCES add_executable(lspl-similarity ${LSPL_SIMILARITY_SOURCES}) -target_link_libraries(lspl-similarity lspl boost_system) +target_link_libraries(lspl-similarity lspl) ### lspl-RangeSetDecartTreeTest @@ -92,7 +92,7 @@ set(LSPL_RANGESET_DECART_TREE_TEST_SOURCES add_executable(lspl-rangeset-decart-tree-test ${LSPL_RANGESET_DECART_TREE_TEST_SOURCES}) -target_link_libraries(lspl-rangeset-decart-tree-test lspl boost_system) +target_link_libraries(lspl-rangeset-decart-tree-test lspl) ### lspl-UtilTest @@ -104,7 +104,7 @@ set(LSPL_UTIL_TEST_SOURCES add_executable(lspl-util-test ${LSPL_UTIL_TEST_SOURCES}) -target_link_libraries(lspl-util-test lspl boost_system) +target_link_libraries(lspl-util-test lspl) # Flags for effective error parsing From bd35ae60c58e2c7e2e0b301910e7b34a3ed96eef Mon Sep 17 00:00:00 2001 From: Ivan Koryabkin Date: Wed, 4 Sep 2019 22:37:16 +0300 Subject: [PATCH 14/24] Fixed restrictions applying in permutations --- core/CMakeLists.txt | 1 + .../src/main/lspl/patterns/PatternBuilder.cpp | 53 +++++++++++++------ .../lspl/patterns/matchers/LoopMatcher.cpp | 2 +- .../restrictions/AgreementRestriction.cpp | 2 +- .../patterns/restrictions/AndRestriction.cpp | 2 +- .../patterns/restrictions/NotRestriction.cpp | 2 +- .../patterns/restrictions/NotRestriction.h | 2 +- .../patterns/restrictions/OrRestriction.cpp | 2 +- .../patterns/restrictions/OrRestriction.h | 2 +- .../lspl/patterns/restrictions/Restriction.h | 4 +- .../restrictions/SharedRestriction.cpp | 41 ++++++++++++++ .../patterns/restrictions/SharedRestriction.h | 30 +++++++++++ 12 files changed, 119 insertions(+), 24 deletions(-) create mode 100644 core/src/main/lspl/patterns/restrictions/SharedRestriction.cpp create mode 100644 core/src/main/lspl/patterns/restrictions/SharedRestriction.h diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index adec0823..434ddc88 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -54,6 +54,7 @@ set(LSPL_CORE_SOURCES src/main/lspl/patterns/restrictions/AndRestriction.cpp src/main/lspl/patterns/restrictions/NotRestriction.cpp src/main/lspl/patterns/restrictions/OrRestriction.cpp + src/main/lspl/patterns/restrictions/SharedRestriction.cpp src/main/lspl/patterns/parsers/CharacterSets.cpp src/main/lspl/patterns/parsers/Functions.cpp diff --git a/core/src/main/lspl/patterns/PatternBuilder.cpp b/core/src/main/lspl/patterns/PatternBuilder.cpp index f72d9eea..84dc8ff6 100644 --- a/core/src/main/lspl/patterns/PatternBuilder.cpp +++ b/core/src/main/lspl/patterns/PatternBuilder.cpp @@ -22,6 +22,7 @@ #include "restrictions/AgreementRestriction.h" #include "restrictions/NotRestriction.h" #include "restrictions/OrRestriction.h" +#include "restrictions/SharedRestriction.h" #include "expressions/CurrentAnnotationExpression.h" #include "expressions/ConstantExpression.h" @@ -678,6 +679,35 @@ class ParserImpl: public PatternBuilder::Parser { throw produceException("= or == expected"); } + template + bool tryToAddAgreementRestriction(MatcherPtrT* matchers, int size, SharedRestriction &r) { + Variable nullvar; + for (int i = size - 1; i >= 0; --i) + if (matchers[i]->variable == nullvar) { + LoopMatcher *loop = dynamic_cast(&*matchers[i]); + if (!loop) + continue; + bool restrictionAdded = false; + if (loop->is_permutation) { + for (Matcher &matcher : loop->alternatives[0].getMatchers()) + if (matcher.variable != nullvar && r.containsVariable(matcher.variable)) { + restrictionAdded = true; + matcher.addRestriction(new SharedRestriction(r)); + } + } else { + // В целом, ситуаций, когда LoopMatcher оказывается не-перестановкой быть не должно, + // но мы все же рассмотрим её + for (MatcherContainer& alt : loop->alternatives) + restrictionAdded |= tryToAddAgreementRestriction(alt.getMatchers().c_array(), alt.getMatchers().size(), r); + } + return restrictionAdded; + } else if (r.containsVariable(matchers[i]->variable)) { + matchers[i]->addRestriction(new SharedRestriction(r)); + return true; + } + return false; + } + /** * Считать одно ограничение согласования для перестановки * @@ -685,7 +715,6 @@ class ParserImpl: public PatternBuilder::Parser { * условие_ согласования ::= имя = имя { = имя } | имя == имя { == имя } */ void readPermutationRestriction(std::vector &matchers) { - std::vector exps(1, readAttributeExpression()); std::string agreementType = readAgreement(); exps.push_back(readAttributeExpression()); @@ -694,22 +723,16 @@ class ParserImpl: public PatternBuilder::Parser { throw produceException("Weak (=) and strong (==) agreements mixed"); exps.push_back(readAttributeExpression()); } - AgreementRestriction *restriction = new AgreementRestriction(agreementType == "="); - for (Expression *e : exps) - restriction->addArgument(e); - - // Ок, теперь нужно найти необходимый сопоставитель. Перебираем их от последнего к первому и смотрим - for (std::vector::reverse_iterator it = matchers.rbegin(); it != matchers.rend(); ++it) - if ((*it)->variable != Variable() && restriction->containsVariable((*it)->variable)) { - (*it)->addRestriction(restriction); - return; - } + AgreementRestriction *agreement_r = new AgreementRestriction(agreementType == "="); + for (Expression *e : exps) + agreement_r->addArgument(e); - // Ограничение не подошло ни к одному сопоставителю. Возможно, тут совсем ничего не нужно делать? - // Мы пересрахуемся и всё же добавим ограничение в конец. - matchers.back()->addRestriction(restriction); - // FIXME: возможно, нужно бросить исключение? + SharedRestriction shared_r(agreement_r); + if (tryToAddAgreementRestriction(matchers.data(), matchers.size(), shared_r)) + return; + for (Expression *e : exps) + delete e; } /** diff --git a/core/src/main/lspl/patterns/matchers/LoopMatcher.cpp b/core/src/main/lspl/patterns/matchers/LoopMatcher.cpp index daf4145a..e741365c 100644 --- a/core/src/main/lspl/patterns/matchers/LoopMatcher.cpp +++ b/core/src/main/lspl/patterns/matchers/LoopMatcher.cpp @@ -272,7 +272,7 @@ void LoopMatcher::dump( std::ostream & out, const std::string & tabs ) const { if ( j != 0 ) out << ",\n\t" << tabs; - out << "[\n\t\t"; + out << "[\n\t\t" << tabs; const boost::ptr_vector & matchers = alternatives[j].getMatchers(); diff --git a/core/src/main/lspl/patterns/restrictions/AgreementRestriction.cpp b/core/src/main/lspl/patterns/restrictions/AgreementRestriction.cpp index 456c88b4..e56675c2 100644 --- a/core/src/main/lspl/patterns/restrictions/AgreementRestriction.cpp +++ b/core/src/main/lspl/patterns/restrictions/AgreementRestriction.cpp @@ -59,7 +59,7 @@ void AgreementRestriction::dump( std::ostream & out, const std::string & tabs ) args[0].dump( out ); for ( uint i = 1; i < args.size(); ++i ) { - out << " ~ "; + out << (weak ? " = " : " == "); args[i].dump( out ); } } diff --git a/core/src/main/lspl/patterns/restrictions/AndRestriction.cpp b/core/src/main/lspl/patterns/restrictions/AndRestriction.cpp index 03bbaa12..1ec59ad6 100644 --- a/core/src/main/lspl/patterns/restrictions/AndRestriction.cpp +++ b/core/src/main/lspl/patterns/restrictions/AndRestriction.cpp @@ -74,4 +74,4 @@ bool AndRestriction::containsCurrentAnnotation() const { return false; } -} } } // namespace lspl::patterns::matchers +} } } // namespace lspl::patterns::restrictions diff --git a/core/src/main/lspl/patterns/restrictions/NotRestriction.cpp b/core/src/main/lspl/patterns/restrictions/NotRestriction.cpp index edbe7b91..c9e2a149 100644 --- a/core/src/main/lspl/patterns/restrictions/NotRestriction.cpp +++ b/core/src/main/lspl/patterns/restrictions/NotRestriction.cpp @@ -41,4 +41,4 @@ bool NotRestriction::containsCurrentAnnotation() const { return arg->containsCurrentAnnotation(); } -} } } // namespace lspl::patterns::matchers +} } } // namespace lspl::patterns::restrictions diff --git a/core/src/main/lspl/patterns/restrictions/NotRestriction.h b/core/src/main/lspl/patterns/restrictions/NotRestriction.h index 31ef4dbe..cdf28946 100644 --- a/core/src/main/lspl/patterns/restrictions/NotRestriction.h +++ b/core/src/main/lspl/patterns/restrictions/NotRestriction.h @@ -24,6 +24,6 @@ class LSPL_EXPORT NotRestriction : public Restriction { }; -} } } // namespace lspl::patterns::matchers +} } } // namespace lspl::patterns::restrictions #endif /* _LSPL_PATTERNS_RESTRICTIONS_NOTRESTRICTION_H_ */ diff --git a/core/src/main/lspl/patterns/restrictions/OrRestriction.cpp b/core/src/main/lspl/patterns/restrictions/OrRestriction.cpp index da63d54e..c6f87078 100644 --- a/core/src/main/lspl/patterns/restrictions/OrRestriction.cpp +++ b/core/src/main/lspl/patterns/restrictions/OrRestriction.cpp @@ -68,4 +68,4 @@ bool OrRestriction::containsCurrentAnnotation() const { return false; } -} } } // namespace lspl::patterns::matchers +} } } // namespace lspl::patterns::restrictions diff --git a/core/src/main/lspl/patterns/restrictions/OrRestriction.h b/core/src/main/lspl/patterns/restrictions/OrRestriction.h index b0304177..0ac5d31a 100644 --- a/core/src/main/lspl/patterns/restrictions/OrRestriction.h +++ b/core/src/main/lspl/patterns/restrictions/OrRestriction.h @@ -36,6 +36,6 @@ class LSPL_EXPORT OrRestriction : public Restriction { }; -} } } // namespace lspl::patterns::matchers +} } } // namespace lspl::patterns::restrictions #endif /* _LSPL_PATTERNS_RESTRICTIONS_ORRESTRICTION_H_ */ diff --git a/core/src/main/lspl/patterns/restrictions/Restriction.h b/core/src/main/lspl/patterns/restrictions/Restriction.h index 7cb2618c..e91f732f 100644 --- a/core/src/main/lspl/patterns/restrictions/Restriction.h +++ b/core/src/main/lspl/patterns/restrictions/Restriction.h @@ -9,7 +9,7 @@ #include "../matchers/Variable.h" -#include +#include namespace lspl { namespace patterns { namespace restrictions { @@ -96,6 +96,6 @@ inline Restriction* new_clone( const Restriction& r ) return nullptr; } -} } } // namespace lspl::patterns::matchers +} } } // namespace lspl::patterns::restrictions #endif//_LSPL_PATTERNS_RESTRICTIONS_RESTRICTION_H_ diff --git a/core/src/main/lspl/patterns/restrictions/SharedRestriction.cpp b/core/src/main/lspl/patterns/restrictions/SharedRestriction.cpp new file mode 100644 index 00000000..e9f1f075 --- /dev/null +++ b/core/src/main/lspl/patterns/restrictions/SharedRestriction.cpp @@ -0,0 +1,41 @@ +#include "../../base/BaseInternal.h" + +#include "SharedRestriction.h" + +namespace lspl { namespace patterns { namespace restrictions { + +SharedRestriction::SharedRestriction(Restriction *r) : ptr(r) { +} + +SharedRestriction::SharedRestriction(const SharedRestriction &r) : SharedRestriction(nullptr) { + this->ptr = r.ptr; +} + +SharedRestriction::~SharedRestriction() { +} + +bool SharedRestriction::matches( const text::Transition * currentAnnotation, const matchers::Variable currentVar, const matchers::Context & ctx ) const { + return ptr->matches(currentAnnotation, currentVar, ctx); +} + +void SharedRestriction::dump( std::ostream & out, const std::string & tabs ) const { + ptr->dump(out, tabs); +} + +bool SharedRestriction::equals( const Restriction & r ) const { + return ptr->equals(r); +} + +bool SharedRestriction::containsVariable( matchers::Variable var ) const { + return ptr->containsVariable(var); +} + +bool SharedRestriction::containsVariables() const { + return ptr->containsVariables(); +} + +bool SharedRestriction::containsCurrentAnnotation() const { + return ptr->containsCurrentAnnotation(); +} + +} } } // namespace lspl::patterns::restrictions diff --git a/core/src/main/lspl/patterns/restrictions/SharedRestriction.h b/core/src/main/lspl/patterns/restrictions/SharedRestriction.h new file mode 100644 index 00000000..3eb9ae6a --- /dev/null +++ b/core/src/main/lspl/patterns/restrictions/SharedRestriction.h @@ -0,0 +1,30 @@ +#ifndef _LSPL_PATTERNS_RESTRICTIONS_SHAREDRESTRICTION_H_ +#define _LSPL_PATTERNS_RESTRICTIONS_SHAREDRESTRICTION_H_ + +#include +#include "Restriction.h" + +namespace lspl { namespace patterns { namespace restrictions { + +class LSPL_EXPORT SharedRestriction : public Restriction { + +public: + SharedRestriction( Restriction * arg ); + SharedRestriction( const SharedRestriction &r ); + virtual ~SharedRestriction(); + + virtual bool matches( const text::Transition * currentAnnotation, const matchers::Variable currentVar, const matchers::Context & ctx ) const; + virtual void dump( std::ostream & out, const std::string & tabs = "" ) const; + virtual bool equals( const Restriction & r ) const; + + virtual bool containsVariable( matchers::Variable var ) const; + virtual bool containsVariables() const; + virtual bool containsCurrentAnnotation() const; + +private: + std::shared_ptr ptr; +}; + +} } } // namespace lspl::patterns::restrictions + +#endif /* _LSPL_PATTERNS_RESTRICTIONS_SHAREDRESTRICTION_H_ */ From 7ebe351ef60aff0ebd99cb6444d6c23b55100fe5 Mon Sep 17 00:00:00 2001 From: Ivan Koryabkin Date: Thu, 3 Oct 2019 17:46:11 +0300 Subject: [PATCH 15/24] Fixed invalid pattern arguments detection --- .../src/main/lspl/patterns/PatternBuilder.cpp | 22 ++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/core/src/main/lspl/patterns/PatternBuilder.cpp b/core/src/main/lspl/patterns/PatternBuilder.cpp index 84dc8ff6..9c960dd5 100644 --- a/core/src/main/lspl/patterns/PatternBuilder.cpp +++ b/core/src/main/lspl/patterns/PatternBuilder.cpp @@ -64,6 +64,15 @@ class ParserImpl: public PatternBuilder::Parser { return buffer[pos] == '\0'; } + /** + * Проверка на наличие конца строки / ввода + */ + bool seekEndOfLine() { + while (buffer[pos] == ' ') + ++pos; + return buffer[pos] == '\0' || buffer[pos] == '\n' || buffer[pos] == '\r'; + } + /** * Создаёт экземпляр исключения с заданным сообщением об ошибке, хранящий * информацию о текущей позиции парсера и входных данных @@ -331,13 +340,20 @@ class ParserImpl: public PatternBuilder::Parser { pos = before_pos; return nullptr; } - if (canBeBinding && alts.size() == 1) { + readStrFollows(rbrace); + + // Потенциально вложенный сопоставитель все ещё может быть параметром шаблона, если + // параметр всего один. В таком случае нужно проверить, что + // 1. внутри скобок есть только один параметр; + // 2. этот параметр не является составным; + // 3. после параметра ничего нет (кроме, возможно, шаблона извлечения). + if (canBeBinding && alts.size() == 1 && alts[0].size() == 1 + && dynamic_cast(alts[0][0].get()) == nullptr + && (seekEndOfLine() || strFollows("="))) { pos = before_pos; return nullptr; } - readStrFollows(rbrace); - if (allow && strFollows("<") && !strFollows("<<")) { readStrFollows("<"); min = readUInt(); From 79998e0c3225c70c1ab87f9d4d1828cacd8d0ebb Mon Sep 17 00:00:00 2001 From: Ivan Koryabkin Date: Mon, 7 Oct 2019 20:15:46 +0300 Subject: [PATCH 16/24] Ditched ptr_vector for STL smart pointers. Now patterns are pushed to namespace only after successful parsing. --- core/src/main/lspl/patterns/Pattern.cpp | 31 ++++++++++++------- core/src/main/lspl/patterns/Pattern.h | 15 ++++++--- .../src/main/lspl/patterns/PatternBuilder.cpp | 29 ++++++++++------- .../lspl/patterns/matchers/PatternMatcher.cpp | 2 +- .../main/lspl/patterns/parsers/Functions.cpp | 7 ----- .../main/lspl/patterns/parsers/Functions.h | 11 ------- core/src/main/lspl/text/Text.cpp | 9 +++--- .../main/lspl/text/readers/JsonTextReader.cpp | 2 +- core/src/test/tests/PatternsTest.cpp | 4 +-- tools/src/find.cpp | 6 ++-- 10 files changed, 58 insertions(+), 58 deletions(-) diff --git a/core/src/main/lspl/patterns/Pattern.cpp b/core/src/main/lspl/patterns/Pattern.cpp index f217e6c2..d86417c1 100644 --- a/core/src/main/lspl/patterns/Pattern.cpp +++ b/core/src/main/lspl/patterns/Pattern.cpp @@ -21,14 +21,14 @@ void Pattern::dump( std::ostream & out, const std::string & tabs ) const { out << "Pattern{ name = " << name << ", alternatives = [\n\t" << tabs; bool first = true; - for( boost::ptr_vector::const_iterator altIt = alternatives.begin(); altIt != alternatives.end(); ++ altIt ) { + for( const std::unique_ptr &alt : alternatives ) { if ( first ) { first = false; } else { out << ",\n\t" << tabs; } - altIt->dump( out, tabs + "\t" ); + alt->dump( out, tabs + "\t" ); } out << "\n" << tabs << "] }"; @@ -44,22 +44,29 @@ Alternative & Pattern::newAlternative( const std::string & source ) { void Pattern::addAlternative( Alternative * alt ) { alt->pattern = this; - alternatives.push_back( alt ); + alternatives.emplace_back( alt ); } -void Pattern::addAlternatives( boost::ptr_vector & r ) { - for( Alternative & alt : r ) { - alt.pattern = this; +void Pattern::addAlternatives( std::vector> & r ) { + for( const std::unique_ptr & alt : r ) { + alt->pattern = this; } - alternatives.transfer( alternatives.end(), r.begin(), r.end(), r ); + int oldSize = alternatives.size(); + alternatives.resize(oldSize + r.size()); + std::move(r.begin(), r.end(), alternatives.begin() + oldSize); +} + +void Pattern::mergePattern ( Pattern &other ) { + addAlternatives(other.alternatives); + other.alternatives.clear(); } void Pattern::updateDependencies() { dependencies.clear(); - for( const Alternative & alt : alternatives ) { + for( const std::unique_ptr & alt : alternatives ) { - for( const Pattern * ptr : alt.getDependencies() ) { + for( const Pattern * ptr : alt->getDependencies() ) { bool found = false; for( const Pattern * dep : dependencies ) { @@ -82,7 +89,7 @@ void Pattern::removeDuplicateAlternatives() { while ( i < alternatives.size() ) { bool found = false; for ( uint j = 0; j < i; ++ j ) { - if ( alternatives[i].equals( alternatives[j] ) ) { + if ( alternatives[i]->equals( *alternatives[j] ) ) { found = true; break; } @@ -137,14 +144,14 @@ std::string Pattern::getSource() const { std::string result = ""; bool first = true; - for( boost::ptr_vector::const_iterator altIt = alternatives.begin(); altIt != alternatives.end(); ++ altIt ) { + for( const std::unique_ptr &alt : alternatives ) { if ( first ) { first = false; } else { result += " | "; } - result += altIt->getSource(); + result += alt->getSource(); } return result; diff --git a/core/src/main/lspl/patterns/Pattern.h b/core/src/main/lspl/patterns/Pattern.h index bcfb55bd..af9a7cd2 100644 --- a/core/src/main/lspl/patterns/Pattern.h +++ b/core/src/main/lspl/patterns/Pattern.h @@ -2,11 +2,10 @@ #define _LSPL_PATTERNS_PATTERN_H_ #include +#include #include "Forward.h" #include "../text/Forward.h" -#include - namespace lspl { namespace patterns { /** @@ -44,7 +43,13 @@ class LSPL_EXPORT Pattern : public base::RefCountObject, public base::Identified /** * Добавить альтернативы к шаблону */ - void addAlternatives( boost::ptr_vector & r ); + void addAlternatives( std::vector> & r ); + + /** + * Перенести все альтернативы шаблона other к текущему (подразумевается, что + * они имеют одно имя) + */ + void mergePattern ( Pattern &other ); /** * Обновить список зависимостей альтернативы @@ -86,7 +91,7 @@ class LSPL_EXPORT Pattern : public base::RefCountObject, public base::Identified /** * Получить список внешних альтернатив шаблона */ - const boost::ptr_vector & getAlternatives() const { + const std::vector> & getAlternatives() const { return alternatives; } @@ -104,7 +109,7 @@ class LSPL_EXPORT Pattern : public base::RefCountObject, public base::Identified /** * Список альтернатив шаблона */ - boost::ptr_vector alternatives; + std::vector> alternatives; /** * Количество объектов в памяти diff --git a/core/src/main/lspl/patterns/PatternBuilder.cpp b/core/src/main/lspl/patterns/PatternBuilder.cpp index 9c960dd5..a416e8d1 100644 --- a/core/src/main/lspl/patterns/PatternBuilder.cpp +++ b/core/src/main/lspl/patterns/PatternBuilder.cpp @@ -42,6 +42,7 @@ LSPL_REFCOUNT_CLASS( lspl::patterns::PatternBuilder ); namespace lspl { namespace patterns { typedef std::unique_ptr MatcherPtr; +typedef std::unique_ptr PatternPtr; class ParserImpl: public PatternBuilder::Parser { private: @@ -189,7 +190,7 @@ class ParserImpl: public PatternBuilder::Parser { /** * Обработка параметров шаблона */ - std::vector readPatternArguments(PatternRef pattern) { + std::vector readPatternArguments() { std::vector result; readStrFollows("("); result.push_back(readAttributeExpression()); @@ -821,7 +822,7 @@ class ParserImpl: public PatternBuilder::Parser { /** * Считать альтернативу шаблона pattern, сохранив её source */ - void readAlternativeWithSource(PatternRef pattern) { + void readAlternativeWithSource(const PatternPtr &pattern) { uint before_pos = pos; std::vector alt = readPermutation(); pattern->newAlternative(std::string(buffer + before_pos, buffer + pos)).addMatchers(alt); @@ -872,17 +873,15 @@ class ParserImpl: public PatternBuilder::Parser { */ void readPattern() { std::string patternName = readPatternName(); - PatternRef pattern = space->getPatternByName(patternName); - if (!pattern) - pattern = space->addPattern(new Pattern(patternName)); + PatternPtr pattern(new Pattern(patternName)); std::vector arguments; uint alternativeCountBefore = pattern->alternatives.size(); bool hasPatternAttributes = false; // Параметры шаблона (слева) if (strFollows("(")) { hasPatternAttributes = true; - arguments = readPatternArguments(pattern); + arguments = readPatternArguments(); } // Описание шаблона @@ -898,13 +897,13 @@ class ParserImpl: public PatternBuilder::Parser { if (hasPatternAttributes) throw produceException("Double pattern attributes declaration"); hasPatternAttributes = true; - arguments = readPatternArguments(pattern); + arguments = readPatternArguments(); } // Подключение параметров шаблона к соответствующим им альтернативам for (Expression *exp : arguments) { for (uint i = alternativeCountBefore; i < pattern->alternatives.size(); ++i) - appendAlternativeBinding(pattern->alternatives[i], exp); + appendAlternativeBinding(*pattern->alternatives[i], exp); delete exp; } @@ -924,17 +923,23 @@ class ParserImpl: public PatternBuilder::Parser { ++end_pos; for (uint i = alternativeCountBefore; i < pattern->alternatives.size(); ++i) { - pattern->alternatives[i].transformSource = std::string(buffer + pos, buffer + end_pos); - pattern->alternatives[i].setTransform( + pattern->alternatives[i]->transformSource = std::string(buffer + pos, buffer + end_pos); + pattern->alternatives[i]->setTransform( std::auto_ptr(tf->second->build( - pattern->alternatives[i], - pattern->alternatives[i].getTransformSource() + *pattern->alternatives[i], + pattern->alternatives[i]->getTransformSource() )) ); } pos = end_pos; } + + PatternRef patternRef = space->getPatternByName(patternName); + if (!patternRef) { + space->addPattern(pattern.release()); + } else + patternRef->mergePattern(*pattern); } public: diff --git a/core/src/main/lspl/patterns/matchers/PatternMatcher.cpp b/core/src/main/lspl/patterns/matchers/PatternMatcher.cpp index 26ca818f..a0abdb33 100644 --- a/core/src/main/lspl/patterns/matchers/PatternMatcher.cpp +++ b/core/src/main/lspl/patterns/matchers/PatternMatcher.cpp @@ -164,7 +164,7 @@ TransitionList PatternMatcher::buildTransitions( const text::Node & node, const newTransitions.push_back( node.getTransition( i ) ); } else { for ( uint i = 0; i < pattern.alternatives.size(); ++ i ) { - PatternMatchState state( pattern, pattern.alternatives[i], node ); + PatternMatchState state( pattern, *pattern.alternatives[i], node ); processCompoundPattern( state, newTransitions ); } } diff --git a/core/src/main/lspl/patterns/parsers/Functions.cpp b/core/src/main/lspl/patterns/parsers/Functions.cpp index 926b8548..f035e869 100644 --- a/core/src/main/lspl/patterns/parsers/Functions.cpp +++ b/core/src/main/lspl/patterns/parsers/Functions.cpp @@ -140,13 +140,6 @@ void AddAlternativeDefinitionImpl::operator()( boost::ptr_vector & alts.push_back( alternative ); } -void AddPatternDefinitionImpl::operator()( const std::string & name, boost::ptr_vector & alts ) const { - PatternRef pattern = getPattern( name ); - - pattern->addAlternatives( alts ); // Добавляем альтернативы к шаблону - pattern->updateDependencies(); // Обновляем зависимости шаблона -} - void AddRestrictionImpl::operator()( boost::ptr_vector & matchers, Restriction * restriction ) const { findLastMatcher( matchers, restriction ).addRestriction( restriction ); } diff --git a/core/src/main/lspl/patterns/parsers/Functions.h b/core/src/main/lspl/patterns/parsers/Functions.h index cfd244d1..09804460 100644 --- a/core/src/main/lspl/patterns/parsers/Functions.h +++ b/core/src/main/lspl/patterns/parsers/Functions.h @@ -115,17 +115,6 @@ struct AddAlternativeDefinitionImpl { const std::map& transformBuilders; }; -struct AddPatternDefinitionImpl : public DefinePattern { - - template - struct result { typedef void type; }; - - AddPatternDefinitionImpl( Namespace & space, boost::spirit::classic::symbols & typeSymbol) : - DefinePattern( space, typeSymbol ) {} - - void operator()( const std::string & name, boost::ptr_vector & alts ) const; -}; - struct AddImpl { template struct result { typedef void type; }; diff --git a/core/src/main/lspl/text/Text.cpp b/core/src/main/lspl/text/Text.cpp index da12abc3..63152582 100644 --- a/core/src/main/lspl/text/Text.cpp +++ b/core/src/main/lspl/text/Text.cpp @@ -18,6 +18,7 @@ #include "../patterns/restrictions/Restriction.h" #include "../transforms/ContextRetriever.h" +using lspl::patterns::Alternative; using lspl::patterns::Pattern; using lspl::patterns::matchers::Context; using lspl::patterns::matchers::Matcher; @@ -131,7 +132,7 @@ bool Text::prepareIndices( const Pattern & pattern, IndexIteratorsList & iterato using namespace lspl::patterns; for ( uint i = 0; i < pattern.getAlternatives().size(); ++ i ) { // Перебираем все альтернативы шаблона - const Alternative & alternative = pattern.getAlternatives()[i]; // Получаем ссылку на альтернативу + const Alternative & alternative = *pattern.getAlternatives()[i]; // Получаем ссылку на альтернативу const std::vector & startMatchers = alternative.getStartMatchers(); // Получаем информацию о подходящих индексах из альтернативы @@ -186,8 +187,8 @@ void Text::processWithoutIndices( const Pattern & pattern ) { Context context; // Контекст сопоставления for ( uint nodeIndex = 0; nodeIndex < nodes.size(); ++ nodeIndex ) { - for( boost::ptr_vector::const_iterator altIt = pattern.getAlternatives().begin(); altIt != pattern.getAlternatives().end(); ++ altIt ) // Перебираем все альтернативы шаблона - matcher.buildTransitions( *nodes[nodeIndex], pattern, *altIt, context, results ); + for( const std::unique_ptr &alt : pattern.getAlternatives() ) // Перебираем все альтернативы шаблона + matcher.buildTransitions( *nodes[nodeIndex], pattern, *alt, context, results ); for ( uint i = 0; i < results.size(); ++ i ) addMatchToMarkup( results[i].cast() ); @@ -226,7 +227,7 @@ void Text::processWithIndices( const Pattern & pattern, IndexIteratorsList & ite if ( !index ) // Если не нашли подходящего индекса, значит поиск завершен return; - matcher.buildTransitions( *index->get(), pattern, pattern.getAlternatives()[ alternative ], context, results ); // Производим поиск начиная с текущего индекса + matcher.buildTransitions( *index->get(), pattern, *pattern.getAlternatives()[ alternative ], context, results ); // Производим поиск начиная с текущего индекса lastNode = &index->get()->start; index->increment(); diff --git a/core/src/main/lspl/text/readers/JsonTextReader.cpp b/core/src/main/lspl/text/readers/JsonTextReader.cpp index 9a29cff5..810efdbe 100644 --- a/core/src/main/lspl/text/readers/JsonTextReader.cpp +++ b/core/src/main/lspl/text/readers/JsonTextReader.cpp @@ -147,7 +147,7 @@ class JsonTextReader::Parser : public grammar { void operator()( uint start, uint end, const std::string & patternName, const Match::AttributesMap & attributes ) const { patterns::Pattern & p = *parser.ns.getPatternByName( patternName ); - parser.builder.addMatch( new Match( parser.builder.getNodeByIndex( start ), parser.builder.getNodeByIndex( end ), p, new MatchVariant( p.getAlternatives()[0] ), attributes ) ); // TODO Transitions + parser.builder.addMatch( new Match( parser.builder.getNodeByIndex( start ), parser.builder.getNodeByIndex( end ), p, new MatchVariant( *p.getAlternatives()[0] ), attributes ) ); // TODO Transitions } private: Parser & parser; diff --git a/core/src/test/tests/PatternsTest.cpp b/core/src/test/tests/PatternsTest.cpp index 3cf925ef..a97324c1 100644 --- a/core/src/test/tests/PatternsTest.cpp +++ b/core/src/test/tests/PatternsTest.cpp @@ -308,8 +308,8 @@ static void testRemoveDuplicates() { p2.removeDuplicateAlternatives(); assertEquals( 2, p2.getAlternatives().size() ); - assertEquals( &p2.getAlternatives()[0], &a1 ); - assertEquals( &p2.getAlternatives()[1], &a3 ); + assertEquals( p2.getAlternatives()[0].get(), &a1 ); + assertEquals( p2.getAlternatives()[1].get(), &a3 ); } diff --git a/tools/src/find.cpp b/tools/src/find.cpp index ea6074b8..88440a09 100644 --- a/tools/src/find.cpp +++ b/tools/src/find.cpp @@ -114,10 +114,10 @@ lspl::patterns::PatternList buildGoals( const lspl::patterns::PatternBuilderRef template bool checkForTransformType(const lspl::patterns::PatternRef &pattern) { - for (const lspl::patterns::Alternative &alt : pattern->getAlternatives()) { - if (!alt.hasTransform()) continue; + for (const std::unique_ptr &alt : pattern->getAlternatives()) { + if (!alt->hasTransform()) continue; try { - dynamic_cast(alt.getTransform()); + dynamic_cast(alt->getTransform()); } catch (...) { continue; } From f538be91413c0f24860aa4e6cdf99216ab01dc68 Mon Sep 17 00:00:00 2001 From: Ivan Koryabkin Date: Mon, 7 Oct 2019 21:23:04 +0300 Subject: [PATCH 17/24] Fixed build warnings --- core/CMakeLists.txt | 4 ++-- core/src/test/LsplBenchmarks.cpp | 24 ++++++++++++------------ tools/src/RangeSetDecartTreeElement.cpp | 6 +++--- tools/src/RangeSetDecartTreeElement.h | 6 +++--- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 434ddc88..fade81c3 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 2.6) project(LSPL_CORE) include(../common/CMakeCommon.cmake) -find_package(Boost COMPONENTS system) +find_package(Boost COMPONENTS system timer) if(WIN32) else(WIN32) @@ -156,7 +156,7 @@ set(LSPL_BENCHMARK_SOURCES add_executable(lspl-benchmark ${LSPL_BENCHMARK_SOURCES}) -target_link_libraries(lspl-benchmark lspl) +target_link_libraries(lspl-benchmark lspl ${Boost_TIMER_LIBRARY}) # Flags for effective error parsing diff --git a/core/src/test/LsplBenchmarks.cpp b/core/src/test/LsplBenchmarks.cpp index c745a7ea..21c6d767 100644 --- a/core/src/test/LsplBenchmarks.cpp +++ b/core/src/test/LsplBenchmarks.cpp @@ -3,7 +3,7 @@ #include #include -#include +#include #include #include @@ -15,6 +15,9 @@ using namespace lspl; using lspl::uint; using lspl::morphology::Morphology; +using boost::timer::cpu_timer; + +const uint DEFAULT_TIMER_PLACES = 3; text::TextRef loadTextFromFile( const char * fileName ) { std::ifstream textStream( fileName ); @@ -23,9 +26,9 @@ text::TextRef loadTextFromFile( const char * fileName ) { std::cout << "Loading text from file " << fileName << "... "; std::cout.flush(); - boost::timer tm; + cpu_timer tm; text::TextRef text = reader.readFromStream( textStream ); - std::cout << "Done in " << tm.elapsed() << " seconds" << std::endl; + std::cout << tm.format(DEFAULT_TIMER_PLACES, "Done in %t seconds.") << std::endl; return text; } @@ -36,7 +39,7 @@ void definePatterns( NamespaceRef ns ) { std::cout << "Building patterns... "; std::cout.flush(); - boost::timer tm; + cpu_timer tm; builder->build( "Pact = N V" ); // 5582 [v] builder->build( "Act = N V <>" ); // 4505 [v] @@ -47,7 +50,7 @@ void definePatterns( NamespaceRef ns ) { builder->build( "AEE = N \"\" N" ); // 446 [v] builder->build( "ANom = N V" ); // 4199 [v] builder->build( "AGen = N V" ); // 1373 [v] - std::cout << "Done in " << tm.elapsed() << " seconds." << std::endl; + std::cout << tm.format(DEFAULT_TIMER_PLACES, "Done in %t seconds.") << std::endl; } void findPatterns() { @@ -58,16 +61,13 @@ void findPatterns() { for ( uint i = 0; i < ns->getPatternCount(); ++ i ) { patterns::PatternRef pt = ns->getPatternByIndex( i ); - std::cout << "Dumping: "; - pt->dump(std::cout); - std::cout << std::endl << std::endl; std::cout << "Matching " << pt->getSource() << "... "; std::cout.flush(); - boost::timer tm; + cpu_timer tm; uint count = text->getMatches( *pt ).size(); - std::cout << "Done in " << tm.elapsed() << " seconds, " << count << " matches found"<< std::endl; + std::cout << tm.format(DEFAULT_TIMER_PLACES, "Done in %t seconds, ") << count << " matches found" << std::endl; } std::cout << text->getWords( text::attributes::SpeechPart::NOUN ).size() << std::endl; @@ -77,9 +77,9 @@ void loadMorphology() { std::cout << "Loading morphology system... "; std::cout.flush(); - boost::timer tm; + cpu_timer tm; Morphology::instance(); - std::cout << "Done in " << tm.elapsed() << " seconds." << std::endl; + std::cout << tm.format(DEFAULT_TIMER_PLACES, "Done in %t seconds.") << std::endl; } int main() { diff --git a/tools/src/RangeSetDecartTreeElement.cpp b/tools/src/RangeSetDecartTreeElement.cpp index 36c8500a..9a0bc9fb 100644 --- a/tools/src/RangeSetDecartTreeElement.cpp +++ b/tools/src/RangeSetDecartTreeElement.cpp @@ -105,15 +105,15 @@ namespace lspl { RangeSetDecartTreeElement *element) { _parent_node = element; } - int RangeSetDecartTreeElement::set_subtree_max_right_part_of_ranges( + void RangeSetDecartTreeElement::set_subtree_max_right_part_of_ranges( const Range *subtree_max_right_part_of_ranges) { _subtree_max_right_part_of_ranges = subtree_max_right_part_of_ranges; } - int RangeSetDecartTreeElement::set_subtree_height( + void RangeSetDecartTreeElement::set_subtree_height( const int &subtree_height) { _subtree_height = subtree_height; } - int RangeSetDecartTreeElement::set_subtree_size(const int &subtree_size) { + void RangeSetDecartTreeElement::set_subtree_size(const int &subtree_size) { _subtree_size = subtree_size; } diff --git a/tools/src/RangeSetDecartTreeElement.h b/tools/src/RangeSetDecartTreeElement.h index 70f68a19..b93a700e 100644 --- a/tools/src/RangeSetDecartTreeElement.h +++ b/tools/src/RangeSetDecartTreeElement.h @@ -56,9 +56,9 @@ namespace lspl { void set_left_child(RangeSetDecartTreeElement *element); void set_right_child(RangeSetDecartTreeElement *element); void set_parent_node(RangeSetDecartTreeElement *element); - int set_subtree_max_right_part_of_ranges(const Range *subtree_max_right_part_of_ranges); - int set_subtree_height(const int &subtree_height); - int set_subtree_size(const int &subtree_size); + void set_subtree_max_right_part_of_ranges(const Range *subtree_max_right_part_of_ranges); + void set_subtree_height(const int &subtree_height); + void set_subtree_size(const int &subtree_size); // Returns true, if the element is a left child. bool IsLeftChild() const; From 9562b0eb22eef863a2c29f8c8a069fee503f63f6 Mon Sep 17 00:00:00 2001 From: Ivan Koryabkin Date: Mon, 7 Oct 2019 21:45:41 +0300 Subject: [PATCH 18/24] Removed some unused parser functions --- .../main/lspl/patterns/parsers/Functions.cpp | 70 ------------------- .../main/lspl/patterns/parsers/Functions.h | 50 ------------- 2 files changed, 120 deletions(-) diff --git a/core/src/main/lspl/patterns/parsers/Functions.cpp b/core/src/main/lspl/patterns/parsers/Functions.cpp index f035e869..dc221464 100644 --- a/core/src/main/lspl/patterns/parsers/Functions.cpp +++ b/core/src/main/lspl/patterns/parsers/Functions.cpp @@ -46,16 +46,6 @@ using lspl::morphology::Morphology; namespace lspl { namespace patterns { namespace parsers { -static bool isRegexp( const std::string & str ) { - static std::string regexSymbols( ".[{()\\*+?|^$'" ); - - for ( uint i = 0; i < regexSymbols.length(); ++ i ) - if ( str.find( regexSymbols.at(i) ) != std::string::npos ) - return true; - - return false; -} - void AddWordMatcherImpl::operator()( boost::ptr_vector & matchers, const std::string & base, SpeechPart speechPart, uint index, boost::ptr_vector< Restriction > & restrictions ) const { WordMatcher * matcher; @@ -73,13 +63,6 @@ void AddWordMatcherImpl::operator()( boost::ptr_vector & matchers, cons matchers.push_back( matcher ); } -void AddTokenMatcherImpl::operator()( boost::ptr_vector & matchers, const std::string & token ) const { - if ( isRegexp( token ) ) - matchers.push_back( new RegexpMatcher( token ) ); - else - matchers.push_back( new TokenMatcher( token ) ); -} - void AddTokenMatcherNoRegexpImpl::operator()( boost::ptr_vector & matchers, const std::string & token ) const { matchers.push_back( new TokenMatcher( token ) ); } @@ -88,20 +71,6 @@ void AddStringMatcherImpl::operator()( boost::ptr_vector & matchers, co matchers.push_back( new StringMatcher( token ) ); } -void AddLoopMatcherImpl::operator()( boost::ptr_vector & matchers, uint min, uint max, std::vector & alternativesCount ) const { - LoopMatcher * matcher = new LoopMatcher( min, max ); - - for ( int i = alternativesCount.size() - 1; i >= 0 ; -- i ) { // Важно!! Здесь перебираем в обратном порядке! - MatcherContainer * matcherGroup = new MatcherContainer(); // Создаем контейнер для альтернативы - - matcherGroup->addMatchers( matchers.end() - alternativesCount[ i ], matchers.end(), matchers ); - - matcher->addAlternative( matcherGroup ); - } - - matchers.push_back( matcher ); -} - PatternRef DefinePattern::getPattern( const std::string & name ) const { PatternRef pattern = space.getPatternByName( name ); @@ -124,22 +93,6 @@ void AddPatternMatcherImpl::operator()( boost::ptr_vector & matchers, c matchers.push_back( matcher ); } -void AddAlternativeDefinitionImpl::operator()( boost::ptr_vector & alts, boost::ptr_vector & matchers, boost::ptr_map & bindings, const std::string & source, const std::string & transformSource, const std::string & transformType ) const { - Alternative * alternative = new Alternative( source, transformSource ); // Добавляем новую альтернативу к шаблону - - alternative->addMatchers( matchers ); // Добавляем сопоставители - alternative->addBindings( bindings ); // Добавляем связывания - alternative->updateDependencies(); // Обновляем зависимости альтернативы - - const auto transformBuilder = transformBuilders.find(transformType); - if (transformBuilder == transformBuilders.end()) - throw PatternBuildingException("Invalid transform type: =" + transformType + ">", "", 0); - - alternative->setTransform( std::unique_ptr( transformBuilder->second->build( *alternative, alternative->getTransformSource() ) ) ); - - alts.push_back( alternative ); -} - void AddRestrictionImpl::operator()( boost::ptr_vector & matchers, Restriction * restriction ) const { findLastMatcher( matchers, restriction ).addRestriction( restriction ); } @@ -163,29 +116,6 @@ void AddNormalizationRestrictionImpl::operator()( boost::ptr_vector restrictions.push_back( restriction ); } -void AddBindingImpl::operator()( boost::ptr_map & bindings, AttributeKey att, Expression * exp ) const { - if ( att == AttributeKey::UNDEFINED && dynamic_cast( exp ) ) { - bindings.insert( static_cast( exp )->attribute, exp ); // Если у аттрибута не указано связывание, связываем его с соответствующим аттрибутом - } else if ( att == AttributeKey::UNDEFINED && dynamic_cast( exp ) ) { - bindings.insert( att = AttributeKey::TEXT, exp ); // Если у текстового выражения не указано связывание, связываем его с аттрибутом TEXT - } else { - bindings.insert( att, exp ); - } -} - -Restriction * CreateDictionaryRestrictionImpl::operator()( const std::string & dictionaryName, boost::ptr_vector & args ) const { - dictionaries::DictionaryRef dict = ns.getDictionaryByName( dictionaryName ); - - if ( !dict ) // Не нашли словаря - выкидываем исключение - throw PatternBuildingException( "No dictionary found", "", 0 ); - - DictionaryRestriction * dr = new DictionaryRestriction( dict ); - - dr->addArguments( args ); - - return dr; -} - Restriction * CreateAgreementRestrictionImpl::operator()( boost::ptr_vector & args ) const { AgreementRestriction * dr = new AgreementRestriction(); diff --git a/core/src/main/lspl/patterns/parsers/Functions.h b/core/src/main/lspl/patterns/parsers/Functions.h index 09804460..4ff96a41 100644 --- a/core/src/main/lspl/patterns/parsers/Functions.h +++ b/core/src/main/lspl/patterns/parsers/Functions.h @@ -47,14 +47,6 @@ struct AddWordMatcherImpl { void operator()( boost::ptr_vector & matchers, const std::string & base, SpeechPart speechPart, uint index, boost::ptr_vector< Restriction > & restrictions ) const; }; -struct AddTokenMatcherImpl { - - template - struct result { typedef void type; }; - - void operator()( boost::ptr_vector & matchers, const std::string & token ) const; -}; - struct AddTokenMatcherNoRegexpImpl { template @@ -71,14 +63,6 @@ struct AddStringMatcherImpl { void operator()( boost::ptr_vector & matchers, const std::string & token ) const; }; -struct AddLoopMatcherImpl { - - template - struct result { typedef void type; }; - - void operator()( boost::ptr_vector & matchers, uint min, uint max, std::vector & alternativesCount ) const ; -}; - struct DefinePattern { public: DefinePattern( Namespace & space, boost::spirit::classic::symbols & typeSymbol ) : @@ -101,20 +85,6 @@ struct AddPatternMatcherImpl : public DefinePattern { void operator()( boost::ptr_vector & matchers, const std::string & name, uint index, boost::ptr_vector< Restriction > & restrictions ) const; }; -struct AddAlternativeDefinitionImpl { - - template - struct result { typedef void type; }; - - AddAlternativeDefinitionImpl( const std::map& transformBuilders ) : - transformBuilders( transformBuilders ) {} - - void operator()( boost::ptr_vector & alts, boost::ptr_vector & matchers, boost::ptr_map & bindings, const std::string & source, const std::string & transformSource, const std::string & transformType ) const; - -private: - const std::map& transformBuilders; -}; - struct AddImpl { template struct result { typedef void type; }; @@ -141,26 +111,6 @@ struct AddNormalizationRestrictionImpl { void operator()( boost::ptr_vector & restrictions ) const; }; -struct AddBindingImpl { - template - struct result { typedef void type; }; - - void operator()( boost::ptr_map & bindings, AttributeKey att, Expression * exp ) const; -}; - -struct CreateDictionaryRestrictionImpl { - template - struct result { typedef Restriction * type; }; - - CreateDictionaryRestrictionImpl( Namespace & ns ) : - ns( ns ) { - } - - Restriction * operator()( const std::string & dictionaryName, boost::ptr_vector & args ) const; - - Namespace & ns; -}; - struct CreateAgreementRestrictionImpl { template struct result { typedef Restriction * type; }; From 446bed68f02a28167d3d12c21e3f159f1ec5277d Mon Sep 17 00:00:00 2001 From: Ivan Koryabkin Date: Mon, 7 Oct 2019 21:51:48 +0300 Subject: [PATCH 19/24] Using speech part name as a pattern name is prohibited from now on --- core/src/main/lspl/patterns/PatternBuilder.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/core/src/main/lspl/patterns/PatternBuilder.cpp b/core/src/main/lspl/patterns/PatternBuilder.cpp index a416e8d1..d13f99bf 100644 --- a/core/src/main/lspl/patterns/PatternBuilder.cpp +++ b/core/src/main/lspl/patterns/PatternBuilder.cpp @@ -184,6 +184,9 @@ class ParserImpl: public PatternBuilder::Parser { std::string patternName = readToken(); if (!isLatin(patternName.back())) throw produceException("Pattern name should end with a latin letter"); + for (int i = 0; i < SpeechPart::COUNT; ++i) + if (patternName == SpeechPart(i).getAbbrevation()) + throw produceException("Speech part name \"" + patternName + "\" can't be used as a pattern name"); return patternName; } From 44df69b6d4a31245ecf9d43bb3b5030d1cb4db90 Mon Sep 17 00:00:00 2001 From: Ivan Koryabkin Date: Mon, 7 Oct 2019 22:46:04 +0300 Subject: [PATCH 20/24] Memory leak? --- core/src/main/lspl/patterns/expressions/AttributeExpression.cpp | 1 + core/src/main/lspl/patterns/expressions/AttributeExpression.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/core/src/main/lspl/patterns/expressions/AttributeExpression.cpp b/core/src/main/lspl/patterns/expressions/AttributeExpression.cpp index 9950f57a..c5dd89e6 100644 --- a/core/src/main/lspl/patterns/expressions/AttributeExpression.cpp +++ b/core/src/main/lspl/patterns/expressions/AttributeExpression.cpp @@ -21,6 +21,7 @@ AttributeExpression::AttributeExpression( const Expression * base, const Attribu } AttributeExpression::~AttributeExpression() { + delete base; } void AttributeExpression::evaluateTo( const text::Transition * currentAnnotation, const matchers::Variable currentVar, const Context & ctx, ValueList & results ) const { diff --git a/core/src/main/lspl/patterns/expressions/AttributeExpression.h b/core/src/main/lspl/patterns/expressions/AttributeExpression.h index 8df45b34..6c6ba566 100644 --- a/core/src/main/lspl/patterns/expressions/AttributeExpression.h +++ b/core/src/main/lspl/patterns/expressions/AttributeExpression.h @@ -32,7 +32,7 @@ class LSPL_EXPORT AttributeExpression : public Expression { public: - /*std::unique_ptr*/ const Expression* base; + const Expression* base; text::attributes::AttributeKey attribute; }; From 476b45ab3dcb7e5435c7ef1a0a3f4bef925b11b2 Mon Sep 17 00:00:00 2001 From: Ivan Koryabkin Date: Sat, 26 Sep 2020 14:11:20 +0300 Subject: [PATCH 21/24] Fix OrRestrictions --- .../src/main/lspl/patterns/PatternBuilder.cpp | 26 +++++++++---------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/core/src/main/lspl/patterns/PatternBuilder.cpp b/core/src/main/lspl/patterns/PatternBuilder.cpp index d13f99bf..917040b6 100644 --- a/core/src/main/lspl/patterns/PatternBuilder.cpp +++ b/core/src/main/lspl/patterns/PatternBuilder.cpp @@ -15,7 +15,6 @@ #include "matchers/PatternMatcher.h" #include "matchers/WordMatcher.h" -#include "../transforms/TransformBuilder.h" #include "../transforms/TextTransformBuilder.h" #include "../transforms/PatternTransformBuilder.h" @@ -49,7 +48,7 @@ class ParserImpl: public PatternBuilder::Parser { const char *buffer; uint pos; - /* + /** * Пропускает пробельные символы в буфере */ void skipSpaces() { @@ -85,7 +84,7 @@ class ParserImpl: public PatternBuilder::Parser { return e; } - /* + /** * Вырезать из токена-идентификатора индекс */ bool cutIndexFromToken(std::string &token, uint &index) { @@ -110,7 +109,7 @@ class ParserImpl: public PatternBuilder::Parser { return true; } - /* + /** * Убеждается, что текст продолжается строкой pattern */ bool strFollows(const char *pattern) { @@ -121,7 +120,7 @@ class ParserImpl: public PatternBuilder::Parser { return pattern[i] == '\0'; } - /* + /** * В точности повторяет действие функции strFollows, но считывает pattern из буфера * и кидает исключение, если не встречает строку */ @@ -151,7 +150,7 @@ class ParserImpl: public PatternBuilder::Parser { return token; } - /* + /** * Считывает беззнаковое целое */ uint readUInt() { @@ -218,7 +217,7 @@ class ParserImpl: public PatternBuilder::Parser { return false; } - /* + /** * Разделить строку на токены */ static std::vector split(const std::string &contents) { @@ -312,7 +311,7 @@ class ParserImpl: public PatternBuilder::Parser { return readPatternMatcher(pattern, index); } - /* + /** * Считать сопоставитель с вложенным списком альтернатив, т.е. * * 1) опциональный_элемент @@ -401,7 +400,7 @@ class ParserImpl: public PatternBuilder::Parser { } /** - * условия_на_лемму ::= [ lemma = ] лемма { | лемма } | [ lemma ] != лемма { | лемма } + * условия_на_лемму ::= [ lemma = ] лемма { | лемма } | [ lemma ] != лемма { | лемма } */ void readLemmaRestriction(MatcherPtr &matcher) { WordMatcher *word_m = dynamic_cast(matcher.get()); @@ -430,7 +429,7 @@ class ParserImpl: public PatternBuilder::Parser { } /** - * условия_на_основу ::= stem = основа { | основа } | stem != основа { | основа} + * условия_на_основу ::= stem = основа { | основа } | stem != основа { | основа} */ void readStemRestriction(MatcherPtr &matcher) { readStrFollows("stem"); @@ -549,7 +548,7 @@ class ParserImpl: public PatternBuilder::Parser { valueNames.push_back(readToken()); while (strFollows("|")) { - readStrFollows(""); + readStrFollows("|"); valueNames.push_back(readToken()); if (valueNames.back().length() == 0) throw produceException("Expression value expected"); @@ -560,7 +559,6 @@ class ParserImpl: public PatternBuilder::Parser { /** * Чтение одного ограничения сопоставителя - * */ void readMatcherRestriction(MatcherPtr &matcher) { if (strFollows("lemma") || isCyrillic(buffer[pos]) || strFollows("\"")) @@ -585,7 +583,7 @@ class ParserImpl: public PatternBuilder::Parser { readAttributeRestriction(matcher); } - /* + /** * Чтение списка ограничений сопоставителя */ void readMatcherRestrictions(MatcherPtr &matcher) { @@ -624,7 +622,7 @@ class ParserImpl: public PatternBuilder::Parser { return result; } - /* + /** * Сгенерировать сопоставитель, реализующий перестановку из * указанного набора сопоставителей */ From 0aa5ae5002e96d4e53bebcdba5025d95be08652c Mon Sep 17 00:00:00 2001 From: Ivan Koryabkin Date: Fri, 2 Oct 2020 00:06:55 +0300 Subject: [PATCH 22/24] Fix bindings in pattern arguments --- core/src/main/lspl/patterns/PatternBuilder.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/core/src/main/lspl/patterns/PatternBuilder.cpp b/core/src/main/lspl/patterns/PatternBuilder.cpp index 917040b6..064d51c7 100644 --- a/core/src/main/lspl/patterns/PatternBuilder.cpp +++ b/core/src/main/lspl/patterns/PatternBuilder.cpp @@ -338,12 +338,12 @@ class ParserImpl: public PatternBuilder::Parser { std::vector > alts; try { alts = readAlternatives(); + readStrFollows(rbrace); } catch (PatternBuildingException &e) { if (!canBeBinding) throw e; pos = before_pos; return nullptr; } - readStrFollows(rbrace); // Потенциально вложенный сопоставитель все ещё может быть параметром шаблона, если // параметр всего один. В таком случае нужно проверить, что @@ -964,10 +964,10 @@ class ParserImpl: public PatternBuilder::Parser { PatternBuilder::PatternBuilder( const NamespaceRef & ns ) : - space( ns ), - parser( new ParserImpl( space, transformBuilders ) ) { - transformBuilders.insert(std::make_pair("text", new transforms::TextTransformBuilder( space ))); - transformBuilders.insert(std::make_pair("pattern", new transforms::PatternTransformBuilder( space ))); + space( ns ), + parser( new ParserImpl( space, transformBuilders ) ) { + transformBuilders.insert(std::make_pair("text", new transforms::TextTransformBuilder( space ))); + transformBuilders.insert(std::make_pair("pattern", new transforms::PatternTransformBuilder( space ))); } PatternBuilder::~PatternBuilder() { From 83bae04d31f0d00129c485dde4619c95477d4c6f Mon Sep 17 00:00:00 2001 From: Ivan Koryabkin Date: Fri, 2 Oct 2020 00:57:59 +0300 Subject: [PATCH 23/24] Fix double freeing in agreement restrictions --- core/src/main/lspl/patterns/PatternBuilder.cpp | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/core/src/main/lspl/patterns/PatternBuilder.cpp b/core/src/main/lspl/patterns/PatternBuilder.cpp index 064d51c7..23f24ed2 100644 --- a/core/src/main/lspl/patterns/PatternBuilder.cpp +++ b/core/src/main/lspl/patterns/PatternBuilder.cpp @@ -674,8 +674,10 @@ class ParserImpl: public PatternBuilder::Parser { if (strFollows(".")) { readStrFollows("."); AttributeKey key = AttributeKey::findByAbbrevation(readToken()); - if (key == AttributeKey::UNDEFINED) + if (key == AttributeKey::UNDEFINED) { + delete result; throw produceException("Unknown attribute"); + } result = new AttributeExpression(result, key); } @@ -733,24 +735,23 @@ class ParserImpl: public PatternBuilder::Parser { * условие_ согласования ::= имя = имя { = имя } | имя == имя { == имя } */ void readPermutationRestriction(std::vector &matchers) { - std::vector exps(1, readAttributeExpression()); + std::vector> exps; + exps.push_back(std::unique_ptr(readAttributeExpression())); std::string agreementType = readAgreement(); - exps.push_back(readAttributeExpression()); + exps.push_back(std::unique_ptr(readAttributeExpression())); while (strFollows("=")) { if (readAgreement() != agreementType) throw produceException("Weak (=) and strong (==) agreements mixed"); - exps.push_back(readAttributeExpression()); + exps.push_back(std::unique_ptr(readAttributeExpression())); } AgreementRestriction *agreement_r = new AgreementRestriction(agreementType == "="); - for (Expression *e : exps) - agreement_r->addArgument(e); + for (std::unique_ptr &e : exps) + agreement_r->addArgument(e.release()); SharedRestriction shared_r(agreement_r); if (tryToAddAgreementRestriction(matchers.data(), matchers.size(), shared_r)) return; - for (Expression *e : exps) - delete e; } /** From d361c669b84a560e5925cd011a8794f308a9d7f7 Mon Sep 17 00:00:00 2001 From: Ivan Koryabkin Date: Fri, 2 Oct 2020 01:56:34 +0300 Subject: [PATCH 24/24] Fix recursive patterns --- .../src/main/lspl/patterns/PatternBuilder.cpp | 25 +++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/core/src/main/lspl/patterns/PatternBuilder.cpp b/core/src/main/lspl/patterns/PatternBuilder.cpp index 23f24ed2..5ef73bb3 100644 --- a/core/src/main/lspl/patterns/PatternBuilder.cpp +++ b/core/src/main/lspl/patterns/PatternBuilder.cpp @@ -47,6 +47,7 @@ class ParserImpl: public PatternBuilder::Parser { private: const char *buffer; uint pos; + Pattern *currentPattern; /** * Пропускает пробельные символы в буфере @@ -305,10 +306,7 @@ class ParserImpl: public PatternBuilder::Parser { return readWordMatcher(SpeechPart(i), index); // экземпляр_шаблона - PatternRef pattern = space->getPatternByName(token); - if (!pattern) - throw produceException("No pattern with specified name"); - return readPatternMatcher(pattern, index); + return readPatternMatcher(token, index); } /** @@ -596,12 +594,22 @@ class ParserImpl: public PatternBuilder::Parser { readStrFollows(">"); } + Pattern *findPatternPtr(const std::string &patternName) { + PatternRef patternRef = space->getPatternByName(patternName); + if (patternRef) + return &*patternRef; + if (currentPattern->getName() == patternName) + return currentPattern; + throw produceException("No pattern with name \"" + patternName + "\""); + } + /** * Считать сопоставитель-шаблон * * экземпляр-шаблона ::= имя_шаблона [индекс] | имя_шаблона [индекс] <характеристика { , характеристика }> */ - MatcherPtr readPatternMatcher(PatternRef pattern, uint index) { + MatcherPtr readPatternMatcher(const std::string &patternName, uint index) { + Pattern *pattern = findPatternPtr(patternName); PatternMatcher *matcher = new PatternMatcher(*pattern); MatcherPtr result(matcher); matcher->variable = Variable(*pattern, index); @@ -665,10 +673,7 @@ class ParserImpl: public PatternBuilder::Parser { // Шаблон if (!result) { - PatternRef pattern = space->getPatternByName(token); - if (!pattern) - throw produceException("No pattern with specified name"); - result = new VariableExpression(*pattern, index); + result = new VariableExpression(*findPatternPtr(token), index); } if (strFollows(".")) { @@ -876,7 +881,7 @@ class ParserImpl: public PatternBuilder::Parser { void readPattern() { std::string patternName = readPatternName(); - PatternPtr pattern(new Pattern(patternName)); + PatternPtr pattern(currentPattern = new Pattern(patternName)); std::vector arguments; uint alternativeCountBefore = pattern->alternatives.size(); bool hasPatternAttributes = false;