diff --git a/libturboparser/Makefile b/libturboparser/Makefile index a1a9af1..746cb4e 100644 --- a/libturboparser/Makefile +++ b/libturboparser/Makefile @@ -1,15 +1,17 @@ UTIL = ../src/util CLASSIFIER = ../src/classifier +SEQUENCE = ../src/sequence TAGGER = ../src/tagger +ENTITYRECOGNIZER = ../src/entity_recognizer PARSER = ../src/parser SEMANTICPARSER = ../src/semantic_parser AUXLIBS = ../deps/local/lib AUXINCLUDES = ../deps/local/include -OBJS = TurboParserInterface.o SemanticDecoder.o SemanticDictionary.o SemanticFeatures.o SemanticInstanceNumeric.o SemanticInstance.o SemanticOptions.o SemanticPart.o SemanticPipe.o SemanticReader.o SemanticWriter.o DependencyDecoder.o DependencyDictionary.o DependencyFeatures.o DependencyInstance.o DependencyInstanceNumeric.o DependencyOptions.o DependencyPart.o DependencyPipe.o DependencyReader.o DependencyWriter.o SequenceDecoder.o SequenceDictionary.o SequenceFeatures.o SequenceInstance.o SequenceInstanceNumeric.o SequenceOptions.o SequencePart.o SequencePipe.o SequenceReader.o SequenceWriter.o TokenDictionary.o Alphabet.o Dictionary.o Options.o Parameters.o Pipe.o Reader.o Writer.o AlgUtils.o SerializationUtils.o StringUtils.o TimeUtils.o +OBJS = TurboParserInterface.o SemanticDecoder.o SemanticDictionary.o SemanticFeatures.o SemanticInstanceNumeric.o SemanticInstance.o SemanticOptions.o SemanticPart.o SemanticPipe.o SemanticReader.o SemanticWriter.o DependencyDecoder.o DependencyDictionary.o DependencyFeatures.o DependencyInstance.o DependencyInstanceNumeric.o DependencyOptions.o DependencyPart.o DependencyPipe.o DependencyReader.o DependencyWriter.o TaggerDictionary.o TaggerFeatures.o TaggerOptions.o TaggerPipe.o EntityDictionary.o EntityFeatures.o EntityInstance.o EntityInstanceNumeric.o EntityOptions.o EntityPipe.o EntityReader.o EntityWriter.o SequenceDecoder.o SequenceDictionary.o SequenceInstance.o SequenceInstanceNumeric.o SequenceOptions.o SequencePart.o SequencePipe.o SequenceReader.o SequenceWriter.o TokenDictionary.o Alphabet.o Dictionary.o Options.o Parameters.o Pipe.o Reader.o Writer.o AlgUtils.o SerializationUtils.o StringUtils.o TimeUtils.o CC = g++ DEBUG = -g -INCLUDES = -I$(UTIL)/ -I$(CLASSIFIER) -I$(TAGGER) -I$(PARSER) -I$(SEMANTICPARSER) -I$(AUXINCLUDES) +INCLUDES = -I$(UTIL)/ -I$(CLASSIFIER) -I$(SEQUENCE) -I$(TAGGER) -I$(ENTITYRECOGNIZER) -I$(PARSER) -I$(SEMANTICPARSER) -I$(AUXINCLUDES) LIBS = -L/usr/local/lib/ -L$(AUXLIBS) CFLAGS = -O3 -Wall -Wno-sign-compare -c -fmessage-length=0 -fPIC $(INCLUDES) LFLAGS = $(LIBS) -lad3 -lgflags -lglog @@ -19,7 +21,7 @@ all : libturboparser.a libturboparser.a : $(OBJS) ar rcs libturboparser.a $(OBJS) -TurboParserInterface.o: TurboParserInterface.h TurboParserInterface.cpp $(TAGGER)/SequencePipe.h $(PARSER)/DependencyPipe.h $(SEMANTICPARSER)/SemanticPipe.h $(UTIL)/Utils.h +TurboParserInterface.o: TurboParserInterface.h TurboParserInterface.cpp $(TAGGER)/TaggerPipe.h $(ENTITYRECOGNIZER)/EntityPipe.h $(PARSER)/DependencyPipe.h $(SEMANTICPARSER)/SemanticPipe.h $(UTIL)/Utils.h $(CC) $(CFLAGS) TurboParserInterface.cpp ##################### @@ -27,7 +29,7 @@ TurboParserInterface.o: TurboParserInterface.h TurboParserInterface.cpp $(TAGGER SemanticDecoder.o: $(SEMANTICPARSER)/SemanticDecoder.h $(SEMANTICPARSER)/SemanticDecoder.cpp $(SEMANTICPARSER)/SemanticPart.h $(SEMANTICPARSER)/SemanticPipe.h $(PARSER)/FactorTree.h $(SEMANTICPARSER)/FactorPredicateAutomaton.h $(SEMANTICPARSER)/FactorArgumentAutomaton.h $(UTIL)/AlgUtils.h $(UTIL)/logval.h $(CLASSIFIER)/Decoder.h $(CC) $(CFLAGS) $(SEMANTICPARSER)/SemanticDecoder.cpp -SemanticDictionary.o: $(SEMANTICPARSER)/SemanticDictionary.h $(SEMANTICPARSER)/SemanticDictionary.cpp $(SEMANTICPARSER)/SemanticPipe.h $(CLASSIFIER)/Dictionary.h $(TAGGER)/TokenDictionary.h $(UTIL)/SerializationUtils.h +SemanticDictionary.o: $(SEMANTICPARSER)/SemanticDictionary.h $(SEMANTICPARSER)/SemanticDictionary.cpp $(SEMANTICPARSER)/SemanticPipe.h $(CLASSIFIER)/Dictionary.h $(SEQUENCE)/TokenDictionary.h $(UTIL)/SerializationUtils.h $(CC) $(CFLAGS) $(SEMANTICPARSER)/SemanticDictionary.cpp SemanticFeatures.o: $(SEMANTICPARSER)/SemanticFeatures.h $(SEMANTICPARSER)/SemanticFeatures.cpp $(SEMANTICPARSER)/SemanticPipe.h $(SEMANTICPARSER)/SemanticPart.h $(SEMANTICPARSER)/SemanticFeatureTemplates.h $(CLASSIFIER)/Features.h $(SEMANTICPARSER)/SemanticInstanceNumeric.h $(CLASSIFIER)/FeatureEncoder.h @@ -45,7 +47,7 @@ SemanticOptions.o: $(SEMANTICPARSER)/SemanticOptions.h $(SEMANTICPARSER)/Semanti SemanticPart.o: $(SEMANTICPARSER)/SemanticPart.h $(SEMANTICPARSER)/SemanticPart.cpp $(CLASSIFIER)/Part.h $(CC) $(CFLAGS) $(SEMANTICPARSER)/SemanticPart.cpp -SemanticPipe.o: $(SEMANTICPARSER)/SemanticPipe.h $(SEMANTICPARSER)/SemanticPipe.cpp $(CLASSIFIER)/Pipe.h $(SEMANTICPARSER)/SemanticOptions.h $(SEMANTICPARSER)/SemanticReader.h $(SEMANTICPARSER)/SemanticDictionary.h $(TAGGER)/TokenDictionary.h $(SEMANTICPARSER)/SemanticInstanceNumeric.h $(SEMANTICPARSER)/SemanticWriter.h $(SEMANTICPARSER)/SemanticPart.h $(SEMANTICPARSER)/SemanticFeatures.h $(SEMANTICPARSER)/SemanticDecoder.h +SemanticPipe.o: $(SEMANTICPARSER)/SemanticPipe.h $(SEMANTICPARSER)/SemanticPipe.cpp $(CLASSIFIER)/Pipe.h $(SEMANTICPARSER)/SemanticOptions.h $(SEMANTICPARSER)/SemanticReader.h $(SEMANTICPARSER)/SemanticDictionary.h $(SEQUENCE)/TokenDictionary.h $(SEMANTICPARSER)/SemanticInstanceNumeric.h $(SEMANTICPARSER)/SemanticWriter.h $(SEMANTICPARSER)/SemanticPart.h $(SEMANTICPARSER)/SemanticFeatures.h $(SEMANTICPARSER)/SemanticDecoder.h $(CC) $(CFLAGS) $(SEMANTICPARSER)/SemanticPipe.cpp SemanticReader.o: $(SEMANTICPARSER)/SemanticReader.h $(SEMANTICPARSER)/SemanticReader.cpp $(SEMANTICPARSER)/SemanticInstance.h $(CLASSIFIER)/Reader.h @@ -59,7 +61,7 @@ SemanticWriter.o: $(SEMANTICPARSER)/SemanticWriter.h $(SEMANTICPARSER)/SemanticW DependencyDecoder.o: $(PARSER)/DependencyDecoder.h $(PARSER)/DependencyDecoder.cpp $(PARSER)/DependencyPart.h $(PARSER)/DependencyPipe.h $(PARSER)/FactorTree.h $(PARSER)/FactorHeadAutomaton.h $(PARSER)/FactorGrandparentHeadAutomaton.h $(PARSER)/FactorTrigramHeadAutomaton.h $(PARSER)/FactorSequence.h $(UTIL)/AlgUtils.h $(UTIL)/logval.h $(CLASSIFIER)/Decoder.h $(CC) $(CFLAGS) $(PARSER)/DependencyDecoder.cpp -DependencyDictionary.o: $(PARSER)/DependencyDictionary.h $(PARSER)/DependencyDictionary.cpp $(PARSER)/DependencyPipe.h $(CLASSIFIER)/Dictionary.h $(TAGGER)/TokenDictionary.h $(UTIL)/SerializationUtils.h +DependencyDictionary.o: $(PARSER)/DependencyDictionary.h $(PARSER)/DependencyDictionary.cpp $(PARSER)/DependencyPipe.h $(CLASSIFIER)/Dictionary.h $(SEQUENCE)/TokenDictionary.h $(UTIL)/SerializationUtils.h $(CC) $(CFLAGS) $(PARSER)/DependencyDictionary.cpp DependencyFeatures.o: $(PARSER)/DependencyFeatures.h $(PARSER)/DependencyFeatures.cpp $(PARSER)/DependencyPipe.h $(PARSER)/DependencyPart.h $(PARSER)/DependencyFeatureTemplates.h $(CLASSIFIER)/Features.h $(PARSER)/DependencyInstanceNumeric.h $(CLASSIFIER)/FeatureEncoder.h @@ -77,7 +79,7 @@ DependencyOptions.o: $(PARSER)/DependencyOptions.h $(PARSER)/DependencyOptions.c DependencyPart.o: $(PARSER)/DependencyPart.h $(PARSER)/DependencyPart.cpp $(CLASSIFIER)/Part.h $(CC) $(CFLAGS) $(PARSER)/DependencyPart.cpp -DependencyPipe.o: $(PARSER)/DependencyPipe.h $(PARSER)/DependencyPipe.cpp $(CLASSIFIER)/Pipe.h $(PARSER)/DependencyOptions.h $(PARSER)/DependencyReader.h $(PARSER)/DependencyDictionary.h $(TAGGER)/TokenDictionary.h $(PARSER)/DependencyInstanceNumeric.h $(PARSER)/DependencyWriter.h $(PARSER)/DependencyPart.h $(PARSER)/DependencyFeatures.h $(PARSER)/DependencyDecoder.h +DependencyPipe.o: $(PARSER)/DependencyPipe.h $(PARSER)/DependencyPipe.cpp $(CLASSIFIER)/Pipe.h $(PARSER)/DependencyOptions.h $(PARSER)/DependencyReader.h $(PARSER)/DependencyDictionary.h $(SEQUENCE)/TokenDictionary.h $(PARSER)/DependencyInstanceNumeric.h $(PARSER)/DependencyWriter.h $(PARSER)/DependencyPart.h $(PARSER)/DependencyFeatures.h $(PARSER)/DependencyDecoder.h $(CC) $(CFLAGS) $(PARSER)/DependencyPipe.cpp DependencyReader.o: $(PARSER)/DependencyReader.h $(PARSER)/DependencyReader.cpp $(PARSER)/DependencyInstance.h $(CLASSIFIER)/Reader.h @@ -88,38 +90,75 @@ DependencyWriter.o: $(PARSER)/DependencyWriter.h $(PARSER)/DependencyWriter.cpp ##################### -SequenceDecoder.o: $(TAGGER)/SequenceDecoder.h $(TAGGER)/SequenceDecoder.cpp $(TAGGER)/SequencePart.h $(TAGGER)/SequencePipe.h $(CLASSIFIER)/Decoder.h - $(CC) $(CFLAGS) $(TAGGER)/SequenceDecoder.cpp +TaggerDictionary.o: $(TAGGER)/TaggerDictionary.h $(TAGGER)/TaggerDictionary.cpp $(TAGGER)/TaggerPipe.h $(SEQUENCE)/SequenceDictionary.h $(SEQUENCE)/TokenDictionary.h $(UTIL)/SerializationUtils.h + $(CC) $(CFLAGS) $(TAGGER)/TaggerDictionary.cpp -SequenceDictionary.o: $(TAGGER)/SequenceDictionary.h $(TAGGER)/SequenceDictionary.cpp $(TAGGER)/SequencePipe.h $(CLASSIFIER)/Dictionary.h $(TAGGER)/TokenDictionary.h $(UTIL)/SerializationUtils.h - $(CC) $(CFLAGS) $(TAGGER)/SequenceDictionary.cpp +TaggerFeatures.o: $(TAGGER)/TaggerFeatures.h $(TAGGER)/TaggerFeatures.cpp $(TAGGER)/TaggerPipe.h $(TAGGER)/TaggerFeatureTemplates.h $(SEQUENCE)/SequenceFeatures.h $(SEQUENCE)/SequenceInstanceNumeric.h $(CLASSIFIER)/FeatureEncoder.h + $(CC) $(CFLAGS) $(TAGGER)/TaggerFeatures.cpp -SequenceFeatures.o: $(TAGGER)/SequenceFeatures.h $(TAGGER)/SequenceFeatures.cpp $(TAGGER)/SequencePipe.h $(TAGGER)/SequencePart.h $(TAGGER)/SequenceFeatureTemplates.h $(CLASSIFIER)/Features.h $(TAGGER)/SequenceInstanceNumeric.h $(CLASSIFIER)/FeatureEncoder.h - $(CC) $(CFLAGS) $(TAGGER)/SequenceFeatures.cpp +TaggerOptions.o: $(TAGGER)/TaggerOptions.h $(TAGGER)/TaggerOptions.cpp $(UTIL)/SerializationUtils.h $(SEQUENCE)/SequenceOptions.h + $(CC) $(CFLAGS) $(TAGGER)/TaggerOptions.cpp -SequenceInstance.o: $(TAGGER)/SequenceInstance.h $(TAGGER)/SequenceInstance.cpp $(CLASSIFIER)/Instance.h - $(CC) $(CFLAGS) $(TAGGER)/SequenceInstance.cpp +TaggerPipe.o: $(TAGGER)/TaggerPipe.h $(TAGGER)/TaggerPipe.cpp $(SEQUENCE)/SequencePipe.h $(TAGGER)/TaggerOptions.h $(TAGGER)/TaggerDictionary.h + $(CC) $(CFLAGS) $(TAGGER)/TaggerPipe.cpp -SequenceInstanceNumeric.o: $(TAGGER)/SequenceInstanceNumeric.h $(TAGGER)/SequenceInstanceNumeric.cpp $(TAGGER)/SequenceInstance.h $(TAGGER)/SequenceDictionary.h - $(CC) $(CFLAGS) $(TAGGER)/SequenceInstanceNumeric.cpp +##################### + +EntityDictionary.o: $(ENTITYRECOGNIZER)/EntityDictionary.h $(ENTITYRECOGNIZER)/EntityDictionary.cpp $(ENTITYRECOGNIZER)/EntityPipe.h $(SEQUENCE)/SequenceDictionary.h $(SEQUENCE)/TokenDictionary.h $(UTIL)/SerializationUtils.h + $(CC) $(CFLAGS) $(ENTITYRECOGNIZER)/EntityDictionary.cpp + +EntityFeatures.o: $(ENTITYRECOGNIZER)/EntityFeatures.h $(ENTITYRECOGNIZER)/EntityFeatures.cpp $(ENTITYRECOGNIZER)/EntityPipe.h $(ENTITYRECOGNIZER)/EntityFeatureTemplates.h $(SEQUENCE)/SequenceFeatures.h $(SEQUENCE)/SequenceInstanceNumeric.h $(CLASSIFIER)/FeatureEncoder.h + $(CC) $(CFLAGS) $(ENTITYRECOGNIZER)/EntityFeatures.cpp + +EntityOptions.o: $(ENTITYRECOGNIZER)/EntityOptions.h $(ENTITYRECOGNIZER)/EntityOptions.cpp $(UTIL)/SerializationUtils.h $(SEQUENCE)/SequenceOptions.h + $(CC) $(CFLAGS) $(ENTITYRECOGNIZER)/EntityOptions.cpp + +EntityPipe.o: $(ENTITYRECOGNIZER)/EntityPipe.h $(ENTITYRECOGNIZER)/EntityPipe.cpp $(SEQUENCE)/SequencePipe.h $(ENTITYRECOGNIZER)/EntityOptions.h $(ENTITYRECOGNIZER)/EntityDictionary.h + $(CC) $(CFLAGS) $(ENTITYRECOGNIZER)/EntityPipe.cpp + +EntityReader.o: $(ENTITYRECOGNIZER)/EntityReader.h $(ENTITYRECOGNIZER)/EntityReader.cpp $(ENTITYRECOGNIZER)/EntityInstance.h $(SEQUENCE)/SequenceReader.h + $(CC) $(CFLAGS) $(ENTITYRECOGNIZER)/EntityReader.cpp + +EntityWriter.o: $(ENTITYRECOGNIZER)/EntityWriter.h $(ENTITYRECOGNIZER)/EntityWriter.cpp $(ENTITYRECOGNIZER)/EntityInstance.h $(SEQUENCE)/SequenceWriter.h + $(CC) $(CFLAGS) $(ENTITYRECOGNIZER)/EntityWriter.cpp + +EntityInstance.o: $(ENTITYRECOGNIZER)/EntityInstance.h $(ENTITYRECOGNIZER)/EntityInstance.cpp $(SEQUENCE)/SequenceInstance.h + $(CC) $(CFLAGS) $(ENTITYRECOGNIZER)/EntityInstance.cpp + +EntityInstanceNumeric.o: $(ENTITYRECOGNIZER)/EntityInstanceNumeric.h $(ENTITYRECOGNIZER)/EntityInstanceNumeric.cpp $(ENTITYRECOGNIZER)/EntityInstance.h $(SEQUENCE)/SequenceInstanceNumeric.h $(ENTITYRECOGNIZER)/EntityDictionary.h + $(CC) $(CFLAGS) $(ENTITYRECOGNIZER)/EntityInstanceNumeric.cpp + +##################### + +SequenceDecoder.o: $(SEQUENCE)/SequenceDecoder.h $(SEQUENCE)/SequenceDecoder.cpp $(SEQUENCE)/SequencePart.h $(SEQUENCE)/SequencePipe.h $(CLASSIFIER)/Decoder.h + $(CC) $(CFLAGS) $(SEQUENCE)/SequenceDecoder.cpp + +SequenceDictionary.o: $(SEQUENCE)/SequenceDictionary.h $(SEQUENCE)/SequenceDictionary.cpp $(SEQUENCE)/SequencePipe.h $(CLASSIFIER)/Dictionary.h $(SEQUENCE)/TokenDictionary.h $(UTIL)/SerializationUtils.h + $(CC) $(CFLAGS) $(SEQUENCE)/SequenceDictionary.cpp + +SequenceInstance.o: $(SEQUENCE)/SequenceInstance.h $(SEQUENCE)/SequenceInstance.cpp $(CLASSIFIER)/Instance.h + $(CC) $(CFLAGS) $(SEQUENCE)/SequenceInstance.cpp + +SequenceInstanceNumeric.o: $(SEQUENCE)/SequenceInstanceNumeric.h $(SEQUENCE)/SequenceInstanceNumeric.cpp $(SEQUENCE)/SequenceInstance.h $(SEQUENCE)/SequenceDictionary.h + $(CC) $(CFLAGS) $(SEQUENCE)/SequenceInstanceNumeric.cpp -SequenceOptions.o: $(TAGGER)/SequenceOptions.h $(TAGGER)/SequenceOptions.cpp $(UTIL)/SerializationUtils.h $(CLASSIFIER)/Options.h - $(CC) $(CFLAGS) $(TAGGER)/SequenceOptions.cpp +SequenceOptions.o: $(SEQUENCE)/SequenceOptions.h $(SEQUENCE)/SequenceOptions.cpp $(UTIL)/SerializationUtils.h $(CLASSIFIER)/Options.h + $(CC) $(CFLAGS) $(SEQUENCE)/SequenceOptions.cpp -SequencePart.o: $(TAGGER)/SequencePart.h $(TAGGER)/SequencePart.cpp $(CLASSIFIER)/Part.h - $(CC) $(CFLAGS) $(TAGGER)/SequencePart.cpp +SequencePart.o: $(SEQUENCE)/SequencePart.h $(SEQUENCE)/SequencePart.cpp $(CLASSIFIER)/Part.h + $(CC) $(CFLAGS) $(SEQUENCE)/SequencePart.cpp -SequencePipe.o: $(TAGGER)/SequencePipe.h $(TAGGER)/SequencePipe.cpp $(CLASSIFIER)/Pipe.h $(TAGGER)/SequenceOptions.h $(TAGGER)/SequenceReader.h $(TAGGER)/SequenceDictionary.h $(TAGGER)/TokenDictionary.h $(TAGGER)/SequenceInstanceNumeric.h $(TAGGER)/SequenceWriter.h $(TAGGER)/SequencePart.h $(TAGGER)/SequenceFeatures.h $(TAGGER)/SequenceDecoder.h - $(CC) $(CFLAGS) $(TAGGER)/SequencePipe.cpp +SequencePipe.o: $(SEQUENCE)/SequencePipe.h $(SEQUENCE)/SequencePipe.cpp $(CLASSIFIER)/Pipe.h $(SEQUENCE)/SequenceOptions.h $(SEQUENCE)/SequenceReader.h $(SEQUENCE)/SequenceDictionary.h $(SEQUENCE)/TokenDictionary.h $(SEQUENCE)/SequenceInstanceNumeric.h $(SEQUENCE)/SequenceWriter.h $(SEQUENCE)/SequencePart.h $(SEQUENCE)/SequenceFeatures.h $(SEQUENCE)/SequenceDecoder.h + $(CC) $(CFLAGS) $(SEQUENCE)/SequencePipe.cpp -SequenceReader.o: $(TAGGER)/SequenceReader.h $(TAGGER)/SequenceReader.cpp $(TAGGER)/SequenceInstance.h $(CLASSIFIER)/Reader.h - $(CC) $(CFLAGS) $(TAGGER)/SequenceReader.cpp +SequenceReader.o: $(SEQUENCE)/SequenceReader.h $(SEQUENCE)/SequenceReader.cpp $(SEQUENCE)/SequenceInstance.h $(CLASSIFIER)/Reader.h + $(CC) $(CFLAGS) $(SEQUENCE)/SequenceReader.cpp -SequenceWriter.o: $(TAGGER)/SequenceWriter.h $(TAGGER)/SequenceWriter.cpp $(TAGGER)/SequenceInstance.h $(CLASSIFIER)/Writer.h - $(CC) $(CFLAGS) $(TAGGER)/SequenceWriter.cpp +SequenceWriter.o: $(SEQUENCE)/SequenceWriter.h $(SEQUENCE)/SequenceWriter.cpp $(SEQUENCE)/SequenceInstance.h $(CLASSIFIER)/Writer.h + $(CC) $(CFLAGS) $(SEQUENCE)/SequenceWriter.cpp -TokenDictionary.o: $(TAGGER)/TokenDictionary.h $(TAGGER)/TokenDictionary.cpp $(CLASSIFIER)/Pipe.h $(UTIL)/SerializationUtils.h $(CLASSIFIER)/Dictionary.h $(CLASSIFIER)/Alphabet.h $(TAGGER)/SequenceReader.h $(PARSER)/DependencyReader.h - $(CC) $(CFLAGS) $(TAGGER)/TokenDictionary.cpp +TokenDictionary.o: $(SEQUENCE)/TokenDictionary.h $(SEQUENCE)/TokenDictionary.cpp $(CLASSIFIER)/Pipe.h $(UTIL)/SerializationUtils.h $(CLASSIFIER)/Dictionary.h $(CLASSIFIER)/Alphabet.h $(SEQUENCE)/SequenceReader.h $(PARSER)/DependencyReader.h + $(CC) $(CFLAGS) $(SEQUENCE)/TokenDictionary.cpp ##################### diff --git a/libturboparser/TurboParserInterface.cpp b/libturboparser/TurboParserInterface.cpp index d846ace..9978f97 100644 --- a/libturboparser/TurboParserInterface.cpp +++ b/libturboparser/TurboParserInterface.cpp @@ -13,10 +13,10 @@ namespace TurboParserInterface { TurboTaggerWorker::TurboTaggerWorker() { - tagger_options_ = new SequenceOptions; + tagger_options_ = new TaggerOptions; tagger_options_->Initialize(); - tagger_pipe_ = new SequencePipe(tagger_options_); + tagger_pipe_ = new TaggerPipe(tagger_options_); tagger_pipe_->Initialize(); } @@ -61,6 +61,56 @@ void TurboTaggerWorker::Tag(const std::string &file_test, << " sec." << endl; } +TurboEntityRecognizerWorker::TurboEntityRecognizerWorker() { + entity_options_ = new EntityOptions; + entity_options_->Initialize(); + + entity_pipe_ = new EntityPipe(entity_options_); + entity_pipe_->Initialize(); +} + +TurboEntityRecognizerWorker::~TurboEntityRecognizerWorker() { + LOG(INFO) << "Deleting entity recognizer pipe."; + delete entity_pipe_; + LOG(INFO) << "Deleting entity recognizer options."; + delete entity_options_; +} + +void TurboEntityRecognizerWorker::LoadEntityRecognizerModel( + const std::string &file_model) { + entity_options_->SetModelFilePath(file_model); + + int time; + timeval start, end; + gettimeofday(&start, NULL); + + entity_pipe_->LoadModelFile(); + + gettimeofday(&end, NULL); + time = diff_ms(end,start); + + LOG(INFO) << "Took " << static_cast(time)/1000.0 + << " sec." << endl; +} + +void TurboEntityRecognizerWorker::Tag(const std::string &file_test, + const std::string &file_prediction) { + entity_options_->SetTestFilePath(file_test); + entity_options_->SetOutputFilePath(file_prediction); + + int time; + timeval start, end; + gettimeofday(&start, NULL); + + entity_pipe_->Run(); + + gettimeofday(&end, NULL); + time = diff_ms(end,start); + + LOG(INFO) << "Took " << static_cast(time)/1000.0 + << " sec." << endl; +} + TurboParserWorker::TurboParserWorker() { parser_options_ = new DependencyOptions; parser_options_->Initialize(); @@ -183,6 +233,9 @@ TurboParserInterface::~TurboParserInterface() { LOG(INFO) << "Deleting tagger workers."; DeleteAllTaggers(); + LOG(INFO) << "Deleting entity recognizer workers."; + DeleteAllEntityRecognizers(); + LOG(INFO) << "Deleting parser workers."; DeleteAllParsers(); diff --git a/libturboparser/TurboParserInterface.h b/libturboparser/TurboParserInterface.h index 158ba6a..2f3c542 100644 --- a/libturboparser/TurboParserInterface.h +++ b/libturboparser/TurboParserInterface.h @@ -1,6 +1,7 @@ #include #include -#include "SequencePipe.h" +#include "TaggerPipe.h" +#include "EntityPipe.h" #include "DependencyPipe.h" #include "SemanticPipe.h" @@ -17,8 +18,23 @@ class TurboTaggerWorker { const std::string &file_prediction); private: - SequenceOptions *tagger_options_; - SequencePipe *tagger_pipe_; + TaggerOptions *tagger_options_; + TaggerPipe *tagger_pipe_; +}; + +class TurboEntityRecognizerWorker { + public: + TurboEntityRecognizerWorker(); + virtual ~TurboEntityRecognizerWorker(); + + void LoadEntityRecognizerModel(const std::string &file_model); + + void Tag(const std::string &file_test, + const std::string &file_prediction); + + private: + EntityOptions *entity_options_; + EntityPipe *entity_pipe_; }; class TurboParserWorker { @@ -77,6 +93,13 @@ class TurboParserInterface { return tagger; } + TurboEntityRecognizerWorker *CreateEntityRecognizer() { + TurboEntityRecognizerWorker *entity_recognizer = + new TurboEntityRecognizerWorker(); + entity_recognizers_.push_back(entity_recognizer); + return entity_recognizer; + } + TurboParserWorker *CreateParser() { TurboParserWorker *parser = new TurboParserWorker(); parsers_.push_back(parser); @@ -96,6 +119,13 @@ class TurboParserInterface { taggers_.clear(); } + void DeleteAllEntityRecognizers() { + for (int i = 0; i < entity_recognizers_.size(); ++i) { + delete entity_recognizers_[i]; + } + entity_recognizers_.clear(); + } + void DeleteAllParsers() { for (int i = 0; i < parsers_.size(); ++i) { delete parsers_[i]; @@ -113,9 +143,10 @@ class TurboParserInterface { private: int argc_; char** argv_; - vector taggers_; - vector parsers_; - vector semantic_parsers_; + std::vector taggers_; + std::vector parsers_; + std::vector semantic_parsers_; + std::vector entity_recognizers_; }; } // namespace TurboParserInterface. diff --git a/python/nlp_pipeline.config b/python/nlp_pipeline.config index a5acc36..d50bc34 100644 --- a/python/nlp_pipeline.config +++ b/python/nlp_pipeline.config @@ -19,6 +19,7 @@ parser="/home/atm/workspace/CPP/TurboParser/models/english_proj/english_proj_par EN-Nonprojective splitter="tokenizers/punkt/english.pickle" tagger="/home/atm/workspace/CPP/TurboParser/models/english_proj/english_proj_tagger.model" +entity_recognizer="/home/atm/workspace/CPP/TurboParser/ner/models/english/english_entity_recognizer.model" parser="/home/atm/workspace/CPP/TurboParser/models/english/english_parser_pruned-true_model-standard.model" semantic_parser="/home/atm/workspace/CPP/TurboParser/srl/models/english/english_semantic_parser_conll2008_pruned-false_model-basic_syntax-true_C-0.01_fp-0.4_fn-0.6.model" lemmatizer="/home/atm/workspace/CPP/TurboParser/models/english/english_lemmatizer.model" diff --git a/python/nlp_pipeline.py b/python/nlp_pipeline.py index 594c818..0de8026 100644 --- a/python/nlp_pipeline.py +++ b/python/nlp_pipeline.py @@ -8,6 +8,7 @@ class NLPPipelineWorker: def __init__(self, pipeline, language): self.tagger = None + self.entity_recognizer = None self.parser = None self.semantic_parser = None self.lemmatizer = None @@ -32,6 +33,9 @@ def __init__(self, pipeline, language): if 'tagger' in pipeline.models[language]: self.tagger = pipeline.turbo_interface.create_tagger() self.tagger.load_tagger_model(pipeline.models[language]['tagger']) + if 'entity_recognizer' in pipeline.models[language]: + self.entity_recognizer = pipeline.turbo_interface.create_entity_recognizer() + self.entity_recognizer.load_entity_recognizer_model(pipeline.models[language]['entity_recognizer']) if 'parser' in pipeline.models[language]: self.parser = pipeline.turbo_interface.create_parser() self.parser.load_parser_model(pipeline.models[language]['parser']) @@ -122,6 +126,26 @@ def tag(self, tokenized_sentence, language): lemmas = ['_' for token in tokenized_sentence] return tags, lemmas + def recognize_entities(self, tokenized_sentence, tags, language): + worker = self.get_worker(language) + f_ner = open('ner.tmp', 'w') + for i, token in enumerate(tokenized_sentence): + tag = tags[i] + f_ner.write(token + '\t' + tag + '\t' + '\t_\n') + f_ner.close() + worker.entity_recognizer.tag('ner.tmp', 'ner.tmp.pred') + f_ner_pred = open('ner.tmp.pred') + entity_tags = [] + for line in f_ner_pred: + line = line.rstrip('\n') + if line == '': + continue + fields = line.split('\t') + entity_tag = fields[2] + entity_tags.append(entity_tag) + f_ner_pred.close() + return entity_tags + def parse(self, tokenized_sentence, tags, lemmas, language): worker = self.get_worker(language) f_conll = open('conll.tmp', 'w') @@ -149,6 +173,10 @@ def parse(self, tokenized_sentence, tags, lemmas, language): f_conll_pred.close() return heads, deprels + def has_entity_recognizer(self, language): + worker = self.get_worker(language) + return (worker.entity_recognizer != None) + def has_semantic_parser(self, language): worker = self.get_worker(language) return (worker.semantic_parser != None) diff --git a/python/setup.py b/python/setup.py index 4237539..b3d73c7 100644 --- a/python/setup.py +++ b/python/setup.py @@ -6,5 +6,5 @@ setup(cmdclass={'build_ext': build_ext}, ext_modules=[Extension("turboparser", ["turbo_parser.pyx"], language="c++", - include_dirs=["../src/semantic_parser", "../src/parser", "../src/tagger/", "../src/classifier/", "../src/util", "../deps/local/include/"], + include_dirs=["../src/semantic_parser", "../src/parser", "../src/entity_recognizer/", "../src/tagger/", "../src/sequence/", "../src/classifier/", "../src/util", "../deps/local/include/"], library_dirs=[src, "../deps/local/lib/"], libraries=["turboparser", "gflags", "glog", "ad3"])]) diff --git a/python/turbo_parser.pyx b/python/turbo_parser.pyx index a8441d0..34fe78c 100644 --- a/python/turbo_parser.pyx +++ b/python/turbo_parser.pyx @@ -12,6 +12,11 @@ cdef extern from "../libturboparser/TurboParserInterface.h" namespace "TurboPars void LoadTaggerModel(string file_model) void Tag(string file_test, string file_prediction) + cdef cppclass TurboEntityRecognizerWorker: + TurboEntityRecognizerWorker() + void LoadEntityRecognizerModel(string file_model) + void Tag(string file_test, string file_prediction) + cdef cppclass TurboParserWorker: TurboParserWorker() void LoadParserModel(string file_model) @@ -25,6 +30,7 @@ cdef extern from "../libturboparser/TurboParserInterface.h" namespace "TurboPars cdef cppclass TurboParserInterface: TurboParserInterface() TurboTaggerWorker* CreateTagger() + TurboEntityRecognizerWorker* CreateEntityRecognizer() TurboParserWorker* CreateParser() TurboSemanticParserWorker* CreateSemanticParser() @@ -48,6 +54,11 @@ cdef class PTurboParser: tagger.thisptr = self.thisptr.CreateTagger() return tagger + def create_entity_recognizer(self): + entity_recognizer = PTurboEntityRecognizerWorker(allocate=False) + entity_recognizer.thisptr = self.thisptr.CreateEntityRecognizer() + return entity_recognizer + def create_parser(self): parser = PTurboParserWorker(allocate=False) parser.thisptr = self.thisptr.CreateParser() @@ -76,6 +87,24 @@ cdef class PTurboTaggerWorker: def tag(self, file_test, file_prediction): self.thisptr.Tag(file_test, file_prediction) +cdef class PTurboEntityRecognizerWorker: + cdef TurboEntityRecognizerWorker *thisptr + cdef bool allocate + def __cinit__(self, allocate=False): + self.allocate = allocate + if allocate: + self.thisptr = new TurboEntityRecognizerWorker() + + def __dealloc__(self): + if self.allocate: + del self.thisptr + + def load_entity_recognizer_model(self, file_model): + self.thisptr.LoadEntityRecognizerModel(file_model) + + def tag(self, file_test, file_prediction): + self.thisptr.Tag(file_test, file_prediction) + cdef class PTurboParserWorker: cdef TurboParserWorker *thisptr cdef bool allocate diff --git a/scripts_ner/create_NER_corpus_from_POS_tagged_corpus.sh b/scripts_ner/create_NER_corpus_from_POS_tagged_corpus.sh new file mode 100755 index 0000000..67623d9 --- /dev/null +++ b/scripts_ner/create_NER_corpus_from_POS_tagged_corpus.sh @@ -0,0 +1,3 @@ +file_conll2003=$1 +file_tagged=$2 +paste ${file_tagged} ${file_conll2003} | awk '{ if (NF>0) print $1" "$2" "$5; else print ""}' \ No newline at end of file diff --git a/scripts_ner/prepare_NER_corpus_for_POS_tagging.sh b/scripts_ner/prepare_NER_corpus_for_POS_tagging.sh new file mode 100755 index 0000000..84f6ee9 --- /dev/null +++ b/scripts_ner/prepare_NER_corpus_for_POS_tagging.sh @@ -0,0 +1,2 @@ +file_conll2003=$1 +awk '{ if (NF>0) print $1"\t_"; else print ""}' ${file_conll2003} diff --git a/scripts_ner/results.txt b/scripts_ner/results.txt index d02ad59..de48c3a 100644 --- a/scripts_ner/results.txt +++ b/scripts_ner/results.txt @@ -343,3 +343,25 @@ accuracy: 87.67%; precision: 90.85%; recall: 90.04%; FB1: 90.44 ORG: precision: 85.47%; recall: 83.37%; FB1: 84.41 PER: precision: 92.81%; recall: 95.28%; FB1: 94.03 + + +======================================================================= + +BILOU with shapes and trigram features, constrained + +Spanish +------- +test: +accuracy: 97.11%; precision: 78.68%; recall: 78.70%; FB1: 78.69 + LOC: precision: 80.82%; recall: 80.07%; FB1: 80.44 + MISC: precision: 65.16%; recall: 46.76%; FB1: 54.45 + ORG: precision: 77.23%; recall: 79.00%; FB1: 78.11 + PER: precision: 82.47%; recall: 90.88%; FB1: 86.47 + +dev: +I1013 11:09:44.904317 23623 SequencePipe.h:150] Tagging speed: 11134.7 tokens per second. +accuracy: 96.24%; precision: 74.88%; recall: 74.75%; FB1: 74.82 + LOC: precision: 63.21%; recall: 83.05%; FB1: 71.79 + MISC: precision: 64.08%; recall: 53.71%; FB1: 58.44 + ORG: precision: 80.19%; recall: 73.59%; FB1: 76.75 + PER: precision: 84.60%; recall: 77.33%; FB1: 80.80 diff --git a/scripts_ner/train_test_entity_recognizer.sh b/scripts_ner/train_test_entity_recognizer.sh index 2c752ff..cb0fbec 100755 --- a/scripts_ner/train_test_entity_recognizer.sh +++ b/scripts_ner/train_test_entity_recognizer.sh @@ -7,8 +7,8 @@ export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${root_folder}/deps/local/lib" # Set options. language=$1 # Example: "slovene" or "english_proj". train_algorithm=svm_mira # Training algorithm. -num_epochs=10 # Number of training epochs. -regularization_parameter=1e12 # The C parameter in MIRA. +num_epochs=20 #50 #20 # Number of training epochs. +regularization_parameter=$2 #1e12 # The C parameter in MIRA. train=true test=true model_type=2 # Second-order model (trigrams). @@ -33,7 +33,7 @@ mkdir -p ${path_results} file_model=${path_models}/${language}_${suffix}.model file_train=${path_data}/${language}_train.conll.ner -if [ "$language" == "english" ] || [ "$language" == "spanish" ] +if [ "$language" == "english" ] then files_test[0]=${path_data}/${language}_test.conll.ner files_test[1]=${path_data}/${language}_dev.conll.ner @@ -43,7 +43,11 @@ then echo "Creating gazetteer file..." python create_gazetteer_file.py ${path_data}/KnownLists $file_gazetteer echo "Done." - +elif [ "$language" == "spanish" ] || [ "$language" == "dutch" ] +then + files_test[0]=${path_data}/${language}_test.conll.ner + files_test[1]=${path_data}/${language}_dev.conll.ner + files_test[2]=${path_data}/${language}_train.conll.ner else files_test[0]=${path_data}/${language}_test.conll.ner fi diff --git a/scripts_srl/evaluator b/scripts_srl/evaluator index b43ebdd..5a93180 160000 --- a/scripts_srl/evaluator +++ b/scripts_srl/evaluator @@ -1 +1 @@ -Subproject commit b43ebdd8bced2c5d7d026f76547129c2beb13feb +Subproject commit 5a93180098f76a08ada25344f52f73538574e98b diff --git a/src/entity_recognizer/EntityDictionary.cpp b/src/entity_recognizer/EntityDictionary.cpp index e3e0842..c5236aa 100644 --- a/src/entity_recognizer/EntityDictionary.cpp +++ b/src/entity_recognizer/EntityDictionary.cpp @@ -71,7 +71,6 @@ void EntityDictionary::CreateTagDictionary(SequenceReader *reader) { int tag_begin = tag_alphabet_.Lookup("B-" + entity); int tag_inside = tag_alphabet_.Lookup("I-" + entity); int tag_last = tag_alphabet_.Lookup("L-" + entity); - int tag_unique = tag_alphabet_.Lookup("U-" + entity); // I-tags and L-tags can only occur after a B-tag or an I-tag of the same // entity. for (int left_tag = -1; left_tag < tag_alphabet_.size(); ++left_tag) { @@ -150,14 +149,10 @@ void EntityDictionary::ReadGazetteerFiles() { StringSplit(line, " \t", &fields); // Break on tabs or spaces. if (fields.size() < 2) continue; const std::string &entity_type = fields[0]; - int entity_type_begin_id = - gazetteer_entity_tag_alphabet_.Insert("B-" + entity_type); - int entity_type_inside_id = - gazetteer_entity_tag_alphabet_.Insert("I-" + entity_type); - int entity_type_last_id = - gazetteer_entity_tag_alphabet_.Insert("L-" + entity_type); - int entity_type_unique_id = - gazetteer_entity_tag_alphabet_.Insert("U-" + entity_type); + gazetteer_entity_tag_alphabet_.Insert("B-" + entity_type); + gazetteer_entity_tag_alphabet_.Insert("I-" + entity_type); + gazetteer_entity_tag_alphabet_.Insert("L-" + entity_type); + gazetteer_entity_tag_alphabet_.Insert("U-" + entity_type); for (int k = 1; k < fields.size(); ++k) { const std::string &word = fields[k]; gazetteer_word_alphabet_.Insert(word);