diff --git a/libturboparser/Makefile b/libturboparser/Makefile index 309ecee..a1a9af1 100644 --- a/libturboparser/Makefile +++ b/libturboparser/Makefile @@ -2,13 +2,14 @@ UTIL = ../src/util CLASSIFIER = ../src/classifier TAGGER = ../src/tagger PARSER = ../src/parser +SEMANTICPARSER = ../src/semantic_parser AUXLIBS = ../deps/local/lib AUXINCLUDES = ../deps/local/include -OBJS = TurboParserInterface.o DependencyDecoder.o DependencyDictionary.o DependencyFeatures.o DependencyInstance.o DependencyInstanceNumeric.o DependencyOptions.o DependencyPart.o DependencyPipe.o DependencyReader.o DependencyWriter.o SequenceDecoder.o SequenceDictionary.o SequenceFeatures.o SequenceInstance.o SequenceInstanceNumeric.o SequenceOptions.o SequencePart.o SequencePipe.o SequenceReader.o SequenceWriter.o TokenDictionary.o Alphabet.o Dictionary.o Options.o Parameters.o Pipe.o Reader.o Writer.o AlgUtils.o SerializationUtils.o StringUtils.o TimeUtils.o +OBJS = TurboParserInterface.o SemanticDecoder.o SemanticDictionary.o SemanticFeatures.o SemanticInstanceNumeric.o SemanticInstance.o SemanticOptions.o SemanticPart.o SemanticPipe.o SemanticReader.o SemanticWriter.o DependencyDecoder.o DependencyDictionary.o DependencyFeatures.o DependencyInstance.o DependencyInstanceNumeric.o DependencyOptions.o DependencyPart.o DependencyPipe.o DependencyReader.o DependencyWriter.o SequenceDecoder.o SequenceDictionary.o SequenceFeatures.o SequenceInstance.o SequenceInstanceNumeric.o SequenceOptions.o SequencePart.o SequencePipe.o SequenceReader.o SequenceWriter.o TokenDictionary.o Alphabet.o Dictionary.o Options.o Parameters.o Pipe.o Reader.o Writer.o AlgUtils.o SerializationUtils.o StringUtils.o TimeUtils.o CC = g++ DEBUG = -g -INCLUDES = -I$(UTIL)/ -I$(CLASSIFIER) -I$(TAGGER) -I$(PARSER) -I$(AUXINCLUDES) +INCLUDES = -I$(UTIL)/ -I$(CLASSIFIER) -I$(TAGGER) -I$(PARSER) -I$(SEMANTICPARSER) -I$(AUXINCLUDES) LIBS = -L/usr/local/lib/ -L$(AUXLIBS) CFLAGS = -O3 -Wall -Wno-sign-compare -c -fmessage-length=0 -fPIC $(INCLUDES) LFLAGS = $(LIBS) -lad3 -lgflags -lglog @@ -18,11 +19,43 @@ all : libturboparser.a libturboparser.a : $(OBJS) ar rcs libturboparser.a $(OBJS) -TurboParserInterface.o: TurboParserInterface.h TurboParserInterface.cpp $(TAGGER)/SequencePipe.h $(PARSER)/DependencyPipe.h $(UTIL)/Utils.h +TurboParserInterface.o: TurboParserInterface.h TurboParserInterface.cpp $(TAGGER)/SequencePipe.h $(PARSER)/DependencyPipe.h $(SEMANTICPARSER)/SemanticPipe.h $(UTIL)/Utils.h $(CC) $(CFLAGS) TurboParserInterface.cpp ##################### +SemanticDecoder.o: $(SEMANTICPARSER)/SemanticDecoder.h $(SEMANTICPARSER)/SemanticDecoder.cpp $(SEMANTICPARSER)/SemanticPart.h $(SEMANTICPARSER)/SemanticPipe.h $(PARSER)/FactorTree.h $(SEMANTICPARSER)/FactorPredicateAutomaton.h $(SEMANTICPARSER)/FactorArgumentAutomaton.h $(UTIL)/AlgUtils.h $(UTIL)/logval.h $(CLASSIFIER)/Decoder.h + $(CC) $(CFLAGS) $(SEMANTICPARSER)/SemanticDecoder.cpp + +SemanticDictionary.o: $(SEMANTICPARSER)/SemanticDictionary.h $(SEMANTICPARSER)/SemanticDictionary.cpp $(SEMANTICPARSER)/SemanticPipe.h $(CLASSIFIER)/Dictionary.h $(TAGGER)/TokenDictionary.h $(UTIL)/SerializationUtils.h + $(CC) $(CFLAGS) $(SEMANTICPARSER)/SemanticDictionary.cpp + +SemanticFeatures.o: $(SEMANTICPARSER)/SemanticFeatures.h $(SEMANTICPARSER)/SemanticFeatures.cpp $(SEMANTICPARSER)/SemanticPipe.h $(SEMANTICPARSER)/SemanticPart.h $(SEMANTICPARSER)/SemanticFeatureTemplates.h $(CLASSIFIER)/Features.h $(SEMANTICPARSER)/SemanticInstanceNumeric.h $(CLASSIFIER)/FeatureEncoder.h + $(CC) $(CFLAGS) $(SEMANTICPARSER)/SemanticFeatures.cpp + +SemanticInstance.o: $(SEMANTICPARSER)/SemanticInstance.h $(SEMANTICPARSER)/SemanticInstance.cpp $(CLASSIFIER)/Instance.h + $(CC) $(CFLAGS) $(SEMANTICPARSER)/SemanticInstance.cpp + +SemanticInstanceNumeric.o: $(SEMANTICPARSER)/SemanticInstanceNumeric.h $(SEMANTICPARSER)/SemanticInstanceNumeric.cpp $(SEMANTICPARSER)/SemanticInstance.h $(SEMANTICPARSER)/SemanticDictionary.h + $(CC) $(CFLAGS) $(SEMANTICPARSER)/SemanticInstanceNumeric.cpp + +SemanticOptions.o: $(SEMANTICPARSER)/SemanticOptions.h $(SEMANTICPARSER)/SemanticOptions.cpp $(UTIL)/SerializationUtils.h $(CLASSIFIER)/Options.h + $(CC) $(CFLAGS) $(SEMANTICPARSER)/SemanticOptions.cpp + +SemanticPart.o: $(SEMANTICPARSER)/SemanticPart.h $(SEMANTICPARSER)/SemanticPart.cpp $(CLASSIFIER)/Part.h + $(CC) $(CFLAGS) $(SEMANTICPARSER)/SemanticPart.cpp + +SemanticPipe.o: $(SEMANTICPARSER)/SemanticPipe.h $(SEMANTICPARSER)/SemanticPipe.cpp $(CLASSIFIER)/Pipe.h $(SEMANTICPARSER)/SemanticOptions.h $(SEMANTICPARSER)/SemanticReader.h $(SEMANTICPARSER)/SemanticDictionary.h $(TAGGER)/TokenDictionary.h $(SEMANTICPARSER)/SemanticInstanceNumeric.h $(SEMANTICPARSER)/SemanticWriter.h $(SEMANTICPARSER)/SemanticPart.h $(SEMANTICPARSER)/SemanticFeatures.h $(SEMANTICPARSER)/SemanticDecoder.h + $(CC) $(CFLAGS) $(SEMANTICPARSER)/SemanticPipe.cpp + +SemanticReader.o: $(SEMANTICPARSER)/SemanticReader.h $(SEMANTICPARSER)/SemanticReader.cpp $(SEMANTICPARSER)/SemanticInstance.h $(CLASSIFIER)/Reader.h + $(CC) $(CFLAGS) $(SEMANTICPARSER)/SemanticReader.cpp + +SemanticWriter.o: $(SEMANTICPARSER)/SemanticWriter.h $(SEMANTICPARSER)/SemanticWriter.cpp $(SEMANTICPARSER)/SemanticInstance.h $(CLASSIFIER)/Writer.h + $(CC) $(CFLAGS) $(SEMANTICPARSER)/SemanticWriter.cpp + +##################### + DependencyDecoder.o: $(PARSER)/DependencyDecoder.h $(PARSER)/DependencyDecoder.cpp $(PARSER)/DependencyPart.h $(PARSER)/DependencyPipe.h $(PARSER)/FactorTree.h $(PARSER)/FactorHeadAutomaton.h $(PARSER)/FactorGrandparentHeadAutomaton.h $(PARSER)/FactorTrigramHeadAutomaton.h $(PARSER)/FactorSequence.h $(UTIL)/AlgUtils.h $(UTIL)/logval.h $(CLASSIFIER)/Decoder.h $(CC) $(CFLAGS) $(PARSER)/DependencyDecoder.cpp diff --git a/libturboparser/TurboParserInterface.cpp b/libturboparser/TurboParserInterface.cpp index c57e466..d846ace 100644 --- a/libturboparser/TurboParserInterface.cpp +++ b/libturboparser/TurboParserInterface.cpp @@ -110,6 +110,59 @@ void TurboParserWorker::Parse(const std::string &file_test, << " sec." << endl; } +TurboSemanticParserWorker::TurboSemanticParserWorker() { + semantic_options_ = new SemanticOptions; + semantic_options_->Initialize(); + + semantic_pipe_ = new SemanticPipe(semantic_options_); + semantic_pipe_->Initialize(); +} + +TurboSemanticParserWorker::~TurboSemanticParserWorker() { + LOG(INFO) << "Deleting semantic pipe."; + delete semantic_pipe_; + LOG(INFO) << "Deleting semantic options."; + delete semantic_options_; +} + +void TurboSemanticParserWorker::LoadSemanticParserModel( + const std::string &file_model) { + semantic_options_->SetModelFilePath(file_model); + + int time; + timeval start, end; + gettimeofday(&start, NULL); + + LOG(INFO) << "Loading model file " << file_model; + + semantic_pipe_->LoadModelFile(); + + gettimeofday(&end, NULL); + time = diff_ms(end,start); + + LOG(INFO) << "Took " << static_cast(time)/1000.0 + << " sec." << endl; +} + +void TurboSemanticParserWorker::ParseSemanticDependencies( + const std::string &file_test, + const std::string &file_prediction) { + semantic_options_->SetTestFilePath(file_test); + semantic_options_->SetOutputFilePath(file_prediction); + + int time; + timeval start, end; + gettimeofday(&start, NULL); + + semantic_pipe_->Run(); + + gettimeofday(&end, NULL); + time = diff_ms(end,start); + + LOG(INFO) << "Took " << static_cast(time)/1000.0 + << " sec." << endl; +} + TurboParserInterface::TurboParserInterface() { argc_ = 0; argv_ = NULL; @@ -133,6 +186,9 @@ TurboParserInterface::~TurboParserInterface() { LOG(INFO) << "Deleting parser workers."; DeleteAllParsers(); + LOG(INFO) << "Deleting semantic parser workers."; + DeleteAllSemanticParsers(); + LOG(INFO) << "Clearing argument list."; ClearArgumentList(); diff --git a/libturboparser/TurboParserInterface.h b/libturboparser/TurboParserInterface.h index 02ad16a..158ba6a 100644 --- a/libturboparser/TurboParserInterface.h +++ b/libturboparser/TurboParserInterface.h @@ -2,6 +2,7 @@ #include #include "SequencePipe.h" #include "DependencyPipe.h" +#include "SemanticPipe.h" namespace TurboParserInterface { @@ -35,6 +36,21 @@ class TurboParserWorker { DependencyPipe *parser_pipe_; }; +class TurboSemanticParserWorker { + public: + TurboSemanticParserWorker(); + virtual ~TurboSemanticParserWorker(); + + void LoadSemanticParserModel(const std::string &file_model); + + void ParseSemanticDependencies(const std::string &file_test, + const std::string &file_prediction); + + private: + SemanticOptions *semantic_options_; + SemanticPipe *semantic_pipe_; +}; + class TurboParserInterface { public: TurboParserInterface(); @@ -67,6 +83,12 @@ class TurboParserInterface { return parser; } + TurboSemanticParserWorker *CreateSemanticParser() { + TurboSemanticParserWorker *semantic_parser = new TurboSemanticParserWorker(); + semantic_parsers_.push_back(semantic_parser); + return semantic_parser; + } + void DeleteAllTaggers() { for (int i = 0; i < taggers_.size(); ++i) { delete taggers_[i]; @@ -81,11 +103,19 @@ class TurboParserInterface { parsers_.clear(); } + void DeleteAllSemanticParsers() { + for (int i = 0; i < semantic_parsers_.size(); ++i) { + delete semantic_parsers_[i]; + } + semantic_parsers_.clear(); + } + private: int argc_; char** argv_; vector taggers_; vector parsers_; + vector semantic_parsers_; }; } // namespace TurboParserInterface. diff --git a/python/nlp_pipeline.py b/python/nlp_pipeline.py index f2cabdf..975cc33 100644 --- a/python/nlp_pipeline.py +++ b/python/nlp_pipeline.py @@ -8,6 +8,7 @@ class NLPPipelineWorker: def __init__(self, pipeline, language): self.tagger = pipeline.turbo_interface.create_tagger() self.parser = pipeline.turbo_interface.create_parser() + self.semantic_parser = None self.lemmatizer = None if language == 'PT': self.sent_tokenizer = nltk.data.load('tokenizers/punkt/portuguese.pickle') @@ -26,6 +27,8 @@ def __init__(self, pipeline, language): self.word_tokenizer = nltk.TreebankWordTokenizer() # For now... self.tagger.load_tagger_model('/home/atm/workspace/CPP/TurboParser/models/spanish_conll2009_v2.0_nomwe_auto/spanish_conll2009_v2.0_nomwe_auto_tagger.model') self.parser.load_parser_model('/home/atm/workspace/CPP/TurboParser/models/spanish_conll2009_v2.0_nomwe_auto/spanish_conll2009_v2.0_nomwe_auto_parser_pruned-true_model-standard.model') + self.semantic_parser = pipeline.turbo_interface.create_semantic_parser() + self.semantic_parser.load_semantic_parser_model('/home/atm/workspace/CPP/TurboParser/srl/models/spanish_conll2009_v2.0_nomwe_auto/spanish_conll2009_v2.0_nomwe_auto_semantic_parser_conll2008_pruned-false_model-basic_syntax-true_C-0.01_fp-0.4_fn-0.6.model') self.lemmatizer = lemmatizer.BasicLemmatizer() self.lemmatizer.load_lemmatizer_model('/home/atm/workspace/CPP/TurboParser/models/spanish_conll2009_v2.0_nomwe_auto/spanish_conll2009_v2.0_nomwe_auto_lemmatizer.model') elif language == 'EN': @@ -33,6 +36,15 @@ def __init__(self, pipeline, language): self.word_tokenizer = nltk.TreebankWordTokenizer() self.tagger.load_tagger_model('/home/atm/workspace/CPP/TurboParser/models/english_proj/english_proj_tagger.model') self.parser.load_parser_model('/home/atm/workspace/CPP/TurboParser/models/english_proj/english_proj_parser_pruned-true_model-standard.model') + elif language == 'EN-Nonprojective': + self.sent_tokenizer = nltk.data.load('tokenizers/punkt/english.pickle') + self.word_tokenizer = nltk.TreebankWordTokenizer() + self.tagger.load_tagger_model('/home/atm/workspace/CPP/TurboParser/models/english_proj/english_proj_tagger.model') + self.parser.load_parser_model('/home/atm/workspace/CPP/TurboParser/models/english/english_parser_pruned-true_model-standard.model') + self.semantic_parser = pipeline.turbo_interface.create_semantic_parser() + self.semantic_parser.load_semantic_parser_model('/home/atm/workspace/CPP/TurboParser/srl/models/english/english_semantic_parser_conll2008_pruned-false_model-basic_syntax-true_C-0.01_fp-0.4_fn-0.6.model') + self.lemmatizer = lemmatizer.BasicLemmatizer() + self.lemmatizer.load_lemmatizer_model('/home/atm/workspace/CPP/TurboParser/models/english/english_lemmatizer.model') elif language == 'PT-BR-Universal': self.sent_tokenizer = nltk.data.load('tokenizers/punkt/portuguese.pickle') self.word_tokenizer = nltk.TreebankWordTokenizer() # For now... @@ -60,12 +72,12 @@ def __init__(self, pipeline, language): self.parser.load_parser_model('/home/atm/workspace/CPP/TurboParser/models/german_universal/german_universal_parser_pruned-true_model-standard.model') else: raise NotImplementedError - + class NLPPipeline: def __init__(self): self.turbo_interface = tp.PTurboParser() self.workers = {} - + def get_worker(self, language): if language in self.workers: return self.workers[language] @@ -73,17 +85,17 @@ def get_worker(self, language): worker = NLPPipelineWorker(self, language) self.workers[language] = worker return worker - + def split_sentences(self, text, language): worker = self.get_worker(language) sentences = worker.sent_tokenizer.tokenize(text) return sentences - + def tokenize(self, sentence, language): worker = self.get_worker(language) tokenized_sentence = worker.word_tokenizer.tokenize(sentence) return tokenized_sentence - + def tag(self, tokenized_sentence, language): worker = self.get_worker(language) f_tagging = open('tagging.tmp', 'w') @@ -135,6 +147,41 @@ def parse(self, tokenized_sentence, tags, lemmas, language): f_conll_pred.close() return heads, deprels + def has_semantic_parser(self, language): + worker = self.get_worker(language) + return (worker.semantic_parser != None) + + def parse_semantic_dependencies(self, tokenized_sentence, tags, lemmas, + heads, deprels, language): + worker = self.get_worker(language) + f_conll = open('conll2008.tmp', 'w') + for i, token in enumerate(tokenized_sentence): + tag = tags[i] + lemma = lemmas[i] + head = heads[i] + deprel = deprels[i] + f_conll.write(str(i+1) + '\t_\t_\t_\t_\t' + token + '\t' + lemma + \ + '\t' + tag + '\t' + str(head) + '\t' + deprel + \ + '\t_\n') + f_conll.close() + worker.semantic_parser.parse_semantic_dependencies('conll2008.tmp', + 'conll2008.tmp.pred') + f_conll_pred = open('conll2008.tmp.pred') + predicates = [] + argument_lists = [] + for line in f_conll_pred: + line = line.rstrip('\n') + line = line.rstrip('\t') + if line == '': + continue + fields = line.split('\t') + predicate = fields[10] + argument_list = fields[11:] + predicates.append(predicate) + argument_lists.append(argument_list) + f_conll_pred.close() + return predicates, argument_lists + def parse_conll(self, text, language): sentences = self.split_sentences(text, language) conll_str = '' diff --git a/python/setup.py b/python/setup.py index 2a8b8f2..4237539 100644 --- a/python/setup.py +++ b/python/setup.py @@ -6,5 +6,5 @@ setup(cmdclass={'build_ext': build_ext}, ext_modules=[Extension("turboparser", ["turbo_parser.pyx"], language="c++", - include_dirs=["../src/parser", "../src/tagger/", "../src/classifier/", "../src/util", "../deps/local/include/"], + include_dirs=["../src/semantic_parser", "../src/parser", "../src/tagger/", "../src/classifier/", "../src/util", "../deps/local/include/"], library_dirs=[src, "../deps/local/lib/"], libraries=["turboparser", "gflags", "glog", "ad3"])]) diff --git a/python/turbo_parser.pyx b/python/turbo_parser.pyx index 20632ec..a8441d0 100644 --- a/python/turbo_parser.pyx +++ b/python/turbo_parser.pyx @@ -17,10 +17,16 @@ cdef extern from "../libturboparser/TurboParserInterface.h" namespace "TurboPars void LoadParserModel(string file_model) void Parse(string file_test, string file_prediction) + cdef cppclass TurboSemanticParserWorker: + TurboSemanticParserWorker() + void LoadSemanticParserModel(string file_model) + void ParseSemanticDependencies(string file_test, string file_prediction) + cdef cppclass TurboParserInterface: TurboParserInterface() TurboTaggerWorker* CreateTagger() TurboParserWorker* CreateParser() + TurboSemanticParserWorker* CreateSemanticParser() # Wrap them into python extension types. @@ -47,6 +53,11 @@ cdef class PTurboParser: parser.thisptr = self.thisptr.CreateParser() return parser + def create_semantic_parser(self): + semantic_parser = PTurboSemanticParserWorker(allocate=False) + semantic_parser.thisptr = self.thisptr.CreateSemanticParser() + return semantic_parser + cdef class PTurboTaggerWorker: cdef TurboTaggerWorker *thisptr cdef bool allocate @@ -82,3 +93,21 @@ cdef class PTurboParserWorker: def parse(self, file_test, file_prediction): self.thisptr.Parse(file_test, file_prediction) + +cdef class PTurboSemanticParserWorker: + cdef TurboSemanticParserWorker *thisptr + cdef bool allocate + def __cinit__(self, allocate=False): + self.allocate = allocate + if allocate: + self.thisptr = new TurboSemanticParserWorker() + + def __dealloc__(self): + if self.allocate: + del self.thisptr + + def load_semantic_parser_model(self, file_model): + self.thisptr.LoadSemanticParserModel(file_model) + + def parse_semantic_dependencies(self, file_test, file_prediction): + self.thisptr.ParseSemanticDependencies(file_test, file_prediction) diff --git a/scripts_srl/eval08.pl b/scripts_srl/eval08.pl index 8f47e7f..ed82553 100644 --- a/scripts_srl/eval08.pl +++ b/scripts_srl/eval08.pl @@ -575,7 +575,8 @@ sub update_srl_scores if($gold_prop->position() == $sys_prop->position()){ $counts->{coru_prop} ++; $counts->{coru_prop_per_tag}{$gold_prop->pposs()} ++; - if($gold_prop->sense() == $sys_prop->sense()){ + #if($gold_prop->sense() == $sys_prop->sense()){ + if($gold_prop->sense() eq $sys_prop->sense()){ $counts->{corl_prop} ++; $sent_counts{corl_prop} ++; $counts->{corl_prop_per_tag}{$gold_prop->pposs()} ++; diff --git a/scripts_srl/train_test_semantic_parser.sh b/scripts_srl/train_test_semantic_parser.sh index 0dc854c..6610ed4 100755 --- a/scripts_srl/train_test_semantic_parser.sh +++ b/scripts_srl/train_test_semantic_parser.sh @@ -32,7 +32,7 @@ model_type=$5 #af+as+gp+cp # Parts used in the model (subset of "af+cs+gp+as+hb+ # make the parser a lot slower. train_cost_false_positives=$3 train_cost_false_negatives=$4 -file_format=conll #sdp # conll +file_format=$6 # sdp or conll if [ "${file_format}" == "sdp" ] then @@ -123,24 +123,24 @@ then --train_epochs=${num_epochs_pruner} \ --file_model=${file_pruner_model} \ --file_train=${file_train} \ - --model_type=basic \ - --labeled=false \ - --deterministic_labels=false \ - --use_dependency_syntactic_features=${use_dependency_syntactic_features} \ - --prune_labels=${prune_labels} \ - --prune_distances=${prune_distances} \ - --prune_basic=false \ + --srl_model_type=basic \ + --srl_labeled=false \ + --srl_deterministic_labels=false \ + --srl_use_dependency_syntactic_features=${use_dependency_syntactic_features} \ + --srl_prune_labels=${prune_labels} \ + --srl_prune_distances=${prune_distances} \ + --srl_prune_basic=false \ --only_supported_features \ --form_case_sensitive=${case_sensitive} \ --train_algorithm=${train_algorithm_pruner} \ --train_regularization_constant=${regularization_parameter_pruner} \ - --train_cost_false_positives=${train_cost_false_positives} \ - --train_cost_false_negatives=${train_cost_false_negatives} \ - --allow_self_loops=${allow_self_loops} \ - --allow_root_predicate=${allow_root_predicate} \ - --allow_unseen_predicates=${allow_unseen_predicates} \ - --use_predicate_senses=${use_predicate_senses} \ - --file_format=${file_format} \ + --srl_train_cost_false_positives=${train_cost_false_positives} \ + --srl_train_cost_false_negatives=${train_cost_false_negatives} \ + --srl_allow_self_loops=${allow_self_loops} \ + --srl_allow_root_predicate=${allow_root_predicate} \ + --srl_allow_unseen_predicates=${allow_unseen_predicates} \ + --srl_use_predicate_senses=${use_predicate_senses} \ + --srl_file_format=${file_format} \ --logtostderr rm -f ${file_pruner_results} @@ -157,7 +157,7 @@ then --file_model=${file_pruner_model} \ --file_test=${file_test} \ --file_prediction=${file_pruner_prediction} \ - --file_format=${file_format} \ + --srl_file_format=${file_format} \ --logtostderr echo "" @@ -194,27 +194,27 @@ then --train_epochs=${num_epochs} \ --file_model=${file_model} \ --file_train=${file_train} \ - --labeled=${labeled} \ - --deterministic_labels=${deterministic_labels} \ - --use_dependency_syntactic_features=${use_dependency_syntactic_features} \ - --prune_labels=${prune_labels} \ - --prune_distances=${prune_distances} \ - --prune_basic=${prune} \ - --pruner_posterior_threshold=${posterior_threshold} \ - --pruner_max_arguments=${pruner_max_arguments} \ - --use_pretrained_pruner \ - --file_pruner_model=${file_pruner_model} \ + --srl_labeled=${labeled} \ + --srl_deterministic_labels=${deterministic_labels} \ + --srl_use_dependency_syntactic_features=${use_dependency_syntactic_features} \ + --srl_prune_labels=${prune_labels} \ + --srl_prune_distances=${prune_distances} \ + --srl_prune_basic=${prune} \ + --srl_pruner_posterior_threshold=${posterior_threshold} \ + --srl_pruner_max_arguments=${pruner_max_arguments} \ + --srl_use_pretrained_pruner \ + --srl_file_pruner_model=${file_pruner_model} \ --form_case_sensitive=${case_sensitive} \ --train_algorithm=${train_algorithm} \ --train_regularization_constant=${regularization_parameter} \ - --train_cost_false_positives=${train_cost_false_positives} \ - --train_cost_false_negatives=${train_cost_false_negatives} \ - --model_type=${model_type} \ - --allow_self_loops=${allow_self_loops} \ - --allow_root_predicate=${allow_root_predicate} \ - --allow_unseen_predicates=${allow_unseen_predicates} \ - --use_predicate_senses=${use_predicate_senses} \ - --file_format=${file_format} \ + --srl_train_cost_false_positives=${train_cost_false_positives} \ + --srl_train_cost_false_negatives=${train_cost_false_negatives} \ + --srl_model_type=${model_type} \ + --srl_allow_self_loops=${allow_self_loops} \ + --srl_allow_root_predicate=${allow_root_predicate} \ + --srl_allow_unseen_predicates=${allow_unseen_predicates} \ + --srl_use_predicate_senses=${use_predicate_senses} \ + --srl_file_format=${file_format} \ --logtostderr else # Train a pruner along with the parser. @@ -223,28 +223,28 @@ then --train_epochs=${num_epochs} \ --file_model=${file_model} \ --file_train=${file_train} \ - --labeled=${labeled} \ - --deterministic_labels=${deterministic_labels} \ - --use_dependency_syntactic_features=${use_dependency_syntactic_features} \ + --srl_labeled=${labeled} \ + --srl_deterministic_labels=${deterministic_labels} \ + --srl_use_dependency_syntactic_features=${use_dependency_syntactic_features} \ --form_case_sensitive=${case_sensitive} \ --train_algorithm=${train_algorithm} \ --train_regularization_constant=${regularization_parameter} \ - --train_cost_false_positives=${train_cost_false_positives} \ - --train_cost_false_negatives=${train_cost_false_negatives} \ - --model_type=${model_type} \ - --prune_labels=${prune_labels} \ - --prune_distances=${prune_distances} \ - --prune_basic=${prune} \ - --pruner_posterior_threshold=${posterior_threshold} \ - --pruner_max_arguments=${pruner_max_arguments} \ - --pruner_train_epochs=${num_epochs_pruner} \ - --pruner_train_algorithm=${train_algorithm_pruner} \ - --pruner_train_regularization_constant=${regularization_parameter_pruner} \ - --allow_self_loops=${allow_self_loops} \ - --allow_root_predicate=${allow_root_predicate} \ - --allow_unseen_predicates=${allow_unseen_predicates} \ - --use_predicate_senses=${use_predicate_senses} \ - --file_format=${file_format} \ + --srl_train_cost_false_positives=${train_cost_false_positives} \ + --srl_train_cost_false_negatives=${train_cost_false_negatives} \ + --srl_model_type=${model_type} \ + --srl_prune_labels=${prune_labels} \ + --srl_prune_distances=${prune_distances} \ + --srl_prune_basic=${prune} \ + --srl_pruner_posterior_threshold=${posterior_threshold} \ + --srl_pruner_max_arguments=${pruner_max_arguments} \ + --srl_pruner_train_epochs=${num_epochs_pruner} \ + --srl_pruner_train_algorithm=${train_algorithm_pruner} \ + --srl_pruner_train_regularization_constant=${regularization_parameter_pruner} \ + --srl_allow_self_loops=${allow_self_loops} \ + --srl_allow_root_predicate=${allow_root_predicate} \ + --srl_allow_unseen_predicates=${allow_unseen_predicates} \ + --srl_use_predicate_senses=${use_predicate_senses} \ + --srl_file_format=${file_format} \ --logtostderr fi fi @@ -275,7 +275,7 @@ then --file_model=${file_model} \ --file_test=${file_test} \ --file_prediction=${file_prediction} \ - --file_format=${file_format} \ + --srl_file_format=${file_format} \ --logtostderr echo "" diff --git a/scripts_srl/train_test_submission_closed.sh b/scripts_srl/train_test_submission_closed.sh index 87983dd..c8e4349 100755 --- a/scripts_srl/train_test_submission_closed.sh +++ b/scripts_srl/train_test_submission_closed.sh @@ -6,19 +6,19 @@ C=0.01 cost_fp=0.3 cost_fn=0.7 echo "${C} ${cost_fp} ${cost_fn} ${formalism} ${model_type} ${open}" -./train_test_semantic_parser.sh english ${C} ${cost_fp} ${cost_fn} ${model_type} ${open} ${formalism} >& out_open-${open}_deterministic_${formalism}_${C}_${cost_fp}_${cost_fn}_${model_type} +./train_test_semantic_parser.sh english ${C} ${cost_fp} ${cost_fn} ${model_type} ${open} ${formalism} sdp >& out_open-${open}_deterministic_${formalism}_${C}_${cost_fp}_${cost_fn}_${model_type} formalism=pas C=0.01 cost_fp=0.4 cost_fn=0.6 echo "${C} ${cost_fp} ${cost_fn} ${formalism} ${model_type} ${open}" -./train_test_semantic_parser.sh english ${C} ${cost_fp} ${cost_fn} ${model_type} ${open} ${formalism} >& out_open-${open}_deterministic_${formalism}_${C}_${cost_fp}_${cost_fn}_${model_type} +./train_test_semantic_parser.sh english ${C} ${cost_fp} ${cost_fn} ${model_type} ${open} ${formalism} sdp >& out_open-${open}_deterministic_${formalism}_${C}_${cost_fp}_${cost_fn}_${model_type} formalism=pcedt C=0.01 cost_fp=0.3 cost_fn=0.7 echo "${C} ${cost_fp} ${cost_fn} ${formalism} ${model_type} ${open}" -./train_test_semantic_parser.sh english ${C} ${cost_fp} ${cost_fn} ${model_type} ${open} ${formalism} >& out_open-${open}_deterministic_${formalism}_${C}_${cost_fp}_${cost_fn}_${model_type} +./train_test_semantic_parser.sh english ${C} ${cost_fp} ${cost_fn} ${model_type} ${open} ${formalism} sdp >& out_open-${open}_deterministic_${formalism}_${C}_${cost_fp}_${cost_fn}_${model_type} diff --git a/scripts_srl/train_test_submission_open.sh b/scripts_srl/train_test_submission_open.sh index c5db2e9..4d49541 100755 --- a/scripts_srl/train_test_submission_open.sh +++ b/scripts_srl/train_test_submission_open.sh @@ -6,19 +6,19 @@ C=0.01 cost_fp=0.4 cost_fn=0.6 echo "${C} ${cost_fp} ${cost_fn} ${formalism} ${model_type} ${open}" -./train_test_semantic_parser.sh english ${C} ${cost_fp} ${cost_fn} ${model_type} ${open} ${formalism} >& out_open-${open}_deterministic_${formalism}_${C}_${cost_fp}_${cost_fn}_${model_type} +./train_test_semantic_parser.sh english ${C} ${cost_fp} ${cost_fn} ${model_type} ${open} ${formalism} sdp >& out_open-${open}_deterministic_${formalism}_${C}_${cost_fp}_${cost_fn}_${model_type} formalism=pas C=0.01 cost_fp=0.4 cost_fn=0.6 echo "${C} ${cost_fp} ${cost_fn} ${formalism} ${model_type} ${open}" -./train_test_semantic_parser.sh english ${C} ${cost_fp} ${cost_fn} ${model_type} ${open} ${formalism} >& out_open-${open}_deterministic_${formalism}_${C}_${cost_fp}_${cost_fn}_${model_type} +./train_test_semantic_parser.sh english ${C} ${cost_fp} ${cost_fn} ${model_type} ${open} ${formalism} sdp >& out_open-${open}_deterministic_${formalism}_${C}_${cost_fp}_${cost_fn}_${model_type} formalism=pcedt C=0.01 cost_fp=0.4 cost_fn=0.6 echo "${C} ${cost_fp} ${cost_fn} ${formalism} ${model_type} ${open}" -./train_test_semantic_parser.sh english ${C} ${cost_fp} ${cost_fn} ${model_type} ${open} ${formalism} >& out_open-${open}_deterministic_${formalism}_${C}_${cost_fp}_${cost_fn}_${model_type} +./train_test_semantic_parser.sh english ${C} ${cost_fp} ${cost_fn} ${model_type} ${open} ${formalism} sdp >& out_open-${open}_deterministic_${formalism}_${C}_${cost_fp}_${cost_fn}_${model_type} diff --git a/src/semantic_parser/SemanticDecoder.cpp b/src/semantic_parser/SemanticDecoder.cpp index df8e4fe..5305b42 100644 --- a/src/semantic_parser/SemanticDecoder.cpp +++ b/src/semantic_parser/SemanticDecoder.cpp @@ -36,9 +36,9 @@ typedef Eigen::Matrix MatrixXlogd; using namespace std; -DEFINE_double(train_cost_false_positives, 1.0, +DEFINE_double(srl_train_cost_false_positives, 1.0, "Cost for predicting false positives."); -DEFINE_double(train_cost_false_negatives, 1.0, +DEFINE_double(srl_train_cost_false_negatives, 1.0, "Cost for predicting false negatives."); void SemanticDecoder::DecodeCostAugmented(Instance *instance, Parts *parts, @@ -69,9 +69,9 @@ void SemanticDecoder::DecodeCostAugmented(Instance *instance, Parts *parts, //////////////////////////////////////////////////// // Penalty for predicting 1 when it is 0 (FP). - double a = FLAGS_train_cost_false_positives; + double a = FLAGS_srl_train_cost_false_positives; // Penalty for predicting 0 when it is 1 (FN). - double b = FLAGS_train_cost_false_negatives; + double b = FLAGS_srl_train_cost_false_negatives; // p = 0.5-z0, q = 0.5'*z0, loss = p'*z + q double q = 0.0; diff --git a/src/semantic_parser/SemanticFeatures.cpp b/src/semantic_parser/SemanticFeatures.cpp index 11c4e88..db1cffb 100644 --- a/src/semantic_parser/SemanticFeatures.cpp +++ b/src/semantic_parser/SemanticFeatures.cpp @@ -27,18 +27,18 @@ // Note 2: these flags don't get saved in the model file!!! So we need to call // them at test time too. // TODO: deprecate this. -DEFINE_bool(use_contextual_features, true, +DEFINE_bool(srl_use_contextual_features, true, "True for using contextual arc-factored features."); -DEFINE_bool(use_predicate_features, true, //false, +DEFINE_bool(srl_use_predicate_features, true, //false, "True for using predicate features."); -DEFINE_bool(use_pair_features_arbitrary_siblings, false, /*false,*/ +DEFINE_bool(srl_use_pair_features_arbitrary_siblings, false, /*false,*/ "True for using pair features for arbitrary sibling parts."); -DEFINE_bool(use_pair_features_second_order, true, /*false,*/ +DEFINE_bool(srl_use_pair_features_second_order, true, /*false,*/ "True for using pair features for second order parts."); -DEFINE_bool(use_pair_features_grandsibling_conjunctions, true, /*false,*/ +DEFINE_bool(srl_use_pair_features_grandsibling_conjunctions, true, /*false,*/ "True for using pair features for grandsiblings that are conjunctions."); // TODO: try setting this true. -DEFINE_bool(use_trilexical_features, false, +DEFINE_bool(srl_use_trilexical_features, false, "True for using trilexical features."); void SemanticFeatures::AddPredicateFeatures(SemanticInstanceNumeric* sentence, @@ -51,7 +51,7 @@ void SemanticFeatures::AddPredicateFeatures(SemanticInstanceNumeric* sentence, BinaryFeatures *features = new BinaryFeatures; input_features_[r] = features; - if (FLAGS_use_predicate_features) { + if (FLAGS_srl_use_predicate_features) { AddPredicateFeatures(sentence, false, SemanticFeatureTemplateParts::PREDICATE, r, predicate, predicate_id); @@ -84,7 +84,7 @@ void SemanticFeatures::AddArcFeatures(SemanticInstanceNumeric* sentence, bool use_dependency_features = options->use_dependency_syntactic_features(); bool use_contextual_dependency_features = use_dependency_features; - bool use_contextual_features = FLAGS_use_contextual_features; + bool use_contextual_features = FLAGS_srl_use_contextual_features; bool use_between_features = false; // TODO(atm): change this. // Only 4 bits are allowed in feature_type. @@ -449,7 +449,7 @@ void SemanticFeatures::AddArcFeatures(SemanticInstanceNumeric* sentence, int sentence_length = sentence->size(); bool use_dependency_features = options->use_dependency_syntactic_features(); bool use_contextual_dependency_features = use_dependency_features; - bool use_contextual_features = FLAGS_use_contextual_features; + bool use_contextual_features = FLAGS_srl_use_contextual_features; bool use_lemma_features = true; bool use_between_features = true; @@ -1112,7 +1112,7 @@ void SemanticFeatures::AddSiblingFeatures(SemanticInstanceNumeric* sentence, CHECK_NE(second_argument, 0) << "Currently, last child is encoded as a2 = -1."; #if 0 - if (FLAGS_use_pair_features_second_order) { + if (FLAGS_srl_use_pair_features_second_order) { // Add word pair features for head and modifier, and modifier and sibling. if (consecutive) { int m = modifier; @@ -1123,7 +1123,7 @@ void SemanticFeatures::AddSiblingFeatures(SemanticInstanceNumeric* sentence, AddWordPairFeatures(sentence, SemanticFeatureTemplateParts::NEXTSIBL_M_S, m, s, true, true, features); } else { - if (FLAGS_use_pair_features_arbitrary_siblings) { + if (FLAGS_srl_use_pair_features_arbitrary_siblings) { // Add word pair features for modifier and sibling. AddWordPairFeatures(sentence, SemanticFeatureTemplateParts::ALLSIBL_M_S, modifier, sibling, true, true, features); @@ -1209,7 +1209,7 @@ void SemanticFeatures::AddSiblingFeatures(SemanticInstanceNumeric* sentence, AddFeature(fkey, features); // Trilexical features. - if (FLAGS_use_trilexical_features) { + if (FLAGS_srl_use_trilexical_features) { // Triplet trilexical features. fkey = encoder_.CreateFKey_WWW(SemanticFeatureTemplateSibling::HW_MW_SW, flags, HWID, MWID, SWID); AddFeature(fkey, features); @@ -1316,8 +1316,8 @@ void SemanticFeatures::AddSecondOrderFeatures( second_predicate <= 0); #if 0 - if (FLAGS_use_pair_features_second_order) { - if (FLAGS_use_upper_dependencies) { + if (FLAGS_srl_use_pair_features_second_order) { + if (FLAGS_srl_use_upper_dependencies) { AddWordPairFeatures(sentence, SemanticFeatureTemplateParts::GRANDPAR_G_H, grandparent, head, true, true, features); } @@ -1466,7 +1466,7 @@ void SemanticFeatures::AddSecondOrderFeatures( fkey = encoder_.CreateFKey_WWP(SemanticFeatureTemplateGrandparent::GP_HW_MW, flags, HWID, MWID, GPID); AddFeature(fkey, features); - if (FLAGS_use_trilexical_features) { + if (FLAGS_srl_use_trilexical_features) { // Triplet trilexical features. fkey = encoder_.CreateFKey_WWW(SemanticFeatureTemplateGrandparent::GW_HW_MW, flags, GWID, HWID, MWID); AddFeature(fkey, features); @@ -1609,7 +1609,7 @@ void SemanticFeatures::AddGrandSiblingFeatures(SemanticInstanceNumeric* sentence fkey = encoder_.CreateFKey_WPPP(SemanticFeatureTemplateGrandSibl::GP_HP_MP_SW, flags, SWID, GPID, HPID, MPID); AddFeature(fkey, features); - if (FLAGS_use_pair_features_grandsibling_conjunctions) { + if (FLAGS_srl_use_pair_features_grandsibling_conjunctions) { if (modifier != head && sentence->IsCoordination(modifier) && sibling > 0 && sibling < sentence->size()) { AddWordPairFeatures(sentence, SemanticFeatureTemplateParts::GRANDSIBL_G_S, @@ -1973,7 +1973,7 @@ void SemanticFeatures::AddPredicateFeatures(SemanticInstanceNumeric* sentence, int sentence_length = sentence->size(); bool use_dependency_features = options->use_dependency_syntactic_features(); bool use_contextual_dependency_features = use_dependency_features; - bool use_contextual_features = FLAGS_use_contextual_features; + bool use_contextual_features = FLAGS_srl_use_contextual_features; // Only 4 bits are allowed in feature_type. //uint8_t feature_type = SemanticFeatureTemplateParts::PREDICATE; @@ -2084,15 +2084,15 @@ void SemanticFeatures::AddPredicateFeatures(SemanticInstanceNumeric* sentence, AddFeature(fkey, features); fkey = encoder_.CreateFKey_WP(SemanticFeatureTemplatePredicate::HW_bdHP, flags, HWID, bdHPID); AddFeature(fkey, features); - //fkey = encoder_.CreateFKey_WP(SemanticFeatureTemplatePredicate::HW_bdHR, flags, HWID, bdHRID); - fkey = encoder_.CreateFKey_WP(SemanticFeatureTemplatePredicate::HW_bdHR, flags, HWID, bdHPID); // Submitted results. + fkey = encoder_.CreateFKey_WP(SemanticFeatureTemplatePredicate::HW_bdHR, flags, HWID, bdHRID); + //fkey = encoder_.CreateFKey_WP(SemanticFeatureTemplatePredicate::HW_bdHR, flags, HWID, bdHPID); // Submitted results. AddFeature(fkey, features); fkey = encoder_.CreateFKey_WP(SemanticFeatureTemplatePredicate::HP_bdHW, flags, bdHWID, HPID); AddFeature(fkey, features); fkey = encoder_.CreateFKey_PP(SemanticFeatureTemplatePredicate::HP_bdHP, flags, HPID, bdHPID); AddFeature(fkey, features); - //fkey = encoder_.CreateFKey_PP(SemanticFeatureTemplatePredicate::HP_bdHR, flags, HPID, bdHRID); - fkey = encoder_.CreateFKey_WP(SemanticFeatureTemplatePredicate::HP_bdHR, flags, HWID, bdHRID); // Submitted results. + fkey = encoder_.CreateFKey_PP(SemanticFeatureTemplatePredicate::HP_bdHR, flags, HPID, bdHRID); + //fkey = encoder_.CreateFKey_WP(SemanticFeatureTemplatePredicate::HP_bdHR, flags, HWID, bdHRID); // Submitted results. AddFeature(fkey, features); } } diff --git a/src/semantic_parser/SemanticOptions.cpp b/src/semantic_parser/SemanticOptions.cpp index 863c407..74e940a 100644 --- a/src/semantic_parser/SemanticOptions.cpp +++ b/src/semantic_parser/SemanticOptions.cpp @@ -24,12 +24,12 @@ using namespace std; // TODO: Implement the text format. -DEFINE_string(file_format, "conll", +DEFINE_string(srl_file_format, "conll", "Format of the input file containing the data. Use ""conll"" for " "the format used in CONLL 2008, ""sdp"" for the format in " "SemEval 2014, and ""text"" for tokenized sentences" "(one per line, with tokens separated by white-spaces."); -DEFINE_string(model_type, "standard", +DEFINE_string(srl_model_type, "standard", "Model type. This a string formed by the one or several of the " "following pieces:" "af enables arc-factored parts (required), " @@ -46,80 +46,80 @@ DEFINE_string(model_type, "standard", "basic is af, " "standard is af+cs+gp, " "full is af+cs+gp+as+gs+ts."); -DEFINE_bool(use_dependency_syntactic_features, true, +DEFINE_bool(srl_use_dependency_syntactic_features, true, "True for using features from the dependency syntactic tree. " "This should be false for the closed track in SemEval 2014."); -DEFINE_bool(labeled, true, +DEFINE_bool(srl_labeled, true, "True for training a parser with labeled arcs (if false, the " "parser outputs just the backbone dependencies.)"); -DEFINE_bool(deterministic_labels, true, +DEFINE_bool(srl_deterministic_labels, true, "True for forcing a set of labels (found in the training set) to be " "deterministic (i.e. to not occur in more than one argument for the " "same predicate)."); -DEFINE_bool(allow_self_loops, true, +DEFINE_bool(srl_allow_self_loops, true, "True for allowing self-loops (a predicate being its own argument.)"); -DEFINE_bool(allow_root_predicate, false, +DEFINE_bool(srl_allow_root_predicate, false, "True for allowing the root to be a predicate (useful for handling " "top nodes).)"); -DEFINE_bool(allow_unseen_predicates, false, +DEFINE_bool(srl_allow_unseen_predicates, false, "True for allowing an unseen predicate to be have a predicate sense " "(assumes --use_predicate_senses=true.)"); -DEFINE_bool(use_predicate_senses, true, +DEFINE_bool(srl_use_predicate_senses, true, "True for using predicate senses (e.g. temperature.01). If false, " "any word can be a predicate and (eventual) sense information will " "be ignored."); -DEFINE_bool(prune_labels, true, +DEFINE_bool(srl_prune_labels, true, "True for pruning the set of possible labels taking into account " "the labels that have occured for each pair of POS tags in the " "training data."); -DEFINE_bool(prune_labels_with_relation_paths, false, //true, +DEFINE_bool(srl_prune_labels_with_relation_paths, false, //true, "True for pruning the set of possible labels taking into account " "the labels that have occured for syntactic dependency relation " "paths in the training data."); -DEFINE_bool(prune_distances, true, +DEFINE_bool(srl_prune_distances, true, "True for pruning the set of possible left/right distances taking " "into account the distances that have occured for each pair of POS " "tags in the training data."); -DEFINE_bool(prune_basic, true, +DEFINE_bool(srl_prune_basic, true, "True for using a basic pruner from a probabilistic first-order " "model."); -DEFINE_bool(use_pretrained_pruner, false, +DEFINE_bool(srl_use_pretrained_pruner, false, "True if using a pre-trained basic pruner. Must specify the file " "path through --file_pruner_model. If this flag is set to false " "and train=true and prune_basic=true, a pruner will be trained " "along with the parser."); -DEFINE_string(file_pruner_model, "", +DEFINE_string(srl_file_pruner_model, "", "Path to the file containing the pre-trained pruner model. Must " "activate the flag --use_pretrained_pruner"); -DEFINE_double(pruner_posterior_threshold, 0.0001, +DEFINE_double(srl_pruner_posterior_threshold, 0.0001, "Posterior probability threshold for an arc to be pruned, in basic " "pruning. For each word p, if " "P(p,a) < pruner_posterior_threshold * P(p,a'), " "where a' is the best scored argument, then (p,a) will be pruned out."); -DEFINE_int32(pruner_max_arguments, 20, +DEFINE_int32(srl_pruner_max_arguments, 20, "Maximum number of possible arguments for a given word, in basic " "pruning."); // Options for pruner training. // TODO: implement these options. -DEFINE_string(pruner_train_algorithm, "crf_mira", +DEFINE_string(srl_pruner_train_algorithm, "crf_mira", "Training algorithm for the pruner. Options are perceptron, mira, " "svm_mira, crf_mira, svm_sgd, crf_sgd."); -DEFINE_bool(pruner_only_supported_features, true, +DEFINE_bool(srl_pruner_only_supported_features, true, "True for the pruner to use supported features only (should be true" "for CRFs)."); -DEFINE_bool(pruner_use_averaging, true, +DEFINE_bool(srl_pruner_use_averaging, true, "True for the pruner to average the weight vector at the end of" "training."); -DEFINE_int32(pruner_train_epochs, 10, +DEFINE_int32(srl_pruner_train_epochs, 10, "Number of training epochs for the pruner."); -DEFINE_double(pruner_train_regularization_constant, 0.001, +DEFINE_double(srl_pruner_train_regularization_constant, 0.001, "Regularization parameter C for the pruner."); -DEFINE_bool(pruner_labeled, false, +DEFINE_bool(srl_pruner_labeled, false, "True if pruner is a labeled parser. Currently, must be set to false."); -DEFINE_double(pruner_train_initial_learning_rate, 0.01, +DEFINE_double(srl_pruner_train_initial_learning_rate, 0.01, "Initial learning rate of pruner (for SGD only)."); -DEFINE_string(pruner_train_learning_rate_schedule, "invsqrt", +DEFINE_string(srl_pruner_train_learning_rate_schedule, "invsqrt", "Learning rate annealing schedule of pruner (for SGD only). " "Options are fixed, lecun, invsqrt, inv."); @@ -164,97 +164,107 @@ void SemanticOptions::Load(FILE* fs) { Options::Load(fs); bool success; - success = ReadString(fs, &FLAGS_model_type); + success = ReadString(fs, &FLAGS_srl_model_type); CHECK(success); - LOG(INFO) << "Setting --model_type=" << FLAGS_model_type; - success = ReadBool(fs, &FLAGS_use_dependency_syntactic_features); + LOG(INFO) << "Setting --srl_model_type=" << FLAGS_srl_model_type; + success = ReadBool(fs, &FLAGS_srl_use_dependency_syntactic_features); CHECK(success); - LOG(INFO) << "Setting --use_dependency_syntactic_features=" - << FLAGS_use_dependency_syntactic_features; - success = ReadBool(fs, &FLAGS_labeled); + LOG(INFO) << "Setting --srl_use_dependency_syntactic_features=" + << FLAGS_srl_use_dependency_syntactic_features; + success = ReadBool(fs, &FLAGS_srl_labeled); CHECK(success); - LOG(INFO) << "Setting --labeled=" << FLAGS_labeled; - success = ReadBool(fs, &FLAGS_deterministic_labels); + LOG(INFO) << "Setting --srl_labeled=" << FLAGS_srl_labeled; + success = ReadBool(fs, &FLAGS_srl_deterministic_labels); CHECK(success); - LOG(INFO) << "Setting --deterministic_labels=" << FLAGS_deterministic_labels; - success = ReadBool(fs, &FLAGS_allow_self_loops); + LOG(INFO) << "Setting --srl_deterministic_labels=" + << FLAGS_srl_deterministic_labels; + success = ReadBool(fs, &FLAGS_srl_allow_self_loops); CHECK(success); - LOG(INFO) << "Setting --allow_self_loops=" << FLAGS_allow_self_loops; - success = ReadBool(fs, &FLAGS_allow_root_predicate); + LOG(INFO) << "Setting --srl_allow_self_loops=" << FLAGS_srl_allow_self_loops; + success = ReadBool(fs, &FLAGS_srl_allow_root_predicate); CHECK(success); - LOG(INFO) << "Setting --allow_root_predicate=" << FLAGS_allow_root_predicate; - success = ReadBool(fs, &FLAGS_allow_unseen_predicates); + LOG(INFO) << "Setting --srl_allow_root_predicate=" + << FLAGS_srl_allow_root_predicate; + success = ReadBool(fs, &FLAGS_srl_allow_unseen_predicates); CHECK(success); - LOG(INFO) << "Setting --allow_unseen_predicates=" - << FLAGS_allow_unseen_predicates; - success = ReadBool(fs, &FLAGS_use_predicate_senses); + LOG(INFO) << "Setting --srl_allow_unseen_predicates=" + << FLAGS_srl_allow_unseen_predicates; + success = ReadBool(fs, &FLAGS_srl_use_predicate_senses); CHECK(success); - LOG(INFO) << "Setting --use_predicate_senses=" << FLAGS_use_predicate_senses; - success = ReadBool(fs, &FLAGS_prune_labels); + LOG(INFO) << "Setting --srl_use_predicate_senses=" + << FLAGS_srl_use_predicate_senses; + success = ReadBool(fs, &FLAGS_srl_prune_labels); CHECK(success); - LOG(INFO) << "Setting --prune_labels=" << FLAGS_prune_labels; - success = ReadBool(fs, &FLAGS_prune_labels_with_relation_paths); + LOG(INFO) << "Setting --srl_prune_labels=" << FLAGS_srl_prune_labels; + success = ReadBool(fs, &FLAGS_srl_prune_labels_with_relation_paths); CHECK(success); - LOG(INFO) << "Setting --prune_labels_with_relation_paths=" - << FLAGS_prune_labels_with_relation_paths; - success = ReadBool(fs, &FLAGS_prune_distances); + LOG(INFO) << "Setting --srl_prune_labels_with_relation_paths=" + << FLAGS_srl_prune_labels_with_relation_paths; + success = ReadBool(fs, &FLAGS_srl_prune_distances); CHECK(success); - LOG(INFO) << "Setting --prune_distances=" << FLAGS_prune_distances; - success = ReadBool(fs, &FLAGS_prune_basic); + LOG(INFO) << "Setting --srl_prune_distances=" << FLAGS_srl_prune_distances; + success = ReadBool(fs, &FLAGS_srl_prune_basic); CHECK(success); - LOG(INFO) << "Setting --prune_basic=" << FLAGS_prune_basic; - success = ReadDouble(fs, &FLAGS_pruner_posterior_threshold); + LOG(INFO) << "Setting --srl_prune_basic=" << FLAGS_srl_prune_basic; + success = ReadDouble(fs, &FLAGS_srl_pruner_posterior_threshold); CHECK(success); - LOG(INFO) << "Setting --pruner_posterior_threshold=" - << FLAGS_pruner_posterior_threshold; - success = ReadInteger(fs, &FLAGS_pruner_max_arguments); + LOG(INFO) << "Setting --srl_pruner_posterior_threshold=" + << FLAGS_srl_pruner_posterior_threshold; + success = ReadInteger(fs, &FLAGS_srl_pruner_max_arguments); CHECK(success); - LOG(INFO) << "Setting --pruner_max_arguments=" << FLAGS_pruner_max_arguments; + LOG(INFO) << "Setting --srl_pruner_max_arguments=" + << FLAGS_srl_pruner_max_arguments; Initialize(); } void SemanticOptions::CopyPrunerFlags() { // Flags from base class Options. - FLAGS_train_algorithm = FLAGS_pruner_train_algorithm; - FLAGS_only_supported_features = FLAGS_pruner_only_supported_features; - FLAGS_use_averaging = FLAGS_pruner_use_averaging; - FLAGS_train_epochs = FLAGS_pruner_train_epochs; - FLAGS_train_regularization_constant = FLAGS_pruner_train_regularization_constant; - FLAGS_train_initial_learning_rate = FLAGS_pruner_train_initial_learning_rate; - FLAGS_train_learning_rate_schedule = FLAGS_pruner_train_learning_rate_schedule; + FLAGS_train_algorithm = FLAGS_srl_pruner_train_algorithm; + FLAGS_only_supported_features = FLAGS_srl_pruner_only_supported_features; + FLAGS_use_averaging = FLAGS_srl_pruner_use_averaging; + FLAGS_train_epochs = FLAGS_srl_pruner_train_epochs; + FLAGS_train_regularization_constant = + FLAGS_srl_pruner_train_regularization_constant; + FLAGS_train_initial_learning_rate = + FLAGS_srl_pruner_train_initial_learning_rate; + FLAGS_train_learning_rate_schedule = + FLAGS_srl_pruner_train_learning_rate_schedule; // Flags from SemanticOptions. - CHECK(!FLAGS_pruner_labeled) << "Currently, the flag --pruner_labeled must be false."; - FLAGS_labeled = FLAGS_pruner_labeled; + CHECK(!FLAGS_srl_pruner_labeled) + << "Currently, the flag --srl_pruner_labeled must be false."; + FLAGS_srl_labeled = FLAGS_srl_pruner_labeled; // General flags. - FLAGS_model_type = "af"; // A pruner is always a arc-factored model. - FLAGS_prune_basic = false; // A pruner has no inner basic pruner. + FLAGS_srl_model_type = "af"; // A pruner is always a arc-factored model. + FLAGS_srl_prune_basic = false; // A pruner has no inner basic pruner. // A pruner does not impose deterministic labels. - FLAGS_deterministic_labels = false; + FLAGS_srl_deterministic_labels = false; } void SemanticOptions::Initialize() { Options::Initialize(); - file_format_ = FLAGS_file_format; - model_type_ = FLAGS_model_type; - use_dependency_syntactic_features_ = FLAGS_use_dependency_syntactic_features; - labeled_ = FLAGS_labeled; - deterministic_labels_ = FLAGS_deterministic_labels; - allow_self_loops_ = FLAGS_allow_self_loops; - allow_root_predicate_ = FLAGS_allow_root_predicate; - allow_unseen_predicates_ = FLAGS_allow_unseen_predicates; - use_predicate_senses_ = FLAGS_use_predicate_senses; - prune_labels_ = FLAGS_prune_labels; - prune_labels_with_relation_paths_ = FLAGS_prune_labels_with_relation_paths; - prune_distances_ = FLAGS_prune_distances; - prune_basic_ = FLAGS_prune_basic; - use_pretrained_pruner_ = FLAGS_use_pretrained_pruner; - file_pruner_model_ = FLAGS_file_pruner_model; - pruner_posterior_threshold_ = FLAGS_pruner_posterior_threshold; - pruner_max_arguments_ = FLAGS_pruner_max_arguments; + file_format_ = FLAGS_srl_file_format; + model_type_ = FLAGS_srl_model_type; + use_dependency_syntactic_features_ = + FLAGS_srl_use_dependency_syntactic_features; + labeled_ = FLAGS_srl_labeled; + deterministic_labels_ = FLAGS_srl_deterministic_labels; + allow_self_loops_ = FLAGS_srl_allow_self_loops; + allow_root_predicate_ = FLAGS_srl_allow_root_predicate; + allow_unseen_predicates_ = FLAGS_srl_allow_unseen_predicates; + use_predicate_senses_ = FLAGS_srl_use_predicate_senses; + prune_labels_ = FLAGS_srl_prune_labels; + prune_labels_with_relation_paths_ = + FLAGS_srl_prune_labels_with_relation_paths; + prune_distances_ = FLAGS_srl_prune_distances; + prune_basic_ = FLAGS_srl_prune_basic; + use_pretrained_pruner_ = FLAGS_srl_use_pretrained_pruner; + file_pruner_model_ = FLAGS_srl_file_pruner_model; + pruner_posterior_threshold_ = FLAGS_srl_pruner_posterior_threshold; + pruner_max_arguments_ = FLAGS_srl_pruner_max_arguments; use_arbitrary_siblings_ = false; use_consecutive_siblings_ = false; @@ -265,7 +275,7 @@ void SemanticOptions::Initialize() { use_trisiblings_ = false; // Enable the parts corresponding to the model type. - string model_type = FLAGS_model_type; + string model_type = FLAGS_srl_model_type; if (model_type == "basic") { model_type = "af"; } else if (model_type == "standard") {