From be9757872b4d33bf0f1c582da5e7a4bfc0e21a29 Mon Sep 17 00:00:00 2001 From: Ko van der Sloot Date: Sat, 4 Apr 2020 11:16:06 +0200 Subject: [PATCH] replaces Lexicon (from ticcutils) by a 'normal' map --- include/mbt/Sentence.h | 3 +-- include/mbt/Tagger.h | 3 +-- src/GenerateTagger.cxx | 2 +- src/RunTagger.cxx | 2 +- src/Sentence.cxx | 10 +++++----- src/Tagger.cxx | 4 ++-- 6 files changed, 11 insertions(+), 13 deletions(-) diff --git a/include/mbt/Sentence.h b/include/mbt/Sentence.h index 7b458c4..0a9dc1a 100644 --- a/include/mbt/Sentence.h +++ b/include/mbt/Sentence.h @@ -30,7 +30,6 @@ #include "ticcutils/TreeHash.h" namespace Tagger { - using Hash::Lexicon; using Hash::StringHash; const std::string DOT = "=="; @@ -66,7 +65,7 @@ namespace Tagger { sentence( const PatTemplate&, const PatTemplate& ); ~sentence(); void clear(); - bool init_windowing( Lexicon&, StringHash& ); + bool init_windowing( std::map&, StringHash& ); bool nextpat( MatchAction&, std::vector&, StringHash& , StringHash&, unsigned int, int * = 0 ) const; int classify_hapax( const std::string&, StringHash& ) const; diff --git a/include/mbt/Tagger.h b/include/mbt/Tagger.h index 105d344..68c0708 100644 --- a/include/mbt/Tagger.h +++ b/include/mbt/Tagger.h @@ -212,8 +212,7 @@ namespace Tagger { PatTemplate Ktemplate; PatTemplate Utemplate; - Lexicon *MT_lexicon; - + std::map *MT_lexicon; std::string UnknownTreeBaseName; std::string KnownTreeBaseName; std::string LexFileBaseName; diff --git a/src/GenerateTagger.cxx b/src/GenerateTagger.cxx index 90249b0..eaeea5f 100644 --- a/src/GenerateTagger.cxx +++ b/src/GenerateTagger.cxx @@ -138,7 +138,7 @@ namespace Tagger { COUT << " Creating ambitag lexicon: " << MTLexFileName << endl; for ( const auto& tv : TagVect ){ out_file << tv->Word << " " << tv->stringRep() << endl; - MT_lexicon->Store( tv->Word, tv->stringRep() ); + MT_lexicon->insert( make_pair(tv->Word, tv->stringRep() ) ); } out_file.close(); } diff --git a/src/RunTagger.cxx b/src/RunTagger.cxx index e5b26fb..4d583c2 100644 --- a/src/RunTagger.cxx +++ b/src/RunTagger.cxx @@ -427,7 +427,7 @@ namespace Tagger { int no_words=0; ifstream lexfile( FileName, ios::in); while ( lexfile >> wordbuf >> valbuf ){ - MT_lexicon->Store( wordbuf, valbuf ); + MT_lexicon->insert(make_pair(wordbuf,valbuf)); no_words++; lexfile >> ws; } diff --git a/src/Sentence.cxx b/src/Sentence.cxx index b674d20..8c83901 100644 --- a/src/Sentence.cxx +++ b/src/Sentence.cxx @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -139,7 +140,7 @@ namespace Tagger { add(a_word, tmp, a_tag); } - bool sentence::init_windowing( Lexicon &lex, + bool sentence::init_windowing( map& lex, StringHash& TheLex ) { if ( UTAG == -1 ){ #pragma omp critical (hasher) @@ -159,12 +160,11 @@ namespace Tagger { } // look up ambiguous tag in the dictionary // - LexInfo *foundInfo = lex.Lookup( cur_word->the_word ); - if ( foundInfo != NULL ){ - // cerr << "MT Lookup(" << cur_word->the_word << ") gave " << *foundInfo << endl; + const auto it = lex.find( cur_word->the_word ); + if ( it != lex.end() ){ #pragma omp critical (hasher) { - cur_word->word_amb_tag = TheLex.Hash( foundInfo->Trans() ); + cur_word->word_amb_tag = TheLex.Hash( it->second ); } } else { diff --git a/src/Tagger.cxx b/src/Tagger.cxx index 72a621d..2969a98 100644 --- a/src/Tagger.cxx +++ b/src/Tagger.cxx @@ -95,7 +95,7 @@ namespace Tagger { initialized = false; Beam_Size = 1; Beam = NULL; - MT_lexicon = new Lexicon(); + MT_lexicon = new map; kwordlist = new StringHash(); uwordlist = new StringHash(); piped_input = true; @@ -155,7 +155,7 @@ namespace Tagger { Separators( in.Separators ), Ktemplate( in.Ktemplate ), Utemplate( in.Utemplate ), - MT_lexicon( in.MT_lexicon ), + MT_lexicon( in.MT_lexicon ), //!> is a pointer to avoid copies UnknownTreeBaseName( in.UnknownTreeBaseName ), KnownTreeBaseName( in.KnownTreeBaseName ), LexFileBaseName( in.LexFileBaseName ),