-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathbatch_tagger.h
121 lines (97 loc) · 3.29 KB
/
batch_tagger.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#ifndef __REFLECT_BATCH_TAGGER_HEADER__
#define __REFLECT_BATCH_TAGGER_HEADER__
#include "base_handlers.h"
#include "tagger.h"
#include <utility>
#include <vector>
#include <unordered_set>
#include <unordered_map>
using namespace std;
class DocumentTagger : public Tagger
{
public:
EntityTypeMap* entity_type_map;
IMatchHandler* match_handler;
public:
DocumentTagger();
~DocumentTagger();
public:
void load_names(const char* entities_filename, const char* names_filename);
void load_names(int type, const char* names_filename);
void load_groups(const char* groups_filename);
void load_groups(int type, const char* groups_filename);
public:
void process(Document& document, const GetMatchesParams& params, IDocumentHandler* document_handler);
void process(IDocumentReader* document_reader, const GetMatchesParams& params, IDocumentHandler* document_handler);
};
class BatchTagger : public DocumentTagger
{
public:
BatchTagger();
~BatchTagger();
public:
void process(IDocumentReader* document_reader, const GetMatchesParams& params, IBatchHandler* batch_handler);
};
////////////////////////////////////////////////////////////////////////////////
DocumentTagger::DocumentTagger()
: Tagger(true)
{
this->match_handler = new DisambiguationMatchHandler();
}
DocumentTagger::~DocumentTagger()
{
}
void DocumentTagger::load_names(const char* entities_filename, const char* names_filename) {
Tagger::load_names(entities_filename, names_filename);
this->entity_type_map = new EntityTypeMap(entities_filename);
}
void DocumentTagger::load_names(int type, const char* names_filename) {
Tagger::load_names(type, names_filename);
}
void DocumentTagger::load_groups(const char* groups_filename)
{
delete this->match_handler;
this->match_handler = new GroupMatchHandler(this->entity_type_map, groups_filename);
}
void DocumentTagger::load_groups(int type, const char* groups_filename)
{
delete this->match_handler;
this->match_handler = new GroupMatchHandler(type, groups_filename);
}
void DocumentTagger::process(Document& document, const GetMatchesParams& params, IDocumentHandler* document_handler)
{
char document_id[16];
sprintf(document_id, "%d", document.key);
Matches matches = Tagger::get_matches(document.text, document_id, params);
this->match_handler->process(matches);
document_handler->process(document, matches);
for (Matches::iterator it = matches.begin(); it != matches.end(); it++) {
delete *it;
}
}
void DocumentTagger::process(IDocumentReader* document_reader, const GetMatchesParams& params, IDocumentHandler* document_handler)
{
while (Document* document = document_reader->read_document()) {
process(*document, params, document_handler);
delete document;
}
}
////////////////////////////////////////////////////////////////////////////////
BatchTagger::BatchTagger()
: DocumentTagger()
{
}
BatchTagger::~BatchTagger()
{
}
void BatchTagger::process(IDocumentReader* document_reader, const GetMatchesParams& params, IBatchHandler* batch_handler)
{
IDocumentHandler* document_handler = batch_handler->create_document_handler();
batch_handler->on_batch_begin();
document_handler->on_batch_begin();
DocumentTagger::process(document_reader, params, document_handler);
document_handler->on_batch_end();
batch_handler->on_batch_end();
delete document_handler;
}
#endif