-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathrun_relation_extraction.py
274 lines (229 loc) · 9.15 KB
/
run_relation_extraction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
"""
Date: 2018/12/26
Version: 0
Last update: 2019/1/14
Author: Moju Wu
"""
from std import *
from Runner import Runner
import jpype
import glob
from collections import OrderedDict
from SSQA import Docxer, Dotter, Plotter, Scanner
from aceAnnotationStructure.ACEEntity import *
from aceAnnotationStructure.ACERelation import *
from aceAnnotationStructure.ACEEvent import *
_log = Logger(__name__)
# =================================== Global =======================================#
ENTITY_MATCH_COUNT, ENTITY_COUNT, ENTITY_BENCH_COUNT = 0, 0, 0
RELATION_MATCH_COUNT, RELATION_COUNT, RELATION_BENCH_COUNT = 0, 0, 0
EVENT_MATCH_COUNT, EVENT_COUNT, EVENT_BENCH_COUNT = 0, 0, 0
PREPRO_DIR = '/media/moju/data/work/ace05_ERE/data/prepro/'
ENTITY_DIR = '/media/moju/data/work/ace05_ERE/data/entity/'
RELATION_DIR = '/media/moju/data/work/ace05_ERE/data/relation/'
def _init_java(args):
myPaths = []
for iArg in args:
lprint(iArg)
if '::' in iArg:
myDir, *myJars = iArg.split('::')
for iJar in myJars:
myPath = '{}/{}'.format(myDir, iJar)
if '*' in myPath:
for iPath in glob.glob(myPath):
myPaths.append(iPath)
else:
myPaths.append(myPath)
else:
myPaths.append(iArg)
myClassPath = os.pathsep.join(myPaths)
jpype.startJVM(jpype.getDefaultJVMPath(), "-ea", "-Dfile.encoding=UTF8", "-Djava.class.path=%s" % myClassPath)
def _read_src(fp):
doc = json.load(fp)
out = [doc["docID"], doc["documentID"], doc["sentences"]]
return out
def _read_bench(fp):
# [doc["docID"], doc["entityList"], doc["relationList"], doc["eventList"]]
doc = json.load(fp)
lprint(doc['docID'])
entityMentions = []
for entityObj in doc['entityList']:
for entityMentionObj in entityObj['entityMentionList']:
myMention = EntityMention(entityMentionObj['extent'], entityMentionObj['position'])
myMention.set(entityMentionObj['id'], entityObj['entityID'], entityObj['entityType'],
entityObj['entitySubType'])
entityMentions.append(myMention)
relationMentions = []
for relationObj in doc['relationList']:
for relationMentionObj in relationObj['relationMentionList']:
myMention = RelationMention(relationMentionObj['mentionArg1']['extent'],
relationMentionObj['mentionArg2']['extent'],
relationMentionObj['position'])
myMention.mentionArg1.setID(relationMentionObj['mentionArg1']['id'])
myMention.mentionArg2.setID(relationMentionObj['mentionArg2']['id'])
myMention.set(relationMentionObj['id'], relationMentionObj['extent'], relationObj['relationID'],
relationObj['relationArg1'], relationObj['relationArg2'], relationObj['relationType'],
relationObj['relationSubType'])
relationMentions.append(myMention)
eventMentions = []
for eventObj in doc['eventList']:
for eventMentionObj in eventObj['eventMentionList']:
myMention = EventMention(eventMentionObj['extent'], eventMentionObj['position'], eventMentionObj['anchor'],
eventMentionObj['eventMentionArgList'])
myMention.setID(eventMentionObj['id'], eventObj['eventID'])
myMention.setType(eventObj['eventType'])
myMention.setSubType(eventObj['eventSubType'])
myMention.setArgs(eventObj['eventArgList'])
eventMentions.append(myMention)
return entityMentions, relationMentions, eventMentions
def _read_ere(fp):
doc = json.loads(fp)
out = [doc["entityList"], doc["relationList"], doc["eventList"]]
return out
def _fmeasure(matchedCount, ieLen, benchLen):
p = matchedCount / ieLen
r = matchedCount / benchLen
fmeasure = 2 * p * r / (p + r)
return fmeasure
def _evaluate(bench, ere):
entityMentions_bench, relationMentions_bench, eventMentions_bench = bench
entityMentions, relationMentions, eventMentions = ere
global ENTITY_MATCH_COUNT, ENTITY_COUNT, ENTITY_BENCH_COUNT
# evaluate entity
for entityMention in entityMentions:
for entityMention_bench in entityMentions_bench:
if entityMention.position == entityMention_bench.position:
if (entityMention.extent == entityMention_bench.extent) & \
(entityMention.type == entityMention_bench.type):
ENTITY_MATCH_COUNT += 1
ENTITY_COUNT += len(entityMentions)
ENTITY_BENCH_COUNT += len(entityMentions_bench)
global RELATION_MATCH_COUNT, RELATION_COUNT, RELATION_BENCH_COUNT
# evaluate relation
for relationMention in relationMentions:
for relationMention_bench in relationMentions_bench:
if relationMention.position == relationMention_bench.position:
if (relationMention.mentionArg1 == relationMention_bench.mentionArg1) & \
(relationMention.mentionArg2 == relationMention_bench.mentionArg2) & \
(relationMention.type == relationMention_bench.type):
RELATION_MATCH_COUNT += 1
RELATION_COUNT += len(relationMentions)
RELATION_BENCH_COUNT += len(relationMentions_bench)
global EVENT_MATCH_COUNT, EVENT_COUNT, EVENT_BENCH_COUNT
# evaluate event
for eventMention in eventMentions:
for eventMention_bench in eventMentions_bench:
if eventMention.position == eventMention_bench.position:
if eventMention.type == eventMention_bench.type:
EVENT_MATCH_COUNT += 1
EVENT_COUNT += len(eventMentions)
EVENT_BENCH_COUNT += len(eventMentions_bench)
class _Runner(Runner):
def __init__(self, arg):
super().__init__(arg)
self.cmd = getattr(self, 'cmd__{}'.format(self.arg.cmd))
def run_one(self, fs, plg=False, elg=False):
if plg or elg:
return self.cmd(fs, plg, elg)
else:
return self.cmd(fs)
@staticmethod
def cmd__test_read(fs):
# sfh, bfh = fs # r:source, r:benchmark
bfh, = fs
# docIDS, documentID, sentences = _read_src(sfh)
entityMentions, relationMentions, eventMentions = _read_bench(bfh)
lprint(entityMentions[0].type)
def cmd__test_eval(self, fhs, plg=False, elg=False):
if plg:
return
# ----
bfh, erefh = fhs
bench, ere = _read_bench(bfh), _read_ere(erefh)
# ----
if elg:
lprint(_fmeasure(ENTITY_MATCH_COUNT, ENTITY_COUNT, ENTITY_BENCH_COUNT))
_evaluate(bench, ere)
def get_runner(arg):
myCmds = arg.cmd.split('.')
if len(myCmds) == 1:
return _Runner(arg)
elif len(myCmds) > 2:
arg.cmd = myCmds.pop()
import importlib
# myModule = 'SSQA.{}.Sys'.format('.'.join(myCmds))
# lprint(myModule)
# lprint(getattr(importlib.import_module('SSQA.{}.Sys'.format('.'.join(myCmds))), 'Sys'))
return getattr(importlib.import_module('SSQA.{}.Sys'.format('.'.join(myCmds))), 'Sys')(arg)
else:
mySys, arg.cmd = myCmds
if mySys == 'Docxer':
return Docxer(arg)
elif mySys == 'Dotter':
return Dotter(arg)
elif mySys == 'Plotter':
return Plotter(arg)
elif mySys == 'Scanner':
return Scanner(arg)
elif mySys in ['SysCyutB1', 'SysCyutB1e']:
from SSQA.Sys.Cyut.Sys import SysB1 as Sys
return Sys(arg, allow_null_evid=(mySys[-1] != 'e'))
elif mySys in ['SysCyutB2', 'SysCyutB2S', 'SysCyutB2e']:
from SSQA.Sys.Cyut.Sys import SysB2 as Sys
return Sys(arg, simplified=(mySys == 'SysCyutB2S'), allow_null_evid=(mySys[-1] != 'e'))
elif mySys == 'SysV0':
from SSQA.Sys.V0.Sys import Sys
return Sys(arg)
elif mySys == 'SysV1R1':
from SSQA.Sys.V1.R1.Sys import Sys
return Sys(arg)
elif mySys.startswith('SysV1R2'):
from SSQA.Sys.V1.R2.Sys import Sys
return Sys(arg)
else:
raise ValueError()
def main(arg):
if arg.jvm:
_init_java(arg.jvm)
myRunner = get_runner(arg)
myRunner.run_all()
if __name__ == '__main__':
import argparse
ap = argparse.ArgumentParser()
# ----
ap.add_argument('-gvb', default='S:', help='Graphviz bin dir')
ap.add_argument('-occ', help='OpenCC home dir')
ap.add_argument('-cns', help='CoreNlp Server URL (http://localhost:9000)')
ap.add_argument('-cwn', help='Chinese WordNet dir')
ap.add_argument('-jvm', nargs='*', help='Run Java Virtual Machine via JPype')
# ----
ap.add_argument('-rfs', nargs='*', help='Runner: File-Spec')
ap.add_argument('-rpe', nargs='*', help='Runner: Prologue and Epilogue')
ap.add_argument('-reo', help='Runner: Epilogue Output File')
ap.add_argument('-rno', action='store_true', help='Runner: Not-Open')
ap.add_argument('-rlc', nargs='*', help='Runner: Looping-Char')
ap.add_argument('-rip', nargs='*', help='Runner: Including Pattern')
ap.add_argument('-rxp', nargs='*', help='Runner: eXcluding Pattern')
ap.add_argument('-rpg', nargs='*', help='Runner: Parameter Grid')
ap.add_argument('-rsa', nargs='*', help='Runner: Skipper Arguments')
ap.add_argument('-rpa', help='Runner: Pool Arguments') # worker-number[:time-out[:poll-gap[:listen]]]
ap.add_argument('-rja', nargs='*', help='Runner: Job-Arguments')
# ----
ap.add_argument('-cmd', required=True, help='Command')
ap.add_argument('-echo', action='store_true', help='Echo command only')
# ----
ap.add_argument('-force', action='store_true', help='Overwrite existing output files')
ap.add_argument('-log', help='Output log file')
ap.add_argument('-llv', default='50', help='Logging level')
ap.add_argument('-v', type=int, default=0, help='Verbose level')
myArg = ap.parse_args()
if myArg.log:
# myFormat = '%(asctime)s %(filename)s(%(lineno)d): %(message)s'
myFormat = '%(asctime)s: %(message)s'
logging.basicConfig(handlers=[log_w(myArg.log)], level=str2llv(myArg.llv), format=myFormat)
_log.log(100, ' '.join(sys.argv))
else:
myFormat = '%(filename)s(%(lineno)d): %(message)s'
logging.basicConfig(level=str2llv(myArg.llv), format=myFormat)
_log.log(100, ' '.join(sys.argv))
main(myArg)