-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathevaluate.py
115 lines (100 loc) · 4.5 KB
/
evaluate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import math
import os.path
import re
import subprocess
import tempfile
import trees
class FScore(object):
def __init__(self, recall, precision, fscore, complete_match, tagging_accuracy=100):
self.recall = recall
self.precision = precision
self.fscore = fscore
self.complete_match = complete_match
self.tagging_accuracy = tagging_accuracy
def __str__(self):
if self.tagging_accuracy < 100:
return "(Recall={:.2f}, Precision={:.2f}, FScore={:.2f}, CompleteMatch={:.2f}, TaggingAccuracy={:.2f})".format(
self.recall, self.precision, self.fscore, self.complete_match, self.tagging_accuracy)
else:
return "(Recall={:.2f}, Precision={:.2f}, FScore={:.2f}, CompleteMatch={:.2f})".format(
self.recall, self.precision, self.fscore, self.complete_match)
def evalb(evalb_dir, gold_trees, predicted_trees, ref_gold_path=None):
assert os.path.exists(evalb_dir)
evalb_program_path = os.path.join(evalb_dir, "evalb")
evalb_spmrl_program_path = os.path.join(evalb_dir, "evalb_spmrl")
assert os.path.exists(evalb_program_path) or os.path.exists(evalb_spmrl_program_path)
if os.path.exists(evalb_program_path):
evalb_param_path = os.path.join(evalb_dir, "nk.prm")
else:
evalb_program_path = evalb_spmrl_program_path
# evalb_param_path = os.path.join(evalb_dir, "spmrl.prm")
evalb_param_path = os.path.join(evalb_dir, "spmrl_nk.prm")
assert os.path.exists(evalb_program_path)
assert os.path.exists(evalb_param_path)
assert len(gold_trees) == len(predicted_trees)
for gold_tree, predicted_tree in zip(gold_trees, predicted_trees):
assert isinstance(gold_tree, trees.TreebankNode)
assert isinstance(predicted_tree, trees.TreebankNode)
gold_leaves = list(gold_tree.leaves())
predicted_leaves = list(predicted_tree.leaves())
assert len(gold_leaves) == len(predicted_leaves)
assert all(
gold_leaf.word == predicted_leaf.word
for gold_leaf, predicted_leaf in zip(gold_leaves, predicted_leaves))
temp_dir = tempfile.TemporaryDirectory(prefix="evalb-")
gold_path = os.path.join(temp_dir.name, "gold.txt")
predicted_path = os.path.join(temp_dir.name, "predicted.txt")
output_path = os.path.join(temp_dir.name, "output.txt")
with open(gold_path, "w") as outfile:
if ref_gold_path is None:
for tree in gold_trees:
outfile.write("{}\n".format(tree.linearize()))
else:
# For the SPMRL dataset our data loader performs some modifications
# (like stripping morphological features), so we compare to the
# raw gold file to be certain that we haven't spoiled the evaluation
# in some way.
with open(ref_gold_path) as goldfile:
outfile.write(goldfile.read())
with open(predicted_path, "w") as outfile:
for tree in predicted_trees:
outfile.write("{}\n".format(tree.linearize()))
command = "{} -p {} {} {} > {}".format(
evalb_program_path,
evalb_param_path,
gold_path,
predicted_path,
output_path,
)
subprocess.run(command, shell=True)
fscore = FScore(math.nan, math.nan, math.nan, math.nan)
with open(output_path) as infile:
for line in infile:
match = re.match(r"Bracketing Recall\s+=\s+(\d+\.\d+)", line)
if match:
fscore.recall = float(match.group(1))
match = re.match(r"Bracketing Precision\s+=\s+(\d+\.\d+)", line)
if match:
fscore.precision = float(match.group(1))
match = re.match(r"Bracketing FMeasure\s+=\s+(\d+\.\d+)", line)
if match:
fscore.fscore = float(match.group(1))
match = re.match(r"Complete match\s+=\s+(\d+\.\d+)", line)
if match:
fscore.complete_match = float(match.group(1))
match = re.match(r"Tagging accuracy\s+=\s+(\d+\.\d+)", line)
if match:
fscore.tagging_accuracy = float(match.group(1))
break
success = (
not math.isnan(fscore.fscore) or
fscore.recall == 0.0 or
fscore.precision == 0.0)
if success:
temp_dir.cleanup()
else:
print("Error reading EVALB results.")
print("Gold path: {}".format(gold_path))
print("Predicted path: {}".format(predicted_path))
print("Output path: {}".format(output_path))
return fscore