forked from cgpotts/cs224u
-
Notifications
You must be signed in to change notification settings - Fork 0
/
compgen.py
117 lines (95 loc) · 3.58 KB
/
compgen.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
from collections import defaultdict
from itertools import product
import re
__author__ = "Christopher Potts and Zhengxuan Wu"
__version__ = "CS224u, Stanford, Spring 2023"
def recogs_exact_match(gold, pred, flag="000000"):
gold = normalize_formula(gold)
pred = normalize_formula(pred)
gold_conj_set = get_conj_set(gold)
# Loop over all viable mappings from pred_vars to gold_vars:
for this_map in _candidate_variable_maps(gold, pred):
phi = pred
for sourcevar, targetvar in this_map.items():
# The flag makes sure we don't accidentally do a chain
# of replacements via successive changes in situations
# where the domain and range of `this_map` share vars.
phi = variable_change(phi, sourcevar, targetvar, flag=flag)
phi = phi.replace(flag, "")
phi_conj_set = get_conj_set(phi)
# This step assumes that we have no conjuncts that are
# tautologies, contradictions, or equality predications. If
# such are introduced, they need to be identified ahead of
# time and treated separately -- tautologies would be removed,
# contradictions would reduce to comparisons of only those
# conjuncts, and equality statements would call for special
# handling related to variables mapping.
if phi_conj_set == gold_conj_set:
return True
return False
def normalize_formula(phi):
return phi.replace(" ", "").replace("AND" , " AND ")
binary_pred_re = re.compile(r"""
(\w+)
\s*
\(
\s*
(\d+)
\s*
,
\s*
(\d+)
\s*
\)""", re.VERBOSE)
unary_pred_re = re.compile(r"""
(\w+)
\s*
\(
\s*
(\d+)
\s*
\)""", re.VERBOSE)
def _candidate_variable_maps(gold, pred):
# This creates a mapping from tuples of predicates into their
# associated variables. These serve as equivalence classes over
# variables that could possibly be translations of each other.
gold_map = _map_get_preds_to_vars(gold)
pred_map = _map_get_preds_to_vars(pred)
# For each prediction variable, get the set of potential
# translations for it:
pred2gold = defaultdict(list)
for preds, pvars in pred_map.items():
gvars = gold_map[preds]
for pvar in pvars:
pred2gold[pvar] = gold_map[preds]
# Variable sets:
gold_vars = set(get_variables(gold))
pred_vars = set(get_variables(pred))
# Now generate potentially viable mappings:
for vals in list(product(*list(pred2gold.values()))):
d = dict(zip(pred2gold.keys(), vals))
if set(d.keys()) == pred_vars and set(d.values()) == gold_vars:
yield d
def _map_get_preds_to_vars(phi):
var2pred = defaultdict(list)
for pred, var in unary_pred_re.findall(phi):
var2pred[var].append(pred)
# We could do somewhat less search by specializing to first and
# second position for these predicates, but I think it's fine
# as-is.
for pred, var1, var2 in binary_pred_re.findall(phi):
var2pred[var1].append(pred)
var2pred[var2].append(pred)
pred2var = defaultdict(list)
for var, preds in var2pred.items():
pred2var[tuple(sorted(preds))].append(var)
return pred2var
def get_variables(phi):
variable_re = re.compile(r"(\d+)")
return variable_re.findall(phi)
def get_conj_set(phi):
conj_splitter_re = re.compile(r"\s*(?:AND|;)\s*")
return set(conj_splitter_re.split(phi))
def variable_change(phi, sourcevar, targetvar, flag="000000"):
replace_re = re.compile(rf"\b{sourcevar}\b")
return replace_re.sub(f"{flag}{targetvar}", phi)