Skip to content

Commit d56d22b

Browse files
committed
Bring in Dan's modifications for n-best easyccg2jigg conversion
1 parent f45b40a commit d56d22b

File tree

1 file changed

+24
-6
lines changed

1 file changed

+24
-6
lines changed

en/easyccg2jigg.py

+24-6
Original file line numberDiff line numberDiff line change
@@ -138,10 +138,10 @@ def get_attributes(node_label):
138138
attributes = get_attributes_from_node(node_label)
139139
return attributes
140140

141-
def make_ccg_node(tree, sentence_id):
141+
def make_ccg_node(tree, sentence_id, ccg_count=0):
142142
ccg_node = etree.Element('ccg')
143143
ccg_node.set('root', 's{0}_sp{1}'.format(sentence_id, 0))
144-
ccg_node.set('id', 's{0}_ccg{1}'.format(sentence_id, 0))
144+
ccg_node.set('id', 's{0}_ccg{1}'.format(sentence_id, ccg_count))
145145
nodes = [tree[p] for p in tree.treepositions() \
146146
if isinstance(tree[p], Tree)]
147147
child_inds = defaultdict(list)
@@ -192,24 +192,42 @@ def make_jigg_sentence(line, sentence_id):
192192
sentence_node.append(ccg_node)
193193
return sentence_node
194194

195+
def add_ccg_nodes(line, sentence_node, sentence_id, ccg_count):
196+
sentence_id -= 1
197+
tree = make_tree(line)
198+
ccg_node = make_ccg_node(tree, sentence_id, ccg_count)
199+
sentence_node.append(ccg_node)
200+
201+
195202
root_node = etree.Element('root')
196203
document_node = etree.Element('document')
197204
root_node.append(document_node)
198205
sentences_node = etree.Element('sentences')
199206
document_node.append(sentences_node)
200207

201208
sentence_id = 0
209+
ccg_count = 0
202210
is_new_sentence = False
211+
sentence_tree = None
212+
203213
for line in codecs.open(args.infile, 'r', 'utf-8'):
204214
if line.startswith('ID='):
205215
current_sentence_id = int(line.split('=')[-1])
206216
if sentence_id != current_sentence_id:
207217
is_new_sentence = True
208218
sentence_id = current_sentence_id
209-
elif is_new_sentence:
210-
is_new_sentence = False
211-
sentence_tree = make_jigg_sentence(line, sentence_id)
212-
sentences_node.append(sentence_tree)
219+
ccg_count = 0
220+
#print("sentence_id:" + str(sentence_id))
221+
else:
222+
ccg_count += 1
223+
else:
224+
if is_new_sentence:
225+
is_new_sentence = False
226+
sentence_tree = make_jigg_sentence(line, sentence_id)
227+
sentences_node.append(sentence_tree)
228+
else:
229+
if sentence_tree is not None:
230+
add_ccg_nodes(line, sentence_tree, sentence_id, ccg_count)
213231

214232
def serialize_tree(tree):
215233
tree_str = etree.tostring(

0 commit comments

Comments
 (0)