@@ -138,10 +138,10 @@ def get_attributes(node_label):
138
138
attributes = get_attributes_from_node (node_label )
139
139
return attributes
140
140
141
- def make_ccg_node (tree , sentence_id ):
141
+ def make_ccg_node (tree , sentence_id , ccg_count = 0 ):
142
142
ccg_node = etree .Element ('ccg' )
143
143
ccg_node .set ('root' , 's{0}_sp{1}' .format (sentence_id , 0 ))
144
- ccg_node .set ('id' , 's{0}_ccg{1}' .format (sentence_id , 0 ))
144
+ ccg_node .set ('id' , 's{0}_ccg{1}' .format (sentence_id , ccg_count ))
145
145
nodes = [tree [p ] for p in tree .treepositions () \
146
146
if isinstance (tree [p ], Tree )]
147
147
child_inds = defaultdict (list )
@@ -192,24 +192,42 @@ def make_jigg_sentence(line, sentence_id):
192
192
sentence_node .append (ccg_node )
193
193
return sentence_node
194
194
195
+ def add_ccg_nodes (line , sentence_node , sentence_id , ccg_count ):
196
+ sentence_id -= 1
197
+ tree = make_tree (line )
198
+ ccg_node = make_ccg_node (tree , sentence_id , ccg_count )
199
+ sentence_node .append (ccg_node )
200
+
201
+
195
202
root_node = etree .Element ('root' )
196
203
document_node = etree .Element ('document' )
197
204
root_node .append (document_node )
198
205
sentences_node = etree .Element ('sentences' )
199
206
document_node .append (sentences_node )
200
207
201
208
sentence_id = 0
209
+ ccg_count = 0
202
210
is_new_sentence = False
211
+ sentence_tree = None
212
+
203
213
for line in codecs .open (args .infile , 'r' , 'utf-8' ):
204
214
if line .startswith ('ID=' ):
205
215
current_sentence_id = int (line .split ('=' )[- 1 ])
206
216
if sentence_id != current_sentence_id :
207
217
is_new_sentence = True
208
218
sentence_id = current_sentence_id
209
- elif is_new_sentence :
210
- is_new_sentence = False
211
- sentence_tree = make_jigg_sentence (line , sentence_id )
212
- sentences_node .append (sentence_tree )
219
+ ccg_count = 0
220
+ #print("sentence_id:" + str(sentence_id))
221
+ else :
222
+ ccg_count += 1
223
+ else :
224
+ if is_new_sentence :
225
+ is_new_sentence = False
226
+ sentence_tree = make_jigg_sentence (line , sentence_id )
227
+ sentences_node .append (sentence_tree )
228
+ else :
229
+ if sentence_tree is not None :
230
+ add_ccg_nodes (line , sentence_tree , sentence_id , ccg_count )
213
231
214
232
def serialize_tree (tree ):
215
233
tree_str = etree .tostring (
0 commit comments