-
Notifications
You must be signed in to change notification settings - Fork 3
/
decorate_coref.py
59 lines (55 loc) · 2.29 KB
/
decorate_coref.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import sys
import os
import re
from span import *
import nlp_utils
import pdb
def decorate_coref(coref_document):
colornames = ['aqua', 'blue', 'fuchsia', 'green', 'lime', 'maroon',
'navy', 'olive', 'orange', 'purple', 'red', 'teal']
decorated_coref_document = [] #''
decorated_coref_sentence = []
canonical_coref_ids = []
words = []
coref_info = []
for line in coref_document.split('\n'):
line = line.rstrip('\n')
if line == '':
spans = nlp_utils.construct_coreference_spans_from_text(coref_info)
for i, word in enumerate(words):
word_spans = []
for span in spans:
if span.start <= i and span.end >= i:
word_spans.append(span)
if len(word_spans) == 0:
color = 'black'
else:
coref_id = int(word_spans[0].name)
if coref_id in canonical_coref_ids:
canonical_coref_id = canonical_coref_ids.index(coref_id)
else:
canonical_coref_id = len(canonical_coref_ids)
canonical_coref_ids.append(coref_id)
#canonical_coref_id = coref_id
id = canonical_coref_id % len(colornames)
color = colornames[id]
span_names = '; '.join([span.name for span in word_spans])
decorated_coref_sentence.append((word, span_names, color))
#word_html = '<span title="%s"><font color="%s">%s</font></span>' % (span_names, color, word)
#decorated_coref_document += word_html + ' '
#decorated_coref_document += '<br/>\n'
decorated_coref_document.append(decorated_coref_sentence)
decorated_coref_sentence = []
words = []
coref_info = []
continue
if line.startswith('#begin document') or line.startswith('#end document'):
decorated_coref_document += '<h1>%s</h1>\n' % line
canonical_coref_ids = []
continue
fields = line.split('\t')
word = fields[3]
coref = fields[-1]
words.append(word)
coref_info.append(coref)
return decorated_coref_document