-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtag_senses_manual.py
executable file
·67 lines (56 loc) · 1.53 KB
/
tag_senses_manual.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#!/usr/bin/python
import sys
import wsd
import nltk
from nltk.corpus.reader.plaintext import PlaintextCorpusReader
from nltk.text import *
import csv
import random
from collections import Counter
if len(sys.argv) != 4:
print "Usage:", sys.argv[0], "word sense1 sense2"
exit(-1)
focal_word = sys.argv[1]
senses = [sys.argv[2], sys.argv[3]]
word_sense_list = []
corpus = PlaintextCorpusReader('outcorpus/', '.*')
corpus_ids = corpus.fileids()
random.shuffle(corpus_ids)
quit = False
for infile in corpus_ids:
words = corpus.words(infile)
text = Text(words)
c = nltk.ConcordanceIndex(text.tokens)
offsets = c.offsets(focal_word)
if len(offsets) > 0:
offset = random.choice(offsets)
print infile
wsd.print_context(text, offset)
i = 1
for sense in senses:
print str(i) + ":", sense
i += 1
print "s: skip"
print "q: quit"
key = ""
while key != "s":
key = raw_input("> ")
if key.isdigit() and int(key) > 0 and int(key) <= len(senses):
word_sense_list.append( (infile, offset, senses[int(key)-1]) )
print infile, offset, senses[int(key)-1]
sense_freq = Counter(elem[2] for elem in word_sense_list)
print sense_freq
break
elif key == "q":
quit = True
break
if quit == True:
break
if quit == True:
break
print word_sense_list
outfile = "senses_" + focal_word + ".csv"
print "Saving to", outfile
with open(outfile, "w") as f:
writer = csv.writer(f)
writer.writerows(word_sense_list)