-
Notifications
You must be signed in to change notification settings - Fork 1
/
ud_process.py
55 lines (53 loc) · 1.89 KB
/
ud_process.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import sys
import utils
import os
dir_path = sys.argv[1]
target_path = sys.argv[2]
length = int(sys.argv[3])
for dir in os.listdir(dir_path):
file_list = os.listdir(dir_path + '/' + dir)
for file in file_list:
leng = len(file)
if leng < 18:
continue
key = file[leng - 16]
last_key = file[leng - 17]
last_last_key = file[leng - 18]
train_key = file[leng - 8]
if (key == 'v' and last_key == 'e') or (
key == 't' and last_key == 's' and last_last_key == 'e') or train_key == '5':
_, _, sentences = utils.read_data(dir_path + '/' + dir + '/' + file, False)
filtered_sentences = []
for s in sentences:
if s.size - 1 < length + 1:
filtered_sentences.append(s)
fw = open(target_path + '/' + file, "w")
print 'writing for ' + file
for s in filtered_sentences:
for t in s.entries:
if t.id == 0:
continue
fw.write(str(t.id))
fw.write('\t')
fw.write(t.form)
fw.write('\t')
fw.write(t.lemma)
fw.write('\t')
fw.write(t.pos)
fw.write('\t')
fw.write(t.cpos)
fw.write('\t')
fw.write(t.feats)
fw.write('\t')
fw.write(str(t.parent_id))
fw.write('\t')
fw.write(t.relation)
fw.write('\t')
fw.write(t.deps)
fw.write('\t')
fw.write(t.misc)
fw.write('\n')
fw.write('\n')
print 'writing for ' + file + ' completed'
fw.close()
print("writing completed")