This repository was archived by the owner on Sep 13, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathtest_depparser_no_wordnet.py
155 lines (121 loc) · 4.8 KB
/
test_depparser_no_wordnet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
""" "
Run unit tests for the ZPar dependency parser without wordnet access.
:author: Nitin Madnani ([email protected])
"""
from __future__ import (absolute_import, division, print_function,
unicode_literals)
import glob
import os
from io import open
from itertools import product
from os.path import abspath, dirname, join
from nose.tools import assert_equal
from zpar import ZPar
_my_dir = abspath(dirname(__file__))
z = None
depparser = None
def setUp():
"""
set up things we need for the tests
"""
global z, depparser
assert 'ZPAR_MODEL_DIR' in os.environ
model_dir = os.environ['ZPAR_MODEL_DIR']
z = ZPar(model_dir)
depparser = z.get_depparser()
def tearDown():
"""
Clean up after the tests
"""
global z, depparser
if z:
z.close()
del depparser
del z
# delete all the files we may have created
data_dir = abspath(join(_my_dir, '..', 'examples'))
for f in glob.glob(join(data_dir, 'test*.dep')):
os.unlink(f)
def check_dep_parse_sentence_no_wordnet(tokenize=False,
with_lemmas=False,
tagged=False):
"""
Check dep_parse_sentence method with and without tokenization,
with and without lemmas, and with and without pre-tagged output,
all under the condition that there is no wordnet corpus
accessible to nltk.
"""
global depparser
if tagged:
sentence = "I/PRP 'm/VBP going/VBG to/TO the/DT market/NN ./."
else:
if tokenize:
sentence = "I'm going to the market."
else:
sentence = "I 'm going to the market ."
correct_output = "I\tPRP\t1\tSUB\n'm\tVBP\t-1\tROOT\ngoing\tVBG\t1\tVC\nto\tTO\t2\tVMOD\nthe\tDT\t5\tNMOD\nmarket\tNN\t3\tPMOD\n.\t.\t1\tP\n"
if not tagged:
parsed_sentence = depparser.dep_parse_sentence(sentence,
tokenize=tokenize,
with_lemmas=with_lemmas)
else:
parsed_sentence = depparser.dep_parse_tagged_sentence(sentence,
with_lemmas=with_lemmas)
assert_equal(parsed_sentence, correct_output)
def test_dep_parse_sentence_no_wordnet():
for (tokenize, with_lemmas, tagged) in product([True, False],
[True, False],
[True, False]):
yield (check_dep_parse_sentence_no_wordnet,
tokenize,
with_lemmas,
tagged)
def check_dep_parse_file_no_wordnet(tokenize=False,
with_lemmas=False,
tagged=False):
"""
Check parse_file method with and without tokenization,
with and without lemmas, and with and without pre-tagged output,
all under the condition that there is no wordnet corpus
accessible to nltk.
"""
global depparser
if tagged:
prefix = 'test_tagged'
else:
if tokenize:
prefix = 'test'
else:
prefix = 'test_tokenized'
correct_output = ['I\tPRP\t1\tSUB', 'am\tVBP\t-1\tROOT',
'going\tVBG\t1\tVC', 'to\tTO\t2\tVMOD',
'the\tDT\t5\tNMOD', 'market\tNN\t3\tPMOD',
'.\t.\t1\tP', '', 'Are\tVBP\t-1\tROOT',
'you\tPRP\t0\tSUB', 'going\tVBG\t0\tVMOD',
'to\tTO\t4\tVMOD', 'come\tVB\t2\tVMOD',
'with\tIN\t4\tVMOD', 'me\tPRP\t5\tPMOD',
'?\t.\t0\tP', '']
input_file = abspath(join(_my_dir, '..', 'examples', '{}.txt'.format(prefix)))
output_file = abspath(join(_my_dir, '..', 'examples', '{}.dep'.format(prefix)))
# dependency parse the file
if not tagged:
depparser.dep_parse_file(input_file,
output_file,
tokenize=tokenize,
with_lemmas=with_lemmas)
else:
depparser.dep_parse_tagged_file(input_file,
output_file,
with_lemmas=with_lemmas)
# read the output file and make sure we have the expected output
with open(output_file, 'r') as outf:
output = [l.strip() for l in outf.readlines()]
assert_equal(output, correct_output)
def test_dep_parse_file_no_wordnet():
for (tokenize, with_lemmas, tagged) in product([True, False],
[True, False],
[True, False]):
yield (check_dep_parse_file_no_wordnet,
tokenize,
with_lemmas,
tagged)