-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest.py
68 lines (48 loc) · 1.93 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
"""test.py
Create LIF files from the Science Parse output. Testing the code on a directpory
with ScienceParse output files.
Usage:
$ python3 test.py JSON_DIR LIF_DIR TXT_DIR
The first directory is the one with JSON files created by science parse, the
second the target for LIF files and the third the target for text files. All
directories are assumed to exist.
"""
import os
import sys
import json
from converter import Converter
from lif import Container
def create_lif_files(science_parse_dir, lif_dir, txt_dir, test=False):
for fname in os.listdir(science_parse_dir):
create_lif_file(os.path.join(science_parse_dir, fname),
os.path.join(lif_dir, fname[:-5] + '.lif'),
os.path.join(txt_dir, fname[:-5] + '.txt'),
test)
def create_lif_file(json_file, lif_file, txt_file, test=False):
print(f"Processing {os.path.basename(json_file)}")
with open(json_file, encoding='utf8') as fh_in, \
open(lif_file, 'w', encoding='utf8') as fh_out_lif, \
open(txt_file, 'w', encoding='utf8') as fh_out_txt:
c = Converter(fh_in.read())
if c.error is None:
fh_out_lif.write(c.get_container_as_json_string())
fh_out_txt.write(c.get_text_value())
else:
print(repr(c.error))
if test:
test_lif_file(lif_file)
def test_lif_file(lif_file):
"""Just print the text of all headers, should give an indication of whether all
the offsets are correct."""
lif = Container(json_file=lif_file).payload
text = lif.text.value
view = lif.views[0]
for anno in view.annotations:
if anno.type.endswith('Header'):
print("[{}]".format(text[anno.start:anno.end]))
print('')
if __name__ == '__main__':
science_parse_dir = sys.argv[1]
lif_dir = sys.argv[2]
txt_dir = sys.argv[3]
create_lif_files(science_parse_dir, lif_dir, txt_dir, test=False)