-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathload_dictionary.py
29 lines (24 loc) · 1.04 KB
/
load_dictionary.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import tensorflow as tf
import numpy as np
import tensorflow_datasets as tfds
import time
import os
import logging
# logging.basicConfig(level="ERROR")
output_dir = "nmt" # where the generated dictionary will locate
en_vocab_file = os.path.join(output_dir, "en_vocab") # join the two sub-directories
zh_vocab_file = os.path.join(output_dir, "zh_vocab")
def load_dictionary():
# logging.basicConfig(level="DEBUG")
start = time.process_time()
subword_encoder_en = tfds.features.text.SubwordTextEncoder.load_from_file(en_vocab_file)
logging.info(f"loaded the English dictionary: {en_vocab_file}")
logging.info(f"the size:{subword_encoder_en.vocab_size}")
subword_encoder_zh = tfds.features.text.SubwordTextEncoder.load_from_file(zh_vocab_file)
logging.info(f"loaded the Chinese dictionary: {zh_vocab_file}")
logging.info(f"the size: {subword_encoder_zh.vocab_size}")
end = time.process_time()
logging.info(f"process time: {end - start}")
return subword_encoder_en, subword_encoder_zh
def test():
print("testing")