|
25 | 25 |
|
26 | 26 | from gflanguages import languages_public_pb2
|
27 | 27 | from google.protobuf import text_format
|
28 |
| -from pkg_resources import resource_filename |
| 28 | +from importlib_resources import files |
29 | 29 |
|
30 | 30 | try:
|
31 | 31 | from ._version import version as __version__ # type: ignore
|
32 | 32 | except ImportError:
|
33 | 33 | __version__ = "0.0.0+unknown"
|
34 | 34 |
|
35 |
| -DATA_DIR = resource_filename("gflanguages", "data") |
36 | 35 |
|
| 36 | +def _load_thing(thing_type, proto_class, base_dir=None): |
| 37 | + things = {} |
37 | 38 |
|
38 |
| -def LoadLanguages(base_dir=DATA_DIR): |
39 |
| - if base_dir is None: |
40 |
| - base_dir = DATA_DIR |
| 39 | + def read_a_thing(contents): |
| 40 | + proto = proto_class() |
| 41 | + thing = text_format.Parse(contents, proto) |
| 42 | + assert thing.id not in things, f"Duplicate {thing_type} id: {thing.id}" |
| 43 | + things[thing.id] = thing |
41 | 44 |
|
42 |
| - languages_dir = os.path.join(base_dir, "languages") |
43 |
| - langs = {} |
44 |
| - for textproto_file in glob.iglob(os.path.join(languages_dir, "*.textproto")): |
45 |
| - with open(textproto_file, "r", encoding="utf-8") as f: |
46 |
| - language = text_format.Parse(f.read(), languages_public_pb2.LanguageProto()) |
47 |
| - assert language.id not in langs, f"Duplicate language id: {language.id}" |
48 |
| - langs[language.id] = language |
49 |
| - return langs |
| 45 | + if base_dir is not None: |
| 46 | + thing_dir = os.path.join(base_dir, thing_type) |
| 47 | + for textproto_file in glob.iglob(os.path.join(thing_dir, "*.textproto")): |
| 48 | + with open(textproto_file, "r", encoding="utf-8") as f: |
| 49 | + read_a_thing(f.read()) |
| 50 | + else: |
| 51 | + for textproto_file in files("gflanguages.data").joinpath(thing_type).iterdir(): |
| 52 | + if not textproto_file.name.endswith(".textproto"): |
| 53 | + continue |
| 54 | + read_a_thing(textproto_file.read_text(encoding="utf-8")) |
| 55 | + return things |
50 | 56 |
|
51 | 57 |
|
52 |
| -def LoadScripts(base_dir=DATA_DIR): |
53 |
| - if base_dir is None: |
54 |
| - base_dir = DATA_DIR |
| 58 | +def LoadLanguages(base_dir=None): |
| 59 | + return _load_thing("languages", languages_public_pb2.LanguageProto, base_dir) |
55 | 60 |
|
56 |
| - scripts_dir = os.path.join(base_dir, "scripts") |
57 |
| - scripts = {} |
58 |
| - for textproto_file in glob.iglob(os.path.join(scripts_dir, "*.textproto")): |
59 |
| - with open(textproto_file, "r", encoding="utf-8") as f: |
60 |
| - script = text_format.Parse(f.read(), languages_public_pb2.ScriptProto()) |
61 |
| - assert script.id not in scripts, f"Duplicate script id: {script.id}" |
62 |
| - scripts[script.id] = script |
63 |
| - return scripts |
64 | 61 |
|
| 62 | +def LoadScripts(base_dir=None): |
| 63 | + return _load_thing("scripts", languages_public_pb2.ScriptProto, base_dir) |
65 | 64 |
|
66 |
| -def LoadRegions(base_dir=DATA_DIR): |
67 |
| - if base_dir is None: |
68 |
| - base_dir = DATA_DIR |
69 | 65 |
|
70 |
| - regions_dir = os.path.join(base_dir, "regions") |
71 |
| - regions = {} |
72 |
| - for textproto_file in glob.iglob(os.path.join(regions_dir, "*.textproto")): |
73 |
| - with open(textproto_file, "r", encoding="utf-8") as f: |
74 |
| - region = text_format.Parse(f.read(), languages_public_pb2.RegionProto()) |
75 |
| - assert region.id not in regions, f"Duplicate region id: {region.id}" |
76 |
| - regions[region.id] = region |
77 |
| - return regions |
| 66 | +def LoadRegions(base_dir=None): |
| 67 | + return _load_thing("regions", languages_public_pb2.RegionProto, base_dir) |
78 | 68 |
|
79 | 69 |
|
80 | 70 | def parse(exemplars: str):
|
|
0 commit comments