-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathhubconf.py
111 lines (87 loc) · 3.87 KB
/
hubconf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
dependencies = ["pos"]
import tarfile
from logging import getLogger
from pathlib import Path
import torch
log = getLogger(__name__)
CLARIN_URL = "http"
def _get_model_location(model_dir_or_url: str, model_name: str, force_download: bool) -> Path:
"""Returns the Path of the model on the local machine.
Args:
model_dir_or_url: If startswith("http") then we will (maybe) download the model. Otherwise we will load a local directory.
Returns:
A Path to the model."""
if model_dir_or_url.startswith("http"):
cache_dir = Path(torch.hub.get_dir())
download_location = cache_dir / f"{model_name}.tar.gz"
need_extraction = False
if not download_location.exists() or force_download:
log.info(f"Downloading model from {model_dir_or_url}")
torch.hub.download_url_to_file(model_dir_or_url, download_location)
need_extraction = True
model_dir = cache_dir / model_name
if not model_dir.exists() or need_extraction:
model_dir.mkdir(exist_ok=True)
# Unpack the model
tar = tarfile.open(download_location, "r:gz")
log.debug("Extracting model")
tar.extractall(path=model_dir)
log.debug("Done extracting model")
tar.close()
else:
model_dir = Path(model_dir_or_url)
if not model_dir.exists():
raise FileNotFoundError(f"{model_dir} does not exist")
return model_dir
def lemma(model_dir_or_url="https://repository.clarin.is/repository/xmlui/bitstream/handle/20.500.12537/134/lemmatizer.tar.gz", device="cpu", force_download=False, *args, **kwargs):
"""
Lemmatizer for Icelandic.
model_dir_or_url (str): Default= The location of a model. Can be a URL: http://CLARIN.eu or a local folder which contains the neccessary files for loading a model.
force_download (bool): Set to True if model should be re-downloaded.
"""
return _load_model(model_dir_or_url, "lemma", device, force_download, *args, **kwargs)
def tag(
model_dir_or_url="https://repository.clarin.is/repository/xmlui/bitstream/handle/20.500.12537/115/pos.tar.gz",
device="cpu",
force_download=False,
*args,
**kwargs,
):
"""
Part-of-Speech tagger for Icelandic.
model_dir_or_url (str): Default= The location of a model. Can be a URL: http://CLARIN.eu or a local folder which contains the neccessary files for loading a model.
force_download (bool): Set to True if model should be re-downloaded.
"""
return _load_model(model_dir_or_url, "pos", device, force_download, *args, **kwargs)
def tag_large(
model_dir_or_url="https://repository.clarin.is/repository/xmlui/bitstream/handle/20.500.12537/115/pos-large.tar.gz",
device="cpu",
force_download=False,
*args,
**kwargs,
):
"""
A large Part-of-Speech tagger for Icelandic.
model_dir_or_url (str): Default= The location of a model. Can be a URL: http://CLARIN.eu or a local folder which contains the neccessary files for loading a model.
force_download (bool): Set to True if model should be re-downloaded.
"""
return _load_model(model_dir_or_url, "pos-large", device, force_download, *args, **kwargs)
def _load_model(
model_dir_or_url="http://localhost:8000/pos.tar.gz",
model_name="",
device="cpu",
force_download=False,
*args,
**kwargs,
):
"""
Part-of-Speech tagger for Icelandic.
model_dir_or_url (str): Default= The location of a model. Can be a URL: http://CLARIN.eu or a local folder which contains the neccessary files for loading a model.
force_download (bool): Set to True if model should be redownloaded.
"""
from pos import Tagger
model_location = _get_model_location(
model_dir_or_url=model_dir_or_url, model_name=model_name, force_download=force_download
)
tagger = Tagger(str(model_location), device=device)
return tagger