-
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathbuild_postcodes.py
executable file
·80 lines (59 loc) · 2.21 KB
/
build_postcodes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#!/usr/bin/env python3
"""
Fetch list of postcodes from the Icelandic postal service,
create and print dict mapping postcode to placename and
other related information. Also adds placenames in the
nominative case (nefnifall) since the source data only
includes placenames in dative (þágufall).
https://www.postur.is/gogn/Gotuskra/postnumer.txt
"""
import requests
import pprint
import logging
import csv
from io import StringIO
from reynir import NounPhrase
from iceaddr.postcodes import POSTCODES
POSTCODES_REMOTE_URL = "https://www.postur.is/gogn/Gotuskra/postnumer.txt"
def _clean_name(name: str) -> str:
return name.split(" - ")[0].strip()
def main() -> None:
pc = dict(POSTCODES)
pc_keys = pc.keys()
pp = pprint.PrettyPrinter(indent=4)
req = requests.get(POSTCODES_REMOTE_URL, allow_redirects=True)
f = StringIO(req.text)
changed = False
reader = csv.DictReader(f, delimiter=";")
for r in reader:
# CSV file from postur.is only contains postcode placenames in
# the dative form (þgf.). Try to lemmatise to nominative (nf.) using Reynir.
postcode = int(r["Póstnúmer"])
if postcode not in pc_keys:
logging.warning(f"Postcode '{postcode}' did not already exist in data.")
changed = True
tp = r["Tegund"]
p_dat = _clean_name(r["Staður"])
p_nom = NounPhrase(p_dat).nominative
if not p_nom:
logging.warning(f"Unable to decline placename '{p_dat}'")
p_nom = p_dat
if pc[postcode]["stadur_nf"] != p_nom:
pc[postcode]["stadur_nf"] = p_nom
print(f"{pc[postcode]['stadur_nf']} --> {p_nom}")
changed = True
if pc[postcode]["stadur_tgf"] != p_dat:
pc[postcode]["stadur_tgf"] = p_dat
print(f"{pc[postcode]['stadur_tgf']} --> {p_dat}")
changed = True
if pc[postcode]["tegund"] != tp:
pc[postcode]["tegund"] = tp
print(f"{pc[postcode]['tegund']} --> {tp}")
changed = True
if not changed:
print("No change since last update")
else:
pp.pprint(pc)
if __name__ == "__main__":
"""Command line invocation."""
main()