forked from doolin/Python-Scripts
-
Notifications
You must be signed in to change notification settings - Fork 1
/
build_gns.py
executable file
·156 lines (139 loc) · 6.89 KB
/
build_gns.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# International Country Files Geographic Names (GNS)
# http://earth-info.nga.mil/gns/html/gis_countryfiles.htm
# http://earth-info.nga.mil/gns/html/geonames_dd_dms_date_20120402.zip
import datetime, csv, os, re, sqlite3, unicodedata
from locFunc import uniasc, cityctry
cc_iso = {'BD': 'BM', 'BF': 'BS', 'BG': 'BD', 'BA': 'BH', 'WA': 'NA', 'BC': 'BW', 'BL': 'BO', 'BN': 'BJ', 'BO': 'BY', 'BH': 'BZ', 'WI': 'EH', 'BK': 'BA', 'BU': 'BG', 'BP': 'SB', 'TP': 'ST', 'BX': 'BN', 'BY': 'BI', 'RP': 'PH', 'RS': 'RU', 'TL': 'TK', 'RM': 'MH', 'RI': 'RS', 'TK': 'TC', 'GV': 'GN', 'GG': 'GE', 'GB': 'GA', 'GA': 'GM', 'GM': 'DE', 'GK': 'GG', 'GJ': 'GD', 'SV': 'SJ', 'HO': 'HN', 'HA': 'HT', 'PS': 'PW', 'PP': 'PG', 'PU': 'GW', 'JA': 'JP', 'PC': 'PN', 'PA': 'PY', 'PO': 'PT', 'PM': 'PA', 'EN': 'EE', 'EI': 'IE', 'ZI': 'ZW', 'EK': 'GQ', 'ZA': 'ZM', 'EZ': 'CZ', 'AN': 'AD', 'ES': 'SV', 'UP': 'UA', 'MG': 'MN', 'MF': 'YT', 'MA': 'MG', 'MC': 'MO', 'MB': 'MQ', 'MO': 'MA', 'MN': 'MC', 'MI': 'MW', 'MH': 'MS', 'MJ': 'ME', 'MU': 'OM', 'MP': 'MU', 'UK': 'GB', 'VT': 'VA', 'FP': 'PF', 'FS': 'TF', 'FG': 'GF', 'NH': 'VU', 'NI': 'NG', 'NE': 'NU', 'NG': 'NE', 'NS': 'SR', 'NT': 'AN', 'NU': 'NI', 'CK': 'CC', 'CJ': 'KY', 'CI': 'CL', 'CH': 'CN', 'CN': 'KM', 'CB': 'KH', 'CG': 'CD', 'CF': 'CG', 'CE': 'LK', 'CD': 'TD', 'CS': 'CR', 'CW': 'CK', 'CT': 'CF', 'SZ': 'CH', 'SX': 'GS', 'SP': 'ES', 'SW': 'SE', 'KN': 'KP', 'SU': 'SD', 'ST': 'LC', 'KS': 'KR', 'KR': 'KI', 'SN': 'SG', 'KU': 'KW', 'KT': 'CX', 'SC': 'KN', 'SB': 'PM', 'SG': 'SN', 'SF': 'ZA', 'SE': 'SC', 'DO': 'DM', 'UV': 'BF', 'YM': 'YE', 'DA': 'DK', 'DR': 'DO', 'LG': 'LV', 'LE': 'LB', 'TX': 'TM', 'LO': 'SK', 'TT': 'TL', 'TU': 'TR', 'TS': 'TN', 'LH': 'LT', 'LI': 'LR', 'TN': 'TO', 'TO': 'TG', 'LT': 'LS', 'LS': 'LI', 'TI': 'TJ', 'TD': 'TT', 'AA': 'AW', 'AC': 'AG', 'IZ': 'IQ', 'AG': 'DZ', 'VI': 'VG', 'IS': 'IL', 'AJ': 'AZ', 'WZ': 'SZ', 'VM': 'VN', 'IV': 'CI', 'AS': 'AU', 'AU': 'AT', 'AV': 'AI', 'IC': 'IS'}
flder = "test/GNS"
fname = sorted([x for x in os.listdir(flder)])[-1]
conn = sqlite3.connect("loctbl.sqlite3")
c = conn.cursor()
#http://www.sqlite.org/faq.html#q7
#tbls = [x[0] for x in c.execute("select name from sqlite_master where type='table'")]
def create_location_table_orig():
c.executescript("""
CREATE TABLE IF NOT EXISTS gnsloc (
RC INTEGER,
UFI INTEGER, UNI INTEGER,
LAT FLOAT, LONG FLOAT,
DMS_LAT INTEGER, DMS_LONG INTEGER,
MGRS VARCHAR(15), JOG VARCHAR(7),
FC VARCHAR(1), DSG VARCHAR(5),
PC INTEGER, CC1 VARCHAR(2),
ADM1 VARCHAR(2), ADM2 VARCHAR(2),
POP INTEGER, ELEV INTEGER,
CC2 VARCHAR(2), NT VACRHAR(1),
LC VARCHAR(3), SHORT_FORM VARCHAR(10),
GENERIC VARCHAR(15),SORT_NAME VARCHAR(15),
FULL_NAME VARCHAR(15),
FULL_NAME_ND VARCHAR(15),
MODIFY_DATE VARCHAR(10),
SORT_NAME3 VARCHAR(3),
SORT_NAME4R VARCHAR(3),
UNIQUE(RC, CC1, ADM1, ADM2, CC2, SORT_NAME));
CREATE INDEX IF NOT EXISTS idx_all ON gnsloc (RC, CC1, ADM1, ADM2, CC2, SORT_NAME);
""")
#create_location_table_orig()
def create_location_table():
c.executescript("""
CREATE TABLE IF NOT EXISTS gnsloc (
RC INTEGER,
UFI INTEGER,
UNI INTEGER,
LAT FLOAT,
LONG FLOAT,
DMS_LAT INTEGER,
DMS_LONG INTEGER,
MGRS VARCHAR(15),
JOG VARCHAR(7),
FC VARCHAR(1),
DSG VARCHAR(5),
PC INTEGER,
CC1 VARCHAR(2),
ADM1 VARCHAR(2),
POP INTEGER,
ELEV INTEGER,
CC2 VARCHAR(2),
NT VACRHAR(1),
LC VARCHAR(3),
SHORT_FORM VARCHAR(10),
GENERIC VARCHAR(15),
SORT_NAME VARCHAR(15),
FULL_NAME VARCHAR(15),
FULL_NAME_ND VARCHAR(15),
SORT_NAME_RG VARCHAR(15),
FULL_NAME_RG VARCHAR(15),
FULL_NAME_ND_RG VARCHAR(15),
NOTE VARCHAR(4000),
MODIFY_DATE VARCHAR(10),
UNIQUE(RC, CC1, ADM1, CC2, SORT_NAME));
CREATE INDEX IF NOT EXISTS idx_all ON gnsloc (RC, CC1, ADM1, CC2, SORT_NAME);
""")
create_location_table()
f = open(flder+"/"+fname, "r")
print f.readline()
j = 0
for i,x in enumerate(f):
#print x
x = x.rstrip('\r\n')
rec = x.split("\t")
## if rec[9]=="P" and rec[11]!="": #if Unknown, super small right?
## if rec[9]=="P":
#properly adjust country code
if rec[12] in cc_iso:
rec[12] = cc_iso[rec[12]]
rec[22] = uniasc(unicode(rec[22], "latin-1")).upper()
rec[23] = uniasc(unicode(rec[23], "latin-1")).upper()
rec[24] = uniasc(unicode(rec[24], "latin-1")).upper()
rec[25] = uniasc(unicode(rec[25], "latin-1")).upper()
rec[25] = rec[25][:-1]
#rec.extend([rec[22].upper()[:3], rec[22].upper()[::-1][:4]])
#c.execute("INSERT OR REPLACE INTO gnsloc VALUES (%s)" % ",".join(["?"]*28), rec)
c.execute("INSERT OR REPLACE INTO gnsloc VALUES (%s)" % ",".join(["?"]*29), rec)
if i%100000==0:
conn.commit()
print i, datetime.datetime.now()
j = 0
## if i==50000:
## break
conn.commit()
def gns_basic_indexing_orig():
print "INDEXING - BASIC"
c.executescript("""
CREATE INDEX IF NOT EXISTS idx_rc ON gnsloc (RC);
CREATE INDEX IF NOT EXISTS idx_cc1 ON gnsloc (CC1);
CREATE INDEX IF NOT EXISTS idx_cc2 ON gnsloc (CC2);
CREATE INDEX IF NOT EXISTS idx_adm1 ON gnsloc (ADM1);
CREATE INDEX IF NOT EXISTS idx_adm2 ON gnsloc (ADM2);
CREATE INDEX IF NOT EXISTS idx_fc ON gnsloc (FC);
""")
def gns_combo_indexing_orig():
print "INDEXING - COMBO"
c.executescript("""
CREATE INDEX IF NOT EXISTS idx_ctc0 ON gnsloc (SORT_NAME, CC1);
CREATE INDEX IF NOT EXISTS idx_ctc1 ON gnsloc (FULL_NAME, CC1);
CREATE INDEX IF NOT EXISTS idx_ctc2 ON gnsloc (FULL_NAME_ND, CC1);
CREATE INDEX IF NOT EXISTS idx_ct3 ON gnsloc (SORT_NAME3, CC1);
CREATE INDEX IF NOT EXISTS idx_ct4r ON gnsloc (SORT_NAME4R, CC1);
CREATE INDEX IF NOT EXISTS idx_ctsf ON gnsloc (SHORT_FORM, CC1);
""")
def gns_basic_indexing():
print "INDEXING - BASIC"
c.executescript("""
CREATE INDEX IF NOT EXISTS idx_rc ON gnsloc (RC);
CREATE INDEX IF NOT EXISTS idx_cc1 ON gnsloc (CC1);
CREATE INDEX IF NOT EXISTS idx_cc2 ON gnsloc (CC2);
CREATE INDEX IF NOT EXISTS idx_adm1 ON gnsloc (ADM1);
CREATE INDEX IF NOT EXISTS idx_fc ON gnsloc (FC);
""")
def gns_combo_indexing():
print "INDEXING - COMBO"
c.executescript("""
CREATE INDEX IF NOT EXISTS idx_ctc0 ON gnsloc (SORT_NAME, CC1);
CREATE INDEX IF NOT EXISTS idx_ctc1 ON gnsloc (FULL_NAME, CC1);
CREATE INDEX IF NOT EXISTS idx_ctc2 ON gnsloc (FULL_NAME_ND, CC1);
CREATE INDEX IF NOT EXISTS idx_ct3 ON gnsloc (SORT_NAME, CC1);
CREATE INDEX IF NOT EXISTS idx_ctsf ON gnsloc (SHORT_FORM, CC1);
""")
gns_basic_indexing()
gns_combo_indexing()