forked from migurski/Extractotron
-
Notifications
You must be signed in to change notification settings - Fork 0
/
build-index.py
298 lines (242 loc) · 13.2 KB
/
build-index.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
from urllib import urlopen
from urlparse import urljoin, urlparse
from httplib import HTTPConnection
from re import compile
from csv import DictReader
from sys import argv, stderr
from dateutil import parser
from ModestMaps import mapByExtent
from ModestMaps.Core import Point
from ModestMaps.Geo import Location
from ModestMaps.OpenStreetMap import Provider
provider = Provider()
dimensions = Point(960, 600)
base_url = 'http://osm-metro-extracts.s3.amazonaws.com/log.txt'
extract_pat = compile(r'^((\S+)\.osm\.(bz2|pbf))\s+(\d+)$')
coastshape_pat = compile(r'^((\S+)\.coastline\.zip)\s+(\d+)$')
shp_imposm_pat = compile(r'^((\S+)\.imposm-shapefiles\.zip)\s+(\d+)$')
shp_osm2pgsql_pat = compile(r'^((\S+)\..*\bshapefiles\.zip)\s+(\d+)$')
coastline_pat = compile(r'^((\w+)-(latlon|merc)\.tar\.bz2)\s+(\d+)$')
months = '- Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec'.split()
def nice_size(size):
KB = 1024.
MB = 1024. * KB
GB = 1024. * MB
TB = 1024. * GB
if size < KB:
size, suffix = size, 'B'
elif size < MB:
size, suffix = size/KB, 'KB'
elif size < GB:
size, suffix = size/MB, 'MB'
elif size < TB:
size, suffix = size/GB, 'GB'
else:
size, suffix = size/TB, 'TB'
if size < 10:
return '%.1f %s' % (size, suffix)
else:
return '%d %s' % (size, suffix)
def nice_time(time):
if time < 15:
return 'moments'
if time < 90:
return '%d seconds' % time
if time < 60 * 60 * 1.5:
return '%d minutes' % (time / 60.)
if time < 24 * 60 * 60 * 1.5:
return '%d hours' % (time / 3600.)
if time < 7 * 24 * 60 * 60 * 1.5:
return '%d days' % (time / 86400.)
if time < 30 * 24 * 60 * 60 * 1.5:
return '%d weeks' % (time / 604800.)
return '%d months' % (time / 2592000.)
if __name__ == '__main__':
(index, ) = argv[1:]
index = open(index, 'w')
log = list(urlopen(base_url))
start = parser.parse(log[0][len('# begin, '):])
start = '%s %d, %s' % (months[start.month], start.day, start.year)
files = dict()
coast = dict()
for line in log:
if coastline_pat.match(line):
match = coastline_pat.match(line)
file, slug, prj, size = (match.group(g) for g in (1, 2, 3, 4))
if slug not in coast:
coast[slug] = dict()
coast[slug][prj] = (file, int(size), urljoin(base_url, file))
continue
elif extract_pat.match(line):
match = extract_pat.match(line)
file, slug, ext, size = (match.group(g) for g in (1, 2, 3, 4))
key, slug_file = ext, (file, int(size), urljoin(base_url, file))
elif coastshape_pat.match(line):
match = coastshape_pat.match(line)
file, slug, size = (match.group(g) for g in (1, 2, 3))
key, slug_file = 'coastline', (file, int(size), urljoin(base_url, file))
elif shp_imposm_pat.match(line):
match = shp_imposm_pat.match(line)
file, slug, size = (match.group(g) for g in (1, 2, 3))
key, slug_file = 'imposm shapefiles', (file, int(size), urljoin(base_url, file))
elif shp_osm2pgsql_pat.match(line):
match = shp_osm2pgsql_pat.match(line)
file, slug, size = (match.group(g) for g in (1, 2, 3))
key, slug_file = 'osm2pgsql shapefiles', (file, int(size), urljoin(base_url, file))
else:
continue
if slug not in files:
files[slug] = dict()
files[slug][key] = slug_file
coast['coastline-good'] = {}
for prj in ('merc', 'latlon'):
file = 'coastline-good-%s.tar.bz2' % prj
href = urljoin(base_url, file)
url = urlparse(href)
conn = HTTPConnection(url.netloc)
conn.request('HEAD', url.path)
resp = conn.getresponse()
size = resp.getheader('content-length')
date = parser.parse(resp.getheader('last-modified'))
date = '%s %d, %s' % (months[date.month], date.day, date.year)
coast['coastline-good'][prj] = (file, int(size), href, date)
#
print >> index, """<!DOCTYPE html>
<html lang="en">
<head>
<title>Metro Extracts</title>
<meta http-equiv="content-type" content="text/html; charset=utf-8">
<link rel="stylesheet" href="style.css" type="text/css" media="all">
</head>
<body>
<h1>Metro Extracts</h1>
<p>
Parts of the <a href="http://www.openstreetmap.org/">OpenStreetMap database</a>
for major world cities and their surrounding areas. The goal of these
extracts is to make it easy to make maps for major world cities, even if
they cross state or national boundaries.
</p>
<p>
Each city includes:
</p>
<ol>
<li>Bzip’ed OpenStreetMap <a href="http://wiki.openstreetmap.org/wiki/.osm">XML data</a> in an <tt>.osm.bz2</tt> file.</li>
<li>Compressed OpenStreetMap <a href="http://wiki.openstreetmap.org/wiki/PBF">binary PBF data</a> in an <tt>.osm.pbf</tt> file.</li>
<li><a href="#coastline">Coastline shapefile</a> extracts of the immediate area in a <tt>.zip</tt> file.</li>
<li>Point, line and polygon shapefiles from <a href="http://wiki.openstreetmap.org/wiki/Osm2pgsql">Osm2pgsql</a> in a <tt>.zip</tt> file.</li>
</ol>
<p>
Provided by <a href="http://mike.teczno.com">Michal Migurski</a> on an expected
monthly basis <a href="https://github.com/migurski/Extractotron/">via extractotron</a>.
Contact me <a href="https://github.com/migurski">via Github</a> to request new cities,
or add them directly to
<a href="https://github.com/migurski/Extractotron/blob/master/cities.txt">cities.txt</a>
with a <a href="http://help.github.com/fork-a-repo/">fork</a>-and-<a href="http://help.github.com/send-pull-requests/">pull-request</a>.
</p>
<h2>Updated From <a href="http://planet.openstreetmap.org/">Planet</a> %(start)s</h2>
<p id="archive-note">
An archived copy of this collection of extracts from the
<a href="http://archive.org/download/metro.teczno.com/planet-120314.osm.bz2">March 14th 2012 Planet file</a>
(just before the <a href="http://lists.openstreetmap.org/pipermail/talk/2012-January/061800.html">April, 2012</a>
<a href="http://www.osmfoundation.org/wiki/License/We_Are_Changing_The_License">license changeover</a>)
is available at <a href="http://archive.org/download/metro.teczno.com">archive.org</a>.
Extracts here will continue to be updated into the future.
</p>
<ul class="links">""" % locals()
cities = list(DictReader(open('cities.txt'), dialect='excel-tab'))
cities.sort(key=lambda city: (city['group'], city['name']))
last_group = None
for city in cities:
if city['slug'] in files:
if city['group'] != last_group:
print >> index, '<li class="group">%(group)s:</li>' % city
last_group = city['group']
print >> index, '<li class="link"><a href="#%(slug)s">%(name)s</a></li>' % city
print >> index, """</ul>"""
print >> index, """<ul>"""
cities.sort(key=lambda city: city['name'])
for city in cities:
slug = city['slug']
name = city['name']
try:
ul = Location(float(city['top']), float(city['left']))
lr = Location(float(city['bottom']), float(city['right']))
except ValueError:
print >> stderr, 'Failed on %(name)s (%(slug)s)' % city
raise
else:
mmap = mapByExtent(provider, ul, lr, dimensions)
if slug in files:
bz2_file, bz2_size, bz2_href = files[slug]['bz2']
pbf_file, pbf_size, pbf_href = files[slug]['pbf']
list = ('<li class="file"><a href="%s">%s %s OSM data</a></li>' * 2) \
% (bz2_href, nice_size(bz2_size), 'bzip’ed XML',
pbf_href, nice_size(pbf_size), 'binary PBF')
if 'coastline' in files[slug]:
coast_file, coast_size, coast_href = files[slug]['coastline']
list += '<li class="file"><a href="%s">%s coastline shapefile</a></li>' % (coast_href, nice_size(coast_size))
if 'osm2pgsql shapefiles' in files[slug]:
shape_file, shape_size, shape_href = files[slug]['osm2pgsql shapefiles']
list += '<li class="file"><a href="%s">%s osm2pgsql shapefiles</a></li>' % (shape_href, nice_size(shape_size))
if 'imposm shapefiles' in files[slug]:
shape_file, shape_size, shape_href = files[slug]['imposm shapefiles']
list += '<li class="file"><a href="%s">%s imposm shapefiles</a></li>' % (shape_href, nice_size(shape_size))
center = mmap.pointLocation(Point(dimensions.x/2, dimensions.y/2))
zoom = mmap.coordinate.zoom
href = 'http://www.openstreetmap.org/?lat=%.3f&lon=%.3f&zoom=%d&layers=M' % (center.lat, center.lon, zoom)
print >> index, """
<li class="city">
<a name="%(slug)s" href="%(href)s"><img src="previews/%(slug)s.jpg"></a>
<h3>%(name)s</h3>
<ul>%(list)s</ul>
</li>""" % locals()
print >> index, """</ul>"""
if 'processed_p' in coast:
print >> index, """<h2><a name="coastline">Coastline Shapefiles</a></h2>
<p>
<a href="http://wiki.openstreetmap.org/wiki/Coastline">Coastline</a> objects
in OpenStreetMap are not directly usable for rendering. They must first be
joined into continent-sized polygons by the
<a href="http://wiki.openstreetmap.org/wiki/Coastline_error_checker">coastline error checker</a>
and converted to shapefiles. The files available below are up-to-date,
error-corrected versions of the worldwide coastline generated using the code available from
<a href="http://svn.openstreetmap.org/applications/utils/coastcheck/">Subversion</a>.
</p>
<ul class="coast">
<li><a href="%s">Coastline polygons</a>: automatically generated areas, divided into 100km squares.<br><a href="%s">Mercator</a> (%s) and <a href="%s">unprojected</a> (%s) shapefiles.<br>Updated from <a href="http://planet.openstreetmap.org/">Planet</a> %s.</li>
<li><a href="%s">Good coastline polygons</a>: coastline polygons chosen to fill gaps in new data with old data.<br><a href="%s">Mercator</a> (%s) and <a href="%s">unprojected</a> (%s) shapefiles.<br>Last manually selected %s.</li>
</ul>
<p>
The coastline usually has errors in it. These files help show where
those errors might be lurking, so that you can fix OpenStreetMap for
the next time the coastline polygons are rendered:
</p>
<ul class="coast">
<li><a href="%s">Incomplete lines</a>: incomplete coastlines, joined into linestrings.<br><a href="%s">Mercator</a> (%s) and <a href="%s">unprojected</a> (%s) shapefiles.</li>
<li><a href="%s">Error points</a>: points where the coastline checker found errors.<br><a href="%s">Mercator</a> (%s) and <a href="%s">unprojected</a> (%s) shapefiles.</li>
<li><a href="%s">PostGIS error points</a>: points where PostGIS found topology errors.<br><a href="%s">Mercator</a> (%s) and <a href="%s">unprojected</a> (%s) shapefiles.</li>
<li><a href="%s">PostGIS missing tiles</a>: areas where PostGIS was unable to parse a geometry.<br><a href="%s">Mercator</a> (%s) and <a href="%s">unprojected</a> (%s) shapefiles.</li>
</ul>""" \
% (
coast['processed_p']['merc'][2],
coast['processed_p']['merc'][2], nice_size(coast['processed_p']['merc'][1]),
coast['processed_p']['latlon'][2], nice_size(coast['processed_p']['latlon'][1]),
start,
coast['coastline-good']['merc'][2],
coast['coastline-good']['merc'][2], nice_size(coast['coastline-good']['merc'][1]),
coast['coastline-good']['latlon'][2], nice_size(coast['coastline-good']['latlon'][1]),
coast['coastline-good']['merc'][3],
coast['processed_i']['merc'][2],
coast['processed_i']['merc'][2], nice_size(coast['processed_i']['merc'][1]),
coast['processed_i']['latlon'][2], nice_size(coast['processed_i']['latlon'][1]),
coast['coastline_p']['merc'][2],
coast['coastline_p']['merc'][2], nice_size(coast['coastline_p']['merc'][1]),
coast['coastline_p']['latlon'][2], nice_size(coast['coastline_p']['latlon'][1]),
coast['post_errors']['merc'][2],
coast['post_errors']['merc'][2], nice_size(coast['post_errors']['merc'][1]),
coast['post_errors']['latlon'][2], nice_size(coast['post_errors']['latlon'][1]),
coast['post_missing']['merc'][2],
coast['post_missing']['merc'][2], nice_size(coast['post_missing']['merc'][1]),
coast['post_missing']['latlon'][2], nice_size(coast['post_missing']['latlon'][1])
)
print >> index, """</body></html>"""