forked from danvk/sfhistory
-
Notifications
You must be signed in to change notification settings - Fork 0
/
geocoder.py
executable file
·134 lines (109 loc) · 4.07 KB
/
geocoder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#!/usr/bin/python
#
# Run addresses or cross-streets through the Google Maps geocoder.
#
# Maintains a cache of previously-geocoded locations and throttles traffic to the Geocoder.
import base64
import re
import sys
import time
import json
import urllib
from coders.locatable import GetAverageLatLon
GeocodeUrlTemplate = 'https://maps.googleapis.com/maps/api/geocode/json?sensor=false&address=%s'
CacheDir = "geocache"
CacheDebug = False
# CacheDebug = True
# For lat/lon requests, where we can skip the geocoder.
FakeResponse = """
{ "results" : [ {
"geometry" : { "location" : { "lat" : %s, "lng" : %s } },
"types" : [ "point_of_interest" ]
} ], "status" : "OK" }
"""
def _cache_file(loc):
key = base64.b64encode(loc)[:-2] # minus the trailing '=='
key = key.replace('/', '-') # '/' is bad in a file name.
key = key[:255] # longest possible filename
return "%s/%s" % (CacheDir, key)
class Geocoder:
def __init__(self, network_allowed, wait_time):
self._network_allowed = network_allowed
self._wait_time = wait_time
self._last_fetch = 0
def _check_cache(self, loc):
"""Returns cached results for the location or None if not available."""
cache_file = _cache_file(loc)
if CacheDebug:
sys.stderr.write('Checking %s\n' % cache_file);
try:
return file(cache_file).read()
except:
return None
def _cache_result(self, loc, result):
cache_file = _cache_file(loc)
file(cache_file, "w").write(result)
def _fetch(self, url):
"""Attempts to fetch the URL. Does rate throttling. Returns XML."""
now = time.time()
diff = now - self._last_fetch
sys.stderr.write("now=%f, then=%f, diff=%f vs. %f\n" % (
now, self._last_fetch, diff, self._wait_time))
if diff < self._wait_time:
time.sleep(self._wait_time - diff)
self._last_fetch = time.time()
sys.stderr.write("Fetching %s\n" % url)
f = urllib.URLopener().open(url)
return f.read()
def _check_for_lat_lon(self, address):
"""For addresses of the form "@(lat),(lon)", skip the geocoder."""
m = re.match(r'@([-0-9.]+),([-0-9.]+)$', address)
if m:
return FakeResponse % (m.group(1), m.group(2))
# Returns a lat lon which is the average between two intersections. Helpful for addresses that are like A between B and C.
def LocateAverage(self, first_interesction, second_interesction, check_cache=True):
first_lat_lon = self.Locate(first_interesction)["results"][0]["geometry"]["location"]
second_lat_lon = self.Locate(second_interesction)["results"][0]["geometry"]["location"]
if first_lat_lon and second_lat_lon:
return GetAverageLatLon([[first_lat_lon['lat'], first_lat_lon['lng']], [second_lat_lon['lat'], second_lat_lon['lng']]])
return None
def Locate(self, address, check_cache=True):
"""Returns a maps API JSON response for the address or None.
Address should be a fully-qualified address, e.g.
'111 8th Avenue, New York, NY'.
"""
url = GeocodeUrlTemplate % urllib.quote(address)
data = None
from_cache = False
if check_cache:
data = self._check_cache(address)
from_cache = data != None
if not data:
data = self._check_for_lat_lon(address)
if not data:
if not self._network_allowed:
return None
data = self._fetch(url)
if not data:
return None
response = json.loads(data)
status = response['status']
if status not in ['OK', 'ZERO_RESULTS']:
sys.stderr.write('Error status %s %s\n' % (status, json.dumps(response)))
if status == 'OVER_QUERY_LIMIT':
raise Exception('Over your quota for the day!')
return None
if not from_cache and response:
self._cache_result(address, data)
return response
def InCache(self, loc):
data = self._check_cache(loc)
return data == None
def LocateFromCache(self, loc):
"""Like Locate, but never goes to the network to get a location."""
data = self._check_cache(loc)
if not data: return None
return json.loads(data)
if __name__ == '__main__':
for arg in sys.argv[1:]:
print '%s --> %s' % (arg, _cache_file(arg))