forked from lucjon/Py-StackExchange
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathweb.py
166 lines (135 loc) · 6.02 KB
/
web.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
# stackweb.py - Core classes for web-request stuff
from __future__ import print_function
from stackexchange.core import StackExchangeError
from six.moves import urllib
import datetime, operator, io, gzip, time
import datetime
try:
import json
except ImportError:
import simplejson as json
class TooManyRequestsError(Exception):
def __str__(self):
return "More than 30 requests have been made in the last five seconds."
class WebRequest(object):
data = ''
info = None
def __init__(self, data, info):
self.data = data
self.info = info
def __str__(self):
return str(self.data)
class WebRequestManager(object):
debug = False
cache = {}
def __init__(self, impose_throttling = False, throttle_stop = True, cache = True, cache_age = 1800):
# Whether to monitor requests for overuse of the API
self.impose_throttling = impose_throttling
# Whether to throw an error (when True) if the limit is reached, or wait until another request
# can be made (when False).
self.throttle_stop = throttle_stop
# Whether to use request caching.
self.do_cache = cache
# The time, in seconds, for which to cache a response
self.cache_age = cache_age
# The time at which we should resume making requests after receiving a 'backoff' for each method
self.backoff_expires = {}
# When we last made a request
window = datetime.datetime.now()
# Number of requests since last throttle window
num_requests = 0
def debug_print(self, *p):
if WebRequestManager.debug:
print(' '.join([x if isinstance(x, str) else repr(x) for x in p]))
def canon_method_name(self, url):
# Take the URL relative to the domain, without initial / or parameters
parsed = urllib.parse.urlparse(url)
return '/'.join(parsed.path.split('/')[1:])
def request(self, url, params):
now = datetime.datetime.now()
# Quote URL fields (mostly for 'c#'), but not : in http://
components = url.split('/')
url = components[0] + '/' + ('/'.join(urllib.parse.quote(path) for path in components[1:]))
done = False
for k, v in params.items():
if not done:
url += '?'
done = True
else:
url += '&'
url += '%s=%s' % (k, urllib.parse.quote(str(v).encode('utf-8')))
# Now we have the `proper` URL, we can check the cache
if self.do_cache and url in self.cache:
timestamp, data = self.cache[url]
self.debug_print('C>', url, '@', timestamp)
if (now - timestamp).seconds <= self.cache_age:
self.debug_print('Hit>', url)
return data
# Before we do the actual request, are we going to be throttled?
def halt(wait_time):
if self.throttle_stop:
raise TooManyRequestsError()
else:
# Wait the required time, plus a bit of extra padding time.
time.sleep(wait_time + 0.1)
if self.impose_throttling:
# We need to check if we've been told to back off
method = self.canon_method_name(url)
backoff_time = self.backoff_expires.get(method, None)
if backoff_time is not None and backoff_time >= now:
self.debug_print('backoff: %s until %s' % (method, backoff_time))
halt((now - backoff_time).seconds)
if (now - WebRequestManager.window).seconds >= 5:
WebRequestManager.window = now
WebRequestManager.num_requests = 0
WebRequestManager.num_requests += 1
if WebRequestManager.num_requests > 30:
halt(5 - (now - WebRequestManager.window).seconds)
# We definitely do need to go out to the internet, so make the real request
self.debug_print('R>', url)
request = urllib.request.Request(url)
request.add_header('Accept-encoding', 'gzip')
req_open = urllib.request.build_opener()
try:
conn = req_open.open(request)
info = conn.info()
req_data = conn.read()
error_code = 200
except urllib.error.HTTPError as e:
# we'll handle the error response later
error_code = e.code
# a hack (headers is an undocumented property), but there's no sensible way to get them
info = getattr(e, 'headers', {})
req_data = e.read()
# Handle compressed responses.
# (Stack Exchange's API sends its responses compressed but intermediary
# proxies may send them to us decompressed.)
if info.get('Content-Encoding') == 'gzip':
data_stream = io.BytesIO(req_data)
gzip_stream = gzip.GzipFile(fileobj = data_stream)
actual_data = gzip_stream.read()
else:
actual_data = req_data
# Check for errors
if error_code != 200:
try:
error_ob = json.loads(actual_data.decode('utf8'))
except:
raise StackExchangeError()
else:
raise StackExchangeError(error_ob.get('error_id', StackExchangeError.UNKNOWN), error_ob.get('error_name'), error_ob.get('error_message'))
conn.close()
req_object = WebRequest(actual_data, info)
# Let's store the response in the cache
if self.do_cache:
self.cache[url] = (now, req_object)
self.debug_print('Store>', url)
return req_object
def json_request(self, to, params):
req = self.request(to, params)
parsed_result = json.loads(req.data.decode('utf8'))
# In API v2.x we now need to respect the 'backoff' warning
if 'backoff' in parsed_result:
method = self.canon_method_name(to)
self.backoff_expires[method] = datetime.datetime.now() + datetime.timedelta(seconds = parsed_result['backoff'])
return (parsed_result, req.info)