diff --git a/requirements.txt b/requirements.txt index f4cbc4286..e0aaa4e0c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,7 +15,7 @@ pyjwt==1.4.0 python-dateutil==2.5.3 pytz==2017.2 sentry-sdk==0.14.4 -redis==3.3.4 +redis==3.3.8 setuptools==37.0.0 stevedore==1.2.0 tornado==6.0.3 diff --git a/tests/core/test_exceptions.py b/tests/core/test_exceptions.py index 84b381260..fba5293d5 100644 --- a/tests/core/test_exceptions.py +++ b/tests/core/test_exceptions.py @@ -38,6 +38,8 @@ class TestExceptionSerialization: exceptions.UninitializedRepositoryError, exceptions.UnexportableFileTypeError, exceptions.InvalidProviderConfigError, + exceptions.WaterButlerRedisError, + exceptions.TooManyRequests, ]) def test_tolerate_dumb_signature(self, exception_class): """In order for WaterButlerError-inheriting exception classes to survive diff --git a/waterbutler/core/exceptions.py b/waterbutler/core/exceptions.py index fb8484943..35abca10b 100644 --- a/waterbutler/core/exceptions.py +++ b/waterbutler/core/exceptions.py @@ -67,21 +67,23 @@ class TooManyRequests(WaterButlerError): * X-Waterbutler-RateLimiting-Reset """ def __init__(self, data): - - message = { - 'error': 'API rate-limiting active due to too many requests', - 'headers': { - 'Retry-After': data['retry_after'], - 'X-Waterbutler-RateLimiting-Window': settings.RATE_LIMITING_FIXED_WINDOW_SIZE, - 'X-Waterbutler-RateLimiting-Limit': settings.RATE_LIMITING_FIXED_WINDOW_LIMIT, - 'X-Waterbutler-RateLimiting-Remaining': data['remaining'], - 'X-Waterbutler-RateLimiting-Reset': data['reset'], - }, - } + if type(data) != dict: + message = 'This is a thing: {}'.format(data) + else: + message = { + 'error': 'API rate-limiting active due to too many requests', + 'headers': { + 'Retry-After': data['retry_after'], + 'X-Waterbutler-RateLimiting-Window': settings.RATE_LIMITING_FIXED_WINDOW_SIZE, + 'X-Waterbutler-RateLimiting-Limit': settings.RATE_LIMITING_FIXED_WINDOW_LIMIT, + 'X-Waterbutler-RateLimiting-Remaining': data['remaining'], + 'X-Waterbutler-RateLimiting-Reset': data['reset'], + }, + } super().__init__(message, code=HTTPStatus.TOO_MANY_REQUESTS, is_user_error=True) -class WaterbutlerRedisError(WaterButlerError): +class WaterButlerRedisError(WaterButlerError): """Indicates the Redis server has returned an error. Returns HTTP 503 Service Unavailable. """ def __init__(self, redis_command): diff --git a/waterbutler/server/api/v1/core.py b/waterbutler/server/api/v1/core.py index 13322f5e8..19928f5d9 100644 --- a/waterbutler/server/api/v1/core.py +++ b/waterbutler/server/api/v1/core.py @@ -29,6 +29,11 @@ def write_error(self, status_code, exc_info): scope.level = 'info' self.set_status(int(exc.code)) + + # If the exception has a `data` property then we need to handle that with care. + # The expectation is that we need to return a structured response. For now, assume + # that involves setting the response headers to the value of the `headers` + # attribute of the `data`, while also serializing the entire `data` data structure. if exc.data: self.set_header('Content-Type', 'application/json') headers = exc.data.get('headers', None) diff --git a/waterbutler/server/api/v1/provider/ratelimiting.py b/waterbutler/server/api/v1/provider/ratelimiting.py index 90c058ee7..0bb40e25c 100644 --- a/waterbutler/server/api/v1/provider/ratelimiting.py +++ b/waterbutler/server/api/v1/provider/ratelimiting.py @@ -6,7 +6,7 @@ from redis.exceptions import RedisError from waterbutler.server import settings -from waterbutler.core.exceptions import WaterbutlerRedisError +from waterbutler.core.exceptions import WaterButlerRedisError logger = logging.getLogger(__name__) @@ -23,8 +23,10 @@ def __init__(self): self.redis_conn = Redis(host=settings.REDIS_DOMAIN, port=settings.REDIS_PORT) def rate_limit(self): - """ Check with the WB Redis server on whether to rate-limit a request. Return True if the - limit is reached; return False otherwise. + """ Check with the WB Redis server on whether to rate-limit a request. Returns a tuple. + First value is `True` if the limit is reached, `False` otherwise. Second value is the + rate-limiting metadata (nbr of requests remaining, time to reset, etc.) if the request was + rate-limited. """ limit_check, redis_key = self.get_auth_naive() @@ -35,13 +37,14 @@ def rate_limit(self): try: counter = self.redis_conn.incr(redis_key) except RedisError: - raise WaterbutlerRedisError('INCR {}'.format(redis_key)) + raise WaterButlerRedisError('INCR {}'.format(redis_key)) + if counter > self.WINDOW_LIMIT: # The key exists and the limit has been reached. try: retry_after = self.redis_conn.ttl(redis_key) except RedisError: - raise WaterbutlerRedisError('TTL {}'.format(redis_key)) + raise WaterButlerRedisError('TTL {}'.format(redis_key)) logger.info('>>> RATE LIMITING >>> FAIL >>> key={} ' 'counter={} url={}'.format(redis_key, counter, self.request.full_url())) data = { @@ -55,13 +58,14 @@ def rate_limit(self): try: self.redis_conn.expire(redis_key, self.WINDOW_SIZE) except RedisError: - raise WaterbutlerRedisError('EXPIRE {} {}'.format(redis_key, self.WINDOW_SIZE)) + raise WaterButlerRedisError('EXPIRE {} {}'.format(redis_key, self.WINDOW_SIZE)) logger.info('>>> RATE LIMITING >>> NEW >>> key={} ' 'counter={} url={}'.format(redis_key, counter, self.request.full_url())) else: # The key exists and the limit has not been reached. logger.info('>>> RATE LIMITING >>> PASS >>> key={} ' 'counter={} url={}'.format(redis_key, counter, self.request.full_url())) + return False, None def get_auth_naive(self): @@ -72,49 +76,45 @@ def get_auth_naive(self): Refer to ``tornado.httputil.HTTPServerRequest`` for more info on tornado's request object: https://www.tornadoweb.org/en/stable/httputil.html#tornado.httputil.HTTPServerRequest - This is a NAIVE implementation in which Waterbutler rate-limiter only checks the existence + This is a NAIVE implementation in which WaterButler rate-limiter only checks the existence of auth creds in the requests without further verifying them with the OSF. Invalid creds will fail the next OSF auth part anyway even if it passes the rate-limiter. - There are totally four types of auth: 1) OSF cookie, 2) OAuth access token, 3) basic auth w/ - base64-encoded username/password and 4) w/o any of the three aforementioned auth. The naive - implementation allows cookie auth to bypass the rate-limiter while limiting the rest three. - - Please note that the above exception for cookie auth only applies when it is the only the - auth option provided by the request. The priority order is: token > basic > cookie. + There are four types of auth: 1) OAuth access token, 2) basic auth w/ base64-encoded + username/password, 3) OSF cookie, and 4) no auth. The naive implementation checks each + method in this order. Only cookie-based auth is permitted to bypass the rate-limiter. + This order does not care about the validity of the auth mechanism. An invalid Basic auth + header + an OSF cookie will be rate-limited according to the Basic auth header. TODO: check with OSF API auth to see how it deals with multiple auth options. """ - osf_cookie = bearer_token = basic_creds = None - - # CASE 1: Requests with OSF cookies - cookies = self.request.cookies or None - if cookies and cookies.get('osf'): - osf_cookie = cookies.get('osf').value - auth_hdrs = self.request.headers.get('Authorization', None) - if auth_hdrs: - # CASE 2: Requests with OAuth bearer token - bearer_token = auth_hdrs.split(' ')[1] if auth_hdrs.startswith('Bearer ') else None - # CASE 3: Requests with basic auth using username and password - basic_creds = auth_hdrs.split(' ')[1] if auth_hdrs.startswith('Basic ') else None - # TODO: Work with DevOps to make sure that the `remote_ip` is the real IP instead of our - # load balancers. In addition, check relevatn HTTP headers as well. - # CASE 4: Requests without any expected auth (case 1, 2 or 3 above). - remote_ip = self.request.remote_ip or 'NOI.PNO.IPN.OIP' - - if bearer_token: + # CASE 1: Requests with a bearer token (PAT or OAuth) + if auth_hdrs and auth_hdrs.startswith('Bearer '): # Bearer token + bearer_token = auth_hdrs.split(' ')[1] if auth_hdrs.startswith('Bearer ') else None logger.info('>>> RATE LIMITING >>> AUTH:TOKEN >>> {}'.format(bearer_token)) return True, 'TOKEN__{}'.format(self._obfuscate_creds(bearer_token)) - if basic_creds: + + # CASE 2: Requests with basic auth using username and password + if auth_hdrs and auth_hdrs.startswith('Basic '): # Basic auth + basic_creds = auth_hdrs.split(' ')[1] if auth_hdrs.startswith('Basic ') else None logger.info('>>> RATE LIMITING >>> AUTH:BASIC >>> {}'.format(basic_creds)) return True, 'BASIC__{}'.format(self._obfuscate_creds(basic_creds)) + + # CASE 3: Requests with OSF cookies # SECURITY WARNING: Must check cookie last since it can only be allowed when used alone! - if osf_cookie: + cookies = self.request.cookies or None + if cookies and cookies.get('osf'): + osf_cookie = cookies.get('osf').value logger.info('>>> RATE LIMITING >>> AUTH:COOKIE >>> {}'.format(osf_cookie)) return False, 'COOKIE_{}'.format(self._obfuscate_creds(osf_cookie)) + + # TODO: Work with DevOps to make sure that the `remote_ip` is the real IP instead of our + # load balancers. In addition, check relevatn HTTP headers as well. + # CASE 4: Requests without any expected auth (case 1, 2 or 3 above). + remote_ip = self.request.remote_ip or 'NOI.PNO.IPN.OIP' logger.info('>>> RATE LIMITING >>> AUTH:NONE >>> {}'.format(remote_ip)) return True, 'NOAUTH_{}'.format(self._obfuscate_creds(remote_ip)) diff --git a/waterbutler/server/settings.py b/waterbutler/server/settings.py index 19d074b49..d682f214d 100644 --- a/waterbutler/server/settings.py +++ b/waterbutler/server/settings.py @@ -33,8 +33,12 @@ # Configs for WB API Rate-limiting with Redis -ENABLE_RATE_LIMITING = config.get('ENABLE_RATE_LIMITING', False) -REDIS_DOMAIN = config.get('WB_REDIS_DOMAIN', '192.168.168.167') -REDIS_PORT = config.get('WB_REDIS_PORT', '6379') -RATE_LIMITING_FIXED_WINDOW_SIZE = int(config.get('RATE_LIMITING_FIXED_WINDOW_SIZE', 3600)) -RATE_LIMITING_FIXED_WINDOW_LIMIT = int(config.get('RATE_LIMITING_FIXED_WINDOW_LIMIT', 3600)) +ENABLE_RATE_LIMITING = config.get_bool('ENABLE_RATE_LIMITING', False) +REDIS_DOMAIN = config.get('REDIS_DOMAIN', '192.168.168.167') +REDIS_PORT = config.get('REDIS_PORT', '6379') + +# Number of seconds until the redis key expires +RATE_LIMITING_FIXED_WINDOW_SIZE = int(config.get('RATE_LIMITING_FIXED_WINDOW_SIZE', 360)) + +# number of reqests permitted while the redis key is active +RATE_LIMITING_FIXED_WINDOW_LIMIT = int(config.get('RATE_LIMITING_FIXED_WINDOW_LIMIT', 36))