diff --git a/lambdas/service/app.py b/lambdas/service/app.py index 5bd42e97e7..824f1350a0 100644 --- a/lambdas/service/app.py +++ b/lambdas/service/app.py @@ -29,6 +29,7 @@ Response, UnauthorizedError, ) +import chevron from furl import ( furl, ) @@ -56,6 +57,9 @@ from azul.collections import ( OrderedSet, ) +from azul.csp import ( + CSP, +) from azul.drs import ( AccessMethod, ) @@ -487,10 +491,19 @@ def manifest_url(self, } ) def oauth2_redirect(): - oauth2_redirect_html = app.load_static_resource('swagger', 'oauth2-redirect.html') + file_name = 'oauth2-redirect.html.template.mustache' + template = app.load_static_resource('swagger', file_name) + nonce = CSP.new_nonce() + html = chevron.render(template, { + 'CSP_NONCE': json.dumps(nonce) + }) + csp = CSP.for_azul(nonce) return Response(status_code=200, - headers={"Content-Type": "text/html"}, - body=oauth2_redirect_html) + headers={ + 'Content-Type': 'text/html', + 'Content-Security-Policy': str(csp) + }, + body=html) def validate_repository_search(entity_type: EntityType, diff --git a/scripts/convert_environment.py b/scripts/convert_environment.py index 1e330cd27b..0fb8863444 100644 --- a/scripts/convert_environment.py +++ b/scripts/convert_environment.py @@ -13,13 +13,6 @@ Optional, ) -""" -Convert an old-style `environment` Bash script to a new-style `environment.py`. - -This is by no means a complete parser for shell scripts. It recognizes only the -script statements typically used in `environment` and `environment.local` files. -""" - from azul.files import ( write_file_atomically, ) @@ -27,6 +20,13 @@ single_quote as sq, ) +""" +Convert an old-style `environment` Bash script to a new-style `environment.py`. + +This is by no means a complete parser for shell scripts. It recognizes only the +script statements typically used in `environment` and `environment.local` files. +""" + class Variable(NamedTuple): name: str diff --git a/src/azul/chalice.py b/src/azul/chalice.py index 5a310df26c..f5d1e4876e 100644 --- a/src/azul/chalice.py +++ b/src/azul/chalice.py @@ -58,6 +58,9 @@ from azul.collections import ( deep_dict_merge, ) +from azul.csp import ( + CSP, +) from azul.enums import ( auto, ) @@ -73,7 +76,6 @@ ) from azul.strings import ( join_words as jw, - single_quote as sq, ) from azul.types import ( JSON, @@ -205,33 +207,35 @@ def _api_gateway_context_middleware(self, event, get_response): finally: config.lambda_is_handling_api_gateway_request = False - hsts_max_age = 60 * 60 * 24 * 365 * 2 - - # Headers added to every response from the app, as well as canned 4XX and - # 5XX responses from API Gateway. Use of these headers addresses known - # security vulnerabilities. - # - security_headers = { - 'Content-Security-Policy': jw('default-src', sq('self')), - 'Referrer-Policy': 'strict-origin-when-cross-origin', - 'Strict-Transport-Security': jw(f'max-age={hsts_max_age};', - 'includeSubDomains;', - 'preload'), - 'X-Content-Type-Options': 'nosniff', - 'X-Frame-Options': 'DENY', - 'X-XSS-Protection': '1; mode=block' - } + @classmethod + def security_headers(cls) -> dict[str, str]: + """ + Default values for headers added to every response from the app, as well + as canned 4XX and 5XX responses from API Gateway. Use of these headers + addresses known security vulnerabilities. + """ + hsts_max_age = 60 * 60 * 24 * 365 * 2 + csp = CSP.for_azul() + return { + 'Content-Security-Policy': str(csp), + 'Referrer-Policy': 'strict-origin-when-cross-origin', + 'Strict-Transport-Security': jw(f'max-age={hsts_max_age};', + 'includeSubDomains;', + 'preload'), + 'X-Content-Type-Options': 'nosniff', + 'X-Frame-Options': 'DENY', + 'X-XSS-Protection': '1; mode=block' + } def _security_headers_middleware(self, event, get_response): """ Add headers to the response """ response = get_response(event) - response.headers.update(self.security_headers) - # FIXME: Add a CSP header with a nonce value to text/html responses - # https://github.com/DataBiosphere/azul-private/issues/6 - if response.headers.get('Content-Type') == 'text/html': - del response.headers['Content-Security-Policy'] + # Add security headers to the response without overwriting any headers + # that might have been added already (e.g. Content-Security-Policy) + for k, v in self.security_headers().items(): + response.headers.setdefault(k, v) view_function = self.routes[event.path][event.method].view_function cache_control = getattr(view_function, 'cache_control') # Caching defeats the automatic reloading of application source code by @@ -527,11 +531,14 @@ def _controller(self, controller_cls: type[C], **kwargs) -> C: return controller_cls(app=self, **kwargs) def swagger_ui(self) -> Response: - swagger_ui_template = self.load_static_resource('swagger', 'swagger-ui.html.template.mustache') + file_name = 'swagger-ui.html.template.mustache' + template = self.load_static_resource('swagger', file_name) base_url = self.base_url redirect_url = furl(base_url).add(path='oauth2_redirect') deployment_url = furl(base_url).add(path='openapi') - swagger_ui_html = chevron.render(swagger_ui_template, { + nonce = CSP.new_nonce() + html = chevron.render(template, { + 'CSP_NONCE': json.dumps(nonce), 'DEPLOYMENT_PATH': json.dumps(str(deployment_url.path)), 'OAUTH2_CLIENT_ID': json.dumps(config.google_oauth2_client_id), 'OAUTH2_REDIRECT_URL': json.dumps(str(redirect_url)), @@ -540,20 +547,24 @@ def swagger_ui(self) -> Response: for path, method in self.non_interactive_routes ]) }) + csp = CSP.for_azul(nonce) return Response(status_code=200, - headers={'Content-Type': 'text/html'}, - body=swagger_ui_html) - - def swagger_resource(self, file) -> Response: - if os.sep in file: - raise BadRequestError(file) + headers={ + 'Content-Type': 'text/html', + 'Content-Security-Policy': str(csp) + }, + body=html) + + def swagger_resource(self, file_name: str) -> Response: + if os.sep in file_name: + raise BadRequestError(file_name) else: try: - body = self.load_static_resource('swagger', file) + body = self.load_static_resource('swagger', file_name) except FileNotFoundError: - raise NotFoundError(file) + raise NotFoundError(file_name) else: - path = pathlib.Path(file) + path = pathlib.Path(file_name) content_type = mimetypes.types_map[path.suffix] return Response(status_code=200, headers={'Content-Type': content_type}, diff --git a/src/azul/csp.py b/src/azul/csp.py new file mode 100644 index 0000000000..7177679535 --- /dev/null +++ b/src/azul/csp.py @@ -0,0 +1,214 @@ +import base64 +from collections import ( + defaultdict, +) +import logging +import re +import secrets +from typing import ( + Self, +) + +import attrs +from more_itertools import ( + only, + prepend, +) + +from azul import ( + require, +) +from azul.strings import ( + single_quote as sq, +) + +log = logging.getLogger(__name__) + + +@attrs.frozen +class CSP: + directives: dict[str, list[str]] + + @classmethod + def for_azul(cls, nonce: str | None = None) -> Self: + self, none, data = sq('self'), sq('none'), 'data:' + nonce = [] if nonce is None else [sq('nonce-' + nonce)] + return cls({ + 'default-src': [self], + 'img-src': [self, data], + 'script-src': [self, *nonce], + 'style-src': [self, *nonce], + 'frame-ancestors': [none] + }) + + @classmethod + def new_nonce(cls) -> str: + """ + A random nonce for use in a CSP. + """ + return base64.b64encode(secrets.token_bytes(32)).decode('ascii').rstrip('=') + + @classmethod + def parse(cls, csp: str) -> Self: + """( + + Parse the given CSP or raise RequirementError if it is not syntactically + valid against the specification at https://www.w3.org/TR/CSP2. + + >>> from azul.doctests import ( + ... assert_json, + ... ) + + >>> def parse(s): return CSP.parse(s).directives + + A valid CSP: + + >>> valid_csp = "img-src 'self' data:;frame-ancestors 'none'" + >>> assert_json(parse(valid_csp)) + { + "img-src": [ + "'self'", + "data:" + ], + "frame-ancestors": [ + "'none'" + ] + } + + Insignificant whitespace is removed: + + >>> fluffy_csp = " \timg-src\t'self' data:\t;\tframe-ancestors\t 'none' \t" + >>> parse(valid_csp) == parse(fluffy_csp) + True + + Multiple multiple directives of the same name are consolidated: + + >>> assert_json(parse("img-src data:;img-src 'self':")) + { + "img-src": [ + "data:", + "'self':" + ] + } + + Invalid CSPs: + + >>> parse(";") + Traceback (most recent call last): + ... + azul.RequirementError: ('Invalid directive', '') + + >>> parse('img_src;') + Traceback (most recent call last): + ... + azul.RequirementError: ('Invalid directive', 'img_src') + + >>> parse('img-src a,b') + Traceback (most recent call last): + ... + azul.RequirementError: ('Invalid directive', 'img-src a,b') + """ + # https://www.w3.org/TR/CSP2/#policy-syntax + directive_re = re.compile(r'[ \t]*([a-zA-Z0-9-]+)' + # Space, tab and any visible character + # (0x21-0xFE) except for comma (0x2C) or + # semicolon (0x3B). + r'(?:[ \t]([ \t\x21-\x2B\x2D-\x3A\x3C-\xFE]*))?') + wsp_re = re.compile(r'[ \t]+') + directives = defaultdict(list) + for directive in csp.split(';'): + match = directive_re.fullmatch(directive) + require(match is not None, 'Invalid directive', directive) + name, values = match.groups() + values = [] if values is None else filter(None, wsp_re.split(values)) + directives[name].extend(values) + return cls(directives) + + # Matches only Azul nonces, specifically + nonce_re = re.compile(sq(r'nonce-([a-zA-Z0-9+/]{43})')) + + def validate(self): + """ + Validate the directive values against a subset of the Source List + grammar from the specification. Of that grammar, only the productions + used in CSPs for Azul are supported. + + >>> def validate(s): return CSP.parse(s).validate() + + >>> valid = ('0a+/' * 11)[:43] + >>> validate(f"script-src 'self' 'nonce-{valid}'") + + Disallowed characters in nonce: + + >>> invalid = valid.replace('+','*') + >>> validate(f"script-src 'self' 'nonce-{invalid}'") + Traceback (most recent call last): + ... + azul.RequirementError: ('Invalid value', "'nonce-0a*/0a*/0a*/0a*/0a*/0a*/0a*/0a*/0a*/0a*/0a*'") + + Nonce is too short: + + >>> invalid = valid[:-1] + >>> validate(f"script-src 'self' 'nonce-{invalid}'") + Traceback (most recent call last): + ... + azul.RequirementError: ('Invalid value', "'nonce-0a+/0a+/0a+/0a+/0a+/0a+/0a+/0a+/0a+/0a+/0a'") + + Nonce is too long: + + >>> invalid = valid + '/' + >>> validate(f"script-src 'self' 'nonce-{invalid}'") + Traceback (most recent call last): + ... + azul.RequirementError: ('Invalid value', "'nonce-0a+/0a+/0a+/0a+/0a+/0a+/0a+/0a+/0a+/0a+/0a+/'") + + Other invalid combinations: + + >>> validate("frame-ancestors 'none' 'none'") + Traceback (most recent call last): + ... + azul.RequirementError: ("'none' can only appear alone", ["'none'", "'none'"]) + + >>> validate("frame-ancestors 'self' 'none'") + Traceback (most recent call last): + ... + azul.RequirementError: ("'none' can only appear alone", ["'self'", "'none'"]) + + >>> validate("img-src 'self' data: 'self'") + Traceback (most recent call last): + ... + azul.RequirementError: ('Duplicated value', ["'self'", 'data:', "'self'"]) + """ + self_, none, data = sq('self'), sq('none'), 'data:' + value_res = prepend(self.nonce_re.pattern, map(re.escape, [self_, none, data])) + value_re = re.compile('|'.join(value_res)) + for name, values in self.directives.items(): + for value in values: + match = value_re.fullmatch(value) + require(match is not None, 'Invalid value', value) + require(values == [none] or none not in values, + f'{none} can only appear alone', values) + require(len(values) == len(set(values)), 'Duplicated value', values) + + def nonce(self) -> str | None: + """ + Extract the Azul nonce from this CSP, if present. If there are multiple + occurrances of a nonce, they must all be equal. + """ + return only(set( + value + for name, values in self.directives.items() + for value in values + if self.nonce_re.fullmatch(value) is not None + )) + + def __str__(self) -> str: + """ + >>> s = "img-src 'self' data:;frame-ancestors 'none'" + >>> s == str(CSP.parse(s)) + True + """ + return ';'.join( + ' '.join(value for value in prepend(name, values)) + for name, values in self.directives.items() + ) diff --git a/src/azul/terraform.py b/src/azul/terraform.py index 4166c885a6..b90db83b17 100644 --- a/src/azul/terraform.py +++ b/src/azul/terraform.py @@ -824,7 +824,7 @@ def tf_config(self, app_name): # security_headers = { f'gatewayresponse.header.{k}': f"'{v}'" - for k, v in AzulChaliceApp.security_headers.items() + for k, v in AzulChaliceApp.security_headers().items() } assert 'aws_api_gateway_gateway_response' not in resources, resources openapi_spec['x-amazon-apigateway-gateway-responses'] = ( diff --git a/src/azul/threads.py b/src/azul/threads.py index 0e81e21921..0cef7b9915 100644 --- a/src/azul/threads.py +++ b/src/azul/threads.py @@ -62,13 +62,13 @@ class Latch: >>> with ThreadPoolExecutor(max_workers=n) as tpe: ... l = Latch(n) ... fs = [tpe.submit(l.decrement, 1) for i in range(n)] - >>> [f.result() for f in fs] + >>> list(map(Future.result, fs)) [None, None] >>> with ThreadPoolExecutor(max_workers=n) as tpe: ... l = Latch(n+1) ... fs = [tpe.submit(l.decrement, 1, timeout=1) for i in range(n)] - >>> [f.result() for f in fs] + >>> list(map(Future.result, fs)) Traceback (most recent call last): ... TimeoutError @@ -79,7 +79,7 @@ def __init__(self, value): self.value = value self.condition = threading.Condition() - def decrement(self, value, timeout=None): + def decrement(self, value, *, timeout=None): require(isinstance(value, int)) self.condition.acquire() try: @@ -107,6 +107,7 @@ class DeferredTaskExecutor(metaclass=ABCMeta): ... ... def __init__(self) -> None: ... super().__init__(num_workers=2) + ... self.delta = None ... self.a, self.b, self.c, self.d = None, None, None, None ... ... def _run(self): @@ -128,10 +129,16 @@ class DeferredTaskExecutor(metaclass=ABCMeta): ... def never(self): ... self.d = 1 - >>> e = MyExecutor() - >>> e.run() # err() raises an exception + >>> from logging import Logger + >>> import unittest.mock + >>> with unittest.mock.patch.object(Logger, 'warning') as mock_warning: + ... e = MyExecutor() + ... e.run() # err() raises an exception, and emits a warning log [ValueError(123)] + >>> mock_warning.mock_calls + [call('Exception in deferred callable', exc_info=True)] + >>> 1.23 <= e.delta < 2 # set() runs after the given delay, but not much later True diff --git a/swagger/oauth2-redirect.html b/swagger/oauth2-redirect.html.template.mustache similarity index 85% rename from swagger/oauth2-redirect.html rename to swagger/oauth2-redirect.html.template.mustache index 0c0b5da508..e6266a1816 100644 --- a/swagger/oauth2-redirect.html +++ b/swagger/oauth2-redirect.html.template.mustache @@ -2,10 +2,10 @@ Swagger UI: OAuth2 Redirect - + - diff --git a/swagger/swagger-ui.html.template.mustache b/swagger/swagger-ui.html.template.mustache index 33889fca8c..ffa004c548 100644 --- a/swagger/swagger-ui.html.template.mustache +++ b/swagger/swagger-ui.html.template.mustache @@ -5,7 +5,7 @@ Swagger UI -