From f805c16a6f174a5020f0c31924216dbc7a408de1 Mon Sep 17 00:00:00 2001 From: Tim McCormack Date: Fri, 3 May 2024 00:35:23 +0000 Subject: [PATCH] feat: Start conversion of StaticContentServer from middleware into view See https://github.com/openedx/edx-platform/issues/34702 This necessarily involves switching from calling `StaticContent.is_versioned_asset_path` to determine whether to handle the request to having a hardcoded urlpattern. I've made the choice to hardcode the other two patterns similarly rather than using imported constants. The mapping of URL patterns to database records should be explicit (even though we don't expect those constants to change out from under us.) I've renamed the middleware rather than choosing a new name for the implementation because there are other references in tests and other code. This was the smaller change. A note on HTTP methods: The middleware currently completely ignores the request's HTTP method, so I wanted to confirm that only GETs were being used in practice. This query reveals that 99.8% of requests that this middleware handles are GET, with just a smattering of PROPFIND and OPTIONS and a tiny number of HEAD and POST: ``` from Transaction select count(*) facet request.method where name = 'WebTransaction/Function/openedx.core.djangoapps.contentserver.middleware:StaticContentServer' since 4 weeks ago ``` --- cms/envs/common.py | 2 +- cms/urls.py | 3 + lms/envs/common.py | 2 +- lms/urls.py | 3 + .../djangoapps/contentserver/middleware.py | 32 +++++++++- openedx/core/djangoapps/contentserver/urls.py | 16 +++++ .../core/djangoapps/contentserver/views.py | 58 +++++++++++++++++++ 7 files changed, 113 insertions(+), 3 deletions(-) create mode 100644 openedx/core/djangoapps/contentserver/urls.py create mode 100644 openedx/core/djangoapps/contentserver/views.py diff --git a/cms/envs/common.py b/cms/envs/common.py index ebc101801fe2..b1bd3cadb6a3 100644 --- a/cms/envs/common.py +++ b/cms/envs/common.py @@ -947,7 +947,7 @@ 'openedx.core.djangoapps.cache_toolbox.middleware.CacheBackedAuthenticationMiddleware', 'common.djangoapps.student.middleware.UserStandingMiddleware', - 'openedx.core.djangoapps.contentserver.middleware.StaticContentServer', + 'openedx.core.djangoapps.contentserver.middleware.StaticContentServerMiddleware', 'django.contrib.messages.middleware.MessageMiddleware', 'common.djangoapps.track.middleware.TrackMiddleware', diff --git a/cms/urls.py b/cms/urls.py index 9828e9d0fbf0..e21d07083eea 100644 --- a/cms/urls.py +++ b/cms/urls.py @@ -74,6 +74,9 @@ path('heartbeat', include('openedx.core.djangoapps.heartbeat.urls')), path('i18n/', include('django.conf.urls.i18n')), + # Course assets + path('', include('openedx.core.djangoapps.contentserver.urls')), + # User API endpoints path('api/user/', include('openedx.core.djangoapps.user_api.urls')), diff --git a/lms/envs/common.py b/lms/envs/common.py index da2bfed626e0..4f70df63ede1 100644 --- a/lms/envs/common.py +++ b/lms/envs/common.py @@ -2286,7 +2286,7 @@ def _make_locale_paths(settings): # pylint: disable=missing-function-docstring 'openedx.core.djangoapps.safe_sessions.middleware.EmailChangeMiddleware', 'common.djangoapps.student.middleware.UserStandingMiddleware', - 'openedx.core.djangoapps.contentserver.middleware.StaticContentServer', + 'openedx.core.djangoapps.contentserver.middleware.StaticContentServerMiddleware', # Adds user tags to tracking events # Must go before TrackMiddleware, to get the context set up diff --git a/lms/urls.py b/lms/urls.py index 5ac6283fddc7..48b0aecaeb88 100644 --- a/lms/urls.py +++ b/lms/urls.py @@ -111,6 +111,9 @@ path('i18n/', include('django.conf.urls.i18n')), + # Course assets + path('', include('openedx.core.djangoapps.contentserver.urls')), + # Enrollment API RESTful endpoints path('api/enrollment/v1/', include('openedx.core.djangoapps.enrollments.urls')), diff --git a/openedx/core/djangoapps/contentserver/middleware.py b/openedx/core/djangoapps/contentserver/middleware.py index f159a00a56ba..6eb72da458a0 100644 --- a/openedx/core/djangoapps/contentserver/middleware.py +++ b/openedx/core/djangoapps/contentserver/middleware.py @@ -16,6 +16,7 @@ ) from django.utils.deprecation import MiddlewareMixin from edx_django_utils.monitoring import set_custom_attribute +from edx_toggles.toggles import WaffleFlag from opaque_keys import InvalidKeyError from opaque_keys.edx.locator import AssetLocator @@ -32,6 +33,18 @@ log = logging.getLogger(__name__) +# .. toggle_name: content_server.use_view +# .. toggle_implementation: WaffleFlag +# .. toggle_default: False +# .. toggle_description: Deployment flag for switching asset serving from a middleware +# to a view. Intended to be used once in each environment to test the cutover and +# ensure there are no errors or changes in behavior. Once this has been tested, +# the middleware can be fully converted to a view. +# .. toggle_use_cases: temporary +# .. toggle_creation_date: 2024-05-02 +# .. toggle_target_removal_date: 2024-07-01 +# .. toggle_tickets: https://github.com/openedx/edx-platform/issues/34702 +CONTENT_SERVER_USE_VIEW = WaffleFlag('content_server.use_view', module_name=__name__) # TODO: Soon as we have a reasonable way to serialize/deserialize AssetKeys, we need # to change this file so instead of using course_id_partial, we're just using asset keys @@ -39,12 +52,26 @@ HTTP_DATE_FORMAT = "%a, %d %b %Y %H:%M:%S GMT" -class StaticContentServer(MiddlewareMixin): +class StaticContentServerMiddleware(MiddlewareMixin): + """ + Shim to maintain old pattern of serving course assets from a middleware. See views.py. + """ + def process_request(self, request): + """Intercept asset request or allow view to handle it, depending on config.""" + if CONTENT_SERVER_USE_VIEW.is_enabled(): + return + else: + set_custom_attribute('content_server.handled_by.middleware', True) + return IMPL.process_request(request) + + +class StaticContentServer(): """ Serves course assets to end users. Colloquially referred to as "contentserver." """ def is_asset_request(self, request): """Determines whether the given request is an asset request""" + # Don't change this without updating urls.py! See docstring of views.py. return ( request.path.startswith('/' + XASSET_LOCATION_TAG + '/') or @@ -295,6 +322,9 @@ def load_asset_from_location(self, location): return content +IMPL = StaticContentServer() + + def parse_range_header(header_value, content_length): """ Returns the unit and a list of (start, end) tuples of ranges. diff --git a/openedx/core/djangoapps/contentserver/urls.py b/openedx/core/djangoapps/contentserver/urls.py new file mode 100644 index 000000000000..96bbe6bf3820 --- /dev/null +++ b/openedx/core/djangoapps/contentserver/urls.py @@ -0,0 +1,16 @@ +""" +URL patterns for course asset serving. +""" + +from django.urls import path, re_path + +from . import views + +# These patterns are incomplete and do not capture the variable +# components of the URLs. That's because the view itself is separately +# parsing the paths, for historical reasons. See docstring on views.py. +urlpatterns = [ + path("c4x/", views.course_assets_view), + re_path("^asset-v1:", views.course_assets_view), + re_path("^assets/courseware/", views.course_assets_view), +] diff --git a/openedx/core/djangoapps/contentserver/views.py b/openedx/core/djangoapps/contentserver/views.py new file mode 100644 index 000000000000..84bd4c3b1046 --- /dev/null +++ b/openedx/core/djangoapps/contentserver/views.py @@ -0,0 +1,58 @@ +""" +Views for serving course assets. + +Historically, this was implemented as a *middleware* (StaticContentServer) that +intercepted requests with paths matching certain patterns, rather than using +urlpatterns and a view. There wasn't any good reason for this, as far as I can +tell. It causes some problems for telemetry: When the code-owner middleware asks +Django what view handled the request, it does so by looking at the result of the +`resolve` utility, but these URLs get a Resolver404 (because there's no +registered urlpattern). + +We'd like to turn this into a proper view: +https://github.com/openedx/edx-platform/issues/34702 + +The first step, seen here, is to have urlpatterns (redundant with the +middleware's `is_asset_request` method) and a view, but the view just calls into +the same code the middleware uses. The implementation of the middleware has been +moved into StaticContentServerImpl, leaving the middleware as just a shell +around the latter. + +A waffle flag chooses whether to allow the middleware to handle the request, or +whether to pass the request along to the view. Why? Because we might be relying +by accident on some weird behavior inherent to misusing a middleware this way, +and we need a way to quickly switch back if we encounter problems. + +If the view works, we can move all of StaticContentServerImpl directly into the +view and drop the middleware and the waffle flag. +""" +from django.http import HttpResponseNotFound +from django.views.decorators.http import require_safe +from edx_django_utils.monitoring import set_custom_attribute + +from .middleware import CONTENT_SERVER_USE_VIEW, IMPL + + +@require_safe +def course_assets_view(request): + """ + Serve course assets to end users. Colloquially referred to as "contentserver." + """ + set_custom_attribute('content_server.handled_by.view', True) + + if not CONTENT_SERVER_USE_VIEW.is_enabled(): + # Should never happen; keep track of occurrences. + set_custom_attribute('content_server.view.called_when_disabled', True) + # But handle the request anyhow. + + # We'll delegate request handling to an instance of the middleware + # until we can verify that the behavior is identical when requests + # come all the way through to the view. + response = IMPL.process_request(request) + + if response is None: + # Shouldn't happen + set_custom_attribute('content_server.view.no_response_from_impl', True) + return HttpResponseNotFound() + else: + return response