diff --git a/lms/services/canvas_studio.py b/lms/services/canvas_studio.py index 1c6b8ee46f..d554bc2f06 100644 --- a/lms/services/canvas_studio.py +++ b/lms/services/canvas_studio.py @@ -1,5 +1,5 @@ from functools import lru_cache -from typing import Literal, NotRequired, Type, TypedDict +from typing import Literal, Mapping, NotRequired, Type, TypedDict from urllib.parse import urlencode, urljoin, urlparse, urlunparse import requests @@ -20,6 +20,27 @@ from lms.validation._base import RequestsResponseSchema +class PaginationSchema(Schema): + """ + Schema for `meta` field in paginated Canvas Studio responses. + + See `PaginationMeta` on https://tw.instructuremedia.com/api/public/docs/. + """ + + class Meta: + unknown = EXCLUDE + + current_page = fields.Integer() + """1-based page number.""" + + last_page = fields.Integer() + """1-based number of last page.""" + + +MAX_PAGE_SIZE = 50 +"""Maximum value for `per_page` argument to paginated API calls.""" + + class CanvasStudioCollectionsSchema(RequestsResponseSchema): """Schema for Canvas Studio /collections responses.""" @@ -33,10 +54,7 @@ class Meta: created_at = fields.Str(required=False) collections = fields.List(fields.Nested(CollectionSchema), required=True) - - @post_load - def post_load(self, data, **_kwargs): - return data["collections"] + meta = fields.Nested(PaginationSchema, required=True) class CanvasStudioCollectionMediaSchema(RequestsResponseSchema): @@ -53,10 +71,7 @@ class Meta: thumbnail_url = fields.Str(required=False) media = fields.List(fields.Nested(MediaSchema), required=True) - - @post_load - def post_load(self, data, **_kwargs): - return data["media"] + meta = fields.Nested(PaginationSchema, required=True) class CanvasStudioCaptionFilesSchema(RequestsResponseSchema): @@ -119,7 +134,10 @@ class CanvasStudioService: API reference: https://tw.instructuremedia.com/api/public/docs/ """ - def __init__(self, request, application_instance): + page_size: int + """Number of items to fetch per page.""" + + def __init__(self, request, application_instance, page_size=MAX_PAGE_SIZE): self._domain = application_instance.settings.get("canvas_studio", "domain") self._client_id = application_instance.settings.get( "canvas_studio", "client_id" @@ -132,6 +150,7 @@ def __init__(self, request, application_instance): ) self._request = request self._application_instance = application_instance + self.page_size = page_size def get_access_token(self, code: str) -> None: """ @@ -218,7 +237,9 @@ def list_media_library(self) -> list[File]: picker when creating a Page. """ - collections = self._api_request("v1/collections", CanvasStudioCollectionsSchema) + collections = self._paginated_api_request( + "v1/collections", CanvasStudioCollectionsSchema, field="collections" + ) user_collection = None files: list[File] = [] @@ -254,8 +275,10 @@ def list_media_library(self) -> list[File]: def list_collection(self, collection_id: str) -> list[File]: """List the videos in a collection.""" - media = self._api_request( - f"v1/collections/{collection_id}/media", CanvasStudioCollectionMediaSchema + media = self._paginated_api_request( + f"v1/collections/{collection_id}/media", + CanvasStudioCollectionMediaSchema, + field="media", ) files: list[File] = [] @@ -353,16 +376,51 @@ def get_transcript_url(self, media_id: str) -> str | None: return None def _api_request( - self, path: str, schema_cls: Type[RequestsResponseSchema], as_admin=False + self, + path: str, + schema_cls: Type[RequestsResponseSchema], + as_admin=False, ) -> dict: """ Make a request to the Canvas Studio API and parse the JSON response. + :param path: Request path, relative to the API root + :param schema_cls: Schema to parse the JSON response :param as_admin: Make the request using the admin account instead of the current user. """ response = self._bare_api_request(path, as_admin=as_admin) return schema_cls(response).parse() + def _paginated_api_request( + self, + path: str, + schema_cls: Type[RequestsResponseSchema], + field: str, + as_admin=False, + ) -> list: + """ + Fetch a paginated collection via the Canvas Studio API. + + :param path: Request path, relative to the API root + :param schema_cls: Schema to parse the JSON response + :param field: Response field containing items from each page + :param as_admin: Make the request using the admin account instead of the current user. + """ + max_pages = 100 + next_page = 1 + last_page = None + collated = [] + + while last_page is None or next_page <= last_page: + params = {"page": next_page, "per_page": self.page_size} + response = self._bare_api_request(path, as_admin=as_admin, params=params) + parsed = schema_cls(response).parse() + collated += parsed[field] + last_page = min(parsed["meta"]["last_page"], max_pages) + next_page += 1 + + return collated + def _admin_api_request(self, path: str, allow_redirects=True) -> requests.Response: """ Make a request to the Canvas Studio API using the admin user identity. @@ -401,14 +459,18 @@ def _admin_api_request(self, path: str, allow_redirects=True) -> requests.Respon ) from err def _bare_api_request( - self, path: str, as_admin=False, allow_redirects=True + self, + path: str, + as_admin=False, + allow_redirects=True, + params: Mapping[str, str | int] | None = None, ) -> requests.Response: """ Make a request to the Canvas Studio API and return the response. :param as_admin: Make the request using the admin account instead of the current user. """ - url = self._api_url(path) + url = self._api_url(path, params) if as_admin and not self.is_admin(): return self._admin_api_request(path, allow_redirects=allow_redirects) @@ -426,7 +488,7 @@ def _bare_api_request( refresh_service=Service.CANVAS_STUDIO, ) from err - def _api_url(self, path: str) -> str: + def _api_url(self, path: str, params: Mapping[str, str | int] | None = None) -> str: """ Return the URL of a Canvas Studio API endpoint. @@ -434,10 +496,14 @@ def _api_url(self, path: str) -> str: endpoints. :param path: Path of endpoint relative to the API root + :param params: Query parameters """ site = self._canvas_studio_site() - return f"{site}/api/public/{path}" + url = f"{site}/api/public/{path}" + if params: + url = url + "?" + urlencode(params) + return url def _admin_email(self) -> str: """Return the email address of the configured Canvas Studio admin.""" diff --git a/tests/unit/lms/services/canvas_studio_test.py b/tests/unit/lms/services/canvas_studio_test.py index 7d75404844..9216fb6f49 100644 --- a/tests/unit/lms/services/canvas_studio_test.py +++ b/tests/unit/lms/services/canvas_studio_test.py @@ -1,5 +1,6 @@ +import math from unittest.mock import create_autospec, patch, sentinel -from urllib.parse import urlencode +from urllib.parse import parse_qs, urlencode, urlparse import pytest from h_matchers import Any @@ -99,8 +100,19 @@ def test_redirect_uri_localhost_workaround(self, svc, pyramid_request): == "https://hypothesis.local:48001/api/canvas_studio/oauth/callback" ) - def test_list_media_library(self, svc): + @pytest.mark.parametrize( + "page_size", + ( + None, # Default, one page will be enough + 3, # Multiple pages will be needed + ), + ) + def test_list_media_library(self, svc, page_size): + if page_size is not None: + svc.page_size = page_size + files = svc.list_media_library() + assert files == [ { "type": "Folder", @@ -166,8 +178,19 @@ def test_list_media_library_other_error(self, svc, oauth_http_service): assert exc_info.value == err - def test_list_collection(self, svc): + @pytest.mark.parametrize( + "page_size", + ( + None, # Default. One page will be enough + 1, # Multiple pages will be needed + ), + ) + def test_list_collection(self, svc, page_size): + if page_size is not None: + svc.page_size = page_size + files = svc.list_collection("8") + assert files == [ { "type": "File", @@ -318,7 +341,7 @@ def admin_oauth_http_service(self): svc.get.side_effect = self.get_request_handler(is_admin=True) return svc - def get_request_handler(self, collections=None, is_admin=False): + def get_request_handler(self, collections=None, is_admin=False): # noqa: PLR0915 pylint:disable=too-complex """ Create a handler for `GET` requests to the Canvas Studio API. @@ -350,36 +373,63 @@ def make_file(id_, title, created_at, with_thumbnail=False): make_collection(10, None, "course_wide", "2024-03-01"), ] - def handler(url, allow_redirects=True): - api_prefix = "https://hypothesis.instructuremedia.com/api/public/v1/" - assert url.startswith(api_prefix) + def handler(url, allow_redirects=True): # noqa: PLR0912, PLR0915 + api_prefix = "/api/public/v1/" + + parsed_url = urlparse(url) + assert parsed_url.scheme == "https" + assert parsed_url.netloc == "hypothesis.instructuremedia.com" + assert parsed_url.path.startswith(api_prefix) - url_suffix = url[len(api_prefix) :] json_data = None status_code = 200 - - match url_suffix: + per_page = 20 + page = 1 + + params = parse_qs(parsed_url.query) + if "per_page" in params: + per_page = int(params["per_page"][0]) + if "page" in params: + page = int(params["page"][0]) + + def collection_data(items_field, items): + n_pages = max(math.ceil(len(items) / per_page), 1) + assert 1 <= page <= n_pages + + start = (page - 1) * per_page + end = start + per_page + + return { + items_field: items[start:end], + "meta": { + "current_page": page, + "last_page": n_pages, + }, + } + + resource_path = parsed_url.path[len(api_prefix) :] + match resource_path: case "collections": - json_data = { - "collections": collections.copy(), - } + json_data = collection_data("collections", collections.copy()) case "collections/1/media": - json_data = { - "media": [ + json_data = collection_data( + "media", + [ make_file(5, "Test video", "2024-02-03"), - ] - } + ], + ) case "collections/8/media": - json_data = { - "media": [ + json_data = collection_data( + "media", + [ make_file( 7, "Some video", "2024-02-04", with_thumbnail=True ), make_file( 6, "Another video", "2024-02-04", with_thumbnail=True ), - ] - } + ], + ) case "media/42/caption_files": assert is_admin json_data = {