Skip to content

Commit

Permalink
Fetch all pages of paginated Canvas Studio API responses
Browse files Browse the repository at this point in the history
Lists of collections and media are paginated, using offset-based pagination.
Fetch all of pages, up to a limit, instead of just the first one. Also increase
the size for each page from 20 items up to the maximum of 50.

Fixes #6354
  • Loading branch information
robertknight committed Jun 17, 2024
1 parent 52f8ec5 commit 6856d22
Show file tree
Hide file tree
Showing 2 changed files with 155 additions and 39 deletions.
102 changes: 84 additions & 18 deletions lms/services/canvas_studio.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from functools import lru_cache
from typing import Literal, NotRequired, Type, TypedDict
from typing import Literal, Mapping, NotRequired, Type, TypedDict
from urllib.parse import urlencode, urljoin, urlparse, urlunparse

import requests
Expand All @@ -20,6 +20,27 @@
from lms.validation._base import RequestsResponseSchema


class PaginationSchema(Schema):
"""
Schema for `meta` field in paginated Canvas Studio responses.
See `PaginationMeta` on https://tw.instructuremedia.com/api/public/docs/.
"""

class Meta:
unknown = EXCLUDE

current_page = fields.Integer()
"""1-based page number."""

last_page = fields.Integer()
"""1-based number of last page."""


MAX_PAGE_SIZE = 50
"""Maximum value for `per_page` argument to paginated API calls."""


class CanvasStudioCollectionsSchema(RequestsResponseSchema):
"""Schema for Canvas Studio /collections responses."""

Expand All @@ -33,10 +54,7 @@ class Meta:
created_at = fields.Str(required=False)

collections = fields.List(fields.Nested(CollectionSchema), required=True)

@post_load
def post_load(self, data, **_kwargs):
return data["collections"]
meta = fields.Nested(PaginationSchema, required=True)


class CanvasStudioCollectionMediaSchema(RequestsResponseSchema):
Expand All @@ -53,10 +71,7 @@ class Meta:
thumbnail_url = fields.Str(required=False)

media = fields.List(fields.Nested(MediaSchema), required=True)

@post_load
def post_load(self, data, **_kwargs):
return data["media"]
meta = fields.Nested(PaginationSchema, required=True)


class CanvasStudioCaptionFilesSchema(RequestsResponseSchema):
Expand Down Expand Up @@ -119,7 +134,10 @@ class CanvasStudioService:
API reference: https://tw.instructuremedia.com/api/public/docs/
"""

def __init__(self, request, application_instance):
page_size: int
"""Number of items to fetch per page."""

def __init__(self, request, application_instance, page_size=MAX_PAGE_SIZE):
self._domain = application_instance.settings.get("canvas_studio", "domain")
self._client_id = application_instance.settings.get(
"canvas_studio", "client_id"
Expand All @@ -132,6 +150,7 @@ def __init__(self, request, application_instance):
)
self._request = request
self._application_instance = application_instance
self.page_size = page_size

def get_access_token(self, code: str) -> None:
"""
Expand Down Expand Up @@ -218,7 +237,9 @@ def list_media_library(self) -> list[File]:
picker when creating a Page.
"""

collections = self._api_request("v1/collections", CanvasStudioCollectionsSchema)
collections = self._paginated_api_request(
"v1/collections", CanvasStudioCollectionsSchema, field="collections"
)
user_collection = None

files: list[File] = []
Expand Down Expand Up @@ -254,8 +275,10 @@ def list_media_library(self) -> list[File]:
def list_collection(self, collection_id: str) -> list[File]:
"""List the videos in a collection."""

media = self._api_request(
f"v1/collections/{collection_id}/media", CanvasStudioCollectionMediaSchema
media = self._paginated_api_request(
f"v1/collections/{collection_id}/media",
CanvasStudioCollectionMediaSchema,
field="media",
)

files: list[File] = []
Expand Down Expand Up @@ -353,16 +376,51 @@ def get_transcript_url(self, media_id: str) -> str | None:
return None

def _api_request(
self, path: str, schema_cls: Type[RequestsResponseSchema], as_admin=False
self,
path: str,
schema_cls: Type[RequestsResponseSchema],
as_admin=False,
) -> dict:
"""
Make a request to the Canvas Studio API and parse the JSON response.
:param path: Request path, relative to the API root
:param schema_cls: Schema to parse the JSON response
:param as_admin: Make the request using the admin account instead of the current user.
"""
response = self._bare_api_request(path, as_admin=as_admin)
return schema_cls(response).parse()

def _paginated_api_request(
self,
path: str,
schema_cls: Type[RequestsResponseSchema],
field: str,
as_admin=False,
) -> list:
"""
Fetch a paginated collection via the Canvas Studio API.
:param path: Request path, relative to the API root
:param schema_cls: Schema to parse the JSON response
:param field: Response field containing items from each page
:param as_admin: Make the request using the admin account instead of the current user.
"""
max_pages = 100
next_page = 1
last_page = None
collated = []

while last_page is None or next_page <= last_page:
params = {"page": next_page, "per_page": self.page_size}
response = self._bare_api_request(path, as_admin=as_admin, params=params)
parsed = schema_cls(response).parse()
collated += parsed[field]
last_page = min(parsed["meta"]["last_page"], max_pages)
next_page += 1

return collated

def _admin_api_request(self, path: str, allow_redirects=True) -> requests.Response:
"""
Make a request to the Canvas Studio API using the admin user identity.
Expand Down Expand Up @@ -401,14 +459,18 @@ def _admin_api_request(self, path: str, allow_redirects=True) -> requests.Respon
) from err

def _bare_api_request(
self, path: str, as_admin=False, allow_redirects=True
self,
path: str,
as_admin=False,
allow_redirects=True,
params: Mapping[str, str | int] | None = None,
) -> requests.Response:
"""
Make a request to the Canvas Studio API and return the response.
:param as_admin: Make the request using the admin account instead of the current user.
"""
url = self._api_url(path)
url = self._api_url(path, params)

if as_admin and not self.is_admin():
return self._admin_api_request(path, allow_redirects=allow_redirects)
Expand All @@ -426,18 +488,22 @@ def _bare_api_request(
refresh_service=Service.CANVAS_STUDIO,
) from err

def _api_url(self, path: str) -> str:
def _api_url(self, path: str, params: Mapping[str, str | int] | None = None) -> str:
"""
Return the URL of a Canvas Studio API endpoint.
See https://tw.instructuremedia.com/api/public/docs/ for available
endpoints.
:param path: Path of endpoint relative to the API root
:param params: Query parameters
"""

site = self._canvas_studio_site()
return f"{site}/api/public/{path}"
url = f"{site}/api/public/{path}"
if params:
url = url + "?" + urlencode(params)
return url

def _admin_email(self) -> str:
"""Return the email address of the configured Canvas Studio admin."""
Expand Down
92 changes: 71 additions & 21 deletions tests/unit/lms/services/canvas_studio_test.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import math
from unittest.mock import create_autospec, patch, sentinel
from urllib.parse import urlencode
from urllib.parse import parse_qs, urlencode, urlparse

import pytest
from h_matchers import Any
Expand Down Expand Up @@ -99,8 +100,19 @@ def test_redirect_uri_localhost_workaround(self, svc, pyramid_request):
== "https://hypothesis.local:48001/api/canvas_studio/oauth/callback"
)

def test_list_media_library(self, svc):
@pytest.mark.parametrize(
"page_size",
(
None, # Default, one page will be enough
3, # Multiple pages will be needed
),
)
def test_list_media_library(self, svc, page_size):
if page_size is not None:
svc.page_size = page_size

files = svc.list_media_library()

assert files == [
{
"type": "Folder",
Expand Down Expand Up @@ -166,8 +178,19 @@ def test_list_media_library_other_error(self, svc, oauth_http_service):

assert exc_info.value == err

def test_list_collection(self, svc):
@pytest.mark.parametrize(
"page_size",
(
None, # Default. One page will be enough
1, # Multiple pages will be needed
),
)
def test_list_collection(self, svc, page_size):
if page_size is not None:
svc.page_size = page_size

files = svc.list_collection("8")

assert files == [
{
"type": "File",
Expand Down Expand Up @@ -318,7 +341,7 @@ def admin_oauth_http_service(self):
svc.get.side_effect = self.get_request_handler(is_admin=True)
return svc

def get_request_handler(self, collections=None, is_admin=False):
def get_request_handler(self, collections=None, is_admin=False): # noqa: PLR0915 pylint:disable=too-complex
"""
Create a handler for `GET` requests to the Canvas Studio API.
Expand Down Expand Up @@ -350,36 +373,63 @@ def make_file(id_, title, created_at, with_thumbnail=False):
make_collection(10, None, "course_wide", "2024-03-01"),
]

def handler(url, allow_redirects=True):
api_prefix = "https://hypothesis.instructuremedia.com/api/public/v1/"
assert url.startswith(api_prefix)
def handler(url, allow_redirects=True): # noqa: PLR0912, PLR0915
api_prefix = "/api/public/v1/"

parsed_url = urlparse(url)
assert parsed_url.scheme == "https"
assert parsed_url.netloc == "hypothesis.instructuremedia.com"
assert parsed_url.path.startswith(api_prefix)

url_suffix = url[len(api_prefix) :]
json_data = None
status_code = 200

match url_suffix:
per_page = 20
page = 1

params = parse_qs(parsed_url.query)
if "per_page" in params:
per_page = int(params["per_page"][0])
if "page" in params:
page = int(params["page"][0])

def collection_data(items_field, items):
n_pages = max(math.ceil(len(items) / per_page), 1)
assert 1 <= page <= n_pages

start = (page - 1) * per_page
end = start + per_page

return {
items_field: items[start:end],
"meta": {
"current_page": page,
"last_page": n_pages,
},
}

resource_path = parsed_url.path[len(api_prefix) :]
match resource_path:
case "collections":
json_data = {
"collections": collections.copy(),
}
json_data = collection_data("collections", collections.copy())
case "collections/1/media":
json_data = {
"media": [
json_data = collection_data(
"media",
[
make_file(5, "Test video", "2024-02-03"),
]
}
],
)
case "collections/8/media":
json_data = {
"media": [
json_data = collection_data(
"media",
[
make_file(
7, "Some video", "2024-02-04", with_thumbnail=True
),
make_file(
6, "Another video", "2024-02-04", with_thumbnail=True
),
]
}
],
)
case "media/42/caption_files":
assert is_admin
json_data = {
Expand Down

0 comments on commit 6856d22

Please sign in to comment.