Skip to content

Commit

Permalink
Merge pull request #868 from openzim/zimit_checks
Browse files Browse the repository at this point in the history
Check recipe flags at schedule creation (including fix for youzim.it)
  • Loading branch information
benoit74 authored Nov 17, 2023
2 parents 5883e60 + 3680185 commit e078943
Show file tree
Hide file tree
Showing 8 changed files with 251 additions and 7 deletions.
3 changes: 3 additions & 0 deletions dispatcher/backend/src/common/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,3 +97,6 @@
REQ_TIMEOUT_NOTIFICATIONS = int(os.getenv("REQ_TIMEOUT_NOTIFICATIONS", 5))
REQ_TIMEOUT_CMS = int(os.getenv("REQ_TIMEOUT_CMS", 10))
REQ_TIMEOUT_GHCR = int(os.getenv("REQ_TIMEOUT_GHCR", 10))

# OFFLINERS
ZIMIT_USE_RELAXED_SCHEMA = bool(os.getenv("ZIMIT_USE_RELAXED_SCHEMA"))
6 changes: 5 additions & 1 deletion dispatcher/backend/src/common/schemas/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from marshmallow import Schema, fields, pre_load, validate, validates_schema

from common import constants
from common.enum import DockerImageName, Offliner, Platform
from common.schemas import SerializableSchema, String
from common.schemas.fields import (
Expand Down Expand Up @@ -33,6 +34,7 @@
WikihowFlagsSchema,
YoutubeFlagsSchema,
ZimitFlagsSchema,
ZimitFlagsSchemaRelaxed,
)


Expand Down Expand Up @@ -84,7 +86,9 @@ def get_offliner_schema(offliner):
Offliner.nautilus: NautilusFlagsSchema,
Offliner.ted: TedFlagsSchema,
Offliner.openedx: OpenedxFlagsSchema,
Offliner.zimit: ZimitFlagsSchema,
Offliner.zimit: ZimitFlagsSchemaRelaxed
if constants.ZIMIT_USE_RELAXED_SCHEMA
else ZimitFlagsSchema,
Offliner.kolibri: KolibriFlagsSchema,
Offliner.wikihow: WikihowFlagsSchema,
Offliner.ifixit: IFixitFlagsSchema,
Expand Down
3 changes: 2 additions & 1 deletion dispatcher/backend/src/common/schemas/offliners/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from common.schemas.offliners.ted import TedFlagsSchema
from common.schemas.offliners.wikihow import WikihowFlagsSchema
from common.schemas.offliners.youtube import YoutubeFlagsSchema
from common.schemas.offliners.zimit import ZimitFlagsSchema
from common.schemas.offliners.zimit import ZimitFlagsSchema, ZimitFlagsSchemaRelaxed

__all__ = (
"FreeCodeCampFlagsSchema",
Expand All @@ -25,6 +25,7 @@
"WikihowFlagsSchema",
"YoutubeFlagsSchema",
"ZimitFlagsSchema",
"ZimitFlagsSchemaRelaxed",
)


Expand Down
16 changes: 16 additions & 0 deletions dispatcher/backend/src/common/schemas/offliners/zimit.py
Original file line number Diff line number Diff line change
Expand Up @@ -468,3 +468,19 @@ class Meta:
data_key="adminEmail",
required=False,
)


class ZimitFlagsSchemaRelaxed(ZimitFlagsSchema):
"""A Zimit flags schema with relaxed constraints on validation
For now, only zim_file name is not checked anymore. Typically used for youzim.it
"""

zim_file = String(
metadata={
"label": "ZIM filename",
"description": "ZIM file name (based on --name if not provided). "
"Make sure to end with _{period}.zim",
},
data_key="zim-file",
)
4 changes: 4 additions & 0 deletions dispatcher/backend/src/routes/schedules/schedule.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,10 @@ def post(self, session: so.Session, token: AccessToken.Payload):

try:
document = ScheduleSchema().load(request.get_json())
flags_schema = ScheduleConfigSchema.get_offliner_schema(
document["config"]["task_name"]
)
flags_schema().load(document["config"]["flags"])
except ValidationError as e:
raise InvalidRequestJSON(e.messages)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
class TestFreeCodeCamp:
def test_create_freecodecamp_schedule(
def test_create_freecodecamp_schedule_ok(
self, client, access_token, garbage_collector
):
schedule = {
"name": "fcc_javascript_test",
"name": "fcc_javascript_test_ok",
"category": "freecodecamp",
"enabled": False,
"tags": [],
Expand All @@ -14,7 +14,13 @@ def test_create_freecodecamp_schedule(
"image": {"name": "openzim/freecodecamp", "tag": "1.0.0"},
"monitor": False,
"platform": None,
"flags": {},
"flags": {
"course": ("somecourse"),
"language": "eng",
"name": "acourse",
"title": "a title",
"description": "a description",
},
"resources": {"cpu": 3, "memory": 1024, "disk": 0},
},
"periodicity": "quarterly",
Expand All @@ -24,9 +30,10 @@ def test_create_freecodecamp_schedule(
response = client.post(
url, json=schedule, headers={"Authorization": access_token}
)
assert response.status_code == 201
response_data = response.get_json()
garbage_collector.add_schedule_id(response_data["_id"])
if "_id" in response_data:
garbage_collector.add_schedule_id(response_data["_id"])
assert response.status_code == 201

patch_data = {
"enabled": True,
Expand All @@ -50,3 +57,48 @@ def test_create_freecodecamp_schedule(
url, json=patch_data, headers={"Authorization": access_token}
)
assert response.status_code == 204

def test_create_freecodecamp_schedule_ko(
self, client, access_token, garbage_collector
):
schedule = {
"name": "fcc_javascript_test_ko",
"category": "freecodecamp",
"enabled": False,
"tags": [],
"language": {"code": "fr", "name_en": "French", "name_native": "Français"},
"config": {
"task_name": "freecodecamp",
"warehouse_path": "/freecodecamp",
"image": {"name": "openzim/freecodecamp", "tag": "1.0.0"},
"monitor": False,
"platform": None,
"flags": {
"course": ("somecourse"),
"language": "eng",
"name": "acourse",
"title": "a title",
"description": (
"a description which is way way way way way way way way way "
"way way way way way way way way way too long"
),
},
"resources": {"cpu": 3, "memory": 1024, "disk": 0},
},
"periodicity": "quarterly",
}

url = "/schedules/"
response = client.post(
url, json=schedule, headers={"Authorization": access_token}
)
response_data = response.get_json()
if "_id" in response_data:
garbage_collector.add_schedule_id(response_data["_id"])
assert response.status_code == 400
assert "error_description" in response_data
assert "description" in response_data["error_description"]
assert (
"Longer than maximum length 80."
in response_data["error_description"]["description"]
)
Original file line number Diff line number Diff line change
Expand Up @@ -265,15 +265,25 @@ def test_create_schedule_missing_keys(self, client, access_token, key):
"flags": {},
"image": {"name": "openzim/phet", "tag": "latest"},
"monitor": False,
"platform": None,
"resources": {"cpu": 3, "memory": 1024, "disk": 0},
},
"periodicity": "quarterly",
}

del schedule[key]
url = "/schedules/"
response = client.post(
url, json=schedule, headers={"Authorization": access_token}
)
response_data = response.get_json()
assert response.status_code == 400
assert "error_description" in response_data
assert key in response_data["error_description"]
assert (
"Missing data for required field."
in response_data["error_description"][key]
)

@pytest.mark.parametrize("key", ["warehouse_path", "flags", "image"])
def test_create_schedule_missing_config_keys(self, client, access_token, key):
Expand All @@ -293,15 +303,62 @@ def test_create_schedule_missing_config_keys(self, client, access_token, key):
"flags": {},
"image": {"name": "openzim/phet", "tag": "latest"},
"monitor": False,
"platform": None,
"resources": {"cpu": 3, "memory": 1024, "disk": 0},
},
"periodicity": "quarterly",
}

del schedule["config"][key]
url = "/schedules/"
response = client.post(
url, json=schedule, headers={"Authorization": access_token}
)
response_data = response.get_json()
assert response.status_code == 400
assert "error_description" in response_data
assert "config" in response_data["error_description"]
assert key in response_data["error_description"]["config"]
assert (
"Missing data for required field."
in response_data["error_description"]["config"][key]
)

def test_create_schedule_flags_ko(self, client, access_token):
schedule = {
"name": "ifixit flags ko",
"category": "ifixit",
"enabled": False,
"tags": [],
"language": {
"code": "en",
"name_en": "English",
"name_native": "English",
},
"config": {
"task_name": "ifixit",
"warehouse_path": "/ifixit",
"flags": {},
"image": {"name": "openzim/ifixit", "tag": "latest"},
"monitor": False,
"platform": "ifixit",
"resources": {"cpu": 3, "memory": 1024, "disk": 0},
},
"periodicity": "quarterly",
}

url = "/schedules/"
response = client.post(
url, json=schedule, headers={"Authorization": access_token}
)
response_data = response.get_json()
assert response.status_code == 400
assert "error_description" in response_data
assert "language" in response_data["error_description"]
assert (
"Missing data for required field."
in response_data["error_description"]["language"]
)

def test_image_names(self, client, schedule, access_token):
url = "/schedules/{}/image-names".format(schedule["name"])
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
from collections import namedtuple
from typing import List

import pytest

from common import constants


def update_dict(dict: dict, key_path: str, new_value: any):
"""Update a nested key value in a dictionary
E.g if key_path is 'key1.subkey2', then dict['key1']['subkey2'] will be set"""

# Split the key path into individual keys
keys = key_path.split(".")

# Initialize a reference to the nested dictionary
current_dict = dict

# Navigate through the nested structure
for key in keys[:-1]:
current_dict = current_dict[key]

# Update the value using the last key
current_dict[keys[-1]] = new_value


class TestZimit:
mod = namedtuple("Modification", ["key_path", "new_value"])

@pytest.mark.parametrize(
"modifications, relaxed_schema, succeeds",
[
(
[mod(key_path="name", new_value="zimit_test_good_name_not_relaxed")],
False,
True,
),
(
[mod(key_path="name", new_value="zimit_test_good_name_relaxed")],
True,
True,
),
(
[
mod(key_path="name", new_value="zimit_test_bad_name_not_relaxed"),
mod(key_path="config.flags.zim-file", new_value="bad_name"),
],
False,
False,
),
(
[
mod(key_path="name", new_value="zimit_test_bad_name_relaxed"),
mod(key_path="config.flags.zim-file", new_value="bad_name"),
],
True,
True,
),
],
)
def test_create_zimit_schedule_generic(
self,
client,
access_token,
garbage_collector,
modifications: List[mod],
relaxed_schema: bool,
succeeds: bool,
):
constants.ZIMIT_USE_RELAXED_SCHEMA = relaxed_schema
schedule = {
"name": "zimit_test_ok",
"category": "other",
"enabled": False,
"tags": [],
"language": {"code": "fr", "name_en": "French", "name_native": "Français"},
"config": {
"task_name": "zimit",
"warehouse_path": "/other",
"image": {"name": "openzim/zimit", "tag": "1.0.0"},
"monitor": False,
"platform": None,
"flags": {
"name": "acme",
"url": "https://www.acme.com",
"zim-file": "acme_en_all_{period}.zim",
},
"resources": {"cpu": 3, "memory": 1024, "disk": 0},
},
"periodicity": "quarterly",
}
for modification in modifications:
update_dict(schedule, modification.key_path, modification.new_value)
url = "/schedules/"
response = client.post(
url, json=schedule, headers={"Authorization": access_token}
)
response_data = response.get_json()
if "_id" in response_data:
garbage_collector.add_schedule_id(response_data["_id"])
if succeeds:
assert response.status_code == 201
else:
assert response.status_code == 400
assert "error_description" in response_data
assert "zim-file" in response_data["error_description"]

0 comments on commit e078943

Please sign in to comment.