Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Devdocs offliner, category and warehouse path #1020

Merged
merged 2 commits into from
Sep 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions dev/receiver/create-warehouse-paths.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ mkdir -p \
/jail/zim/freecodecamp \
/jail/zim/gutenberg \
/jail/zim/ifixit \
/jail/zim/devdocs \
/jail/zim/mooc \
/jail/zim/other \
/jail/zim/phet \
Expand All @@ -28,6 +29,7 @@ chmod 777 \
/jail/zim/freecodecamp \
/jail/zim/gutenberg \
/jail/zim/ifixit \
/jail/zim/devdocs \
/jail/zim/mooc \
/jail/zim/other \
/jail/zim/phet \
Expand Down
1 change: 1 addition & 0 deletions dispatcher/backend/docs/openapi_v1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1915,6 +1915,7 @@ components:
- wikihow
- zimit
- ifixit
- devdocs
example:
- mwoffliner
- sotoki
Expand Down
17 changes: 16 additions & 1 deletion dispatcher/backend/src/common/enum.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ class ScheduleCategory:
wiktionary = "wiktionary"
ifixit = "ifixit"
freecodecamp = "freecodecamp"
devdocs = "devdocs"

@classmethod
def all(cls):
Expand All @@ -137,6 +138,7 @@ def all(cls):
cls.wiktionary,
cls.ifixit,
cls.freecodecamp,
cls.devdocs,
]

@classmethod
Expand Down Expand Up @@ -168,6 +170,7 @@ class DockerImageName:
wikihow = "openzim/wikihow"
ifixit = "openzim/ifixit"
freecodecamp = "openzim/freecodecamp"
devdocs = "openzim/devdocs"

@classmethod
def all(cls) -> set:
Expand All @@ -185,6 +188,7 @@ def all(cls) -> set:
cls.wikihow,
cls.ifixit,
cls.freecodecamp,
cls.devdocs,
}


Expand All @@ -202,6 +206,7 @@ class Offliner:
wikihow = "wikihow"
ifixit = "ifixit"
freecodecamp = "freecodecamp"
devdocs = "devdocs"

@classmethod
def all(cls):
Expand All @@ -219,6 +224,7 @@ def all(cls):
cls.wikihow,
cls.ifixit,
cls.freecodecamp,
cls.devdocs,
]

@classmethod
Expand All @@ -243,6 +249,7 @@ def get_image_name(cls, offliner):
cls.wikihow: DockerImageName.wikihow,
cls.ifixit: DockerImageName.ifixit,
cls.freecodecamp: DockerImageName.freecodecamp,
cls.devdocs: DockerImageName.devdocs,
}.get(offliner, "-")


Expand All @@ -264,10 +271,18 @@ class Platform:
wikihow = "wikihow"
ifixit = "ifixit"
ted = "ted"
devdocs = "devdocs"

@classmethod
def all(cls) -> str:
return [cls.wikimedia, cls.youtube, cls.wikihow, cls.ifixit, cls.ted]
return [
cls.wikimedia,
cls.youtube,
cls.wikihow,
cls.ifixit,
cls.ted,
cls.devdocs,
]

@classmethod
def get_max_per_worker_tasks_for(cls, platform) -> int:
Expand Down
2 changes: 2 additions & 0 deletions dispatcher/backend/src/common/schemas/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
validate_warehouse_path,
)
from common.schemas.offliners import (
DevDocsFlagsSchema,
FreeCodeCampFlagsSchema,
GutenbergFlagsSchema,
IFixitFlagsSchema,
Expand Down Expand Up @@ -101,6 +102,7 @@ def get_offliner_schema(offliner):
Offliner.wikihow: WikihowFlagsSchema,
Offliner.ifixit: IFixitFlagsSchema,
Offliner.freecodecamp: FreeCodeCampFlagsSchema,
Offliner.devdocs: DevDocsFlagsSchema,
}.get(offliner, Schema)

@validates_schema
Expand Down
2 changes: 2 additions & 0 deletions dispatcher/backend/src/common/schemas/offliners/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from common.schemas import SerializableSchema
from common.schemas.offliners.devdocs import DevDocsFlagsSchema
from common.schemas.offliners.freecodecamp import FreeCodeCampFlagsSchema
from common.schemas.offliners.gutenberg import GutenbergFlagsSchema
from common.schemas.offliners.ifixit import IFixitFlagsSchema
Expand All @@ -16,6 +17,7 @@
from common.schemas.offliners.zimit import ZimitFlagsSchema, ZimitFlagsSchemaRelaxed

__all__ = (
"DevDocsFlagsSchema",
"FreeCodeCampFlagsSchema",
"GutenbergFlagsSchema",
"IFixitFlagsSchema",
Expand Down
139 changes: 139 additions & 0 deletions dispatcher/backend/src/common/schemas/offliners/devdocs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
from marshmallow import fields

from common.schemas import SerializableSchema, String
from common.schemas.fields import (
validate_output,
validate_zim_description,
validate_zim_longdescription,
)


class DevDocsFlagsSchema(SerializableSchema):
class Meta:
ordered = True

slug = String(
metadata={
"label": "Slug",
"description": "Fetch the provided Devdocs resource. "
"Slugs are the first path entry in the Devdocs URL. "
"For example, the slug for: `https://devdocs.io/gcc~12/` is `gcc~12`. "
"Mutually exclusive with `All` setting, set only one option. Either this"
"setting or `All` must be configured.",
},
)

all_flag = fields.Boolean(
truthy=[True],
falsy=[False],
metadata={
"label": "All",
"description": "Fetch all Devdocs resources, and produce one ZIM "
"per resource. Mutually exclusive with `Slug` setting, set only "
"one option. Either this setting or `Slug` must be configured.",
},
data_key="all",
)

skip_slug_regex = String(
metadata={
"label": "Skip slugs regex",
"description": "Skips slugs matching the given regular expression."
"Do not set to fetch all slugs. Only useful when `All` is set.",
},
data_key="skip-slug-regex",
)

file_name_format = String(
metadata={
"label": "ZIM filename",
"description": "ZIM filename. Do not input trailing `.zim`, it "
"will be automatically added. You can use placeholders, see "
"https://github.com/openzim/devdocs/blob/main/README.md. Defaults "
"to devdocs.io_en_{clean_slug}_{period}",
},
data_key="file-name-format",
)

name_format = String(
rgaudin marked this conversation as resolved.
Show resolved Hide resolved
metadata={
"label": "ZIM name",
"description": "ZIM name. You can use placeholders, see "
"https://github.com/openzim/devdocs/blob/main/README.md. Defaults "
"to devdocs.io_en_{clean_slug}",
},
data_key="name-format",
)

title_format = String(
metadata={
"label": "ZIM title",
"description": "ZIM title. You can use placeholders, see "
"https://github.com/openzim/devdocs/blob/main/README.md. Defaults "
"to `{full_name} Docs`",
},
data_key="title-format",
)

description_format = String(
metadata={
"label": "ZIM description",
"description": "ZIM description. You can use placeholders, see "
"https://github.com/openzim/devdocs/blob/main/README.md. Defaults "
"to `{full_name} docs by DevDocs`",
},
data_key="description-format",
validate=validate_zim_description,
)

long_description_format = String(
metadata={
"label": "ZIM long description",
"description": "ZIM long description. You can use placeholders, see "
"https://github.com/openzim/devdocs/blob/main/README.md. Defaults to no "
"long description",
},
data_key="long-description-format",
validate=validate_zim_longdescription,
)

tags = String(
metadata={
"label": "ZIM Tags",
"description": "List of semi-colon-separated Tags for the ZIM file. "
" You can use placeholders, see "
"https://github.com/openzim/devdocs/blob/main/README.md. Defaults to"
"`devdocs;{slug_without_version}`",
}
)

creator = String(
metadata={
"label": "Creator",
"description": "Name of content creator. “DevDocs” otherwise",
},
)

publisher = String(
metadata={
"label": "Publisher",
"description": "Custom publisher name (ZIM metadata). “openZIM” otherwise",
},
)

output = String(
metadata={
"label": "Output folder",
"placeholder": "/output",
"description": "Output folder for ZIM file(s). Leave it as `/output`",
},
load_default="/output",
dump_default="/output",
validate=validate_output,
)

debug = fields.Boolean(
truthy=[True],
falsy=[False],
metadata={"label": "Debug", "description": "Enable verbose output"},
)
1 change: 1 addition & 0 deletions dispatcher/backend/src/utils/offliners.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
Offliner.nautilus: od("nautiluszim", True, False),
Offliner.zimit: od("zimit", True, "statsFilename"),
Offliner.kolibri: od("kolibri2zim", True, False),
Offliner.devdocs: od("devdocs2zim", True, False),
}


Expand Down
6 changes: 3 additions & 3 deletions dispatcher/frontend-ui/src/constants.js
Original file line number Diff line number Diff line change
Expand Up @@ -336,17 +336,17 @@ export default {
cancelable_statuses: cancelable_statuses,
running_statuses: running_statuses,
contact_email: "[email protected]",
categories: ["freecodecamp", "gutenberg", "ifixit", "other", "phet", "psiram", "stack_exchange",
categories: ["devdocs", "freecodecamp", "gutenberg", "ifixit", "other", "phet", "psiram", "stack_exchange",
"ted", "openedx", "vikidia", "wikibooks", "wikihow", "wikinews",
"wikipedia", "wikiquote", "wikisource", "wikispecies", "wikiversity",
"wikivoyage", "wiktionary"], // list of categories for fileering
warehouse_paths: ["/freecodecamp", "/gutenberg", "/ifixit", "/other", "/phet", "/psiram", "/stack_exchange",
warehouse_paths: ["/devdocs", "/freecodecamp", "/gutenberg", "/ifixit", "/other", "/phet", "/psiram", "/stack_exchange",
"/ted", "/mooc", "/videos", "/vikidia", "/wikibooks", "/wikihow",
"/wikinews", "/wikipedia", "/wikiquote", "/wikisource",
"/wikiversity", "/wikivoyage", "/wiktionary", "/zimit",
"/.hidden/dev", "/.hidden/private", "/.hidden/endless",
"/.hidden/bard", "/.hidden/bsf", "/.hidden/custom_apps"],
offliners: ["mwoffliner", "youtube", "phet", "gutenberg", "sotoki", "nautilus", "ted", "openedx", "zimit", "kolibri", "wikihow", "ifixit", "freecodecamp"],
offliners: ["mwoffliner", "youtube", "phet", "gutenberg", "sotoki", "nautilus", "ted", "openedx", "zimit", "kolibri", "wikihow", "ifixit", "freecodecamp", "devdocs"],
periodicities: ["manually", "monthly", "quarterly", "biannualy", "annually"],
memory_values: [536870912, // 512MiB
1073741824, // 1GiB
Expand Down
4 changes: 3 additions & 1 deletion workers/app/common/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@
OFFLINER_WIKIHOW = "wikihow"
OFFLINER_IFIXIT = "ifixit"
OFFLINER_FREECODECAMP = "freecodecamp"
OFFLINER_DEVDOCS = "devdocs"

ALL_OFFLINERS = [
OFFLINER_MWOFFLINER,
Expand All @@ -137,6 +138,7 @@
OFFLINER_WIKIHOW,
OFFLINER_IFIXIT,
OFFLINER_FREECODECAMP,
OFFLINER_DEVDOCS,
]
SUPPORTED_OFFLINERS = [
offliner
Expand All @@ -152,7 +154,7 @@
OFFLINER_YOUTUBE,
]

ALL_PLATFORMS = ["wikimedia", "youtube", "wikihow", "ifixit", "ted"]
ALL_PLATFORMS = ["wikimedia", "youtube", "wikihow", "ifixit", "ted", "devdocs"]
PLATFORMS_TASKS = {}
for platform in ALL_PLATFORMS:
name = f"PLATFORM_{platform}_MAX_TASKS"
Expand Down
3 changes: 2 additions & 1 deletion workers/contrib/zimfarm.config.example
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ ZIMFARM_CPU="3"
# Comma-separated list of offliners to run or `""` for all of them. If
# you want to run `youtube` tasks, you need to be whitelisted, contact
# us.
ZIMFARM_OFFLINERS="mwoffliner,sotoki,gutenberg,phet,nautilus,ted,openedx,zimit,kolibri,wikihow,ifixit,freecodecamp"
ZIMFARM_OFFLINERS="mwoffliner,sotoki,gutenberg,phet,nautilus,ted,openedx,zimit,kolibri,wikihow,ifixit,freecodecamp,devdocs"

# Set to `"y"` to only run task specifically assigned to this worker
# (`""` otherwise)
Expand All @@ -66,4 +66,5 @@ DISABLE_IPV6=""
# PLATFORM_youtube_MAX_TASKS=2
# PLATFORM_wikihow_MAX_TASKS=2
# PLATFORM_ifixit_MAX_TASKS=2
# PLATFORM_devdocs_MAX_TASKS=2
# PLATFORM_ted_MAX_TASKS=2
1 change: 1 addition & 0 deletions workers/contrib/zimfarm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ function restart() {
--env PLATFORM_youtube_MAX_TASKS=$PLATFORM_youtube_MAX_TASKS \
--env PLATFORM_wikihow_MAX_TASKS=$PLATFORM_wikihow_MAX_TASKS \
--env PLATFORM_ifixit_MAX_TASKS=$PLATFORM_ifixit_MAX_TASKS \
--env PLATFORM_devdocs_MAX_TASKS=$PLATFORM_devdocs_MAX_TASKS \
--env PLATFORM_ted_MAX_TASKS=$PLATFORM_ted_MAX_TASKS \
--env POLL_INTERVAL=$POLL_INTERVAL \
--env DNSCACHE_IMAGE=$DNSCACHE_IMAGE \
Expand Down