Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Devdocs offliner, category and warehouse path #1020

Merged
merged 2 commits into from
Sep 27, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions dev/receiver/create-warehouse-paths.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ mkdir -p \
/jail/zim/freecodecamp \
/jail/zim/gutenberg \
/jail/zim/ifixit \
/jail/zim/devdocs \
/jail/zim/mooc \
/jail/zim/other \
/jail/zim/phet \
Expand All @@ -28,6 +29,7 @@ chmod 777 \
/jail/zim/freecodecamp \
/jail/zim/gutenberg \
/jail/zim/ifixit \
/jail/zim/devdocs \
/jail/zim/mooc \
/jail/zim/other \
/jail/zim/phet \
Expand Down
1 change: 1 addition & 0 deletions dispatcher/backend/docs/openapi_v1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1915,6 +1915,7 @@ components:
- wikihow
- zimit
- ifixit
- devdocs
example:
- mwoffliner
- sotoki
Expand Down
17 changes: 16 additions & 1 deletion dispatcher/backend/src/common/enum.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ class ScheduleCategory:
wiktionary = "wiktionary"
ifixit = "ifixit"
freecodecamp = "freecodecamp"
devdocs = "devdocs"

@classmethod
def all(cls):
Expand All @@ -137,6 +138,7 @@ def all(cls):
cls.wiktionary,
cls.ifixit,
cls.freecodecamp,
cls.devdocs,
]

@classmethod
Expand Down Expand Up @@ -168,6 +170,7 @@ class DockerImageName:
wikihow = "openzim/wikihow"
ifixit = "openzim/ifixit"
freecodecamp = "openzim/freecodecamp"
devdocs = "openzim/devdocs"

@classmethod
def all(cls) -> set:
Expand All @@ -185,6 +188,7 @@ def all(cls) -> set:
cls.wikihow,
cls.ifixit,
cls.freecodecamp,
cls.devdocs,
}


Expand All @@ -202,6 +206,7 @@ class Offliner:
wikihow = "wikihow"
ifixit = "ifixit"
freecodecamp = "freecodecamp"
devdocs = "devdocs"

@classmethod
def all(cls):
Expand All @@ -219,6 +224,7 @@ def all(cls):
cls.wikihow,
cls.ifixit,
cls.freecodecamp,
cls.devdocs,
]

@classmethod
Expand All @@ -243,6 +249,7 @@ def get_image_name(cls, offliner):
cls.wikihow: DockerImageName.wikihow,
cls.ifixit: DockerImageName.ifixit,
cls.freecodecamp: DockerImageName.freecodecamp,
cls.devdocs: DockerImageName.devdocs,
}.get(offliner, "-")


Expand All @@ -264,10 +271,18 @@ class Platform:
wikihow = "wikihow"
ifixit = "ifixit"
ted = "ted"
devdocs = "devdocs"

@classmethod
def all(cls) -> str:
return [cls.wikimedia, cls.youtube, cls.wikihow, cls.ifixit, cls.ted]
return [
cls.wikimedia,
cls.youtube,
cls.wikihow,
cls.ifixit,
cls.ted,
cls.devdocs,
]

@classmethod
def get_max_per_worker_tasks_for(cls, platform) -> int:
Expand Down
2 changes: 2 additions & 0 deletions dispatcher/backend/src/common/schemas/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
validate_warehouse_path,
)
from common.schemas.offliners import (
DevDocsFlagsSchema,
FreeCodeCampFlagsSchema,
GutenbergFlagsSchema,
IFixitFlagsSchema,
Expand Down Expand Up @@ -101,6 +102,7 @@ def get_offliner_schema(offliner):
Offliner.wikihow: WikihowFlagsSchema,
Offliner.ifixit: IFixitFlagsSchema,
Offliner.freecodecamp: FreeCodeCampFlagsSchema,
Offliner.devdocs: DevDocsFlagsSchema,
}.get(offliner, Schema)

@validates_schema
Expand Down
2 changes: 2 additions & 0 deletions dispatcher/backend/src/common/schemas/offliners/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from common.schemas import SerializableSchema
from common.schemas.offliners.devdocs import DevDocsFlagsSchema
from common.schemas.offliners.freecodecamp import FreeCodeCampFlagsSchema
from common.schemas.offliners.gutenberg import GutenbergFlagsSchema
from common.schemas.offliners.ifixit import IFixitFlagsSchema
Expand All @@ -16,6 +17,7 @@
from common.schemas.offliners.zimit import ZimitFlagsSchema, ZimitFlagsSchemaRelaxed

__all__ = (
"DevDocsFlagsSchema",
"FreeCodeCampFlagsSchema",
"GutenbergFlagsSchema",
"IFixitFlagsSchema",
Expand Down
162 changes: 162 additions & 0 deletions dispatcher/backend/src/common/schemas/offliners/devdocs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
from marshmallow import fields

from common.schemas import SerializableSchema, String
from common.schemas.fields import (
validate_output,
validate_zim_description,
validate_zim_longdescription,
)


class DevDocsFlagsSchema(SerializableSchema):
class Meta:
ordered = True

all_flag = fields.Boolean(
rgaudin marked this conversation as resolved.
Show resolved Hide resolved
truthy=[True],
falsy=[False],
metadata={
"label": "All",
"description": "Fetch all Devdocs resources, and produce one ZIM "
"per resource.",
},
data_key="all",
)

slug = String( # should be ListOfString but not yet supported by Zimfarm
rgaudin marked this conversation as resolved.
Show resolved Hide resolved
metadata={
"label": "Slug",
"description": "Fetch the provided Devdocs resource. "
"Slugs are the first path entry in the Devdocs URL. "
"For example, the slug for: `https://devdocs.io/gcc~12/` is `gcc~12`.",
},
)

first = fields.Integer(
rgaudin marked this conversation as resolved.
Show resolved Hide resolved
metadata={
"label": "Number of first items",
"description": "Fetch only the first N items per slug as shown "
"in the DevDocs UI. Do not set to fetch all items.",
},
)

skip_slug_regex = String(
metadata={
"label": "Skip slugs regex",
"description": "Skips slugs matching the given regular expression."
"Do not set to fetch all slugs",
},
data_key="skip-slug-regex",
)

file_name_format = String(
metadata={
"label": "ZIM filename",
"description": "ZIM filename. Do not input trailing `.zim`, it "
"will be automatically added. You can use placeholders, see "
"https://github.com/openzim/devdocs/blob/main/README.md. Defaults "
"to devdocs.io_en_{clean_slug}_{period}",
},
data_key="file-name-format",
)

name_format = String(
rgaudin marked this conversation as resolved.
Show resolved Hide resolved
metadata={
"label": "ZIM name",
"description": "ZIM name. You can use placeholders, see "
"https://github.com/openzim/devdocs/blob/main/README.md. Defaults "
"to devdocs.io_en_{clean_slug}",
},
data_key="name-format",
)

title_format = String(
metadata={
"label": "ZIM title",
"description": "ZIM title. You can use placeholders, see "
"https://github.com/openzim/devdocs/blob/main/README.md. Defaults "
"to `{full_name} Docs`",
},
data_key="title-format",
)

description_format = String(
metadata={
"label": "ZIM description",
"description": "ZIM description. You can use placeholders, see "
"https://github.com/openzim/devdocs/blob/main/README.md. Defaults "
"to `{full_name} docs by DevDocs`",
},
data_key="description-format",
validate=validate_zim_description,
)

long_description_format = String(
metadata={
"label": "ZIM long description",
"description": "ZIM long description. You can use placeholders, see "
"https://github.com/openzim/devdocs/blob/main/README.md. Defaults "
"to `{full_name} docs by DevDocs`",
},
data_key="long-description-format",
validate=validate_zim_longdescription,
)

tags = String(
metadata={
"label": "ZIM Tags",
"description": "List of semi-colon-separated Tags for the ZIM file. "
" You can use placeholders, see "
"https://github.com/openzim/devdocs/blob/main/README.md. Defaults to"
"`devdocs;{slug_without_version}`",
}
)

creator = String(
metadata={
"label": "Creator",
"description": "Name of content creator. “DevDocs” otherwise",
},
)

publisher = String(
metadata={
"label": "Publisher",
"description": "Custom publisher name (ZIM metadata). “openZIM” otherwise",
},
)

output = String(
metadata={
"label": "Output folder",
"placeholder": "/output",
"description": "Output folder for ZIM file(s). Leave it as `/output`",
},
load_default="/output",
dump_default="/output",
validate=validate_output,
)

debug = fields.Boolean(
truthy=[True],
falsy=[False],
metadata={"label": "Debug", "description": "Enable verbose output"},
)

devdocs_frontend_url = String(
rgaudin marked this conversation as resolved.
Show resolved Hide resolved
metadata={
"label": "DevDocs frontend URL",
"description": "Scheme and hostname for the devdocs frontend."
"Defaults to https://devdocs.io",
},
data_key="devdocs-frontend-url",
)

devdocs_documents_url = String(
rgaudin marked this conversation as resolved.
Show resolved Hide resolved
metadata={
"label": "DevDocs documents URL",
"description": "Scheme and hostname for the devdocs documents server."
"Defaults to https://documents.devdocs.io",
},
data_key="devdocs-documents-url",
)
1 change: 1 addition & 0 deletions dispatcher/backend/src/utils/offliners.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
Offliner.nautilus: od("nautiluszim", True, False),
Offliner.zimit: od("zimit", True, "statsFilename"),
Offliner.kolibri: od("kolibri2zim", True, False),
Offliner.devdocs: od("devdocs2zim", True, False),
}


Expand Down
6 changes: 3 additions & 3 deletions dispatcher/frontend-ui/src/constants.js
Original file line number Diff line number Diff line change
Expand Up @@ -336,17 +336,17 @@ export default {
cancelable_statuses: cancelable_statuses,
running_statuses: running_statuses,
contact_email: "[email protected]",
categories: ["freecodecamp", "gutenberg", "ifixit", "other", "phet", "psiram", "stack_exchange",
categories: ["devdocs", "freecodecamp", "gutenberg", "ifixit", "other", "phet", "psiram", "stack_exchange",
"ted", "openedx", "vikidia", "wikibooks", "wikihow", "wikinews",
"wikipedia", "wikiquote", "wikisource", "wikispecies", "wikiversity",
"wikivoyage", "wiktionary"], // list of categories for fileering
warehouse_paths: ["/freecodecamp", "/gutenberg", "/ifixit", "/other", "/phet", "/psiram", "/stack_exchange",
warehouse_paths: ["/devdocs", "/freecodecamp", "/gutenberg", "/ifixit", "/other", "/phet", "/psiram", "/stack_exchange",
"/ted", "/mooc", "/videos", "/vikidia", "/wikibooks", "/wikihow",
"/wikinews", "/wikipedia", "/wikiquote", "/wikisource",
"/wikiversity", "/wikivoyage", "/wiktionary", "/zimit",
"/.hidden/dev", "/.hidden/private", "/.hidden/endless",
"/.hidden/bard", "/.hidden/bsf", "/.hidden/custom_apps"],
offliners: ["mwoffliner", "youtube", "phet", "gutenberg", "sotoki", "nautilus", "ted", "openedx", "zimit", "kolibri", "wikihow", "ifixit", "freecodecamp"],
offliners: ["mwoffliner", "youtube", "phet", "gutenberg", "sotoki", "nautilus", "ted", "openedx", "zimit", "kolibri", "wikihow", "ifixit", "freecodecamp", "devdocs"],
periodicities: ["manually", "monthly", "quarterly", "biannualy", "annually"],
memory_values: [536870912, // 512MiB
1073741824, // 1GiB
Expand Down
4 changes: 3 additions & 1 deletion workers/app/common/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@
OFFLINER_WIKIHOW = "wikihow"
OFFLINER_IFIXIT = "ifixit"
OFFLINER_FREECODECAMP = "freecodecamp"
OFFLINER_DEVDOCS = "devdocs"

ALL_OFFLINERS = [
OFFLINER_MWOFFLINER,
Expand All @@ -137,6 +138,7 @@
OFFLINER_WIKIHOW,
OFFLINER_IFIXIT,
OFFLINER_FREECODECAMP,
OFFLINER_DEVDOCS,
]
SUPPORTED_OFFLINERS = [
offliner
Expand All @@ -152,7 +154,7 @@
OFFLINER_YOUTUBE,
]

ALL_PLATFORMS = ["wikimedia", "youtube", "wikihow", "ifixit", "ted"]
ALL_PLATFORMS = ["wikimedia", "youtube", "wikihow", "ifixit", "ted", "devdocs"]
PLATFORMS_TASKS = {}
for platform in ALL_PLATFORMS:
name = f"PLATFORM_{platform}_MAX_TASKS"
Expand Down
3 changes: 2 additions & 1 deletion workers/contrib/zimfarm.config.example
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ ZIMFARM_CPU="3"
# Comma-separated list of offliners to run or `""` for all of them. If
# you want to run `youtube` tasks, you need to be whitelisted, contact
# us.
ZIMFARM_OFFLINERS="mwoffliner,sotoki,gutenberg,phet,nautilus,ted,openedx,zimit,kolibri,wikihow,ifixit,freecodecamp"
ZIMFARM_OFFLINERS="mwoffliner,sotoki,gutenberg,phet,nautilus,ted,openedx,zimit,kolibri,wikihow,ifixit,freecodecamp,devdocs"

# Set to `"y"` to only run task specifically assigned to this worker
# (`""` otherwise)
Expand All @@ -66,4 +66,5 @@ DISABLE_IPV6=""
# PLATFORM_youtube_MAX_TASKS=2
# PLATFORM_wikihow_MAX_TASKS=2
# PLATFORM_ifixit_MAX_TASKS=2
# PLATFORM_devdocs_MAX_TASKS=2
# PLATFORM_ted_MAX_TASKS=2
1 change: 1 addition & 0 deletions workers/contrib/zimfarm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ function restart() {
--env PLATFORM_youtube_MAX_TASKS=$PLATFORM_youtube_MAX_TASKS \
--env PLATFORM_wikihow_MAX_TASKS=$PLATFORM_wikihow_MAX_TASKS \
--env PLATFORM_ifixit_MAX_TASKS=$PLATFORM_ifixit_MAX_TASKS \
--env PLATFORM_devdocs_MAX_TASKS=$PLATFORM_devdocs_MAX_TASKS \
--env PLATFORM_ted_MAX_TASKS=$PLATFORM_ted_MAX_TASKS \
--env POLL_INTERVAL=$POLL_INTERVAL \
--env DNSCACHE_IMAGE=$DNSCACHE_IMAGE \
Expand Down