Skip to content

Commit

Permalink
Utility script to create a monthly report of tasks ran on a Zimfarm i…
Browse files Browse the repository at this point in the history
…nstance
  • Loading branch information
benoit74 committed Sep 13, 2024
1 parent f0dbbe1 commit d27d106
Showing 1 changed file with 78 additions and 0 deletions.
78 changes: 78 additions & 0 deletions dispatcher/backend/maint-scripts/monthly_report_tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# Creates a CSV report of tasks completed (successfully or not)
# during last month (for 1st to last day of the month)

import datetime
import logging
import os
from pathlib import Path

import requests
from dateutil.relativedelta import relativedelta

logging.basicConfig(
level=logging.DEBUG, format="[%(asctime)s: %(levelname)s] %(message)s"
)
logger = logging.getLogger(__name__)

# url of the zimfarm API to request
url = os.getenv("ZF_URI", "https://api.farm.zimit.kiwix.org/v1")

# prefix of csv file to create: {prefix}{year}-{month}.csv
file_prefix = os.getenv("FILE_PREFIX", "zimfarm_tasks_")

# list of offliners to include
offliners = os.getenv("OFFLINERS", "zimit").split(",")


def main():
"""Creates recipes for TED by topics"""

now = datetime.datetime.now(datetime.UTC)
start_of_this_month = now.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
start_of_last_month = start_of_this_month - relativedelta(months=1)

nb_per_page = 100
current_page = 0

last_task_found = False
with open(
Path(
f"{file_prefix}{start_of_last_month.year}-{start_of_last_month.month}.csv"
),
"w",
) as fh:
fh.write("ID,URL,Status,Requested,Started,Completed\n")
while not last_task_found:
response = requests.get(
f"{url}/tasks/?skip={current_page*nb_per_page}&limit={nb_per_page}"
"&status=failed&status=canceled&status=succeeded"
)
response.raise_for_status()
items = response.json()["items"]
for item in items:
item_last_modification = datetime.datetime.fromisoformat(
item["updated_at"]
)
if item_last_modification >= start_of_this_month:
continue # too recent
if item_last_modification < start_of_last_month:
last_task_found = True # we found the most ancient task to report
break

response = requests.get(f'{url}/tasks/{item["_id"]}')
response.raise_for_status()
task = response.json()
offliner = task["config"]["task_name"]
if offliners != "all" and offliner not in offliners:
continue
fh.write(
f'{task["_id"]},{task["config"]["flags"]["url"]},{task["status"]},'
f'{task["timestamp"]["requested"]},'
f'{task["timestamp"].get("started", "-")},{task["updated_at"]}\n'
)
current_page += 1
fh.flush()


if __name__ == "__main__":
main()

0 comments on commit d27d106

Please sign in to comment.