From d27d106ed11ee9b951660b93f42f49041313ebbc Mon Sep 17 00:00:00 2001 From: benoit74 Date: Fri, 13 Sep 2024 13:03:42 +0000 Subject: [PATCH] Utility script to create a monthly report of tasks ran on a Zimfarm instance --- .../maint-scripts/monthly_report_tasks.py | 78 +++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 dispatcher/backend/maint-scripts/monthly_report_tasks.py diff --git a/dispatcher/backend/maint-scripts/monthly_report_tasks.py b/dispatcher/backend/maint-scripts/monthly_report_tasks.py new file mode 100644 index 00000000..00db1b3b --- /dev/null +++ b/dispatcher/backend/maint-scripts/monthly_report_tasks.py @@ -0,0 +1,78 @@ +# Creates a CSV report of tasks completed (successfully or not) +# during last month (for 1st to last day of the month) + +import datetime +import logging +import os +from pathlib import Path + +import requests +from dateutil.relativedelta import relativedelta + +logging.basicConfig( + level=logging.DEBUG, format="[%(asctime)s: %(levelname)s] %(message)s" +) +logger = logging.getLogger(__name__) + +# url of the zimfarm API to request +url = os.getenv("ZF_URI", "https://api.farm.zimit.kiwix.org/v1") + +# prefix of csv file to create: {prefix}{year}-{month}.csv +file_prefix = os.getenv("FILE_PREFIX", "zimfarm_tasks_") + +# list of offliners to include +offliners = os.getenv("OFFLINERS", "zimit").split(",") + + +def main(): + """Creates recipes for TED by topics""" + + now = datetime.datetime.now(datetime.UTC) + start_of_this_month = now.replace(day=1, hour=0, minute=0, second=0, microsecond=0) + start_of_last_month = start_of_this_month - relativedelta(months=1) + + nb_per_page = 100 + current_page = 0 + + last_task_found = False + with open( + Path( + f"{file_prefix}{start_of_last_month.year}-{start_of_last_month.month}.csv" + ), + "w", + ) as fh: + fh.write("ID,URL,Status,Requested,Started,Completed\n") + while not last_task_found: + response = requests.get( + f"{url}/tasks/?skip={current_page*nb_per_page}&limit={nb_per_page}" + "&status=failed&status=canceled&status=succeeded" + ) + response.raise_for_status() + items = response.json()["items"] + for item in items: + item_last_modification = datetime.datetime.fromisoformat( + item["updated_at"] + ) + if item_last_modification >= start_of_this_month: + continue # too recent + if item_last_modification < start_of_last_month: + last_task_found = True # we found the most ancient task to report + break + + response = requests.get(f'{url}/tasks/{item["_id"]}') + response.raise_for_status() + task = response.json() + offliner = task["config"]["task_name"] + if offliners != "all" and offliner not in offliners: + continue + fh.write( + f'{task["_id"]},{task["config"]["flags"]["url"]},{task["status"]},' + f'{task["timestamp"]["requested"]},' + f'{task["timestamp"].get("started", "-")},{task["updated_at"]}\n' + ) + current_page += 1 + fh.flush() + + +if __name__ == "__main__": + main()