diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b3a8d42..953ad7c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -33,3 +33,45 @@ repos: rev: 24.8.0 hooks: - id: black + + - repo: local + hooks: + - id: no-do-not-merge + name: No 'DO NOT MERGE' + description: | + * You can add 'DONOTMERGE', 'DONOTSUBMIT', 'DO NOT MERGE', 'DO NOT SUBMIT', 'DON'T MERGE', + 'DON'T SUBMIT' or the same with underscores instead of spaces to prevent merging. + language: pygrep + args: [-i] + entry: DO([ _]?NOT|N'T)[ _]?(SUBMIT|MERGE) + exclude: ^.pre-commit-config.yaml$ + types: [text] + + - id: no-todos-without-context + name: No TODOs without context + description: | + * Use descriptive, referenceable TODOs. Examples: + * `// TODO(nero.windstriker)` + * `# FIXME(https://github.com/helly25/circleci/issues/1)` + * From the google style guide: + Use TODO comments for code that is temporary, a short-term solution, or good-enough but + not perfect. TODOs should include the string TODO in all caps, followed by the bug ID, + name, e-mail address, or other identifier of the person or issue with the best context + about the problem referenced by the TODO. + * A bug or other URL reference is better than a person, as the person may leave the team. + * A developer can be identified by their email addresses or just by their name without the + @-part. + * If a developer is referenced then it does not need to be the one writing the TODO. + * It is more important to note the most knowledgable person. + * If you add someone else, first clarify with them that they are the right person. + * Concretely, TODOs and FIXMEs without a context are rather pointless. That is, sooner or + later noone knows why they were introduced and they will simply get deleted. Or they + stay in place along with an endless larger list of them resulting in all of them being + ignored immediately. + * It is impossible to systematically find your own TODOs, someone else' or perform any + other kind of statictics on them. The issue is that the blame tool tracks the last + change, so you would need a tool that can follow the history to the actual introduction + of the TODOs. + language: pygrep + entry: (#|//|/[*]).*(FIXM[E]|TOD[O])\b(?![(](((\w|\w[.-]\w)+(@github.com)?)|(https?://[^)]+))(,[^)]+)?[)]) + types: [text] diff --git a/README.md b/README.md index bf03048..bbbfcd3 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ bazel run //circleci:workflows -- [args...] ## Commands * combine: Read multiple files generated by `workflow.py fetch` and combine them. -* fetch: Fetch workflow stats from the CircleCI API server and writes them as a CSV file. +* fetch: Fetch workflow data from the CircleCI API server and writes them as a CSV file. * fetch_details: Given a workflow CSV file, fetch details for each workflow (slow). * filter: Read CSV files generated from `workflow.py fetch` and filters them. * help: Provides help for the program. @@ -38,14 +38,144 @@ Read multiple files generated by `workflow.py fetch` and combine them. bazel run //circleci:workflows -- combine --output=/tmp/circleci.csv "${PWD}/data/circleci_workflows*.csv*" ``` +### positional arguments: + +input + + List of CSV files generated from `workflow.py fetch`. + +### options: + +-h, --help + + show this help message and exit + +--circleci_server CIRCLECI_SERVER + + The circleci server url including protocol (defaults to environment + variable 'CIRCLECI_SERVER' which defaults to 'https://circleci.com'). + +--circleci_token CIRCLECI_TOKEN + + CircleCI Auth Token (defaults to environment variable 'CIRCLECI_TOKEN') + +--circleci_project_slug CIRCLECI_PROJECT_SLUG + + CircleCI project-slug (defaults to environment variable + 'CIRCLECI_PROJECT_SLUG'). + +--log_requests_to_file LOG_REQUESTS_TO_FILE + + Whether to log all requests for debugging purposes. + +--log_requests_details {['REQUEST', 'RESPONSE_TEXT', 'STATUS_CODE']} + + Comma separated list of LogRequestDetails. + +--output OUTPUT + + Name of the output file. + +--fetch_workflow_details, --no-fetch_workflow_details + + Whether workflow details should automatically be added (if not present). + +--progress, --no-progress + + Whether to indicate progress (defaults to True if + `--fetch_workflow_details` is active). + ## Command fetch -Fetch workflow stats from the CircleCI API server and writes them as a CSV file. +Fetch workflow data from the CircleCI API server and writes them as a CSV +file. + +The time range to fetch runs for can be specified using flags `--start`, +`--end` and `--midnight`. By default fetch will retrieve the data for the past +89 complete days starting at midnight. + +The easiest and intended way to manually control the time range is to speficy +`--start` as an offset to the current time. For instance, using `--start=1w` +will fetch runs for the past week. + +In many cases it is preferably to fetch data for complete days. That can be +achieved by with the `--midnight` flag. + +After fetching general workflow information, the command will fetch all +details if flag `fetch_workflow_details` if True (default). ``` bazel run //circleci:workflows -- fetch --output "${PWD}/data/circleci_workflows_$(date +"%Y%m%d").csv.bz2" ``` +### options: + +-h, --help + + show this help message and exit + +--circleci_server CIRCLECI_SERVER + + The circleci server url including protocol (defaults to environment + variable 'CIRCLECI_SERVER' which defaults to 'https://circleci.com'). + +--circleci_token CIRCLECI_TOKEN + + CircleCI Auth Token (defaults to environment variable 'CIRCLECI_TOKEN') + +--circleci_project_slug CIRCLECI_PROJECT_SLUG + + CircleCI project-slug (defaults to environment variable + 'CIRCLECI_PROJECT_SLUG'). + +--log_requests_to_file LOG_REQUESTS_TO_FILE + + Whether to log all requests for debugging purposes. + +--log_requests_details {['REQUEST', 'RESPONSE_TEXT', 'STATUS_CODE']} + + Comma separated list of LogRequestDetails. + +--workflow WORKFLOW + + The name of the workflow(s) to read. Multiple workflows can be read by + separating with comma. If no workflow is set, then fetch all workflows. + +--output OUTPUT + + Name of the output file. + +--end END + + End (newest) date/time in Python [ISO + 8601](https://en.wikipedia.org/wiki/ISO_8601) format, e.g. `200241224` + or as a negative time difference, e.g. `-10days` (for details see + [pytimeparse](https://github.com/wroberts/pytimeparse)). + + This defaults to `now`. + +--start START + + Start (oldest) date/time in Python [ISO + 8601](https://en.wikipedia.org/wiki/ISO_8601) format, e.g. `200241224` + or as a negative time difference, e.g. `-10days` (for details see + [pytimeparse](https://github.com/wroberts/pytimeparse)). + + This defaults to `-90days` (or `-89days` if --midnight is active). + +--midnight, --no-midnight + + Adjust start and end date/time to midnight of the same day. + +--progress, --no-progress + + Whether to indicate progress (defaults to True if + `--fetch_workflow_details` is active). + +--fetch_workflow_details, --no-fetch_workflow_details + + Whether workflow details should automatically be added. + ## Command fetch_details Given a workflow CSV file, fetch details for each workflow (slow). @@ -54,6 +184,46 @@ Given a workflow CSV file, fetch details for each workflow (slow). bazel run //circleci:workflows -- fetch_details --input "${PWD}/data/circleci_workflows_IN.csv.bz2" --output "${PWD}/data/circleci_workflows_OUT.csv.bz2" ``` +### options: + +-h, --help + + show this help message and exit + +--circleci_server CIRCLECI_SERVER + + The circleci server url including protocol (defaults to environment + variable 'CIRCLECI_SERVER' which defaults to 'https://circleci.com'). + +--circleci_token CIRCLECI_TOKEN + + CircleCI Auth Token (defaults to environment variable 'CIRCLECI_TOKEN') + +--circleci_project_slug CIRCLECI_PROJECT_SLUG + + CircleCI project-slug (defaults to environment variable + 'CIRCLECI_PROJECT_SLUG'). + +--log_requests_to_file LOG_REQUESTS_TO_FILE + + Whether to log all requests for debugging purposes. + +--log_requests_details {['REQUEST', 'RESPONSE_TEXT', 'STATUS_CODE']} + + Comma separated list of LogRequestDetails. + +--input INPUT + + A CSV file generated from `workflow.py fetch`. + +--output OUTPUT + + Name of the output file. + +--progress, --no-progress + + Whether to indicate progress. + ## Command filter Read CSV files generated from `workflow.py fetch` and filters them. @@ -62,14 +232,100 @@ Read CSV files generated from `workflow.py fetch` and filters them. bazel run //circleci:workflows -- filter --workflow default_workflow,pre_merge --input /tmp/circleci.csv --output "${HOME}/circleci_filtered_workflows.csv" ``` +### options: + +-h, --help + + show this help message and exit + +--workflow WORKFLOW + + The name of the workflow(s) to accept. Multiple workflows can be userd + by separating with comma. If no workflow is set, then accept all + workflows. + +--input INPUT + + CSV file generated from `workflow.py fetch`. + +--output OUTPUT + + Name of the output file. + +--min_duration_sec MIN_DURATION_SEC + + Mininum duration to accept row in [sec]. + +--output_duration_as_mins, --no-output_duration_as_mins + + Whether to report duration values in minutes. + +--exclude_branches EXCLUDE_BRANCHES + + Exclude branches by full regular expression match. + +--exclude_incomplete_reruns, --no-exclude_incomplete_reruns + + If workflow details are available, reject inomplete reruns (e.g.: rerun- + single-job, rerun-workflow-from-failed). + +--only_branches ONLY_BRANCHES + + Accept branches by full regular expression match. + +--only_status ONLY_STATUS + + Accept only listed status values (multiple separated by comma). + +--only_weekdays ONLY_WEEKDAYS + + Accept only the listed days of the week as indexed 1=Monday through + 7=Sunday (ISO notation). + ## Command request_branches Read and display the list of branches for `workflow` from CircleCI API. +By default this fetches branches for the workflow `default_workflow`. The +workflow can be specified with the `--workflow` flag. + ``` bazel run //circleci:workflows -- request_branches ``` +### options: + +-h, --help + + show this help message and exit + +--circleci_server CIRCLECI_SERVER + + The circleci server url including protocol (defaults to environment + variable 'CIRCLECI_SERVER' which defaults to 'https://circleci.com'). + +--circleci_token CIRCLECI_TOKEN + + CircleCI Auth Token (defaults to environment variable 'CIRCLECI_TOKEN') + +--circleci_project_slug CIRCLECI_PROJECT_SLUG + + CircleCI project-slug (defaults to environment variable + 'CIRCLECI_PROJECT_SLUG'). + +--log_requests_to_file LOG_REQUESTS_TO_FILE + + Whether to log all requests for debugging purposes. + +--log_requests_details {['REQUEST', 'RESPONSE_TEXT', 'STATUS_CODE']} + + Comma separated list of LogRequestDetails. + +--workflow WORKFLOW + + The name of the workflow to read. Multiple workflows can be read by + separating with comma. + ## Command request_workflow Given a workflow ID return its details. @@ -78,6 +334,38 @@ Given a workflow ID return its details. bazel run //circleci:workflows -- request_workflow --workflow_id ``` +### options: + +-h, --help + + show this help message and exit + +--circleci_server CIRCLECI_SERVER + + The circleci server url including protocol (defaults to environment + variable 'CIRCLECI_SERVER' which defaults to 'https://circleci.com'). + +--circleci_token CIRCLECI_TOKEN + + CircleCI Auth Token (defaults to environment variable 'CIRCLECI_TOKEN') + +--circleci_project_slug CIRCLECI_PROJECT_SLUG + + CircleCI project-slug (defaults to environment variable + 'CIRCLECI_PROJECT_SLUG'). + +--log_requests_to_file LOG_REQUESTS_TO_FILE + + Whether to log all requests for debugging purposes. + +--log_requests_details {['REQUEST', 'RESPONSE_TEXT', 'STATUS_CODE']} + + Comma separated list of LogRequestDetails. + +--workflow_id WORKFLOW_ID + + Workflow ID to request. + ## Command request_workflows Read and display the list of workflow names from CircleCI API. @@ -85,3 +373,31 @@ Read and display the list of workflow names from CircleCI API. ``` bazel run //circleci:workflows -- request_workflows ``` + +### options: + +-h, --help + + show this help message and exit + +--circleci_server CIRCLECI_SERVER + + The circleci server url including protocol (defaults to environment + variable 'CIRCLECI_SERVER' which defaults to 'https://circleci.com'). + +--circleci_token CIRCLECI_TOKEN + + CircleCI Auth Token (defaults to environment variable 'CIRCLECI_TOKEN') + +--circleci_project_slug CIRCLECI_PROJECT_SLUG + + CircleCI project-slug (defaults to environment variable + 'CIRCLECI_PROJECT_SLUG'). + +--log_requests_to_file LOG_REQUESTS_TO_FILE + + Whether to log all requests for debugging purposes. + +--log_requests_details {['REQUEST', 'RESPONSE_TEXT', 'STATUS_CODE']} + + Comma separated list of LogRequestDetails. diff --git a/circleci/BUILD b/circleci/BUILD index 1b178a2..c7f25f8 100644 --- a/circleci/BUILD +++ b/circleci/BUILD @@ -39,26 +39,15 @@ py_test( ], ) -py_library( - name = "commands_py", - srcs = ["commands.py"], -) - -py_test( - name = "commands_test", - srcs = ["commands_test.py"], - deps = [ - ":commands_py", - requirement("parameterized"), - ], -) - py_library( name = "workflows_lib_py", srcs = ["workflows_lib.py"], deps = [ ":circleci_api_v2_py", - ":commands_py", + "//mbo/app:commands_py", + "//mbo/app:flags_py", + requirement("humanize"), + requirement("pytimeparse"), ], ) @@ -67,8 +56,8 @@ py_test( srcs = ["workflows_lib_test.py"], deps = [ ":circleci_api_v2_py", - ":commands_py", ":workflows_lib_py", + "//mbo/app:commands_py", requirement("parameterized"), ], ) @@ -79,7 +68,7 @@ py_binary( srcs = ["workflows.py"], tags = ["manual"], deps = [ - ":commands_py", ":workflows_lib_py", + "//mbo/app:commands_py", ], ) diff --git a/circleci/circleci_api_v2.py b/circleci/circleci_api_v2.py index 06411ce..3352ac2 100644 --- a/circleci/circleci_api_v2.py +++ b/circleci/circleci_api_v2.py @@ -15,8 +15,15 @@ """A simple CircleCI V2 API client.""" -from datetime import datetime -from typing import Any +from dataclasses import dataclass, field, fields +from datetime import datetime, timezone +from enum import Enum +from typing import IO, TYPE_CHECKING, Any, Optional + +if TYPE_CHECKING: + from _typeshed import SupportsWrite +else: + SupportsWrite = IO import requests @@ -45,18 +52,48 @@ class CircleCiDataError(CircleCiError): pass +class LogRequestDetail(Enum): + REQUEST = (0,) + STATUS_CODE = 1 + RESPONSE_TEXT = 2 + + class CircleCiApiV2: """Implementation of a simple CircleCI API V2 client. See https://circleci.com/docs/api/v2/index.html """ - def __init__(self, circleci_server: str, circleci_token: str, project_slug: str): + def __init__( + self, + *, + circleci_server: str, + circleci_token: str, + project_slug: str, + log_requests_to_file: Optional[SupportsWrite[str]] = None, + log_requests_details: list[LogRequestDetail] = [LogRequestDetail.REQUEST], + ): if not circleci_server.startswith(("https://", "http://")): circleci_server = "https://" + circleci_server self.circleci_server = circleci_server self.circleci_token = circleci_token self.project_slug = project_slug + self.log_requests_to_file = log_requests_to_file + self.log_requests_details = log_requests_details + + def _LogRequestDetail(self, url: str) -> None: + if ( + self.log_requests_to_file + and LogRequestDetail.REQUEST in self.log_requests_details + ): + print(url, file=self.log_requests_to_file) + + def _LogResponseDetail(self, response: requests.Response) -> None: + if self.log_requests_to_file: + if LogRequestDetail.STATUS_CODE in self.log_requests_details: + print(response.status_code, file=self.log_requests_to_file) + if LogRequestDetail.RESPONSE_TEXT in self.log_requests_details: + print(response.text, file=self.log_requests_to_file) def _GetRequestJson(self, api: str, params: dict[str, str] = {}) -> Any: headers = { @@ -66,7 +103,9 @@ def _GetRequestJson(self, api: str, params: dict[str, str] = {}) -> Any: if params: url += "?" + "&".join([k + "=" + v for k, v in params.items()]) try: + self._LogRequestDetail(url=url) response = requests.get(url=url, headers=headers) + self._LogResponseDetail(response) except Exception as err: raise CircleCiRequestError(f"CircleCI Request Error: '{err}'") if response.status_code != 200: @@ -109,7 +148,7 @@ def RequestWorkflows(self) -> list[str]: items: list[dict[str, Any]] = data["items"] for item in items: workflows.add(str(item["name"])) - next_page_token = str(data.get("next_page_token", "")) + next_page_token = data.get("next_page_token", "") or "" if not next_page_token: break params["page-token"] = next_page_token @@ -134,7 +173,7 @@ def RequestWorkflowRuns( for k in item.keys(): next_item[k] = str(item[k]) items.append(next_item) - next_page_token = str(data.get("next_page_token", "")) + next_page_token = data.get("next_page_token", "") or "" if not next_page_token: break params["page-token"] = next_page_token @@ -154,3 +193,28 @@ def ParseTime(self, dt: str) -> datetime: if dt.endswith("Z") and dt[len(dt) - 2] in "0123456789": dt = dt[:-1] + "UTC" return datetime.strptime(dt, r"%Y-%m-%dT%H:%M:%S.%f%Z") + + def FormatTime(self, dt: datetime) -> str: + if dt.tzinfo and dt.tzinfo != timezone.utc: + dt = dt.astimezone(timezone.utc) + return dt.strftime(r"%Y-%m-%dT%H:%M:%SZ") + + +@dataclass(kw_only=True) +class CircleCiApiV2Opts: + """Dataclass that can carry the init parameters for class .""" + + circleci_server: str + circleci_token: str + project_slug: str + log_requests_to_file: Optional[SupportsWrite[str]] = field(repr=False, default=None) + log_requests_details: list[LogRequestDetail] = field( + default_factory=lambda: [LogRequestDetail.REQUEST] + ) + + def CreateClient(self) -> CircleCiApiV2: + # Unfortunately `asdict` has an issue copying `IO` types. So we use the official workaround. + # return CircleCiApiV2(**asdict(self)) + return CircleCiApiV2( + **{field.name: getattr(self, field.name) for field in fields(self)} + ) diff --git a/circleci/workflows.py b/circleci/workflows.py index 9c23e02..87bc6bb 100755 --- a/circleci/workflows.py +++ b/circleci/workflows.py @@ -23,7 +23,7 @@ """ import circleci.workflows_lib -from circleci.commands import Command +import mbo.app.commands if __name__ == "__main__": - Command.Run() + mbo.app.commands.Command.Run() diff --git a/circleci/workflows_lib.py b/circleci/workflows_lib.py index 392c34e..8b3e740 100644 --- a/circleci/workflows_lib.py +++ b/circleci/workflows_lib.py @@ -23,11 +23,21 @@ import sys import time from abc import ABC, abstractmethod -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone +from enum import Enum from pathlib import Path +from typing import IO, TYPE_CHECKING, Any, Optional -from circleci.circleci_api_v2 import CircleCiApiV2 -from circleci.commands import Command, Die, Log, OpenTextFile, Print +if TYPE_CHECKING: + from _typeshed import SupportsWrite +else: + SupportsWrite = IO + +import humanize + +from circleci.circleci_api_v2 import CircleCiApiV2, CircleCiApiV2Opts, LogRequestDetail +from mbo.app.commands import Command, Die, DocOutdent, Log, OpenTextFile, Print +from mbo.app.flags import EnumListAction, ParseDateTimeOrDelta # Keys used by the `fetch` command. # Instead of `created_at` and `stopped_at` we provide `created`/`created_unix` @@ -62,14 +72,19 @@ FETCH_WORKFLOW_DETAIL_KEYS = FETCH_WORKFLOW_KEYS + FETCH_WORKFLOW_DETAIL_EXTRAS +def TimeRangeStr(start: datetime, end: datetime) -> str: + return f"Time range: [{start} .. {end}] ({humanize.precisedelta(end - start)})." + + class CircleCiCommand(Command): """Abstract base class for commands that use the CircleCI API.""" def __init_subclass__(cls, **kwargs): super().__init_subclass__(**kwargs) - def __init__(self): + def __init__(self) -> None: super(CircleCiCommand, self).__init__() + self.log_requests_to_file: Optional[SupportsWrite[str]] = None self.parser.add_argument( "--circleci_server", default="", @@ -86,46 +101,69 @@ def __init__(self): help="CircleCI Auth Token (defaults to environment variable 'CIRCLECI_TOKEN')", ) self.parser.add_argument( - "--project_slug", + "--circleci_project_slug", default="", type=str, help="CircleCI project-slug (defaults to environment variable 'CIRCLECI_PROJECT_SLUG').", ) + self.parser.add_argument( + "--log_requests_to_file", + type=Path, + default=None, + help="Whether to log all requests for debugging purposes.", + ) + self.parser.add_argument( + "--log_requests_details", + default=[LogRequestDetail.REQUEST], + type=LogRequestDetail, + allow_empty=False, + container_type=set, + action=EnumListAction, + help="Comma separated list of LogRequestDetails.", + ) def Prepare(self, argv: list[str]) -> None: super(CircleCiCommand, self).Prepare(argv) + self.log_requests_to_file = None + if self.args.log_requests_to_file: + self.log_requests_to_file = OpenTextFile( + self.args.log_requests_to_file, "wt" + ) self.circleci = self._InitCircleCiClient( - circleci_server=self.args.circleci_server, - circleci_token=self.args.circleci_token, - project_slug=self.args.project_slug, + options=CircleCiApiV2Opts( + circleci_server=self.args.circleci_server, + circleci_token=self.args.circleci_token, + project_slug=self.args.circleci_project_slug, + log_requests_to_file=self.log_requests_to_file, # Not from args! + log_requests_details=self.args.log_requests_details, + ) ) @staticmethod - def _InitCircleCiClient( - circleci_server: str, circleci_token: str, project_slug: str - ) -> CircleCiApiV2: - circleci_server = str( - circleci_server or os.getenv("CIRCLECI_SERVER", "https://circleci.com") + def _InitCircleCiClient(options: CircleCiApiV2Opts) -> CircleCiApiV2: + options.circleci_server = str( + options.circleci_server + or os.getenv("CIRCLECI_SERVER", "https://circleci.com") ) - if not circleci_server: + if not options.circleci_server: Die( "Must provide non empty `--circleci_server` flag or environment variable 'CIRCLECI_SERVER'." ) - circleci_token = str(circleci_token or os.getenv("CIRCLECI_TOKEN")) - if not circleci_token: + options.circleci_token = str( + options.circleci_token or os.getenv("CIRCLECI_TOKEN") + ) + if not options.circleci_token: Die( "Must provide non empty `--circleci_token` flag or environment variable 'CIRCLECI_TOKEN'." ) - project_slug = str(project_slug or os.getenv("CIRCLECI_PROJECT_SLUG")) - if not project_slug: + options.project_slug = str( + options.project_slug or os.getenv("CIRCLECI_PROJECT_SLUG") + ) + if not options.project_slug: Die( - "Must provide non empty `--project_slug` flag or environment variable 'CIRCLECI_PROJECT_SLUG'." + "Must provide non empty `--circleci_project_slug` flag or environment variable 'CIRCLECI_PROJECT_SLUG'." ) - return CircleCiApiV2( - circleci_server=circleci_server, - circleci_token=circleci_token, - project_slug=project_slug, - ) + return options.CreateClient() def AddDetails(self, row: dict[str, str]) -> dict[str, str]: """Fetches details for `row`, combines the row with the details and returns the result.""" @@ -140,10 +178,27 @@ def AddDetails(self, row: dict[str, str]) -> dict[str, str]: result[k] = details.get(k, "") return result + def LogRowProgress(self, row_index: int) -> None: + if self.args.progress: + if not row_index % 1000: + Log(f"{row_index}") + elif not row_index % 20: + Log(".", end="") + + def LogRowProgressEnd(self, row_index: int) -> None: + if self.args.progress: + if (row_index % 1000) < 20: + Log(f".{row_index}") + else: + Log(f"{row_index}") + class RequestBranches(CircleCiCommand): """Read and display the list of branches for `workflow` from CircleCI API. + By default this fetches branches for the workflow `default_workflow`. The workflow can be + specified with the `--workflow` flag. + ``` bazel run //circleci:workflows -- request_branches ``` @@ -203,7 +258,19 @@ def Main(self) -> None: class Fetch(CircleCiCommand): - """Fetch workflow stats from the CircleCI API server and writes them as a CSV file. + """Fetch workflow data from the CircleCI API server and writes them as a CSV file. + + The time range to fetch runs for can be specified using flags `--start`, `--end` and `--midnight`. + By default fetch will retrieve the data for the past 89 complete days starting at midnight. + + The easiest and intended way to manually control the time range is to speficy `--start` as an + offset to the current time. For instance, using `--start=1w` will fetch runs for the past week. + + In many cases it is preferably to fetch data for complete days. That can be achieved by with the + `--midnight` flag. + + After fetching general workflow information, the command will fetch all details if flag + `fetch_workflow_details` if True (default). ``` bazel run //circleci:workflows -- fetch --output "${PWD}/data/circleci_workflows_$(date +"%Y%m%d").csv.bz2" @@ -229,24 +296,84 @@ def __init__(self): "--end", default="", type=str, - help=r"End (newest) Date/time in format `%Y%m%d`, defaults to `now`.", + help="""End (newest) date/time in Python [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) + format, e.g. `200241224` or as a negative time difference, + e.g. `-10days` (for details see [pytimeparse](https://github.com/wroberts/pytimeparse)). + + This defaults to `now`. + """, ) self.parser.add_argument( "--start", default="", type=str, - help=r"Start (oldest) Date/time in format `%Y%m%d`, defaults to `--end` minus 90 days.", + help="""Start (oldest) date/time in Python [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) + format, e.g. `200241224` or as a negative time difference, + e.g. `-10days` (for details see [pytimeparse](https://github.com/wroberts/pytimeparse)). + + This defaults to `-90days` (or `-89days` if --midnight is active). + """, + ) + self.parser.add_argument( + "--midnight", + default=True, + action=argparse.BooleanOptionalAction, + help="""Adjust start and end date/time to midnight of the same day. + """, + ) + self.parser.add_argument( + "--progress", + action=argparse.BooleanOptionalAction, + help="Whether to indicate progress (defaults to True if `--fetch_workflow_details` is active).", + ) + self.parser.add_argument( + "--fetch_workflow_details", + default=True, + action=argparse.BooleanOptionalAction, + help="Whether workflow details should automatically be added.", ) def Main(self) -> None: - if self.args.end: - end = datetime.strptime(self.args.end, r"%Y%m%d") - else: - end = datetime.now() - if self.args.start: - start = datetime.strptime(self.args.start, r"%Y%m%d") - else: - start = end - timedelta(days=90) + if self.args.fetch_workflow_details and self.args.progress == None: + self.args.progress = True + now = datetime.now() + now = datetime.combine( + now.date(), now.time(), tzinfo=now.tzinfo or timezone.utc + ) + end = ParseDateTimeOrDelta( + arg=self.args.end, + midnight=self.args.midnight, + default=now, + reference=now, + error_prefix="Bad flag `--end` value '", + error_suffix="'.", + ) + start = ParseDateTimeOrDelta( + arg=self.args.start, + midnight=self.args.midnight, + default=end - timedelta(days=90), + reference=end, + error_prefix="Bad flag `--start` value '", + error_suffix="'.", + ) + if (now - start) > timedelta(days=90): + if self.args.start: + Log("Specified start is more than the maximum of 90 days ago.") + if self.args.midnight: + if self.args.start: + Log("Adjusting to midnight from 89 days ago.") + start = datetime.now() - timedelta(days=89) + start = datetime( + start.year, start.month, start.day, tzinfo=start.tzinfo + ) + else: + if self.args.start: + Log("Adjusting to 90 days ago.") + start = datetime.now() - timedelta(days=90) + if start >= end: + Die(f"Specified start time {start} must be before end time {end}!") + Log(TimeRangeStr(start, end)) + Log(f"Fetching details: {self.args.fetch_workflow_details}") if self.args.workflow: workflows = self.args.workflow.split(",") else: @@ -257,18 +384,20 @@ def Main(self) -> None: with OpenTextFile(filename=self.args.output, mode="w") as csv_file: keys = FETCH_WORKFLOW_KEYS print(f"{','.join(keys)}", file=csv_file) - for workflow in workflows: + for workflow in sorted(workflows): + Log(f"Fetching workflow runs for '{workflow}'.") runs = self.circleci.RequestWorkflowRuns( workflow=workflow, params={ "all-branches": "True", - "start-date": start.strftime(r"%Y-%m-%dT%H:%M:%S%Z"), - "end-date": end.strftime(r"%Y-%m-%dT%H:%M:%S%Z"), + "start-date": self.circleci.FormatTime(start), + "end-date": self.circleci.FormatTime(end), }, ) - Log(f"Read {len(runs)} workflow runs from '{workflow}'.") + if self.args.fetch_workflow_details: + Log(f"Fetching {len(runs)} workflow run details for '{workflow}'.") run_count += len(runs) - for run in runs: + for run_index, run in enumerate(runs, 1): run["workflow"] = workflow created: datetime = self.circleci.ParseTime(run["created_at"]) stopped: datetime = self.circleci.ParseTime(run["stopped_at"]) @@ -282,13 +411,15 @@ def Main(self) -> None: # Write unix timestamps for sorting etc. run["created_unix"] = str(created.timestamp()) run["stopped_unix"] = str(stopped.timestamp()) + if self.args.fetch_workflow_details: + self.LogRowProgress(row_index=run_index) + run = self.AddDetails(run) data = ",".join([str(run[k]) for k in keys]) print(data, file=csv_file) - Log(f"Read {run_count} items.") - if max_created: - Log(f"Max Date: {max_created.strftime(r'%Y.%m.%d')}.") - if min_created: - Log(f"Min Date: {min_created.strftime(r'%Y.%m.%d')}.") + self.LogRowProgressEnd(row_index=run_index) + if min_created and max_created: + Log(TimeRangeStr(min_created, max_created)) + Log(f"Wrote {run_count} items to '{self.args.output}'.") class FetchDetails(CircleCiCommand): @@ -313,7 +444,10 @@ def __init__(self): help="Name of the output file.", ) self.parser.add_argument( - "--progress", type=bool, default=True, help="Whether to indicate progress." + "--progress", + default=True, + action=argparse.BooleanOptionalAction, + help="Whether to indicate progress.", ) def Main(self) -> None: @@ -331,15 +465,9 @@ def Main(self) -> None: f"Bad field names [{headers}], expected subset of [{FETCH_WORKFLOW_DETAIL_KEYS}]" ) for index, row in enumerate(reader, 1): - if self.args.progress: - if not index % 1000: - Log(f"{index}") - Log("Fetching workflow details:", end="") - elif not index % 20: - Log(".", end="") + self.LogRowProgress(row_index=index) data[row["id"]] = self.AddDetails(row) - if self.args.progress: - Log() + self.LogRowProgressEnd(row_index=index) Log(f"Read {len(data)} details.") with OpenTextFile(filename=self.args.output, mode="w") as csv_file: writer = csv.DictWriter( @@ -377,15 +505,14 @@ def __init__(self): ) self.parser.add_argument( "--fetch_workflow_details", - default=False, - type=bool, - help="Whether workflow details should automatically be added if not present.", + default=True, + action=argparse.BooleanOptionalAction, + help="Whether workflow details should automatically be added (if not present).", ) self.parser.add_argument( "--progress", - type=bool | None, - default=None, - help="Whether to indicate progress.", + action=argparse.BooleanOptionalAction, + help="Whether to indicate progress (defaults to True if `--fetch_workflow_details` is active).", ) def Main(self) -> None: @@ -411,13 +538,8 @@ def Main(self) -> None: if self.args.fetch_workflow_details: row = self.AddDetails(row) data[row["id"]] = row - if self.args.progress: - if not rows % 1000: - Log(f"{rows}") - elif not rows % 20: - Log(".", end="") - if self.args.progress: - Log() + self.LogRowProgress(row_index=rows) + self.LogRowProgressEnd(row_index=rows) Log(f"Read file {filename} with {rows} rows.") with OpenTextFile(filename=self.args.output, mode="w") as csv_file: writer = csv.DictWriter( @@ -457,21 +579,27 @@ def __init__(self): help="Name of the output file.", ) self.parser.add_argument( - "--min_duration_sec", + "--min_duration_sec", # TODO(helly25): Use a duration parser type=int, default=600, help="Mininum duration to accept row in [sec].", ) + self.parser.add_argument( + "--output_duration_as_mins", + default=True, + action=argparse.BooleanOptionalAction, + help="Whether to report duration values in minutes.", + ) self.parser.add_argument( "--exclude_branches", type=str, default="main|master|develop|develop-freeze.*", - help="Exclude brnaches by full regular expression match.", + help="Exclude branches by full regular expression match.", ) self.parser.add_argument( "--exclude_incomplete_reruns", - type=bool, default=True, + action=argparse.BooleanOptionalAction, help="If workflow details are available, reject inomplete reruns " "(e.g.: rerun-single-job, rerun-workflow-from-failed).", ) @@ -573,9 +701,11 @@ def Main(self) -> None: and row["tag"] not in ["", "rerun-workflow-from-beginning"] ): continue - duration = int(row["duration"]) / 60 - if duration * 60 < self.args.min_duration_sec: + duration = float(row["duration"]) + if duration < self.args.min_duration_sec: continue + if self.args.output_duration_as_mins: + duration /= 60 workflow = row["workflow"] workflows.add(workflow) if self.args.workflow and workflow not in self.args.workflow.split(","): @@ -585,6 +715,7 @@ def Main(self) -> None: created = self.ParseTime(row["created"]) if created.strftime("%u") not in self.args.only_weekdays: continue + # NOT detected by gsheets as date/time! date = created.strftime(r"%Y.%m.%d") row["date"] = date if not date in data: @@ -615,6 +746,11 @@ def Main(self) -> None: "runs": str(r_cnt), } ) - Log(f"First {sorted_data[0][0]['date']}") - Log(f"Last {sorted_data[-1][0]['date']}") + if sorted_data: + Log( + TimeRangeStr( + datetime.strptime(sorted_data[0][0]["date"], r"%Y.%m.%d"), + datetime.strptime(sorted_data[-1][0]["date"], r"%Y.%m.%d"), + ) + ) Log(f"Wrote {len(sorted_data)} rows to '{self.args.output}'.") diff --git a/circleci/workflows_lib_test.py b/circleci/workflows_lib_test.py index e1a8552..de8b638 100644 --- a/circleci/workflows_lib_test.py +++ b/circleci/workflows_lib_test.py @@ -26,7 +26,7 @@ import circleci.workflows_lib from circleci.circleci_api_v2 import CircleCiApiV2 -from circleci.commands import Command, Print, SnakeCase +from mbo.app.commands import Command, Print, SnakeCase class WorkflowsTest(unittest.TestCase): @@ -62,8 +62,12 @@ def test_Workflows(self): with redirect_stdout(io.StringIO()) as capture: with patch.object( CircleCiApiV2, "RequestBranches", return_value=["b1", "b2"] - ) as mock_client: - mock_client = CircleCiApiV2("__test__", "TOKEN", "project") + ) as mock_request_branches: + mock_client = CircleCiApiV2( + circleci_server="__test__", + circleci_token="TOKEN", + project_slug="project", + ) with patch( "circleci.workflows_lib.CircleCiCommand._InitCircleCiClient", return_value=mock_client, diff --git a/mbo/BUILD b/mbo/BUILD new file mode 100644 index 0000000..af28b8b --- /dev/null +++ b/mbo/BUILD @@ -0,0 +1,16 @@ +# SPDX-FileCopyrightText: Copyright (c) The helly25/mbo authors (helly25.com) +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Python MBO lib.""" diff --git a/mbo/app/BUILD b/mbo/app/BUILD new file mode 100644 index 0000000..9ff6186 --- /dev/null +++ b/mbo/app/BUILD @@ -0,0 +1,58 @@ +# SPDX-FileCopyrightText: Copyright (c) The helly25/mbo authors (helly25.com) +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Python application support library.""" + +load("@my_pip_deps//:requirements.bzl", "requirement") +load("@rules_python//python:defs.bzl", "py_library", "py_test") + +package(default_visibility = ["//visibility:public"]) + +py_library( + name = "commands_py", + srcs = ["commands.py"], + deps = [ + ":flags_py", + requirement("argparse-formatter"), + ], +) + +py_test( + name = "commands_test", + srcs = ["commands_test.py"], + deps = [ + ":commands_py", + requirement("parameterized"), + ], +) + +py_library( + name = "flags_py", + srcs = ["flags.py"], + deps = [ + requirement("argparse-formatter"), + requirement("pytimeparse"), + ], +) + +py_test( + name = "flags_test", + srcs = ["flags_test.py"], + deps = [ + ":flags_py", + requirement("freezegun"), + requirement("parameterized"), + ], +) diff --git a/circleci/commands.py b/mbo/app/commands.py similarity index 60% rename from circleci/commands.py rename to mbo/app/commands.py index f916659..47aa970 100644 --- a/circleci/commands.py +++ b/mbo/app/commands.py @@ -55,15 +55,19 @@ def Main(self): from pathlib import Path from typing import TYPE_CHECKING, Any, Type, cast +from argparse_formatter import ParagraphFormatter + +from mbo.app.flags import EnumAction + if TYPE_CHECKING: from _typeshed import OpenTextMode else: OpenTextMode = str -def Die(message: Any): +def Die(message: Any, exit_code: int = 1): print(f"FATAL: {message}", flush=True, file=sys.stderr) - exit(1) + exit(exit_code) def Log(message: Any = "", end="\n", flush=True, file=None): @@ -122,13 +126,97 @@ def SnakeCase(text: str) -> str: return re.sub("_+", "_", text).lower() +class HelpOutputMode(Enum): + TEXT = "text" + MARKDOWN = "markdown" + + +class CommandParagraphFormatter(ParagraphFormatter): + """A Paragraph formatter that can control TEXT and MARKDOWN formatting. + + The formatter is also able to handle '```' markdown correctly in either mode. + """ + + _help_output_mode: HelpOutputMode = HelpOutputMode.TEXT + + @classmethod + def SetOutputMode(cls, output_mode: HelpOutputMode): + cls._help_output_mode = output_mode + + def IsOutputMode(self, output_mode: HelpOutputMode) -> bool: + return self._help_output_mode == output_mode + + def __init__(self, **kwargs) -> None: + super(CommandParagraphFormatter, self).__init__(**kwargs) + + def _fill_text(self, text: str, width: int, indent: str) -> str: + if len(indent) > 4: + indent = " " * 4 + keep = False + sub_text = "" + result = "" + for line in text.split("\n"): + if line.startswith("```"): + if not keep: + if sub_text: + result += super(CommandParagraphFormatter, self)._fill_text( + sub_text, width, indent + ) + result += "\n\n" + sub_text = "" + result += line + "\n" + keep = not keep + continue + if keep: + result += line + "\n" + else: + sub_text += line + "\n" + if sub_text: + result += super(CommandParagraphFormatter, self)._fill_text( + sub_text, width, indent + ) + return result + + def _format_action_invocation(self, action): + result = super(CommandParagraphFormatter, self)._format_action_invocation( + action + ) + if self.IsOutputMode(HelpOutputMode.MARKDOWN): + return "\n" + result + "\n\n" + return result + + def _format_action(self, action) -> str: + result: str = super(CommandParagraphFormatter, self)._format_action(action) + # ATM this works without extra space in either mode. + return result + + def _format_usage(self, usage, actions, groups, prefix): + if self.IsOutputMode(HelpOutputMode.MARKDOWN): + if not prefix: + prefix = "### usage:" + return super(CommandParagraphFormatter, self)._format_usage( + usage, actions, groups, prefix + ) + + def start_section(self, heading: str | None) -> None: + if self.IsOutputMode(HelpOutputMode.MARKDOWN): + if heading: + heading = f"### {heading}" + super(CommandParagraphFormatter, self).start_section(heading) + + def add_argument(self, action): + super(CommandParagraphFormatter, self).add_argument(action) + if self.IsOutputMode(HelpOutputMode.MARKDOWN): + self._action_max_length = 4 + + def DocOutdent(text: str) -> str: if not text: return text result = [] lines = text.strip("\n").rstrip().split("\n") if text.startswith("\n") and not lines[0].startswith(" "): - result.append("XXX" + lines[0]) + result.append(lines[0]) lines.pop(0) max_indent = -1 for line in lines: @@ -184,7 +272,10 @@ def __init_subclass__(cls, **kwargs): cls._commands[cls.name()] = cls def __init__(self): - self.parser = argparse.ArgumentParser(description=self.description()) + self.parser = argparse.ArgumentParser( + description=self.description(), + formatter_class=CommandParagraphFormatter, + ) @classmethod def name(cls): @@ -204,6 +295,14 @@ def Prepare(self, argv: list[str]) -> None: same as if `Argparse.parse_args` was passed no argument (in which case it uses `sys.argv`. """ + self.program = argv[0] if argv else "-" + match = re.fullmatch( + "(?:.*/)?bazel-out/.*/bin/.*[.]runfiles/(?:__main__|_main)/(.*)/([^/]+)[.]py", + self.program, + ) + if match: + self.program = f"bazel run //{match.group(1)}:{match.group(2)} --" + self.parser.prog = self.program + " " + self.name() self.args = self.parser.parse_args(argv[1:]) @abstractmethod @@ -213,62 +312,63 @@ def Main(self): @staticmethod def Run(argv: list[str] = sys.argv): command_name = argv[1] if len(argv) > 1 else "" - if not Command._commands: - Die("No Commands were implemented.") - if command_name in Command._commands.keys(): - command = Command._commands[command_name]() - else: - command = None - if len(argv) < 2 or not command: - command = Command._commands["help"]() + if not Command._commands or Command._commands.keys() == ["help"]: + Die("No `Command` were implemented.") + command_type = Command._commands.get(command_name, None) + if not command_type: + command_type = Command._commands["help"] + command = command_type() argv = [argv[0]] + argv[2:] + command.parser.add_argument( + "--mbo_app_swallow_exceptions", + action=argparse.BooleanOptionalAction, + help="Whether to swollow details from exceptions and only show their error message.", + ) + command.parser.add_argument( + "--help_output_mode", + "--help-output-mode", + dest="help_output_mode", + type=HelpOutputMode, + action=EnumAction, + help="Output mode for help.", + ) command.Prepare(argv) + CommandParagraphFormatter.SetOutputMode(command.args.help_output_mode) try: command.Main() + except KeyboardInterrupt: + Die(message="Interrupted!", exit_code=130) except Exception as err: - Die(err) - - -class HelpOutputMode(Enum): - TEXT = "text" - MARKDOWN = "markdown" + if command.args.mbo_app_swallow_exceptions: + Die(err) + raise err class Help(Command): """Provides help for the program.""" - def __init__(self): + def __init__(self) -> None: super(Help, self).__init__() - self.parser.add_argument( - "--mode", - type=HelpOutputMode, - default=HelpOutputMode.TEXT, - help="The output mode for printing help.", - ) self.parser.add_argument( "--all_commands", - action="store_true", + action=argparse.BooleanOptionalAction, help="Whether to show all commands", ) self.parser.add_argument( - "--prefix", + "--show_usage", + action=argparse.BooleanOptionalAction, + help="Whether to show generated command useage (aka synopsis).", + ) + self.parser.add_argument( + "--prefix_file", type=Path, - default="", help="A file that should be used as a prefix on output.", ) + self.esequential_mpty_lines: int = 0 - def Prepare(self, argv: list[str]) -> None: - super(Help, self).Prepare(argv) - self.program = argv[0] if argv else "-" - match = re.fullmatch( - "(?:.*/)?bazel-out/.*/bin/.*[.]runfiles/(?:__main__|_main)/(.*)/([^/]+)[.]py", - self.program, - ) - if match: - self.program = f"bazel run //{match.group(1)}:{match.group(2)} --" - - def Print(self, text: str = ""): - if self.args.mode == HelpOutputMode.TEXT: + def Print(self, text: str = "") -> None: + # Deal with links... + if self.args.help_output_mode == HelpOutputMode.TEXT: # In text mode replace replace images and links with their targets. img_re = re.compile(r"\[!\[([^\]]+)\]\([^\)]+\)\]\(([^\)]+)\)") lnk_re = re.compile(r"\[!?([^\]]+)\]\([^\)]+\)") @@ -277,35 +377,56 @@ def Print(self, text: str = ""): (text, n_lnk) = lnk_re.subn("\\1", text) if not n_img and not n_lnk: break - globals()["Print"](text) + # Allow at most 2 sequential empty lines. If there were some on the last + # call to `Print`, then push at most two empty lines onto `result`. + # Then loop over the lines and if there are empty lines count them. + # For non empty lines print at most two empty lines if some empty lines + # preceeded. + max_empty_lines = 1 + self.esequential_mpty_lines = min(max_empty_lines, self.esequential_mpty_lines) + text = "\n" * self.esequential_mpty_lines + text + self.esequential_mpty_lines = 0 + for t in text.split("\n"): + if t.count(" ") == len(t): + t = "" + if not t: + self.esequential_mpty_lines = min( + max_empty_lines, self.esequential_mpty_lines + 1 + ) + else: + while self.esequential_mpty_lines > 0: + globals()["Print"]("") + self.esequential_mpty_lines -= 1 + self.esequential_mpty_lines = 0 + globals()["Print"](t) def H1(self, text: str): - if self.args.mode == HelpOutputMode.MARKDOWN: + if self.args.help_output_mode == HelpOutputMode.MARKDOWN: self.Print(f"# {text.rstrip(':')}") else: self.Print(f"{text}\n") def H2(self, text: str): - if self.args.mode == HelpOutputMode.MARKDOWN: + if self.args.help_output_mode == HelpOutputMode.MARKDOWN: self.Print(f"## {text.rstrip(':')}") else: self.Print(f"{text}\n") def Code(self, text: str): - if self.args.mode == HelpOutputMode.MARKDOWN: + if self.args.help_output_mode == HelpOutputMode.MARKDOWN: self.Print(f"```\n{text}\n```") else: self.Print(f" {text}") def ListItem(self, text: str): - if self.args.mode == HelpOutputMode.MARKDOWN: + if self.args.help_output_mode == HelpOutputMode.MARKDOWN: self.Print(f"* {text}") else: self.Print(f" {text}") def Main(self) -> None: - if self.args.prefix: - self.Print(self.args.prefix.open("rt").read()) + if self.args.prefix_file: + self.Print(self.args.prefix_file.open("rt").read()) first_line, program_doc = DocOutdent( str(sys.modules["__main__"].__doc__).strip() ).split("\n\n", 1) @@ -334,5 +455,11 @@ def Main(self) -> None: self.Print() self.H2(f"Command {name}") self.Print() - self.Print(command.description()) + if self.args.help_output_mode == HelpOutputMode.TEXT: + self.Print(command.description()) + else: + cmd = command() + cmd.parser.prog = self.program + " " + name + cmd.parser.usage = argparse.SUPPRESS + self.Print(cmd.parser.format_help()) exit(1) diff --git a/circleci/commands_test.py b/mbo/app/commands_test.py similarity index 97% rename from circleci/commands_test.py rename to mbo/app/commands_test.py index 9dfe015..f3238ca 100644 --- a/circleci/commands_test.py +++ b/mbo/app/commands_test.py @@ -22,7 +22,7 @@ from parameterized import parameterized -from circleci.commands import Command, Print, SnakeCase +from mbo.app.commands import Command, Print, SnakeCase class HelloDear(Command): diff --git a/mbo/app/flags.py b/mbo/app/flags.py new file mode 100644 index 0000000..c10a79f --- /dev/null +++ b/mbo/app/flags.py @@ -0,0 +1,205 @@ +# SPDX-FileCopyrightText: Copyright (c) The helly25/mbo authors (helly25.com) +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A flag support library.""" + +import argparse +import collections +from datetime import datetime, time, timedelta, timezone +from enum import Enum +from typing import Callable, Iterable, Optional, cast + +from pytimeparse.timeparse import timeparse + + +class EnumAction(argparse.Action): + """Argparse action that handles single Enum values.""" + + def __init__(self, **kwargs): + enum_type = kwargs.pop("type", None) + if enum_type is None or not issubclass(enum_type, Enum): + raise ValueError(f"Type must be an Enum, provided type is '{enum_type}'.") + + kwargs.setdefault("choices", tuple(e.value for e in enum_type)) + super(EnumAction, self).__init__(**kwargs) + self._enum = enum_type + + def __call__(self, parser, namespace, values, option_string=None): + value = self._enum(values) + setattr(namespace, self.dest, value) + + +def EnumListParser(enum_type: type[Enum]) -> Callable[[str], list[Enum]]: + """Implements flags comma separate lists of enum values. + + In the argument definition default values can be specified as a list of the actual enum values. + On the command line the values do not have to be upper case (lowercase and mixed case are fine). + + Note: In many cases `EnumListAction` provides a better solution for flag parsing. + + Example: + ``` + parser.add_argument( + "--myenum", + default=[MyEnum.MY_DEFAULT], + type=EnumListParser(enum_type=MyEnum), + help="Comma separated list of MyEnum {}.".format(set(MyEnum.__members__.keys())), + ) + args=parser.parse_args({"--nyenum", "my_default,my_other"}) + ``` + """ + return lambda values: [ + enum_type.__getitem__(v.strip().upper()) for v in values.split(",") if v + ] + + +class EnumListAction(argparse.Action): + """Argparse `action` for comma separated lists of Enum values. + + This action has the additional config: + * allow_empty: If False (the default), then an empty list is NOT allowed. + * container_type: The container type (e.g. list or set). + + Example: + ``` + parser.add_argument( + "--myenum", + default=[MyEnum.MY_DEFAULT], + type=MyEnum, + action=EnumListAction, + allow_empty=False, + container_type=set, + help="Comma separated list of MyEnum values.", + ) + args=parser.parse_args({"--nyenum", "my_default,my_other"}) + ``` + """ + + class Choices: + def __init__(self, action: argparse.Action): + self._action: EnumListAction = cast(EnumListAction, action) + + def choices(self) -> Iterable[str]: + return sorted(self._action._enum_type.__members__.keys()) + + def choices_list(self) -> str: + return ", ".join(self.choices()) + + def __repr__(self) -> str: + return self.choices_list() + + def __iter__(self): + yield self.choices() + + def __contains__(self, value: str) -> bool: + if value == "": + if self._action._allow_empty: + return True + raise argparse.ArgumentError( + self._action, + f"Empty value is not allowed, chose at least one of [{self.choices_list()}].", + ) + for v in value.split(","): + v = v.strip() + if not v: + raise argparse.ArgumentError( + self._action, + "Empty sub values are not allowed (that means values containing `,,`).", + ) + if v.upper() not in self.choices(): + raise argparse.ArgumentError( + self._action, + f"Sub value '{v}' is not a valid {self._action._enum_type} value, chose from [{self.choices_list()}].", + ) + return True + + def __init__(self, **kwargs): + self._enum_type = kwargs.pop("type", None) + self._allow_empty = kwargs.pop("allow_empty", False) + self._container_type = kwargs.pop("container_type", list) + + if self._enum_type is None or not issubclass(self._enum_type, Enum): + raise ValueError( + f"Type must be an Enum, provided type is '{self._enum_type}'." + ) + + kwargs.setdefault("choices", self.Choices(action=self)) + super(EnumListAction, self).__init__(**kwargs) + + def __call__(self, parser, namespace, values, option_string=None): + if isinstance(values, list): + values = ",".join(values) + value = self._container_type( + [ + self._enum_type.__getitem__(v.strip().upper()) + for v in values.split(",") + if v + ] + ) + setattr(namespace, self.dest, value) + + +def ParseDateTimeOrDelta( + arg: str, + midnight: bool = False, + default: Optional[datetime] = None, + reference: Optional[datetime] = None, + error_prefix: Optional[str] = None, + error_suffix: Optional[str] = None, +) -> datetime: + """Parse `arg` as date or time delta in relation to reference. + + If `arg` starts with either `-` or `+`, then it will be parsed as `timedelta`. + Otherwise `arg` will be parsed as Python [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601). + + Note: The returned type will have its timezone set from (in order precedence)): + * The input time. + * The reference time. + * `datetime.now()`. + * `timezone.utc`. + + Args: + arg: The argument to parse. If this starts with a '-', then + the argument will be interpreted as a time delta. + midnight: Whether to adjust the date/time to midnight of the day. + default: The value to use if `arg` is empty (defaults to `datetime.now`). + reference: The reference datetime to use for time deltas (defaults to `datetime.now`). + error_prefix: An optional error prefix prepended to messages of raised errors. By default + this is "Bad timedelta value '". Together with error_suffix which defaults + to "'." this allows to provide additional error information. + For instance if the function is used to parse flags, then it is a + good idea to state which flag cannot be parsed. + error_suffix: See `error_prefix`. + """ + result: datetime + if arg.startswith(("-", "+")): + seconds: float | None = timeparse(arg) + if type(seconds) == type(None): + if error_prefix is None: + error_prefix = "Bad timedelta value '" + if error_suffix is None: + error_suffix = "'." + raise ValueError(f"{error_prefix}{arg}{error_suffix}") + result = (reference or datetime.now()) + timedelta(seconds=seconds or 0) + elif arg: + result = datetime.fromisoformat(arg) + else: + result = default or datetime.now() + if not result.tzinfo: + tzinfo = (reference or datetime.now()).tzinfo or timezone.utc + result = datetime.combine(result.date(), result.time(), tzinfo=tzinfo) + if midnight: + return datetime.combine(result.date(), time(), tzinfo=result.tzinfo) + return result diff --git a/mbo/app/flags_test.py b/mbo/app/flags_test.py new file mode 100644 index 0000000..115472c --- /dev/null +++ b/mbo/app/flags_test.py @@ -0,0 +1,316 @@ +# SPDX-FileCopyrightText: Copyright (c) The helly25/mbo authors (helly25.com) +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for flags.py.""" + +import argparse +import unittest +from dataclasses import dataclass, is_dataclass +from datetime import datetime +from enum import Enum +from typing import Any + +from freezegun import freeze_time +from parameterized import param, parameterized + +import mbo.app.flags + +_NOW = "2024-08-28T14:15:16.123Z" + + +def ActionArgs(name: str = "--flag", **kwargs) -> dict[str, Any]: + kwargs["name"] = name + return kwargs + + +class TestEnum(Enum): + ONE = 1 + TWO = 2 + TRE = 3 + FOR = 4 + + +def dataclass_as_param(cls): + def iter_as_param(self): + if not is_dataclass(self): + raise TypeError( + f"Decorator 'dataclass_as_param' only works with dataclasses, {type(self)} is not." + ) + return iter([self]) + + return type(cls.__name__, (cls,), {"__iter__": iter_as_param}) + + +class FlagsTest(unittest.TestCase): + """Tests for flags.py.""" + + @dataclass_as_param + @dataclass(kw_only=True) + class ParseDateTimeOrDeltaTest: + expected: str + expected_error: type | None = None + input: str + midnight: bool = False + default: str | None = None + reference: str | None = None + error_prefix: str | None = None + error_suffix: str | None = None + now: str = _NOW + + @parameterized.expand( + [ + ParseDateTimeOrDeltaTest( + expected="2024-04-02T14:00:00Z", + input="2024-04-02T14", + ), + ParseDateTimeOrDeltaTest( + expected="2024-04-02T01:02:03.004Z", + input="2024-04-02T01:02:03.004Z", + ), + ParseDateTimeOrDeltaTest( + expected="2024-04-02T00:00:00Z", + input="2024-04-02T13:14:15.123Z", + midnight=True, + ), + ParseDateTimeOrDeltaTest( + expected="2024-08-21T14:15:16.123Z", + input="-1w", + ), + ParseDateTimeOrDeltaTest( + expected="2024-08-29T14:15:16.123Z", + input="+1d", + ), + ParseDateTimeOrDeltaTest( + expected="2024-08-29T00:00:00Z", + input="+1d", + midnight=True, + ), + ParseDateTimeOrDeltaTest( + expected="2024-08-14 01:00:00Z", + input="+8h", + reference="2024-08-13 17", + midnight=False, + ), + ParseDateTimeOrDeltaTest( + expected="2024-08-14 00:00:00Z", + input="+8h", + reference="2024-08-13 17", + midnight=True, + ), + ParseDateTimeOrDeltaTest( + expected="2024-08-14 00:00:00Z", + input="+0s", + reference="2024-08-14 17", + midnight=True, + ), + ParseDateTimeOrDeltaTest( + expected="Bad timedelta value '+0 NOPE'.", + expected_error=ValueError, + input="+0 NOPE", + reference="2024-08-14 17", + midnight=True, + ), + ParseDateTimeOrDeltaTest( + expected="+0 NOPE", + expected_error=ValueError, + input="+0 NOPE", + reference="2024-08-14 17", + midnight=True, + error_prefix="", + error_suffix="", + ), + ] + ) + def test_ParseDateTimeOrDelta(self, test: ParseDateTimeOrDeltaTest): + with freeze_time(datetime.fromisoformat(test.now)): + try: + self.assertEqual( + ( + datetime.fromisoformat(test.expected) + if not test.expected_error + else None + ), + mbo.app.flags.ParseDateTimeOrDelta( + arg=test.input, + midnight=test.midnight, + default=( + datetime.fromisoformat(test.default) + if test.default + else None + ), + reference=( + datetime.fromisoformat(test.reference) + if test.reference + else None + ), + error_prefix=test.error_prefix, + error_suffix=test.error_suffix, + ), + ) + except Exception as error: + self.assertIsNotNone(test.expected_error, error) + if test.expected_error: + self.assertEqual(test.expected, str(error)) + self.assertEqual(type(error), test.expected_error) + + @dataclass_as_param + @dataclass(kw_only=True) + class EnumListActionTest: + test: str + expected: Any + expected_error: type | None = None + action: dict[str, Any] + input: list[str] + + @parameterized.expand( + [ + EnumListActionTest( + test="Set a single value to a list.", + expected=[TestEnum.ONE], + action=ActionArgs( + type=TestEnum, + action=mbo.app.flags.EnumListAction, + ), + input=["--flag=one"], + ), + EnumListActionTest( + test="Setting an empty vlaue requires `allow_empty=True`.", + expected="argument --flag: Empty value is not allowed, chose at least one of [FOR, ONE, TRE, TWO].", + expected_error=argparse.ArgumentError, + action=ActionArgs( + type=TestEnum, + default=[], + action=mbo.app.flags.EnumListAction, + ), + input=["--flag="], + ), + EnumListActionTest( + test="Setting an empty vlaue requires `allow_empty=True` (not False).", + expected="argument --flag: Empty value is not allowed, chose at least one of [FOR, ONE, TRE, TWO].", + expected_error=argparse.ArgumentError, + action=ActionArgs( + type=TestEnum, + default=[], + action=mbo.app.flags.EnumListAction, + allow_empty=False, + ), + input=["--flag="], + ), + EnumListActionTest( + test="Setting an empty vlaue requires with `allow_empty=True` works.", + expected=[], + expected_error=argparse.ArgumentError, + action=ActionArgs( + type=TestEnum, + default=[], + action=mbo.app.flags.EnumListAction, + allow_empty=True, + ), + input=["--flag="], + ), + EnumListActionTest( + test="Default values work.", + expected=[TestEnum.TWO], + action=ActionArgs( + type=TestEnum, + default=[TestEnum.TWO], + action=mbo.app.flags.EnumListAction, + ), + input=[], + ), + EnumListActionTest( + test="Default values work: They can even bypass the type.", + expected="Something else", + action=ActionArgs( + type=TestEnum, + default="Something else", + action=mbo.app.flags.EnumListAction, + ), + input=[], + ), + EnumListActionTest( + test="Multile, possible repeated values and mixed case.", + expected=[TestEnum.TWO, TestEnum.ONE, TestEnum.TWO], + action=ActionArgs( + type=TestEnum, + action=mbo.app.flags.EnumListAction, + ), + input=["--flag=two,oNe,TWO"], + ), + EnumListActionTest( + test="Multile values in a set.", + expected=set([TestEnum.ONE, TestEnum.TWO]), + action=ActionArgs( + type=TestEnum, + container_type=set, + action=mbo.app.flags.EnumListAction, + ), + input=["--flag=two,oNe,TWO"], + ), + EnumListActionTest( + test="Repeated flag for list.", + expected=[ + TestEnum.TWO, + TestEnum.FOR, + TestEnum.ONE, + TestEnum.TRE, + TestEnum.TWO, + ], + action=ActionArgs( + "flag", + nargs="+", + type=TestEnum, + action=mbo.app.flags.EnumListAction, + ), + input=["two,for", "one,tre", "TWO"], + ), + EnumListActionTest( + test="Repeated flag for list.", + expected={TestEnum.TWO, TestEnum.FOR, TestEnum.ONE, TestEnum.TRE}, + action=ActionArgs( + "flag", + nargs="+", + type=TestEnum, + container_type=set, + action=mbo.app.flags.EnumListAction, + ), + input=["two,for", "one,tre", "TWO"], + ), + ] + ) + def test_EnumListAction(self, test: EnumListActionTest): + try: + parser = argparse.ArgumentParser(exit_on_error=False) + name = test.action.pop("name", "--flag") + parser.add_argument(name, **test.action) + args = parser.parse_args(test.input) + self.assertEqual( + test.expected, args.flag, "Bad value in test: " + test.test + ) + except argparse.ArgumentError as error: + self.assertIsNotNone(test.expected_error, error) + if test.expected_error: + self.assertEqual( + test.expected, str(error), "Bad error message in test: " + test.test + ) + self.assertEqual( + type(error), + test.expected_error, + "Bad error type in test: " + test.test, + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/requirements.in b/requirements.in index 7d4b399..090f0da 100644 --- a/requirements.in +++ b/requirements.in @@ -1,3 +1,7 @@ +argparse-formatter +freezegun +humanize parameterized +pytimeparse requests responses diff --git a/requirements_lock.txt b/requirements_lock.txt index 6ebdd8b..a8a2b04 100644 --- a/requirements_lock.txt +++ b/requirements_lock.txt @@ -4,6 +4,10 @@ # # bazel run //:requirements.update # +argparse-formatter==1.4 \ + --hash=sha256:35027941a1e75a1a4df21e5c40a3395d311777d9bb1cfd9744d6ff7cc28de216 \ + --hash=sha256:c1ce58a68ed83d5204e1515c6f8fb52f5f32fc21bc993faa80f0b17a78090265 + # via -r requirements.in certifi==2024.7.4 \ --hash=sha256:5a1e7645bc0ec61a09e26c36f6106dd4cf40c6db3a1fb6352b0244e7fb057c7b \ --hash=sha256:c198e21b1289c2ab85ee4e67bb4b4ef3ead0892059901a8d5b622f24a1101e90 @@ -100,6 +104,14 @@ charset-normalizer==3.3.2 \ --hash=sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519 \ --hash=sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561 # via requests +freezegun==1.5.1 \ + --hash=sha256:b29dedfcda6d5e8e083ce71b2b542753ad48cfec44037b3fc79702e2980a89e9 \ + --hash=sha256:bf111d7138a8abe55ab48a71755673dbaa4ab87f4cff5634a4442dfec34c15f1 + # via -r requirements.in +humanize==4.10.0 \ + --hash=sha256:06b6eb0293e4b85e8d385397c5868926820db32b9b654b932f57fa41c23c9978 \ + --hash=sha256:39e7ccb96923e732b5c2e27aeaa3b10a8dfeeba3eb965ba7b74a3eb0e30040a6 + # via -r requirements.in idna==3.8 \ --hash=sha256:050b4e5baadcd44d760cedbd2b8e639f2ff89bbc7a5730fcc662954303377aac \ --hash=sha256:d838c2c0ed6fced7693d5e8ab8e734d5f8fda53a039c0164afb0b82e771e3603 @@ -108,6 +120,14 @@ parameterized==0.8.1 \ --hash=sha256:41bbff37d6186430f77f900d777e5bb6a24928a1c46fb1de692f8b52b8833b5c \ --hash=sha256:9cbb0b69a03e8695d68b3399a8a5825200976536fe1cb79db60ed6a4c8c9efe9 # via -r requirements.in +python-dateutil==2.9.0.post0 \ + --hash=sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3 \ + --hash=sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427 + # via freezegun +pytimeparse==1.1.8 \ + --hash=sha256:04b7be6cc8bd9f5647a6325444926c3ac34ee6bc7e69da4367ba282f076036bd \ + --hash=sha256:e86136477be924d7e670646a98561957e8ca7308d44841e21f5ddea757556a0a + # via -r requirements.in pyyaml==6.0.2 \ --hash=sha256:01179a4a8559ab5de078078f37e5c1a30d76bb88519906844fd7bdea1b7729ff \ --hash=sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48 \ @@ -173,6 +193,10 @@ responses==0.25.3 \ --hash=sha256:521efcbc82081ab8daa588e08f7e8a64ce79b91c39f6e62199b19159bea7dbcb \ --hash=sha256:617b9247abd9ae28313d57a75880422d55ec63c29d33d629697590a034358dba # via -r requirements.in +six==1.16.0 \ + --hash=sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926 \ + --hash=sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254 + # via python-dateutil urllib3==2.2.2 \ --hash=sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472 \ --hash=sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168 diff --git a/tools/readme_update.sh b/tools/readme_update.sh index 232364c..318557a 100755 --- a/tools/readme_update.sh +++ b/tools/readme_update.sh @@ -2,4 +2,4 @@ ROOT="$(realpath "$(dirname "${0}")/..")" -bazel run //circleci:workflows --color=yes -- help --mode=markdown --all_commands --prefix="${ROOT}/README.header.txt" > "${ROOT}/README.md" +bazel run //circleci:workflows --color=yes -- help --help_output_mode=markdown --all_commands --prefix="${ROOT}/README.header.txt" > "${ROOT}/README.md"