From 8ed55bcf5fa296d8f2279d3c0db363c3c12b865c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lucas=20Rod=C3=A9s-Guirao?= Date: Wed, 27 Nov 2024 18:45:23 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=93=8A=20improve=20etl=20pr=20command=20(?= =?UTF-8?q?#3634)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * ๐Ÿ“Š improve etl pr command * wip * ๐Ÿšง some title test * add new method `etl p` * change `etl pr` to `etl pro` * update docs * correct docs * add private mode --- apps/cli/__init__.py | 3 +- apps/pr/__init__.py | 0 apps/pr/categories.py | 69 +++++ apps/pr/cli.py | 412 +++++++++++++++++++++++++++ apps/step_update/cli.py | 3 +- apps/utils/draft_pull_request.py | 2 +- docs/guides/data-work/add-data.md | 2 +- docs/guides/data-work/update-data.md | 6 +- pyproject.toml | 1 + 9 files changed, 491 insertions(+), 7 deletions(-) create mode 100644 apps/pr/__init__.py create mode 100644 apps/pr/categories.py create mode 100644 apps/pr/cli.py diff --git a/apps/cli/__init__.py b/apps/cli/__init__.py index 685b57ab2d1..38007973251 100644 --- a/apps/cli/__init__.py +++ b/apps/cli/__init__.py @@ -167,7 +167,8 @@ def cli_back() -> None: "update": "apps.step_update.cli.cli", "archive": "apps.step_update.cli.archive_cli", "explorer-update": "apps.explorer_update.cli.cli", - "pr": "apps.utils.draft_pull_request.cli", + "prr": "apps.utils.draft_pull_request.cli", + "pr": "apps.pr.cli.cli", }, }, { diff --git a/apps/pr/__init__.py b/apps/pr/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/apps/pr/categories.py b/apps/pr/categories.py new file mode 100644 index 00000000000..6022bc767f9 --- /dev/null +++ b/apps/pr/categories.py @@ -0,0 +1,69 @@ +# Add EMOJIs for each PR type +PR_CATEGORIES = { + "data": { + "emoji": "๐Ÿ“Š", + "emoji_raw": ":bar_chart:", + "description": "data update or addition", + }, + "bug": { + "emoji": "๐Ÿ›", + "emoji_raw": ":bug:", + "description": "bug fix for the user", + }, + "refactor": { + "emoji": "๐Ÿ”จ", + "emoji_raw": ":hammer:", + "description": "a code change that neither fixes a bug nor adds a feature for the user", + }, + "enhance": { + "emoji": "โœจ", + "emoji_raw": ":sparkles:", + "description": "visible improvement over a current implementation without adding a new feature or fixing a bug", + }, + "feature": { + "emoji": "๐ŸŽ‰", + "emoji_raw": ":tada:", + "description": "new feature for the user", + }, + "docs": { + "emoji": "๐Ÿ“œ", + "emoji_raw": ":scroll:", + "description": "documentation only changes", + "shortcut_key": "0", + }, + "chore": { + "emoji": "๐Ÿงน", + "emoji_raw": ":honeybee:", + "description": "upgrading dependencies, tooling, etc. No production code change", + }, + "style": { + "emoji": "๐Ÿ’„", + "emoji_raw": ":lipstick:", + "description": "formatting, missing semi colons, etc. No production code change", + }, + "wip": { + "emoji": "๐Ÿšง", + "emoji_raw": ":construction:", + "description": "work in progress - intermediate commits that will be explained later on", + }, + "tests": { + "emoji": "โœ…", + "emoji_raw": ":white_check_mark:", + "description": "adding missing tests, refactoring tests, etc. No production code change", + }, +} +PR_CATEGORIES_MD_DESCRIPTION = "- " + "\n- ".join( + f"**{choice}**: {choice_params['description']}" for choice, choice_params in PR_CATEGORIES.items() +) +PR_CATEGORIES_CHOICES = [ + { + "title": f"{v['emoji']} {k}", + "value": k, + "shortcut_key": v.get("shortcut_key", k[0]), + } + for k, v in PR_CATEGORIES.items() +] +PR_CATEGORIES_CHOICES = sorted(PR_CATEGORIES_CHOICES, key=lambda x: x["shortcut_key"]) +assert len(set([x["shortcut_key"].lower() for x in PR_CATEGORIES_CHOICES])) == len( + PR_CATEGORIES_CHOICES +), "Shortcut keys must be unique" diff --git a/apps/pr/cli.py b/apps/pr/cli.py new file mode 100644 index 00000000000..67c88e4a0af --- /dev/null +++ b/apps/pr/cli.py @@ -0,0 +1,412 @@ +"""This script creates a new draft pull request in GitHub, which starts a new staging server. + +Arguments: + +`TITLE`: The title of the PR. This must be given. + +`CATEGORY`: The category of the PR. This is optional. If not given, the user will be prompted to choose one. + +**Main use case**: Branch out from `master` to a temporary `work_branch`, and create a PR to merge `work_branch` -> `master`. You will be asked to choose a category. The value of `work_branch` will be auto-generated based on the title and the category. + +```shell +# Without specifying a category (you will be prompted for a category) +etl pr "some title for the PR" + +# With a category +etl pr "some title for the PR" data + +# With private stating server +etl pr "some title for the PR" --private +``` + +**Custom use case (1)**: Same as main use case, but with a specific branch name for the `work_branch`. + +```shell +etl pr "some title for the PR" --work-branch "this-temporary-branch" +# Shorter +etl pr "some title for the PR" -w "this-temporary-branch" +``` + +**Custom use case (2)**: Create a pull request from `current_branch` to `master`. + +```shell +etl pr "some title for the PR" --direct +``` + +**Custom use case (3)**: Create a pull request from branch `this-temporary-branch` -> `develop`. + +```shell +etl pr "some title for the PR" --direct --base-branch "develop" --work-branch "this-temporary-branch" +# Shorter +etl pr "some title for the PR" --direct -b "develop" -w "this-temporary-branch" +``` +""" + +import hashlib +import re +import uuid +from typing import Optional, cast + +import click +import questionary +import requests +from git import GitCommandError, Repo +from rich_click.rich_command import RichCommand +from structlog import get_logger + +from apps.pr.categories import PR_CATEGORIES, PR_CATEGORIES_CHOICES +from etl.config import GITHUB_TOKEN +from etl.paths import BASE_DIR + +# Initialize logger. +log = get_logger() + +# URL of the Github API, to be used to create a draft pull request in the ETL repos. +GITHUB_API_URL = "https://api.github.com/repos/owid/etl/pulls" + +# Style for questionary +SHELL_FORM_STYLE = questionary.Style( + [ + ("qmark", "fg:#fac800 bold"), # token in front of the question + ("question", "bold"), # question text + ("answer", "fg:#fac800 bold"), # submitted answer text behind the question + ("pointer", "fg:#fac800 bold"), # pointer used in select and checkbox prompts + ("highlighted", "bg:#fac800 fg:#000000 bold"), # pointed-at choice in select and checkbox prompts + ("selected", "fg:#54cc90"), # style for a selected item of a checkbox + ("separator", "fg:#cc5454"), # separator in lists + # ('instruction', ''), # user instructions for select, rawselect, checkbox + ("text", ""), # plain text + # ('disabled', 'fg:#858585 italic') # disabled choices for select and checkbox prompts + ] +) + + +@click.command( + name="pr", + cls=RichCommand, + help=__doc__, +) +@click.argument( + "title", + type=str, + required=True, +) +@click.argument( + "category", + type=click.Choice(list(PR_CATEGORIES.keys()), case_sensitive=False), + required=False, + default=None, +) +@click.option( + "--scope", + "-s", + help="Scope of the PR (only relevant if --title is given). This text will be preprended to the PR title. \n\n**Examples**: 'demography' for data work on this field, 'etl.db' if working on specific modules, 'wizard', etc.", + default=None, +) +@click.option( + "--work-branch", + "-w", + "work_branch", + type=str, + default=None, + help="The name of the work branch to create. It is auto-generated based on the title and the category. If --direct is used, this is the PR source branch and defaults to the current branch.", +) +@click.option( + "--base-branch", + "-b", + "base_branch", + type=str, + default="master", + help="Name of the base branch. This is the branch to branch out from and merge back into. If --direct is used, this is the PR target branch.", +) +@click.option( + "--direct", + "-d", + is_flag=True, + help="Directly create a PR from the current branch to the target branch (default: master).", +) +@click.option( + "--private", + "-p", + is_flag=True, + help="By default, staging server site (not admin) will be publicly accessible. Use --private to have it private instead. This does not apply when using --direct mode.", +) +def cli( + title: str, + category: Optional[str], + scope: Optional[str], + work_branch: Optional[str], + base_branch: str, + direct: bool, + private: bool, + # base_branch: Optional[str] = None, +) -> None: + # Check that the user has set up a GitHub token. + check_gh_token() + + # Validate title + _validate_title(title) + + # Get category + category = ensure_category(category) + + # Create title + pr_title = PRTitle( + title=title, + category=category, + scope=scope, + ) + + # Initialize repository, get remote branches + repo, remote_branches = init_repo() + + # Get the new branch + work_branch = ensure_work_branch( + repo=repo, + work_branch=work_branch, + direct=direct, + pr_title=pr_title, + ) + + # Check branches main & work make sense! + check_branches_valid(base_branch, work_branch, remote_branches) + + # Auto PR mode: Create a new branch from the base branch. + if not direct: + if private: + if not work_branch.endswith("-private"): + work_branch = f"{work_branch}-private" + branch_out(repo, base_branch, work_branch) + + # Create PR + create_pr(repo, work_branch, base_branch, pr_title) + + +def check_gh_token(): + if not GITHUB_TOKEN: + raise click.ClickException( + """A github token is needed. To create one: +- Go to: https://github.com/settings/tokens +- Click on the dropdown "Generate new token" and select "Generate new token (classic)". +- Give the token a name (e.g., "etl-work"), set an expiration time, and select the scope "repo". +- Click on "Generate token". +- Copy the token and save it in your .env file as GITHUB_TOKEN. +- Run this tool again. +""" + ) + + +def _validate_title(title): + if not bool(re.search(r"\w+", title)): + raise click.ClickException("Invalid title! Use at least one word!") + + +def ensure_category(category: Optional[str]): + """Get category if not provided.""" + if category is None: + # show suggestions + choices = [questionary.Choice(**choice) for choice in PR_CATEGORIES_CHOICES] # type: ignore + category = questionary.select( + message="Please choose a PR category", + choices=choices, + use_shortcuts=True, + style=SHELL_FORM_STYLE, + instruction="(Use shortcuts or arrow keys)", + ).unsafe_ask() + + category = cast(str, category) + + return category + + +class PRTitle: + def __init__(self, title, category, scope): + self.title = title + self.category = category + self.scope = scope + + def __str__(self) -> str: + title_actual = _generate_pr_title(self.title, self.category, self.scope) + if title_actual is None: + raise click.ClickException("Failed to generate PR title.") + return title_actual + + +def init_repo(): + # Initialize a repos object at the root folder of the etl repos. + repo = Repo(BASE_DIR) + # Update the list of remote branches in the local repository. + origin = repo.remote(name="origin") + # NOTE: The option prune=True removes local references to branches that no longer exist on the remote repository. + # Otherwise, this script might raise an error claiming that your proposed branch exists in remote, even if that + # branch was already deleted. + origin.fetch(prune=True) + # List all remote branches. + remote_branches = [ref.name.split("origin/")[-1] for ref in origin.refs if ref.remote_head != "HEAD"] + + return repo, remote_branches + + +def ensure_work_branch(repo, work_branch, direct, pr_title): + """Get name of new branch if not provided.""" + # If no name for new branch is given + if work_branch is None: + if not direct: + # Generate name for new branch + work_branch = bake_branch_name(repo, pr_title) + else: + # If not explicitly given, the new branch will be the current branch. + work_branch = repo.active_branch.name + if work_branch == "master": + message = "You're currently on 'master' branch. Pass the name of a branch as an argument to create a new branch." + raise click.ClickException(message) + # If a name is given, and not in direct mode + elif (work_branch is not None) & (not direct): + local_branches = [branch.name for branch in repo.branches] + if work_branch in local_branches: + message = ( + f"Branch '{work_branch}' already exists locally." + "Either choose a different name for the new branch to be created, " + "or switch to the new branch and run this tool without specifying a new branch." + ) + raise click.ClickException(message) + return work_branch + + +def check_branches_valid(base_branch, work_branch, remote_branches): + """Ensure the base branch exists in remote (this should always be true for 'master').""" + # Check base branch (main) + if base_branch not in remote_branches: + raise click.ClickException( + f"Base branch '{base_branch}' does not exist in remote. " + "Either push that branch (git push origin base-branch-name) or use 'master' as a base branch. " + "Then run this tool again." + ) + # Check work branch + if work_branch in remote_branches: + raise click.ClickException( + f"New branch '{work_branch}' already exists in remote. " + "Either manually create a pull request from github, or use a different name for the new branch." + ) + + +def branch_out(repo, base_branch, work_branch): + """Branch out from base_branch and create branch 'work_branch'.""" + try: + log.info( + f"Switching to base branch '{base_branch}', creating new branch '{work_branch}' from there, and switching to it." + ) + repo.git.checkout(base_branch) + repo.git.checkout("-b", work_branch) + except GitCommandError as e: + raise click.ClickException(f"Failed to create a new branch from '{base_branch}':\n{e}") + + +def create_pr(repo, work_branch, base_branch, pr_title): + """Create a draft pull request work_branch -> base_branch.""" + pr_title_str = str(pr_title) + + log.info("Creating an empty commit.") + repo.git.commit("--allow-empty", "-m", pr_title_str or f"Start a new staging server for branch '{work_branch}'") + + log.info("Pushing the new branch to remote.") + repo.git.push("origin", work_branch) + + log.info("Creating a draft pull request.") + headers = {"Authorization": f"token {GITHUB_TOKEN}", "Accept": "application/vnd.github.v3+json"} + data = { + "title": pr_title_str or f":construction: Draft PR for branch {work_branch}", + "head": work_branch, + "base": base_branch, + "body": "", + "draft": True, + } + response = requests.post(GITHUB_API_URL, json=data, headers=headers) + if response.status_code == 201: + js = response.json() + log.info(f"Draft pull request created successfully at {js['html_url']}.") + else: + raise click.ClickException(f"Failed to create draft pull request:\n{response.json()}") + + +def _generate_pr_title(title: str, category: str, scope: str | None) -> Optional[str]: + """Generate the PR title. + + title + category + scope -> 'category scope: title' + title + category -> 'category title' + """ + if title is not None: + prefix = "" + # Add emoji for PR mode chosen if applicable + if category in PR_CATEGORIES: + prefix = PR_CATEGORIES[category]["emoji"] + else: + raise ValueError(f"Invalid PR type '{category}'. Choose one of {list(PR_CATEGORIES.keys())}.") + # Add scope + if scope is not None: + if prefix != "": + prefix += " " + prefix += f"{scope}:" + + # Add prefix + title = f"{prefix} {title}" + return title + + +def bake_branch_name(repo, pr_title): + # Get user + git_config = repo.config_reader() + user = git_config.get_value("user", "name") + + # Get category + category = pr_title.category + + # Get input title (without emoji, scope, etc.) + title = _extract_relevant_title_for_branch_name(pr_title.title) + + # Bake complete PR branch name + name = f"{user}-{category}-{title}" + + return name + + +def _extract_relevant_title_for_branch_name(text_in: str) -> str: + """ + Process the input string by: + 1. Removing all symbols, keeping only letters and numbers. + 2. Splitting into a list of words/tokens. + 3. Keeping only the first three tokens (or fewer if not available). + 4. Combining the tokens with a '-'. + + Args: + text_in (str): The input text string. + + Returns: + str: The processed string. + """ + # Remove all symbols, keep only letters and numbers + cleaned_text = re.sub(r"[^a-zA-Z0-9\s]", "", text_in) + # Split into tokens/words + tokens = cleaned_text.split() + # Keep only the first 3 tokens + tokens = tokens[:3] + # Combine tokens with '-' + name = "-".join(tokens).lower() + + # Add hash to prevent collisions + hash_txt = generate_short_hash() + name = f"{name}-{hash_txt}" + + return name + + +def generate_short_hash() -> str: + """ + Generate a random short hash (6 characters) using SHA256. + + Returns: + str: A 6-character random hash string. + """ + random_data = uuid.uuid4().hex # Generate random data + random_hash = hashlib.sha256(random_data.encode()).hexdigest() # Create hash + return random_hash[:6] # Return the first 6 characters diff --git a/apps/step_update/cli.py b/apps/step_update/cli.py index fa7325a2822..f315be84f6c 100644 --- a/apps/step_update/cli.py +++ b/apps/step_update/cli.py @@ -363,7 +363,8 @@ def update_steps( # Tell user how to automatically create PR short_name = steps[-1].split("/")[-1].split(".")[0] - cmd = f'etl pr update-{short_name} --title ":bar_chart: Update {short_name}"' + # cmd = f'etl pro update-{short_name} --title ":bar_chart: Update {short_name}"' + cmd = f'etl pr "{short_name}" data' log.info(f"Create the PR automatically with:\n {cmd}") def _archive_step(self, step: str) -> None: diff --git a/apps/utils/draft_pull_request.py b/apps/utils/draft_pull_request.py index 2d027c633a4..194a44cfc7b 100644 --- a/apps/utils/draft_pull_request.py +++ b/apps/utils/draft_pull_request.py @@ -96,7 +96,7 @@ def _branch_exists_remotely(new_branch, remote_branches): return False -@click.command(name="pr", cls=RichCommand, help=__doc__) +@click.command(name="pro", cls=RichCommand, help=__doc__) @click.argument( "new-branch", type=str, diff --git a/docs/guides/data-work/add-data.md b/docs/guides/data-work/add-data.md index b3fdbe7aff5..e42cf58169c 100644 --- a/docs/guides/data-work/add-data.md +++ b/docs/guides/data-work/add-data.md @@ -39,7 +39,7 @@ There are different ways you can add data to the catalog, depending on your tech Before starting to add a new dataset, make sure to create your new environment. This means creating a new branch, its corresponding pull request and staging server. This can all be made with one command: ```bash -etl pr data-{short_name} -c data -t "{short_name}: new dataset" +etl pr "{short_name}: new dataset" data ``` This will create a new git branch in your local repository with an empty commit, which will be pushed to remote. It will also create a draft pull request in github, and a staging server. Wait for a notification from [@owidbot](https://github.com/owidbot). It should take a few minutes, and will inform you that the staging server `http://staging-site-data-{short_name}` has been created. diff --git a/docs/guides/data-work/update-data.md b/docs/guides/data-work/update-data.md index 1d02b05924d..c0a66de4366 100644 --- a/docs/guides/data-work/update-data.md +++ b/docs/guides/data-work/update-data.md @@ -12,9 +12,9 @@ This guide explains the general workflow to update a dataset that already exists In a nutshell, these are the steps to follow: - Switch to `master` branch (`git switch master`), and ensure it's up-to-date (`git pull`). - - Create a new branch and a draft pull request (PR) with a staging server: + - Create a new branch (name is auto-generated) and a draft pull request (PR) with a staging server: ```bash - etl pr update-{short_name} -c data -t "{short_name}: update" + etl pr "{short_name}: update" data ``` - Use the ETL Dashboard to create new versions of the steps (this will duplicate the code of the old steps). - Execute the newly created snapshot scripts, if any. @@ -47,7 +47,7 @@ This guide assumes you have already a [working installation of `etl`](../../../g - **Create a draft PR and a temporary staging server** - Create a PR with the following command (replace `{short_name}` with the short name of the dataset, e.g. `temperature-anomaly`): ```bash - etl pr update-{short_name} -c data -t "{short_name}: update" + etl pr "{short_name}: update" data ``` This will create a new git branch in your local repository with an empty commit, which will be pushed to remote. It will also create a draft pull request in github, and a staging server. diff --git a/pyproject.toml b/pyproject.toml index 1cb685a3373..203ca47d675 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -140,6 +140,7 @@ wizard = [ etl = 'apps.cli:cli' etlwiz = 'apps.wizard.cli:cli' etlr = 'etl.command:main_cli' +etlp = 'apps.pr.cli:cli' etl-wizard = 'apps.wizard.cli:cli' compare = 'etl.compare:cli' backport = 'apps.backport.backport:backport_cli'