From 8284c34c92157f22c455d6ff9bfd0086e9a81f0e Mon Sep 17 00:00:00 2001 From: Mojmir Vinkler <mojmir.vinkler@gmail.com> Date: Thu, 28 Nov 2024 21:02:34 +0100 Subject: [PATCH] :sparkles: Automatically create a branch if it does not exist (#3605) * :sparkles: Automatically create a branch if it does not exist --- apps/owidbot/cli.py | 4 +- apps/owidbot/github_utils.py | 42 +++++++++++++++++-- .../export/github/co2_data/latest/owid_co2.py | 23 +++++++--- 3 files changed, 57 insertions(+), 12 deletions(-) diff --git a/apps/owidbot/cli.py b/apps/owidbot/cli.py index 3f597b8158f..3db6c9ec83b 100644 --- a/apps/owidbot/cli.py +++ b/apps/owidbot/cli.py @@ -9,7 +9,7 @@ from rich_click.rich_command import RichCommand from apps.owidbot import anomalist, chart_diff, data_diff, grapher -from etl.config import get_container_name +from etl.config import OWIDBOT_ACCESS_TOKEN, get_container_name from . import github_utils as gh_utils @@ -57,7 +57,7 @@ def cli( if repo_name not in get_args(REPOS): raise AssertionError("Invalid repo") - repo = gh_utils.get_repo(repo_name) + repo = gh_utils.get_repo(repo_name, access_token=OWIDBOT_ACCESS_TOKEN) pr = gh_utils.get_pr(repo, branch) if pr is None: log.warning(f"No open PR found for branch {branch}") diff --git a/apps/owidbot/github_utils.py b/apps/owidbot/github_utils.py index 082824a2841..abd5f138ab1 100644 --- a/apps/owidbot/github_utils.py +++ b/apps/owidbot/github_utils.py @@ -17,9 +17,12 @@ def get_repo(repo_name: str, access_token: Optional[str] = None) -> github.Repository.Repository: if not access_token: - assert config.OWIDBOT_ACCESS_TOKEN, "OWIDBOT_ACCESS_TOKEN is not set" - access_token = config.OWIDBOT_ACCESS_TOKEN - auth = Auth.Token(access_token) + # Don't auth, be aware that you won't be able to do write operations. You should + # set up your access token on https://github.com/settings/tokens. + auth = None + else: + auth = Auth.Token(access_token) + g = Github(auth=auth) return g.get_repo(f"owid/{repo_name}") @@ -96,6 +99,37 @@ def compute_git_blob_sha1(content: bytes) -> str: return sha1.hexdigest() +def _github_access_token(): + # Use GITHUB_TOKEN if set, otherwise use OWIDBOT_ACCESS_TOKEN + if config.GITHUB_TOKEN: + return config.GITHUB_TOKEN + elif config.OWIDBOT_ACCESS_TOKEN: + return config.OWIDBOT_ACCESS_TOKEN + else: + raise AssertionError("You need to set GITHUB_TOKEN or OWIDBOT_ACCESS_TOKEN in your .env file to commit.") + + +def create_branch_if_not_exists(repo_name: str, branch: str, dry_run: bool) -> None: + """Create a branch if it doesn't exist.""" + repo = get_repo(repo_name, access_token=_github_access_token()) + try: + repo.get_branch(branch) + except github.GithubException as e: + if e.status == 404: + if not dry_run: + try: + master_ref = repo.get_branch("main").commit.sha + log.info(f"Using 'main' branch as reference for creating {branch}.") + except github.GithubException: + master_ref = repo.get_branch("master").commit.sha + log.info(f"Using 'master' branch as reference for creating {branch}.") + log.info(f"Creating branch {branch} with reference {master_ref}.") + repo.create_git_ref(ref=f"refs/heads/{branch}", sha=master_ref) + log.info(f"Branch {branch} created in {repo.name}.") + else: + raise e + + def commit_file_to_github( content: str, repo_name: str, @@ -106,7 +140,7 @@ def commit_file_to_github( ) -> None: """Commit a table to a GitHub repository using the GitHub API.""" # Get the repository object - repo = get_repo(repo_name) + repo = get_repo(repo_name, access_token=_github_access_token()) new_content_checksum = compute_git_blob_sha1(content.encode("utf-8")) try: diff --git a/etl/steps/export/github/co2_data/latest/owid_co2.py b/etl/steps/export/github/co2_data/latest/owid_co2.py index e1ef156d834..98927151f8e 100644 --- a/etl/steps/export/github/co2_data/latest/owid_co2.py +++ b/etl/steps/export/github/co2_data/latest/owid_co2.py @@ -18,12 +18,14 @@ import tempfile from pathlib import Path +import git import pandas as pd from owid.catalog import Table from structlog import get_logger from apps.owidbot import github_utils as gh from etl.helpers import PathFinder +from etl.paths import BASE_DIR # Initialize logger. log = get_logger() @@ -211,13 +213,15 @@ def run(dest_dir: str) -> None: # # Save outputs. # - # If you want to really commit the data, use `CO2_BRANCH=my-branch etlr github/co2_data --export` - if os.environ.get("CO2_BRANCH"): - dry_run = False - branch = os.environ["CO2_BRANCH"] - else: + branch = git.Repo(BASE_DIR).active_branch.name + + if branch == "master": + log.warning("You are on master branch, using dry mode.") dry_run = True - branch = "master" + else: + log.info(f"Committing files to branch {branch}") + # Load DRY_RUN from env or use False as default. + dry_run = bool(int(os.environ.get("DRY_RUN", 0))) # Uncomment to inspect changes. # from etl.data_helpers.misc import compare_tables @@ -233,6 +237,8 @@ def run(dest_dir: str) -> None: prepare_and_save_outputs(tb, codebook=codebook, temp_dir_path=temp_dir_path) + gh.create_branch_if_not_exists(repo_name="co2-data", branch=branch, dry_run=dry_run) + # Commit csv files to the repos. for file_name in ["owid-co2-data.csv", "owid-co2-codebook.csv", "README.md"]: with (temp_dir_path / file_name).open("r") as file_content: @@ -244,3 +250,8 @@ def run(dest_dir: str) -> None: branch=branch, dry_run=dry_run, ) + + if not dry_run: + log.info( + f"Files committed successfully to branch {branch}. Create a PR here https://github.com/owid/co2-data/compare/master...{branch}." + )