From 8284c34c92157f22c455d6ff9bfd0086e9a81f0e Mon Sep 17 00:00:00 2001
From: Mojmir Vinkler <mojmir.vinkler@gmail.com>
Date: Thu, 28 Nov 2024 21:02:34 +0100
Subject: [PATCH]  :sparkles: Automatically create a branch if it does not
 exist (#3605)

* :sparkles: Automatically create a branch if it does not exist
---
 apps/owidbot/cli.py                           |  4 +-
 apps/owidbot/github_utils.py                  | 42 +++++++++++++++++--
 .../export/github/co2_data/latest/owid_co2.py | 23 +++++++---
 3 files changed, 57 insertions(+), 12 deletions(-)

diff --git a/apps/owidbot/cli.py b/apps/owidbot/cli.py
index 3f597b8158f..3db6c9ec83b 100644
--- a/apps/owidbot/cli.py
+++ b/apps/owidbot/cli.py
@@ -9,7 +9,7 @@
 from rich_click.rich_command import RichCommand
 
 from apps.owidbot import anomalist, chart_diff, data_diff, grapher
-from etl.config import get_container_name
+from etl.config import OWIDBOT_ACCESS_TOKEN, get_container_name
 
 from . import github_utils as gh_utils
 
@@ -57,7 +57,7 @@ def cli(
     if repo_name not in get_args(REPOS):
         raise AssertionError("Invalid repo")
 
-    repo = gh_utils.get_repo(repo_name)
+    repo = gh_utils.get_repo(repo_name, access_token=OWIDBOT_ACCESS_TOKEN)
     pr = gh_utils.get_pr(repo, branch)
     if pr is None:
         log.warning(f"No open PR found for branch {branch}")
diff --git a/apps/owidbot/github_utils.py b/apps/owidbot/github_utils.py
index 082824a2841..abd5f138ab1 100644
--- a/apps/owidbot/github_utils.py
+++ b/apps/owidbot/github_utils.py
@@ -17,9 +17,12 @@
 
 def get_repo(repo_name: str, access_token: Optional[str] = None) -> github.Repository.Repository:
     if not access_token:
-        assert config.OWIDBOT_ACCESS_TOKEN, "OWIDBOT_ACCESS_TOKEN is not set"
-        access_token = config.OWIDBOT_ACCESS_TOKEN
-    auth = Auth.Token(access_token)
+        # Don't auth, be aware that you won't be able to do write operations. You should
+        # set up your access token on https://github.com/settings/tokens.
+        auth = None
+    else:
+        auth = Auth.Token(access_token)
+
     g = Github(auth=auth)
     return g.get_repo(f"owid/{repo_name}")
 
@@ -96,6 +99,37 @@ def compute_git_blob_sha1(content: bytes) -> str:
     return sha1.hexdigest()
 
 
+def _github_access_token():
+    # Use GITHUB_TOKEN if set, otherwise use OWIDBOT_ACCESS_TOKEN
+    if config.GITHUB_TOKEN:
+        return config.GITHUB_TOKEN
+    elif config.OWIDBOT_ACCESS_TOKEN:
+        return config.OWIDBOT_ACCESS_TOKEN
+    else:
+        raise AssertionError("You need to set GITHUB_TOKEN or OWIDBOT_ACCESS_TOKEN in your .env file to commit.")
+
+
+def create_branch_if_not_exists(repo_name: str, branch: str, dry_run: bool) -> None:
+    """Create a branch if it doesn't exist."""
+    repo = get_repo(repo_name, access_token=_github_access_token())
+    try:
+        repo.get_branch(branch)
+    except github.GithubException as e:
+        if e.status == 404:
+            if not dry_run:
+                try:
+                    master_ref = repo.get_branch("main").commit.sha
+                    log.info(f"Using 'main' branch as reference for creating {branch}.")
+                except github.GithubException:
+                    master_ref = repo.get_branch("master").commit.sha
+                    log.info(f"Using 'master' branch as reference for creating {branch}.")
+                log.info(f"Creating branch {branch} with reference {master_ref}.")
+                repo.create_git_ref(ref=f"refs/heads/{branch}", sha=master_ref)
+            log.info(f"Branch {branch} created in {repo.name}.")
+        else:
+            raise e
+
+
 def commit_file_to_github(
     content: str,
     repo_name: str,
@@ -106,7 +140,7 @@ def commit_file_to_github(
 ) -> None:
     """Commit a table to a GitHub repository using the GitHub API."""
     # Get the repository object
-    repo = get_repo(repo_name)
+    repo = get_repo(repo_name, access_token=_github_access_token())
     new_content_checksum = compute_git_blob_sha1(content.encode("utf-8"))
 
     try:
diff --git a/etl/steps/export/github/co2_data/latest/owid_co2.py b/etl/steps/export/github/co2_data/latest/owid_co2.py
index e1ef156d834..98927151f8e 100644
--- a/etl/steps/export/github/co2_data/latest/owid_co2.py
+++ b/etl/steps/export/github/co2_data/latest/owid_co2.py
@@ -18,12 +18,14 @@
 import tempfile
 from pathlib import Path
 
+import git
 import pandas as pd
 from owid.catalog import Table
 from structlog import get_logger
 
 from apps.owidbot import github_utils as gh
 from etl.helpers import PathFinder
+from etl.paths import BASE_DIR
 
 # Initialize logger.
 log = get_logger()
@@ -211,13 +213,15 @@ def run(dest_dir: str) -> None:
     #
     # Save outputs.
     #
-    # If you want to really commit the data, use `CO2_BRANCH=my-branch etlr github/co2_data --export`
-    if os.environ.get("CO2_BRANCH"):
-        dry_run = False
-        branch = os.environ["CO2_BRANCH"]
-    else:
+    branch = git.Repo(BASE_DIR).active_branch.name
+
+    if branch == "master":
+        log.warning("You are on master branch, using dry mode.")
         dry_run = True
-        branch = "master"
+    else:
+        log.info(f"Committing files to branch {branch}")
+        # Load DRY_RUN from env or use False as default.
+        dry_run = bool(int(os.environ.get("DRY_RUN", 0)))
 
     # Uncomment to inspect changes.
     # from etl.data_helpers.misc import compare_tables
@@ -233,6 +237,8 @@ def run(dest_dir: str) -> None:
 
         prepare_and_save_outputs(tb, codebook=codebook, temp_dir_path=temp_dir_path)
 
+        gh.create_branch_if_not_exists(repo_name="co2-data", branch=branch, dry_run=dry_run)
+
         # Commit csv files to the repos.
         for file_name in ["owid-co2-data.csv", "owid-co2-codebook.csv", "README.md"]:
             with (temp_dir_path / file_name).open("r") as file_content:
@@ -244,3 +250,8 @@ def run(dest_dir: str) -> None:
                     branch=branch,
                     dry_run=dry_run,
                 )
+
+    if not dry_run:
+        log.info(
+            f"Files committed successfully to branch {branch}. Create a PR here https://github.com/owid/co2-data/compare/master...{branch}."
+        )