From 361880d77d199895279d17cfcab161d3bfe52703 Mon Sep 17 00:00:00 2001 From: Marigold Date: Mon, 25 Nov 2024 11:02:50 +0100 Subject: [PATCH] wip --- apps/owidbot/github_utils.py | 41 +++++++++++++++++-- .../export/github/co2_data/latest/owid_co2.py | 23 ++++++++--- 2 files changed, 54 insertions(+), 10 deletions(-) diff --git a/apps/owidbot/github_utils.py b/apps/owidbot/github_utils.py index 082824a2841..945a362ba36 100644 --- a/apps/owidbot/github_utils.py +++ b/apps/owidbot/github_utils.py @@ -16,10 +16,19 @@ def get_repo(repo_name: str, access_token: Optional[str] = None) -> github.Repository.Repository: + # No access token, try using OWIDBOT_ACCESS_TOKEN if not access_token: - assert config.OWIDBOT_ACCESS_TOKEN, "OWIDBOT_ACCESS_TOKEN is not set" - access_token = config.OWIDBOT_ACCESS_TOKEN - auth = Auth.Token(access_token) + if config.OWIDBOT_ACCESS_TOKEN: + access_token = config.OWIDBOT_ACCESS_TOKEN + auth = Auth.Token(access_token) + else: + # Don't auth, be aware that you won't be able to do write operations. You should + # set up your access token on https://github.com/settings/tokens and set it as + # OWIDBOT_ACCESS_TOKEN in .env + auth = None + else: + auth = Auth.Token(access_token) + g = Github(auth=auth) return g.get_repo(f"owid/{repo_name}") @@ -96,6 +105,28 @@ def compute_git_blob_sha1(content: bytes) -> str: return sha1.hexdigest() +def create_branch_if_not_exists(repo_name: str, branch: str, dry_run: bool) -> None: + """Create a branch if it doesn't exist.""" + assert config.OWIDBOT_ACCESS_TOKEN, "You need to set OWIDBOT_ACCESS_TOKEN in your .env file to create a branch." + repo = get_repo(repo_name, config.OWIDBOT_ACCESS_TOKEN) + try: + repo.get_branch(branch) + except github.GithubException as e: + if e.status == 404: + if not dry_run: + try: + master_ref = repo.get_branch("main").commit.sha + log.info(f"Using 'main' branch as reference for creating {branch}.") + except github.GithubException: + master_ref = repo.get_branch("master").commit.sha + log.info(f"Using 'master' branch as reference for creating {branch}.") + log.info(f"Creating branch {branch} with reference {master_ref}.") + repo.create_git_ref(ref=f"refs/heads/{branch}", sha=master_ref) + log.info(f"Branch {branch} created in {repo.name}.") + else: + raise e + + def commit_file_to_github( content: str, repo_name: str, @@ -105,8 +136,10 @@ def commit_file_to_github( dry_run: bool = True, ) -> None: """Commit a table to a GitHub repository using the GitHub API.""" + assert config.OWIDBOT_ACCESS_TOKEN, "You need to set OWIDBOT_ACCESS_TOKEN in your .env file to commit." + # Get the repository object - repo = get_repo(repo_name) + repo = get_repo(repo_name, config.OWIDBOT_ACCESS_TOKEN) new_content_checksum = compute_git_blob_sha1(content.encode("utf-8")) try: diff --git a/etl/steps/export/github/co2_data/latest/owid_co2.py b/etl/steps/export/github/co2_data/latest/owid_co2.py index e1ef156d834..98927151f8e 100644 --- a/etl/steps/export/github/co2_data/latest/owid_co2.py +++ b/etl/steps/export/github/co2_data/latest/owid_co2.py @@ -18,12 +18,14 @@ import tempfile from pathlib import Path +import git import pandas as pd from owid.catalog import Table from structlog import get_logger from apps.owidbot import github_utils as gh from etl.helpers import PathFinder +from etl.paths import BASE_DIR # Initialize logger. log = get_logger() @@ -211,13 +213,15 @@ def run(dest_dir: str) -> None: # # Save outputs. # - # If you want to really commit the data, use `CO2_BRANCH=my-branch etlr github/co2_data --export` - if os.environ.get("CO2_BRANCH"): - dry_run = False - branch = os.environ["CO2_BRANCH"] - else: + branch = git.Repo(BASE_DIR).active_branch.name + + if branch == "master": + log.warning("You are on master branch, using dry mode.") dry_run = True - branch = "master" + else: + log.info(f"Committing files to branch {branch}") + # Load DRY_RUN from env or use False as default. + dry_run = bool(int(os.environ.get("DRY_RUN", 0))) # Uncomment to inspect changes. # from etl.data_helpers.misc import compare_tables @@ -233,6 +237,8 @@ def run(dest_dir: str) -> None: prepare_and_save_outputs(tb, codebook=codebook, temp_dir_path=temp_dir_path) + gh.create_branch_if_not_exists(repo_name="co2-data", branch=branch, dry_run=dry_run) + # Commit csv files to the repos. for file_name in ["owid-co2-data.csv", "owid-co2-codebook.csv", "README.md"]: with (temp_dir_path / file_name).open("r") as file_content: @@ -244,3 +250,8 @@ def run(dest_dir: str) -> None: branch=branch, dry_run=dry_run, ) + + if not dry_run: + log.info( + f"Files committed successfully to branch {branch}. Create a PR here https://github.com/owid/co2-data/compare/master...{branch}." + )