Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
Marigold committed Nov 25, 2024
1 parent 2723737 commit 361880d
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 10 deletions.
41 changes: 37 additions & 4 deletions apps/owidbot/github_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,19 @@


def get_repo(repo_name: str, access_token: Optional[str] = None) -> github.Repository.Repository:
# No access token, try using OWIDBOT_ACCESS_TOKEN
if not access_token:
assert config.OWIDBOT_ACCESS_TOKEN, "OWIDBOT_ACCESS_TOKEN is not set"
access_token = config.OWIDBOT_ACCESS_TOKEN
auth = Auth.Token(access_token)
if config.OWIDBOT_ACCESS_TOKEN:
access_token = config.OWIDBOT_ACCESS_TOKEN
auth = Auth.Token(access_token)
else:
# Don't auth, be aware that you won't be able to do write operations. You should
# set up your access token on https://github.com/settings/tokens and set it as
# OWIDBOT_ACCESS_TOKEN in .env
auth = None
else:
auth = Auth.Token(access_token)

g = Github(auth=auth)
return g.get_repo(f"owid/{repo_name}")

Expand Down Expand Up @@ -96,6 +105,28 @@ def compute_git_blob_sha1(content: bytes) -> str:
return sha1.hexdigest()


def create_branch_if_not_exists(repo_name: str, branch: str, dry_run: bool) -> None:
"""Create a branch if it doesn't exist."""
assert config.OWIDBOT_ACCESS_TOKEN, "You need to set OWIDBOT_ACCESS_TOKEN in your .env file to create a branch."
repo = get_repo(repo_name, config.OWIDBOT_ACCESS_TOKEN)
try:
repo.get_branch(branch)
except github.GithubException as e:
if e.status == 404:
if not dry_run:
try:
master_ref = repo.get_branch("main").commit.sha
log.info(f"Using 'main' branch as reference for creating {branch}.")
except github.GithubException:
master_ref = repo.get_branch("master").commit.sha
log.info(f"Using 'master' branch as reference for creating {branch}.")
log.info(f"Creating branch {branch} with reference {master_ref}.")
repo.create_git_ref(ref=f"refs/heads/{branch}", sha=master_ref)
log.info(f"Branch {branch} created in {repo.name}.")
else:
raise e


def commit_file_to_github(
content: str,
repo_name: str,
Expand All @@ -105,8 +136,10 @@ def commit_file_to_github(
dry_run: bool = True,
) -> None:
"""Commit a table to a GitHub repository using the GitHub API."""
assert config.OWIDBOT_ACCESS_TOKEN, "You need to set OWIDBOT_ACCESS_TOKEN in your .env file to commit."

# Get the repository object
repo = get_repo(repo_name)
repo = get_repo(repo_name, config.OWIDBOT_ACCESS_TOKEN)
new_content_checksum = compute_git_blob_sha1(content.encode("utf-8"))

try:
Expand Down
23 changes: 17 additions & 6 deletions etl/steps/export/github/co2_data/latest/owid_co2.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@
import tempfile
from pathlib import Path

import git
import pandas as pd
from owid.catalog import Table
from structlog import get_logger

from apps.owidbot import github_utils as gh
from etl.helpers import PathFinder
from etl.paths import BASE_DIR

# Initialize logger.
log = get_logger()
Expand Down Expand Up @@ -211,13 +213,15 @@ def run(dest_dir: str) -> None:
#
# Save outputs.
#
# If you want to really commit the data, use `CO2_BRANCH=my-branch etlr github/co2_data --export`
if os.environ.get("CO2_BRANCH"):
dry_run = False
branch = os.environ["CO2_BRANCH"]
else:
branch = git.Repo(BASE_DIR).active_branch.name

if branch == "master":
log.warning("You are on master branch, using dry mode.")
dry_run = True
branch = "master"
else:
log.info(f"Committing files to branch {branch}")
# Load DRY_RUN from env or use False as default.
dry_run = bool(int(os.environ.get("DRY_RUN", 0)))

# Uncomment to inspect changes.
# from etl.data_helpers.misc import compare_tables
Expand All @@ -233,6 +237,8 @@ def run(dest_dir: str) -> None:

prepare_and_save_outputs(tb, codebook=codebook, temp_dir_path=temp_dir_path)

gh.create_branch_if_not_exists(repo_name="co2-data", branch=branch, dry_run=dry_run)

# Commit csv files to the repos.
for file_name in ["owid-co2-data.csv", "owid-co2-codebook.csv", "README.md"]:
with (temp_dir_path / file_name).open("r") as file_content:
Expand All @@ -244,3 +250,8 @@ def run(dest_dir: str) -> None:
branch=branch,
dry_run=dry_run,
)

if not dry_run:
log.info(
f"Files committed successfully to branch {branch}. Create a PR here https://github.com/owid/co2-data/compare/master...{branch}."
)

0 comments on commit 361880d

Please sign in to comment.