Skip to content

Commit

Permalink
Require authentication for dry run function and run gcloud auth when …
Browse files Browse the repository at this point in the history
…not logged in
  • Loading branch information
scholtzan committed Mar 5, 2024
1 parent 63d1e3f commit 19c024c
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 17 deletions.
47 changes: 37 additions & 10 deletions .circleci/workflows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,15 @@ jobs:
- << pipeline.parameters.deploy >>
steps:
- checkout
- &skip_forked_pr
run:
name: Early return if this build is from a forked PR
command: |
if [ -n "$CIRCLE_PR_NUMBER" ]; then
echo "Cannot pass creds to forked PRs," \
"so marking this step successful"
circleci-agent step halt
fi
- *restore_venv_cache
- *build
- run:
Expand All @@ -129,6 +138,9 @@ jobs:
# since those tests take the longest to run and running those tests
# in parallel speeds up CI
command: |
export GOOGLE_APPLICATION_CREDENTIALS="/tmp/gcp.json"
echo "$GCLOUD_SERVICE_KEY" > "$GOOGLE_APPLICATION_CREDENTIALS"
PATH="venv/bin:$PATH" script/entrypoint --black --flake8 \
--isort --mypy-ignore-missing-imports --pydocstyle \
-m "not (routine or sql or integration)" \
Expand Down Expand Up @@ -177,6 +189,7 @@ jobs:
condition: *validate-sql-or-routines
steps:
- checkout
- *skip_forked_pr
- *restore_venv_cache
- *build
- *attach_generated_sql
Expand All @@ -203,6 +216,9 @@ jobs:
else
PATHS="sql/bigquery-etl-integration-test"
fi
export GOOGLE_APPLICATION_CREDENTIALS="/tmp/gcp.json"
echo "$GCLOUD_SERVICE_KEY" > "$GOOGLE_APPLICATION_CREDENTIALS"
echo $PATHS
PATH="venv/bin:$PATH" script/bqetl dryrun --validate-schemas $PATHS
# yamllint enable rule:line-length
Expand Down Expand Up @@ -239,13 +255,17 @@ jobs:
condition: *validate-sql
steps:
- checkout
- *skip_forked_pr
- *restore_venv_cache
- *build
- *attach_generated_sql
- *copy_staged_sql
- run:
name: Verify that metadata files are valid
command: |
export GOOGLE_APPLICATION_CREDENTIALS="/tmp/gcp.json"
echo "$GCLOUD_SERVICE_KEY" > "$GOOGLE_APPLICATION_CREDENTIALS"
# TODO: Add check here to make sure all queries have metadata.yaml
PATH="venv/bin:$PATH" script/bqetl query validate \
--respect-dryrun-skip
Expand All @@ -260,15 +280,7 @@ jobs:
condition: *validate-bqetl
steps:
- checkout
- &skip_forked_pr
run:
name: Early return if this build is from a forked PR
command: |
if [ -n "$CIRCLE_PR_NUMBER" ]; then
echo "Cannot pass creds to forked PRs," \
"so marking this step successful"
circleci-agent step halt
fi
- *skip_forked_pr
- *restore_venv_cache
- *build
- run:
Expand All @@ -289,6 +301,7 @@ jobs:
condition: *validate-sql
steps:
- checkout
- *skip_forked_pr
- *restore_venv_cache
- *build
- *attach_generated_sql
Expand All @@ -301,6 +314,9 @@ jobs:
- run:
name: Generate DAGs
command: |
export GOOGLE_APPLICATION_CREDENTIALS="/tmp/gcp.json"
echo "$GCLOUD_SERVICE_KEY" > "$GOOGLE_APPLICATION_CREDENTIALS"
mkdir -p /tmp/workspace/generated-sql/dags
PATH="venv/bin:$PATH" script/bqetl dag generate --output-dir=/tmp/workspace/generated-sql/dags
# this task is overwriting the content produced by generate-sql;
Expand Down Expand Up @@ -407,13 +423,17 @@ jobs:
condition: *validate-sql-or-routines
steps:
- checkout
- *skip_forked_pr
- *restore_venv_cache
- *build
- *attach_generated_sql
- *copy_staged_sql
- run:
name: Validate views
command: PATH="venv/bin:$PATH" script/bqetl view validate
command: |
export GOOGLE_APPLICATION_CREDENTIALS="/tmp/gcp.json"
echo "$GCLOUD_SERVICE_KEY" > "$GOOGLE_APPLICATION_CREDENTIALS"
PATH="venv/bin:$PATH" script/bqetl view validate
- unless:
condition: *validate-sql-or-routines
steps:
Expand Down Expand Up @@ -453,11 +473,15 @@ jobs:
condition: *validate-sql-or-routines
steps:
- checkout
- *skip_forked_pr
- *restore_venv_cache
- *build
- run:
name: Generate SQL content
command: |
export GOOGLE_APPLICATION_CREDENTIALS="/tmp/gcp.json"
echo "$GCLOUD_SERVICE_KEY" > "$GOOGLE_APPLICATION_CREDENTIALS"
# Check if the generated-sql branch can simply be re-used and SQL generation can be skipped.
git clone -b generated-sql [email protected]:mozilla/bigquery-etl ~/remote-generated-sql
cd ~/remote-generated-sql
Expand Down Expand Up @@ -824,6 +848,7 @@ jobs:
condition: *validate-sql-or-routines
steps:
- checkout
- *skip_forked_pr
- run:
name: Switch to main branch
command: |
Expand All @@ -838,6 +863,8 @@ jobs:
name: Generate SQL content
command: |
export PATH="venv/bin:$PATH"
export GOOGLE_APPLICATION_CREDENTIALS="/tmp/gcp.json"
echo "$GCLOUD_SERVICE_KEY" > "$GOOGLE_APPLICATION_CREDENTIALS"
# Check if the generated-sql branch can simply be re-used and SQL generation can be skipped.
# There is a delay between pushing changes to main and pushing a new generated-sql branch,
Expand Down
5 changes: 0 additions & 5 deletions bigquery_etl/cli/dryrun.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import rich_click as click
from google.cloud import bigquery

from ..cli.utils import is_authenticated
from ..config import ConfigLoader
from ..dryrun import DryRun

Expand Down Expand Up @@ -96,10 +95,6 @@ def dryrun(
print("Skipping dry run because no queries matched")
sys.exit(0)

if not use_cloud_function and not is_authenticated():
click.echo("Not authenticated to GCP. Run `gcloud auth login` to login.")
sys.exit(1)

sql_file_valid = partial(
_sql_file_valid, use_cloud_function, project, respect_skip, validate_schemas
)
Expand Down
7 changes: 6 additions & 1 deletion bigquery_etl/cli/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import fnmatch
import os
import re
import subprocess
from fnmatch import fnmatchcase
from glob import glob
from pathlib import Path
Expand Down Expand Up @@ -44,7 +45,11 @@ def is_authenticated():
try:
bigquery.Client(project="")
except DefaultCredentialsError:
return False
try:
subprocess.run(["gcloud", "auth", "application-default", "login"])
except Exception as e:
print(f"Could not log in to GCP: {e}")
return False
return True


Expand Down
23 changes: 22 additions & 1 deletion bigquery_etl/dryrun.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@
from urllib.request import Request, urlopen

import click
import google.auth
from google.auth.transport.requests import Request as GoogleAuthRequest
from google.cloud import bigquery
from google.oauth2.id_token import fetch_id_token

from .config import ConfigLoader
from .metadata.parse_metadata import Metadata
Expand Down Expand Up @@ -69,6 +72,11 @@ def __init__(
except FileNotFoundError:
self.metadata = None

from bigquery_etl.cli.utils import is_authenticated

if not is_authenticated():
print("Authentication to GCP required for dry runs.")

@staticmethod
def skipped_files(sql_dir=ConfigLoader.get("default", "sql_dir")) -> Set[str]:
"""Return files skipped by dry run."""
Expand Down Expand Up @@ -160,10 +168,23 @@ def dry_run_result(self):
dataset = basename(dirname(dirname(self.sqlfile)))
try:
if self.use_cloud_function:
auth_req = GoogleAuthRequest()
creds, _ = google.auth.default(
scopes=["https://www.googleapis.com/auth/cloud-platform"]
)
creds.refresh(auth_req)
if hasattr(creds, "id_token"):
id_token = creds.id_token
else:
id_token = fetch_id_token(auth_req, self.dry_run_url)

r = urlopen(
Request(
self.dry_run_url,
headers={"Content-Type": "application/json"},
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {id_token}",
},
data=json.dumps(
{
"dataset": dataset,
Expand Down

0 comments on commit 19c024c

Please sign in to comment.