Skip to content

Commit

Permalink
Merge branch 'master' of github.com:owid/etl into improve-natural-dis…
Browse files Browse the repository at this point in the history
…asters-explorer
  • Loading branch information
pabloarosado committed May 22, 2024
2 parents 4fd0508 + 27f23f8 commit 8a1453c
Show file tree
Hide file tree
Showing 39 changed files with 1,859 additions and 150 deletions.
16 changes: 8 additions & 8 deletions apps/owidbot/chart_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
from sqlmodel import Session
from structlog import get_logger

from apps.staging_sync.cli import _get_container_name, _get_engine_for_env, _modified_chart_ids_by_admin
from apps.staging_sync.cli import _modified_chart_ids_by_admin
from apps.wizard.pages.chart_diff.chart_diff import ChartDiffModified
from etl import config
from apps.wizard.utils.env import OWID_ENV, OWIDEnv, get_container_name

from . import github_utils as gh_utils

Expand Down Expand Up @@ -44,7 +44,7 @@ def create_check_run(repo_name: str, branch: str, charts_df: pd.DataFrame, dry_r


def run(branch: str, charts_df: pd.DataFrame) -> str:
container_name = _get_container_name(branch) if branch else "dry-run"
container_name = get_container_name(branch) if branch else "dry-run"

chart_diff = format_chart_diff(charts_df)

Expand All @@ -55,7 +55,7 @@ def run(branch: str, charts_df: pd.DataFrame) -> str:

body = f"""
<details open>
<summary>{status} <a href="http://{container_name}/etl/wizard/Chart%20Diff"><b>chart-diff</b></a>: </summary>
<summary><a href="http://{container_name}/etl/wizard/Chart%20Diff"><b>chart-diff</b></a>: {status}</summary>
{chart_diff}
</details>
""".strip()
Expand All @@ -64,13 +64,13 @@ def run(branch: str, charts_df: pd.DataFrame) -> str:


def call_chart_diff(branch: str) -> pd.DataFrame:
source_engine = _get_engine_for_env(branch)
source_engine = OWIDEnv.from_staging(branch).get_engine()

if config.DB_IS_PRODUCTION:
target_engine = _get_engine_for_env(config.ENV_FILE)
if OWID_ENV.env_type_id == "production":
target_engine = OWID_ENV.get_engine()
else:
log.warning("ENV file doesn't connect to production DB, comparing against staging-site-master")
target_engine = _get_engine_for_env("staging-site-master")
target_engine = OWIDEnv.from_staging("master").get_engine()

df = []
with Session(source_engine) as source_session:
Expand Down
4 changes: 2 additions & 2 deletions apps/owidbot/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from rich_click.rich_command import RichCommand

from apps.owidbot import chart_diff, data_diff, grapher
from apps.staging_sync.cli import _get_container_name
from apps.wizard.utils.env import get_container_name

from . import github_utils as gh_utils

Expand Down Expand Up @@ -122,7 +122,7 @@ def services_from_comment(comment: Any) -> Dict[str, str]:


def create_comment_body(branch: str, services: Dict[str, str], start_time: float):
container_name = _get_container_name(branch) if branch else "dry-run"
container_name = get_container_name(branch) if branch else "dry-run"

body = f"""
<b>Quick links (staging server)</b>:
Expand Down
10 changes: 6 additions & 4 deletions apps/staging_sync/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,9 @@ def main():
placeholder="my-branch",
help="Branch name of PR that created the staging server (with existing `staging-site-mybranch` server) or the name of staging server.",
)
target = st.text_input("Target", value="live", help="Using `live` uses DB from local `.env` file as target.")
target = st.text_input(
"Target", value="production", help="Using `production` uses DB from local `.env` file as target."
)
approve_revisions = st.checkbox(
"Automatically approve chart revisions for edited charts",
value=False,
Expand All @@ -57,17 +59,17 @@ def main():
dry_run = st.checkbox("Dry run", value=True)

# Live uses `.env` file which points to the live database in production
if target == "live":
if target == "production":
target_env = ".env"
else:
target_env = target

# Button to show text
if st.button("Sync charts", help="This can take a while."):
if target == "live":
if target == "production":
assert (
config.DB_IS_PRODUCTION
), "If target = live, then chart-sync must be run in production with .env pointing to live DB."
), "If target = production, then chart-sync must be run in production with .env pointing to live DB."

if not _is_valid_config(source, target_env):
return
Expand Down
71 changes: 9 additions & 62 deletions apps/staging_sync/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,13 @@
import datetime as dt
import re
from pathlib import Path
from typing import Any, Dict, List, Optional, Set, Union
from typing import Any, Dict, List, Optional, Set

import click
import pandas as pd
import pytz
import requests
import structlog
from dotenv import dotenv_values
from rich import print
from rich_click.rich_command import RichCommand
from slack_sdk import WebClient
Expand All @@ -18,10 +17,11 @@

from apps.staging_sync.admin_api import AdminAPI
from apps.wizard.pages.chart_diff.chart_diff import ChartDiffModified
from apps.wizard.utils.env import OWIDEnv, get_container_name
from etl import config
from etl import grapher_model as gm
from etl.datadiff import _dict_diff
from etl.db import Engine, get_engine, read_sql
from etl.db import read_sql

log = structlog.get_logger()

Expand Down Expand Up @@ -138,11 +138,8 @@ def cli(
source = _get_git_branch_from_commit_sha(source)
log.info("staging_sync.use_branch", branch=source)

_validate_env(source)
_validate_env(target)

source_engine = _get_engine_for_env(source)
target_engine = _get_engine_for_env(target)
source_engine = OWIDEnv.from_staging_or_env_file(source).get_engine()
target_engine = OWIDEnv.from_staging_or_env_file(target).get_engine()

staging_created_at = _get_staging_created_at(source, staging_created_at) # type: ignore

Expand Down Expand Up @@ -405,7 +402,7 @@ def _notify_slack_chart_update(chart_id: int, source: str, diff: ChartDiffModifi

message = f"""
:warning: *ETL chart-sync: Unapproved Chart Update* from `{source}`
<http://{_get_container_name(source)}/admin/charts/{chart_id}/edit|View Staging Chart> | <https://admin.owid.io/admin/charts/{chart_id}/edit|View Admin Chart>
<http://{get_container_name(source)}/admin/charts/{chart_id}/edit|View Staging Chart> | <https://admin.owid.io/admin/charts/{chart_id}/edit|View Admin Chart>
*Staging Edited*: {str(diff.source_chart.updatedAt)} UTC
*Production Edited*: {str(diff.target_chart.updatedAt)} UTC
```
Expand All @@ -424,7 +421,7 @@ def _notify_slack_chart_update(chart_id: int, source: str, diff: ChartDiffModifi
def _notify_slack_chart_create(source_chart_id: int, target_chart_id: int, source: str, dry_run: bool) -> None:
message = f"""
:warning: *ETL chart-sync: Unapproved New Chart* from `{source}`
<http://{_get_container_name(source)}/admin/charts/{source_chart_id}/edit|View Staging Chart> | <https://admin.owid.io/admin/charts/{target_chart_id}/edit|View Admin Chart>
<http://{get_container_name(source)}/admin/charts/{source_chart_id}/edit|View Staging Chart> | <https://admin.owid.io/admin/charts/{target_chart_id}/edit|View Admin Chart>
""".strip()

print(message)
Expand Down Expand Up @@ -455,9 +452,9 @@ def _matches_include_exclude(chart: gm.Chart, session: Session, include: Optiona
return True


def _get_staging_created_at(source: Path, staging_created_at: Optional[str]) -> dt.datetime:
def _get_staging_created_at(source: str, staging_created_at: Optional[str]) -> dt.datetime:
if staging_created_at is None:
if not _is_env(source):
if not Path(source).exists():
return _get_git_branch_creation_date(str(source).replace("staging-site-", ""))
else:
log.warning(
Expand All @@ -469,56 +466,6 @@ def _get_staging_created_at(source: Path, staging_created_at: Optional[str]) ->
return pd.to_datetime(staging_created_at)


def _is_env(env: Union[str, Path]) -> bool:
return Path(env).exists()


def _normalise_branch(branch_name):
return re.sub(r"[\/\._]", "-", branch_name)


def _get_container_name(branch_name):
normalized_branch = _normalise_branch(branch_name)

# Strip staging-site- prefix to add it back later
normalized_branch = normalized_branch.replace("staging-site-", "")

# Ensure the container name is less than 63 characters
container_name = f"staging-site-{normalized_branch[:50]}"
# Remove trailing hyphens
return container_name.rstrip("-")


def _validate_env(env: Union[str, Path]) -> None:
# if `env` is a path, it must exist (otherwise we'd confuse it with a staging server name)s
if str(env).startswith(".") and "env" in str(env) and not Path(env).exists():
raise click.BadParameter(f"File {env} does not exist")


def _get_engine_for_env(env: Union[Path, str]) -> Engine:
# env exists as a path
if _is_env(Path(env)):
config = dotenv_values(str(env))
# env could be server name
else:
staging_name = str(env)

# add staging-site- prefix
if not staging_name.startswith("staging-site-"):
staging_name = "staging-site-" + staging_name

# generate config for staging server
config = {
"DB_USER": "owid",
"DB_NAME": "owid",
"DB_PASS": "",
"DB_PORT": "3306",
"DB_HOST": _get_container_name(staging_name),
}

return get_engine(config)


def _prune_chart_config(config: Dict[str, Any]) -> Dict[str, Any]:
config = copy.deepcopy(config)
config = {k: v for k, v in config.items() if k not in ("version",)}
Expand Down
36 changes: 14 additions & 22 deletions apps/wizard/pages/chart_diff/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
from st_pages import add_indentation
from structlog import get_logger

from apps.staging_sync.cli import _get_engine_for_env, _modified_chart_ids_by_admin, _validate_env
from apps.staging_sync.cli import _modified_chart_ids_by_admin
from apps.wizard.pages.chart_diff.chart_diff import ChartDiffModified
from apps.wizard.pages.chart_diff.config_diff import st_show_diff
from apps.wizard.utils import chart_html, set_states
from apps.wizard.utils.env import OWID_ENV
from apps.wizard.utils.env import OWID_ENV, OWIDEnv
from etl import config

log = get_logger()
Expand Down Expand Up @@ -41,19 +41,17 @@
########################################
# LOAD ENVS
########################################
# TODO: simplify this
SOURCE_ENV = config.DB_HOST # "staging-site-streamlit-chart-approval"
SOURCE_API = f"https://api-staging.owid.io/{SOURCE_ENV}/v1/indicators/"
SOURCE = OWID_ENV
assert OWID_ENV.env_type_id != "production", "Your .env points to production DB, please use a staging environment."

if config.DB_IS_PRODUCTION:
TARGET_ENV = config.ENV_FILE
TARGET_API = "https://api.ourworldindata.org/v1/indicators/"
# Try to compare against production DB if possible, otherwise compare against staging-site-master
if config.ENV_FILE_PROD:
TARGET = OWIDEnv.from_env_file(config.ENV_FILE_PROD)
else:
warning_msg = "ENV file doesn't connect to production DB, comparing against staging-site-master"
log.warning(warning_msg)
st.warning(warning_msg)
TARGET_ENV = "staging-site-master"
TARGET_API = f"https://api-staging.owid.io/{TARGET_ENV}/v1/indicators/"
TARGET = OWIDEnv.from_staging("master")


########################################
Expand Down Expand Up @@ -178,7 +176,7 @@ def compare_charts(
# Only one chart: new chart
if target_chart is None:
st.markdown(f"New version ┃ _{pretty_date(source_chart)}_")
chart_html(source_chart.config, base_url=SOURCE_ENV, base_api_url=SOURCE_API)
chart_html(source_chart.config, owid_env=SOURCE)
# Two charts, actual diff
else:
# Create two columns for the iframes
Expand All @@ -188,32 +186,26 @@ def compare_charts(
if not prod_is_newer:
with col1:
st.markdown(f"Production ┃ _{pretty_date(target_chart)}_")
chart_html(target_chart.config, base_url=TARGET_ENV, base_api_url=TARGET_API)
chart_html(target_chart.config, owid_env=TARGET)
with col2:
st.markdown(f":green[New version ┃ _{pretty_date(source_chart)}_]")
chart_html(source_chart.config, base_url=SOURCE_ENV, base_api_url=SOURCE_API)
chart_html(source_chart.config, owid_env=SOURCE)
# Conflict with live
else:
with col1:
st.markdown(
f":red[Production ┃ _{pretty_date(target_chart)}_] ⚠️",
help="The chart in production was modified after creating the staging server. Please resolve the conflict by integrating the latest changes from production into staging.",
)
chart_html(target_chart.config, base_url=TARGET_ENV, base_api_url=TARGET_API)
chart_html(target_chart.config, owid_env=TARGET)
with col2:
st.markdown(f"New version ┃ _{pretty_date(source_chart)}_")
chart_html(source_chart.config, base_url=SOURCE_ENV, base_api_url=SOURCE_API)
chart_html(source_chart.config, owid_env=SOURCE)


@st.cache_resource
def get_engines() -> tuple[Engine, Engine]:
_validate_env(SOURCE_ENV)
_validate_env(TARGET_ENV)

source_engine = _get_engine_for_env(SOURCE_ENV)
target_engine = _get_engine_for_env(TARGET_ENV)

return source_engine, target_engine
return SOURCE.get_engine(), TARGET.get_engine()


def show_help_text():
Expand Down
9 changes: 5 additions & 4 deletions apps/wizard/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

from apps.wizard.config import PAGES_BY_ALIAS
from apps.wizard.utils.defaults import load_wizard_defaults, update_wizard_defaults_from_form
from apps.wizard.utils.env import OWIDEnv
from apps.wizard.utils.step_form import StepForm
from etl import config
from etl.db import get_connection
Expand Down Expand Up @@ -613,10 +614,10 @@ def bugsnag_handler(exception: Exception) -> None:
error_util.handle_uncaught_app_exception = bugsnag_handler # type: ignore


def chart_html(chart_config: Dict[str, Any], base_url, base_api_url, height=500, **kwargs):
chart_config["bakedGrapherURL"] = f"http://{base_url}/grapher"
chart_config["adminBaseUrl"] = f"http://{base_url}"
chart_config["dataApiUrl"] = base_api_url
def chart_html(chart_config: Dict[str, Any], owid_env: OWIDEnv, height=500, **kwargs):
chart_config["bakedGrapherURL"] = f"{owid_env.base_site}/grapher"
chart_config["adminBaseUrl"] = owid_env.base_site
chart_config["dataApiUrl"] = owid_env.indicators_url

HTML = f"""
<!DOCTYPE html>
Expand Down
Loading

0 comments on commit 8a1453c

Please sign in to comment.