Skip to content

Commit

Permalink
✨ chart-diff improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
Marigold committed May 22, 2024
1 parent 5a8460d commit 2acac2e
Show file tree
Hide file tree
Showing 7 changed files with 145 additions and 35 deletions.
17 changes: 9 additions & 8 deletions apps/owidbot/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,15 +59,8 @@ def cli(
repo = gh_utils.get_repo(repo_name)
pr = gh_utils.get_pr(repo, branch)

comment = gh_utils.get_comment_from_pr(pr)

# prefill services from existing PR comment
if comment:
services_body = services_from_comment(comment)
else:
services_body = {}

# recalculate services
services_body = {}
for service in services:
if service == "data-diff":
services_body["data-diff"] = data_diff.run(include)
Expand All @@ -84,6 +77,14 @@ def cli(
else:
raise AssertionError("Invalid service")

# get existing comment (do this as late as possible to avoid race conditions)
comment = gh_utils.get_comment_from_pr(pr)

# fill in services from existing comment if not run via --services to avoid deleting them
if comment:
for service, content in services_from_comment(comment).items():
services_body.setdefault(service, content)

body = create_comment_body(branch, services_body, start_time)

if dry_run:
Expand Down
3 changes: 2 additions & 1 deletion apps/step_update/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,8 @@ def _update_snapshot_step(
metadata = SnapshotMeta.load_from_yaml(step_dvc_file)
# Update version and date accessed.
metadata.version = step_version_new # type: ignore
metadata.origin.date_accessed = step_version_new # type: ignore
if metadata.origin:
metadata.origin.date_accessed = step_version_new # type: ignore
# Write metadata to new file.
step_dvc_file_new.write_text(metadata.to_yaml())

Expand Down
72 changes: 52 additions & 20 deletions apps/wizard/pages/chart_diff/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from apps.staging_sync.cli import _modified_chart_ids_by_admin
from apps.wizard.pages.chart_diff.chart_diff import ChartDiffModified
from apps.wizard.pages.chart_diff.config_diff import st_show_diff
from apps.wizard.utils import chart_html, set_states
from apps.wizard.utils import Pagination, chart_html, set_states
from apps.wizard.utils.env import OWID_ENV, OWIDEnv
from etl import config

Expand Down Expand Up @@ -53,13 +53,15 @@
st.warning(warning_msg)
TARGET = OWIDEnv.from_staging("master")

CHART_PER_PAGE = 10


########################################
# FUNCTIONS
########################################


def get_chart_diffs(source_engine, target_engine):
def get_chart_diffs(source_engine, target_engine) -> dict[int, ChartDiffModified]:
with Session(source_engine) as source_session:
with Session(target_engine) as target_session:
# Get IDs from modified charts
Expand All @@ -72,6 +74,19 @@ def get_chart_diffs(source_engine, target_engine):
)
for chart_id in chart_ids
}

# TODO: parallelize it, doesn't work with current version of SQLALchemy
# from concurrent.futures import ThreadPoolExecutor, as_completed
# with ThreadPoolExecutor(max_workers=5) as executor:
# chart_diffs_futures = {
# chart_id: executor.submit(ChartDiffModified.from_chart_id, chart_id, source_session, target_session)
# for chart_id in chart_ids
# }
# chart_diffs = {}
# for chart_id, future in chart_diffs_futures.items():

# chart_diffs[chart_id] = future.result()

return chart_diffs


Expand Down Expand Up @@ -118,13 +133,16 @@ def refresh_on_click(source_session=source_session, target_session=None):

# Actually show stuff
with st.expander(label, not diff.approved):
col1, col2 = st.columns([1, 3])

# Refresh
st.button(
"🔄 Refresh",
key=f"refresh-btn-{diff.chart_id}",
on_click=lambda s=source_session, t=target_session: refresh_on_click(s, t),
help="Get the latest version of the chart from the staging server.",
)
with col2:
st.button(
"🔄 Refresh",
key=f"refresh-btn-{diff.chart_id}",
on_click=lambda s=source_session, t=target_session: refresh_on_click(s, t),
help="Get the latest version of the chart from the staging server.",
)

options = ["Pending", "Approve"]
options = {
Expand All @@ -133,16 +151,17 @@ def refresh_on_click(source_session=source_session, target_session=None):
# "Reject": "red",
}
option_names = list(options.keys())
st.radio(
label="Approve or reject chart",
key=f"radio-{diff.chart_id}",
options=option_names,
horizontal=True,
format_func=lambda x: f":{options.get(x)}-background[{x}]",
index=option_names.index("Approve") if diff.approved else option_names.index("Pending"),
on_change=lambda diff=diff, session=source_session: tgl_on_change(diff, session),
# label_visibility="collapsed",
)
with col1:
st.radio(
label="Approve or reject chart",
key=f"radio-{diff.chart_id}",
options=option_names,
horizontal=True,
format_func=lambda x: f":{options.get(x)}-background[{x}]",
index=option_names.index("Approve") if diff.approved else option_names.index("Pending"),
on_change=lambda diff=diff, session=source_session: tgl_on_change(diff, session),
# label_visibility="collapsed",
)

if diff.is_modified:
tab1, tab2 = st.tabs(["Charts", "Config diff"])
Expand Down Expand Up @@ -304,16 +323,29 @@ def main():
if chart_diffs_modified:
st.header("Modified charts")
st.markdown(f"{len(chart_diffs_modified)} charts modified in [{OWID_ENV.name}]({OWID_ENV.site})")
for chart_diff in chart_diffs_modified:

modified_charts_pagination = Pagination(
chart_diffs_modified, items_per_page=CHART_PER_PAGE, pagination_key="pagination_modified"
)
modified_charts_pagination.show_controls()

for chart_diff in modified_charts_pagination.get_page_items():
st_show(chart_diff, source_session, target_session)
else:
st.warning(
"No chart modifications found in the staging environment. Try unchecking the 'Hide approved charts' toggle in case there are hidden ones."
)

if chart_diffs_new:
st.header("New charts")
st.markdown(f"{len(chart_diffs_new)} new charts in [{OWID_ENV.name}]({OWID_ENV.site})")
for chart_diff in chart_diffs_new:

new_charts_pagination = Pagination(
chart_diffs_new, items_per_page=CHART_PER_PAGE, pagination_key="pagination_new"
)
new_charts_pagination.show_controls()

for chart_diff in new_charts_pagination.get_page_items():
st_show(chart_diff, source_session, target_session)
else:
st.warning(
Expand Down
48 changes: 48 additions & 0 deletions apps/wizard/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -647,3 +647,51 @@ def chart_html(chart_config: Dict[str, Any], owid_env: OWIDEnv, height=500, **kw
"""

components.html(HTML, height=height, **kwargs)


class Pagination:
def __init__(self, items: list[Any], items_per_page: int, pagination_key: str):
self.items = items
self.items_per_page = items_per_page
self.pagination_key = pagination_key

# Initialize session state for the current page
if self.pagination_key not in st.session_state:
self.page = 1

@property
def page(self):
return st.session_state[self.pagination_key]

@page.setter
def page(self, value):
st.session_state[self.pagination_key] = value

@property
def total_pages(self) -> int:
return (len(self.items) - 1) // self.items_per_page + 1

def get_page_items(self) -> list[Any]:
page = self.page
start_idx = (page - 1) * self.items_per_page
end_idx = start_idx + self.items_per_page
return self.items[start_idx:end_idx]

def show_controls(self) -> None:
# Pagination controls
col1, col2, col3 = st.columns(3)

with col1:
if self.page > 1:
if st.button("Previous"):
self.page -= 1
st.rerun()

with col3:
if self.page < self.total_pages:
if st.button("Next"):
self.page += 1
st.rerun()

with col2:
st.write(f"Page {self.page} of {self.total_pages}")
4 changes: 2 additions & 2 deletions etl/chart_revision/v3/indicator_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,8 @@ def update_chart_config_map(
) -> Dict[str, Any]:
"""Update map config."""
log.info("variable_update: updating map config")
# Proceed only if chart uses map
if config["hasMapTab"]:
# Proceed only if chart uses map and has `map` field
if config["hasMapTab"] and "map" in config:
log.info("variable_update: chart uses map")
# Get map.columnSlug
map_var_id = config["map"].get(
Expand Down
32 changes: 28 additions & 4 deletions etl/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,18 @@
import os
import pwd
from os import environ as env
from typing import Optional

import bugsnag
import git
import pandas as pd
import structlog
from dotenv import load_dotenv

from etl.paths import BASE_DIR

log = structlog.get_logger()

ENV_FILE = env.get("ENV_FILE", BASE_DIR / ".env")


Expand Down Expand Up @@ -76,17 +81,36 @@ def load_env():
assert DATA_API_ENV == "production", "DATA_API_ENV must be set to production when publishing to live_grapher"


# if STAGING is used, override ENV values
if env.get("STAGING"):
def load_STAGING() -> Optional[str]:
# if STAGING is used, override ENV values
STAGING = env.get("STAGING")

# ENV_FILE takes precedence over STAGING
if STAGING and ENV_FILE != BASE_DIR / ".env":
log.warning("Both ENV_FILE and STAGING is set, STAGING will be ignored.")
return None
# if STAGING=1, use branch name
elif STAGING == "1":
branch_name = git.Repo(BASE_DIR).active_branch.name
if branch_name == "master":
log.warning("You're on master branch, using local env instead of STAGING=master")
return None
else:
return branch_name
else:
return None


STAGING = load_STAGING()

# if STAGING is used, override ENV values
if STAGING is not None:
DB_USER = "owid"
DB_NAME = "owid"
DB_PASS = ""
DB_PORT = 3306
DB_HOST = f"staging-site-{STAGING}"
DATA_API_ENV = f"staging-site-{STAGING}"
else:
STAGING = None


# if running against live, use s3://owid-api, otherwise use s3://owid-api-staging
Expand Down
4 changes: 4 additions & 0 deletions etl/datadiff.py
Original file line number Diff line number Diff line change
Expand Up @@ -721,7 +721,11 @@ def _table_metadata_dict(tab: Table) -> Dict[str, Any]:

def _column_metadata_dict(meta: VariableMeta) -> Dict[str, Any]:
d = meta.to_dict()
# remove noise
d.pop("processing_log", None)
for source in d.get("sources", []):
source.pop("date_accessed", None)
source.pop("publication_date", None)
return d


Expand Down

0 comments on commit 2acac2e

Please sign in to comment.