Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨ chart-diff improvements #2680

Merged
merged 1 commit into from
May 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 9 additions & 8 deletions apps/owidbot/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,15 +59,8 @@ def cli(
repo = gh_utils.get_repo(repo_name)
pr = gh_utils.get_pr(repo, branch)

comment = gh_utils.get_comment_from_pr(pr)

# prefill services from existing PR comment
if comment:
services_body = services_from_comment(comment)
else:
services_body = {}

# recalculate services
services_body = {}
for service in services:
if service == "data-diff":
services_body["data-diff"] = data_diff.run(include)
Expand All @@ -84,6 +77,14 @@ def cli(
else:
raise AssertionError("Invalid service")

# get existing comment (do this as late as possible to avoid race conditions)
comment = gh_utils.get_comment_from_pr(pr)

# fill in services from existing comment if not run via --services to avoid deleting them
if comment:
for service, content in services_from_comment(comment).items():
services_body.setdefault(service, content)

body = create_comment_body(branch, services_body, start_time)

if dry_run:
Expand Down
3 changes: 2 additions & 1 deletion apps/step_update/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,8 @@ def _update_snapshot_step(
metadata = SnapshotMeta.load_from_yaml(step_dvc_file)
# Update version and date accessed.
metadata.version = step_version_new # type: ignore
metadata.origin.date_accessed = step_version_new # type: ignore
if metadata.origin:
metadata.origin.date_accessed = step_version_new # type: ignore
# Write metadata to new file.
step_dvc_file_new.write_text(metadata.to_yaml())

Expand Down
72 changes: 52 additions & 20 deletions apps/wizard/pages/chart_diff/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from apps.staging_sync.cli import _modified_chart_ids_by_admin
from apps.wizard.pages.chart_diff.chart_diff import ChartDiffModified
from apps.wizard.pages.chart_diff.config_diff import st_show_diff
from apps.wizard.utils import chart_html, set_states
from apps.wizard.utils import Pagination, chart_html, set_states
from apps.wizard.utils.env import OWID_ENV, OWIDEnv
from etl import config

Expand Down Expand Up @@ -53,13 +53,15 @@
st.warning(warning_msg)
TARGET = OWIDEnv.from_staging("master")

CHART_PER_PAGE = 10


########################################
# FUNCTIONS
########################################


def get_chart_diffs(source_engine, target_engine):
def get_chart_diffs(source_engine, target_engine) -> dict[int, ChartDiffModified]:
with Session(source_engine) as source_session:
with Session(target_engine) as target_session:
# Get IDs from modified charts
Expand All @@ -72,6 +74,19 @@ def get_chart_diffs(source_engine, target_engine):
)
for chart_id in chart_ids
}

# TODO: parallelize it, doesn't work with current version of SQLALchemy
# from concurrent.futures import ThreadPoolExecutor, as_completed
# with ThreadPoolExecutor(max_workers=5) as executor:
# chart_diffs_futures = {
# chart_id: executor.submit(ChartDiffModified.from_chart_id, chart_id, source_session, target_session)
# for chart_id in chart_ids
# }
# chart_diffs = {}
# for chart_id, future in chart_diffs_futures.items():

# chart_diffs[chart_id] = future.result()

return chart_diffs


Expand Down Expand Up @@ -118,13 +133,16 @@ def refresh_on_click(source_session=source_session, target_session=None):

# Actually show stuff
with st.expander(label, not diff.approved):
col1, col2 = st.columns([1, 3])

# Refresh
st.button(
"🔄 Refresh",
key=f"refresh-btn-{diff.chart_id}",
on_click=lambda s=source_session, t=target_session: refresh_on_click(s, t),
help="Get the latest version of the chart from the staging server.",
)
with col2:
st.button(
"🔄 Refresh",
key=f"refresh-btn-{diff.chart_id}",
on_click=lambda s=source_session, t=target_session: refresh_on_click(s, t),
help="Get the latest version of the chart from the staging server.",
)

options = ["Pending", "Approve"]
options = {
Expand All @@ -133,16 +151,17 @@ def refresh_on_click(source_session=source_session, target_session=None):
# "Reject": "red",
}
option_names = list(options.keys())
st.radio(
label="Approve or reject chart",
key=f"radio-{diff.chart_id}",
options=option_names,
horizontal=True,
format_func=lambda x: f":{options.get(x)}-background[{x}]",
index=option_names.index("Approve") if diff.approved else option_names.index("Pending"),
on_change=lambda diff=diff, session=source_session: tgl_on_change(diff, session),
# label_visibility="collapsed",
)
with col1:
st.radio(
label="Approve or reject chart",
key=f"radio-{diff.chart_id}",
options=option_names,
horizontal=True,
format_func=lambda x: f":{options.get(x)}-background[{x}]",
index=option_names.index("Approve") if diff.approved else option_names.index("Pending"),
on_change=lambda diff=diff, session=source_session: tgl_on_change(diff, session),
# label_visibility="collapsed",
)

if diff.is_modified:
tab1, tab2 = st.tabs(["Charts", "Config diff"])
Expand Down Expand Up @@ -304,16 +323,29 @@ def main():
if chart_diffs_modified:
st.header("Modified charts")
st.markdown(f"{len(chart_diffs_modified)} charts modified in [{OWID_ENV.name}]({OWID_ENV.site})")
for chart_diff in chart_diffs_modified:

modified_charts_pagination = Pagination(
chart_diffs_modified, items_per_page=CHART_PER_PAGE, pagination_key="pagination_modified"
)
modified_charts_pagination.show_controls()
Comment on lines +326 to +330
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

beautiful


for chart_diff in modified_charts_pagination.get_page_items():
st_show(chart_diff, source_session, target_session)
else:
st.warning(
"No chart modifications found in the staging environment. Try unchecking the 'Hide approved charts' toggle in case there are hidden ones."
)

if chart_diffs_new:
st.header("New charts")
st.markdown(f"{len(chart_diffs_new)} new charts in [{OWID_ENV.name}]({OWID_ENV.site})")
for chart_diff in chart_diffs_new:

new_charts_pagination = Pagination(
chart_diffs_new, items_per_page=CHART_PER_PAGE, pagination_key="pagination_new"
)
new_charts_pagination.show_controls()

for chart_diff in new_charts_pagination.get_page_items():
st_show(chart_diff, source_session, target_session)
else:
st.warning(
Expand Down
48 changes: 48 additions & 0 deletions apps/wizard/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -647,3 +647,51 @@ def chart_html(chart_config: Dict[str, Any], owid_env: OWIDEnv, height=500, **kw
"""

components.html(HTML, height=height, **kwargs)


class Pagination:
def __init__(self, items: list[Any], items_per_page: int, pagination_key: str):
self.items = items
self.items_per_page = items_per_page
self.pagination_key = pagination_key

# Initialize session state for the current page
if self.pagination_key not in st.session_state:
self.page = 1

@property
def page(self):
return st.session_state[self.pagination_key]

@page.setter
def page(self, value):
st.session_state[self.pagination_key] = value

@property
def total_pages(self) -> int:
return (len(self.items) - 1) // self.items_per_page + 1

def get_page_items(self) -> list[Any]:
page = self.page
start_idx = (page - 1) * self.items_per_page
end_idx = start_idx + self.items_per_page
return self.items[start_idx:end_idx]

def show_controls(self) -> None:
# Pagination controls
col1, col2, col3 = st.columns(3)

with col1:
if self.page > 1:
if st.button("Previous"):
self.page -= 1
st.rerun()

with col3:
if self.page < self.total_pages:
if st.button("Next"):
self.page += 1
st.rerun()

with col2:
st.write(f"Page {self.page} of {self.total_pages}")
1 change: 1 addition & 0 deletions apps/wizard/utils/env.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Tools to handle OWID environment."""

import re
from dataclasses import dataclass, fields
from pathlib import Path
Expand Down
4 changes: 2 additions & 2 deletions etl/chart_revision/v3/indicator_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,8 @@ def update_chart_config_map(
) -> Dict[str, Any]:
"""Update map config."""
log.info("variable_update: updating map config")
# Proceed only if chart uses map
if config["hasMapTab"]:
# Proceed only if chart uses map and has `map` field
if config["hasMapTab"] and "map" in config:
log.info("variable_update: chart uses map")
# Get map.columnSlug
map_var_id = config["map"].get(
Expand Down
32 changes: 28 additions & 4 deletions etl/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,18 @@
import os
import pwd
from os import environ as env
from typing import Optional

import bugsnag
import git
import pandas as pd
import structlog
from dotenv import load_dotenv

from etl.paths import BASE_DIR

log = structlog.get_logger()

ENV_FILE = env.get("ENV_FILE", BASE_DIR / ".env")


Expand Down Expand Up @@ -76,17 +81,36 @@ def load_env():
assert DATA_API_ENV == "production", "DATA_API_ENV must be set to production when publishing to live_grapher"


# if STAGING is used, override ENV values
if env.get("STAGING"):
def load_STAGING() -> Optional[str]:
# if STAGING is used, override ENV values
STAGING = env.get("STAGING")

# ENV_FILE takes precedence over STAGING
if STAGING and ENV_FILE != BASE_DIR / ".env":
log.warning("Both ENV_FILE and STAGING is set, STAGING will be ignored.")
return None
# if STAGING=1, use branch name
elif STAGING == "1":
branch_name = git.Repo(BASE_DIR).active_branch.name
if branch_name == "master":
log.warning("You're on master branch, using local env instead of STAGING=master")
return None
else:
return branch_name
else:
return STAGING


STAGING = load_STAGING()

# if STAGING is used, override ENV values
if STAGING is not None:
DB_USER = "owid"
DB_NAME = "owid"
DB_PASS = ""
DB_PORT = 3306
DB_HOST = f"staging-site-{STAGING}"
DATA_API_ENV = f"staging-site-{STAGING}"
else:
STAGING = None


# if running against live, use s3://owid-api, otherwise use s3://owid-api-staging
Expand Down
4 changes: 4 additions & 0 deletions etl/datadiff.py
Original file line number Diff line number Diff line change
Expand Up @@ -721,7 +721,11 @@ def _table_metadata_dict(tab: Table) -> Dict[str, Any]:

def _column_metadata_dict(meta: VariableMeta) -> Dict[str, Any]:
d = meta.to_dict()
# remove noise
d.pop("processing_log", None)
for source in d.get("sources", []):
source.pop("date_accessed", None)
source.pop("publication_date", None)
return d


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from owid.catalog import Dataset, Table
from structlog import get_logger

from etl.data_helpers import geo
from etl.helpers import PathFinder, create_dataset

log = get_logger()
Expand Down
Loading