diff --git a/apps/owidbot/cli.py b/apps/owidbot/cli.py index f3bf1c6d7b3..044a1166500 100644 --- a/apps/owidbot/cli.py +++ b/apps/owidbot/cli.py @@ -59,15 +59,8 @@ def cli( repo = gh_utils.get_repo(repo_name) pr = gh_utils.get_pr(repo, branch) - comment = gh_utils.get_comment_from_pr(pr) - - # prefill services from existing PR comment - if comment: - services_body = services_from_comment(comment) - else: - services_body = {} - # recalculate services + services_body = {} for service in services: if service == "data-diff": services_body["data-diff"] = data_diff.run(include) @@ -84,6 +77,14 @@ def cli( else: raise AssertionError("Invalid service") + # get existing comment (do this as late as possible to avoid race conditions) + comment = gh_utils.get_comment_from_pr(pr) + + # fill in services from existing comment if not run via --services to avoid deleting them + if comment: + for service, content in services_from_comment(comment).items(): + services_body.setdefault(service, content) + body = create_comment_body(branch, services_body, start_time) if dry_run: diff --git a/apps/step_update/cli.py b/apps/step_update/cli.py index f4f3e116cae..cf008a7358d 100644 --- a/apps/step_update/cli.py +++ b/apps/step_update/cli.py @@ -287,7 +287,8 @@ def _update_snapshot_step( metadata = SnapshotMeta.load_from_yaml(step_dvc_file) # Update version and date accessed. metadata.version = step_version_new # type: ignore - metadata.origin.date_accessed = step_version_new # type: ignore + if metadata.origin: + metadata.origin.date_accessed = step_version_new # type: ignore # Write metadata to new file. step_dvc_file_new.write_text(metadata.to_yaml()) diff --git a/apps/wizard/pages/chart_diff/app.py b/apps/wizard/pages/chart_diff/app.py index 4d2f20325c6..d7c54a544a9 100644 --- a/apps/wizard/pages/chart_diff/app.py +++ b/apps/wizard/pages/chart_diff/app.py @@ -10,7 +10,7 @@ from apps.staging_sync.cli import _modified_chart_ids_by_admin from apps.wizard.pages.chart_diff.chart_diff import ChartDiffModified from apps.wizard.pages.chart_diff.config_diff import st_show_diff -from apps.wizard.utils import chart_html, set_states +from apps.wizard.utils import Pagination, chart_html, set_states from apps.wizard.utils.env import OWID_ENV, OWIDEnv from etl import config @@ -53,13 +53,15 @@ st.warning(warning_msg) TARGET = OWIDEnv.from_staging("master") +CHART_PER_PAGE = 10 + ######################################## # FUNCTIONS ######################################## -def get_chart_diffs(source_engine, target_engine): +def get_chart_diffs(source_engine, target_engine) -> dict[int, ChartDiffModified]: with Session(source_engine) as source_session: with Session(target_engine) as target_session: # Get IDs from modified charts @@ -72,6 +74,19 @@ def get_chart_diffs(source_engine, target_engine): ) for chart_id in chart_ids } + + # TODO: parallelize it, doesn't work with current version of SQLALchemy + # from concurrent.futures import ThreadPoolExecutor, as_completed + # with ThreadPoolExecutor(max_workers=5) as executor: + # chart_diffs_futures = { + # chart_id: executor.submit(ChartDiffModified.from_chart_id, chart_id, source_session, target_session) + # for chart_id in chart_ids + # } + # chart_diffs = {} + # for chart_id, future in chart_diffs_futures.items(): + + # chart_diffs[chart_id] = future.result() + return chart_diffs @@ -118,13 +133,16 @@ def refresh_on_click(source_session=source_session, target_session=None): # Actually show stuff with st.expander(label, not diff.approved): + col1, col2 = st.columns([1, 3]) + # Refresh - st.button( - "🔄 Refresh", - key=f"refresh-btn-{diff.chart_id}", - on_click=lambda s=source_session, t=target_session: refresh_on_click(s, t), - help="Get the latest version of the chart from the staging server.", - ) + with col2: + st.button( + "🔄 Refresh", + key=f"refresh-btn-{diff.chart_id}", + on_click=lambda s=source_session, t=target_session: refresh_on_click(s, t), + help="Get the latest version of the chart from the staging server.", + ) options = ["Pending", "Approve"] options = { @@ -133,16 +151,17 @@ def refresh_on_click(source_session=source_session, target_session=None): # "Reject": "red", } option_names = list(options.keys()) - st.radio( - label="Approve or reject chart", - key=f"radio-{diff.chart_id}", - options=option_names, - horizontal=True, - format_func=lambda x: f":{options.get(x)}-background[{x}]", - index=option_names.index("Approve") if diff.approved else option_names.index("Pending"), - on_change=lambda diff=diff, session=source_session: tgl_on_change(diff, session), - # label_visibility="collapsed", - ) + with col1: + st.radio( + label="Approve or reject chart", + key=f"radio-{diff.chart_id}", + options=option_names, + horizontal=True, + format_func=lambda x: f":{options.get(x)}-background[{x}]", + index=option_names.index("Approve") if diff.approved else option_names.index("Pending"), + on_change=lambda diff=diff, session=source_session: tgl_on_change(diff, session), + # label_visibility="collapsed", + ) if diff.is_modified: tab1, tab2 = st.tabs(["Charts", "Config diff"]) @@ -304,16 +323,29 @@ def main(): if chart_diffs_modified: st.header("Modified charts") st.markdown(f"{len(chart_diffs_modified)} charts modified in [{OWID_ENV.name}]({OWID_ENV.site})") - for chart_diff in chart_diffs_modified: + + modified_charts_pagination = Pagination( + chart_diffs_modified, items_per_page=CHART_PER_PAGE, pagination_key="pagination_modified" + ) + modified_charts_pagination.show_controls() + + for chart_diff in modified_charts_pagination.get_page_items(): st_show(chart_diff, source_session, target_session) else: st.warning( "No chart modifications found in the staging environment. Try unchecking the 'Hide approved charts' toggle in case there are hidden ones." ) + if chart_diffs_new: st.header("New charts") st.markdown(f"{len(chart_diffs_new)} new charts in [{OWID_ENV.name}]({OWID_ENV.site})") - for chart_diff in chart_diffs_new: + + new_charts_pagination = Pagination( + chart_diffs_new, items_per_page=CHART_PER_PAGE, pagination_key="pagination_new" + ) + new_charts_pagination.show_controls() + + for chart_diff in new_charts_pagination.get_page_items(): st_show(chart_diff, source_session, target_session) else: st.warning( diff --git a/apps/wizard/utils/__init__.py b/apps/wizard/utils/__init__.py index 41ee46c0f7b..ed845c6c42d 100644 --- a/apps/wizard/utils/__init__.py +++ b/apps/wizard/utils/__init__.py @@ -647,3 +647,51 @@ def chart_html(chart_config: Dict[str, Any], owid_env: OWIDEnv, height=500, **kw """ components.html(HTML, height=height, **kwargs) + + +class Pagination: + def __init__(self, items: list[Any], items_per_page: int, pagination_key: str): + self.items = items + self.items_per_page = items_per_page + self.pagination_key = pagination_key + + # Initialize session state for the current page + if self.pagination_key not in st.session_state: + self.page = 1 + + @property + def page(self): + return st.session_state[self.pagination_key] + + @page.setter + def page(self, value): + st.session_state[self.pagination_key] = value + + @property + def total_pages(self) -> int: + return (len(self.items) - 1) // self.items_per_page + 1 + + def get_page_items(self) -> list[Any]: + page = self.page + start_idx = (page - 1) * self.items_per_page + end_idx = start_idx + self.items_per_page + return self.items[start_idx:end_idx] + + def show_controls(self) -> None: + # Pagination controls + col1, col2, col3 = st.columns(3) + + with col1: + if self.page > 1: + if st.button("Previous"): + self.page -= 1 + st.rerun() + + with col3: + if self.page < self.total_pages: + if st.button("Next"): + self.page += 1 + st.rerun() + + with col2: + st.write(f"Page {self.page} of {self.total_pages}") diff --git a/apps/wizard/utils/env.py b/apps/wizard/utils/env.py index 3ed65a3d1bb..d98a1e75e70 100644 --- a/apps/wizard/utils/env.py +++ b/apps/wizard/utils/env.py @@ -1,4 +1,5 @@ """Tools to handle OWID environment.""" + import re from dataclasses import dataclass, fields from pathlib import Path diff --git a/etl/chart_revision/v3/indicator_update.py b/etl/chart_revision/v3/indicator_update.py index b54e185e1b8..bd3e282467d 100644 --- a/etl/chart_revision/v3/indicator_update.py +++ b/etl/chart_revision/v3/indicator_update.py @@ -108,8 +108,8 @@ def update_chart_config_map( ) -> Dict[str, Any]: """Update map config.""" log.info("variable_update: updating map config") - # Proceed only if chart uses map - if config["hasMapTab"]: + # Proceed only if chart uses map and has `map` field + if config["hasMapTab"] and "map" in config: log.info("variable_update: chart uses map") # Get map.columnSlug map_var_id = config["map"].get( diff --git a/etl/config.py b/etl/config.py index c847e1b7b3d..aa08907c00a 100644 --- a/etl/config.py +++ b/etl/config.py @@ -10,13 +10,18 @@ import os import pwd from os import environ as env +from typing import Optional import bugsnag +import git import pandas as pd +import structlog from dotenv import load_dotenv from etl.paths import BASE_DIR +log = structlog.get_logger() + ENV_FILE = env.get("ENV_FILE", BASE_DIR / ".env") @@ -76,17 +81,36 @@ def load_env(): assert DATA_API_ENV == "production", "DATA_API_ENV must be set to production when publishing to live_grapher" -# if STAGING is used, override ENV values -if env.get("STAGING"): +def load_STAGING() -> Optional[str]: + # if STAGING is used, override ENV values STAGING = env.get("STAGING") + + # ENV_FILE takes precedence over STAGING + if STAGING and ENV_FILE != BASE_DIR / ".env": + log.warning("Both ENV_FILE and STAGING is set, STAGING will be ignored.") + return None + # if STAGING=1, use branch name + elif STAGING == "1": + branch_name = git.Repo(BASE_DIR).active_branch.name + if branch_name == "master": + log.warning("You're on master branch, using local env instead of STAGING=master") + return None + else: + return branch_name + else: + return STAGING + + +STAGING = load_STAGING() + +# if STAGING is used, override ENV values +if STAGING is not None: DB_USER = "owid" DB_NAME = "owid" DB_PASS = "" DB_PORT = 3306 DB_HOST = f"staging-site-{STAGING}" DATA_API_ENV = f"staging-site-{STAGING}" -else: - STAGING = None # if running against live, use s3://owid-api, otherwise use s3://owid-api-staging diff --git a/etl/datadiff.py b/etl/datadiff.py index 4fb50997846..a68dd9ae894 100644 --- a/etl/datadiff.py +++ b/etl/datadiff.py @@ -721,7 +721,11 @@ def _table_metadata_dict(tab: Table) -> Dict[str, Any]: def _column_metadata_dict(meta: VariableMeta) -> Dict[str, Any]: d = meta.to_dict() + # remove noise d.pop("processing_log", None) + for source in d.get("sources", []): + source.pop("date_accessed", None) + source.pop("publication_date", None) return d diff --git a/etl/steps/data/garden/neglected_tropical_diseases/2024-05-18/funding.py b/etl/steps/data/garden/neglected_tropical_diseases/2024-05-18/funding.py index 6069ae34990..4990eb86797 100644 --- a/etl/steps/data/garden/neglected_tropical_diseases/2024-05-18/funding.py +++ b/etl/steps/data/garden/neglected_tropical_diseases/2024-05-18/funding.py @@ -7,7 +7,6 @@ from owid.catalog import Dataset, Table from structlog import get_logger -from etl.data_helpers import geo from etl.helpers import PathFinder, create_dataset log = get_logger()