diff --git a/apps/owidbot/chart_diff.py b/apps/owidbot/chart_diff.py index 7182a2a0758..ce3a06173d2 100644 --- a/apps/owidbot/chart_diff.py +++ b/apps/owidbot/chart_diff.py @@ -2,9 +2,9 @@ from sqlmodel import Session from structlog import get_logger -from apps.staging_sync.cli import _get_container_name, _get_engine_for_env, _modified_chart_ids_by_admin +from apps.staging_sync.cli import _modified_chart_ids_by_admin from apps.wizard.pages.chart_diff.chart_diff import ChartDiffModified -from etl import config +from apps.wizard.utils.env import OWID_ENV, OWIDEnv, get_container_name from . import github_utils as gh_utils @@ -44,7 +44,7 @@ def create_check_run(repo_name: str, branch: str, charts_df: pd.DataFrame, dry_r def run(branch: str, charts_df: pd.DataFrame) -> str: - container_name = _get_container_name(branch) if branch else "dry-run" + container_name = get_container_name(branch) if branch else "dry-run" chart_diff = format_chart_diff(charts_df) @@ -55,7 +55,7 @@ def run(branch: str, charts_df: pd.DataFrame) -> str: body = f"""
-{status} chart-diff: +chart-diff: {status} {chart_diff}
""".strip() @@ -64,13 +64,13 @@ def run(branch: str, charts_df: pd.DataFrame) -> str: def call_chart_diff(branch: str) -> pd.DataFrame: - source_engine = _get_engine_for_env(branch) + source_engine = OWIDEnv.from_staging(branch).get_engine() - if config.DB_IS_PRODUCTION: - target_engine = _get_engine_for_env(config.ENV_FILE) + if OWID_ENV.env_type_id == "production": + target_engine = OWID_ENV.get_engine() else: log.warning("ENV file doesn't connect to production DB, comparing against staging-site-master") - target_engine = _get_engine_for_env("staging-site-master") + target_engine = OWIDEnv.from_staging("master").get_engine() df = [] with Session(source_engine) as source_session: diff --git a/apps/owidbot/cli.py b/apps/owidbot/cli.py index f0a907aee9f..f3bf1c6d7b3 100644 --- a/apps/owidbot/cli.py +++ b/apps/owidbot/cli.py @@ -9,7 +9,7 @@ from rich_click.rich_command import RichCommand from apps.owidbot import chart_diff, data_diff, grapher -from apps.staging_sync.cli import _get_container_name +from apps.wizard.utils.env import get_container_name from . import github_utils as gh_utils @@ -122,7 +122,7 @@ def services_from_comment(comment: Any) -> Dict[str, str]: def create_comment_body(branch: str, services: Dict[str, str], start_time: float): - container_name = _get_container_name(branch) if branch else "dry-run" + container_name = get_container_name(branch) if branch else "dry-run" body = f""" Quick links (staging server): diff --git a/apps/staging_sync/app.py b/apps/staging_sync/app.py index ed652fbf0af..20e76596392 100644 --- a/apps/staging_sync/app.py +++ b/apps/staging_sync/app.py @@ -48,7 +48,9 @@ def main(): placeholder="my-branch", help="Branch name of PR that created the staging server (with existing `staging-site-mybranch` server) or the name of staging server.", ) - target = st.text_input("Target", value="live", help="Using `live` uses DB from local `.env` file as target.") + target = st.text_input( + "Target", value="production", help="Using `production` uses DB from local `.env` file as target." + ) approve_revisions = st.checkbox( "Automatically approve chart revisions for edited charts", value=False, @@ -57,17 +59,17 @@ def main(): dry_run = st.checkbox("Dry run", value=True) # Live uses `.env` file which points to the live database in production - if target == "live": + if target == "production": target_env = ".env" else: target_env = target # Button to show text if st.button("Sync charts", help="This can take a while."): - if target == "live": + if target == "production": assert ( config.DB_IS_PRODUCTION - ), "If target = live, then chart-sync must be run in production with .env pointing to live DB." + ), "If target = production, then chart-sync must be run in production with .env pointing to live DB." if not _is_valid_config(source, target_env): return diff --git a/apps/staging_sync/cli.py b/apps/staging_sync/cli.py index f88084f1461..ddd412e190d 100644 --- a/apps/staging_sync/cli.py +++ b/apps/staging_sync/cli.py @@ -2,14 +2,13 @@ import datetime as dt import re from pathlib import Path -from typing import Any, Dict, List, Optional, Set, Union +from typing import Any, Dict, List, Optional, Set import click import pandas as pd import pytz import requests import structlog -from dotenv import dotenv_values from rich import print from rich_click.rich_command import RichCommand from slack_sdk import WebClient @@ -18,10 +17,11 @@ from apps.staging_sync.admin_api import AdminAPI from apps.wizard.pages.chart_diff.chart_diff import ChartDiffModified +from apps.wizard.utils.env import OWIDEnv, get_container_name from etl import config from etl import grapher_model as gm from etl.datadiff import _dict_diff -from etl.db import Engine, get_engine, read_sql +from etl.db import read_sql log = structlog.get_logger() @@ -138,11 +138,8 @@ def cli( source = _get_git_branch_from_commit_sha(source) log.info("staging_sync.use_branch", branch=source) - _validate_env(source) - _validate_env(target) - - source_engine = _get_engine_for_env(source) - target_engine = _get_engine_for_env(target) + source_engine = OWIDEnv.from_staging_or_env_file(source).get_engine() + target_engine = OWIDEnv.from_staging_or_env_file(target).get_engine() staging_created_at = _get_staging_created_at(source, staging_created_at) # type: ignore @@ -405,7 +402,7 @@ def _notify_slack_chart_update(chart_id: int, source: str, diff: ChartDiffModifi message = f""" :warning: *ETL chart-sync: Unapproved Chart Update* from `{source}` - | + | *Staging Edited*: {str(diff.source_chart.updatedAt)} UTC *Production Edited*: {str(diff.target_chart.updatedAt)} UTC ``` @@ -424,7 +421,7 @@ def _notify_slack_chart_update(chart_id: int, source: str, diff: ChartDiffModifi def _notify_slack_chart_create(source_chart_id: int, target_chart_id: int, source: str, dry_run: bool) -> None: message = f""" :warning: *ETL chart-sync: Unapproved New Chart* from `{source}` - | + | """.strip() print(message) @@ -455,9 +452,9 @@ def _matches_include_exclude(chart: gm.Chart, session: Session, include: Optiona return True -def _get_staging_created_at(source: Path, staging_created_at: Optional[str]) -> dt.datetime: +def _get_staging_created_at(source: str, staging_created_at: Optional[str]) -> dt.datetime: if staging_created_at is None: - if not _is_env(source): + if not Path(source).exists(): return _get_git_branch_creation_date(str(source).replace("staging-site-", "")) else: log.warning( @@ -469,56 +466,6 @@ def _get_staging_created_at(source: Path, staging_created_at: Optional[str]) -> return pd.to_datetime(staging_created_at) -def _is_env(env: Union[str, Path]) -> bool: - return Path(env).exists() - - -def _normalise_branch(branch_name): - return re.sub(r"[\/\._]", "-", branch_name) - - -def _get_container_name(branch_name): - normalized_branch = _normalise_branch(branch_name) - - # Strip staging-site- prefix to add it back later - normalized_branch = normalized_branch.replace("staging-site-", "") - - # Ensure the container name is less than 63 characters - container_name = f"staging-site-{normalized_branch[:50]}" - # Remove trailing hyphens - return container_name.rstrip("-") - - -def _validate_env(env: Union[str, Path]) -> None: - # if `env` is a path, it must exist (otherwise we'd confuse it with a staging server name)s - if str(env).startswith(".") and "env" in str(env) and not Path(env).exists(): - raise click.BadParameter(f"File {env} does not exist") - - -def _get_engine_for_env(env: Union[Path, str]) -> Engine: - # env exists as a path - if _is_env(Path(env)): - config = dotenv_values(str(env)) - # env could be server name - else: - staging_name = str(env) - - # add staging-site- prefix - if not staging_name.startswith("staging-site-"): - staging_name = "staging-site-" + staging_name - - # generate config for staging server - config = { - "DB_USER": "owid", - "DB_NAME": "owid", - "DB_PASS": "", - "DB_PORT": "3306", - "DB_HOST": _get_container_name(staging_name), - } - - return get_engine(config) - - def _prune_chart_config(config: Dict[str, Any]) -> Dict[str, Any]: config = copy.deepcopy(config) config = {k: v for k, v in config.items() if k not in ("version",)} diff --git a/apps/wizard/pages/chart_diff/app.py b/apps/wizard/pages/chart_diff/app.py index 2f75aba8698..4d2f20325c6 100644 --- a/apps/wizard/pages/chart_diff/app.py +++ b/apps/wizard/pages/chart_diff/app.py @@ -7,11 +7,11 @@ from st_pages import add_indentation from structlog import get_logger -from apps.staging_sync.cli import _get_engine_for_env, _modified_chart_ids_by_admin, _validate_env +from apps.staging_sync.cli import _modified_chart_ids_by_admin from apps.wizard.pages.chart_diff.chart_diff import ChartDiffModified from apps.wizard.pages.chart_diff.config_diff import st_show_diff from apps.wizard.utils import chart_html, set_states -from apps.wizard.utils.env import OWID_ENV +from apps.wizard.utils.env import OWID_ENV, OWIDEnv from etl import config log = get_logger() @@ -41,19 +41,17 @@ ######################################## # LOAD ENVS ######################################## -# TODO: simplify this -SOURCE_ENV = config.DB_HOST # "staging-site-streamlit-chart-approval" -SOURCE_API = f"https://api-staging.owid.io/{SOURCE_ENV}/v1/indicators/" +SOURCE = OWID_ENV +assert OWID_ENV.env_type_id != "production", "Your .env points to production DB, please use a staging environment." -if config.DB_IS_PRODUCTION: - TARGET_ENV = config.ENV_FILE - TARGET_API = "https://api.ourworldindata.org/v1/indicators/" +# Try to compare against production DB if possible, otherwise compare against staging-site-master +if config.ENV_FILE_PROD: + TARGET = OWIDEnv.from_env_file(config.ENV_FILE_PROD) else: warning_msg = "ENV file doesn't connect to production DB, comparing against staging-site-master" log.warning(warning_msg) st.warning(warning_msg) - TARGET_ENV = "staging-site-master" - TARGET_API = f"https://api-staging.owid.io/{TARGET_ENV}/v1/indicators/" + TARGET = OWIDEnv.from_staging("master") ######################################## @@ -178,7 +176,7 @@ def compare_charts( # Only one chart: new chart if target_chart is None: st.markdown(f"New version ┃ _{pretty_date(source_chart)}_") - chart_html(source_chart.config, base_url=SOURCE_ENV, base_api_url=SOURCE_API) + chart_html(source_chart.config, owid_env=SOURCE) # Two charts, actual diff else: # Create two columns for the iframes @@ -188,10 +186,10 @@ def compare_charts( if not prod_is_newer: with col1: st.markdown(f"Production ┃ _{pretty_date(target_chart)}_") - chart_html(target_chart.config, base_url=TARGET_ENV, base_api_url=TARGET_API) + chart_html(target_chart.config, owid_env=TARGET) with col2: st.markdown(f":green[New version ┃ _{pretty_date(source_chart)}_]") - chart_html(source_chart.config, base_url=SOURCE_ENV, base_api_url=SOURCE_API) + chart_html(source_chart.config, owid_env=SOURCE) # Conflict with live else: with col1: @@ -199,21 +197,15 @@ def compare_charts( f":red[Production ┃ _{pretty_date(target_chart)}_] ⚠️", help="The chart in production was modified after creating the staging server. Please resolve the conflict by integrating the latest changes from production into staging.", ) - chart_html(target_chart.config, base_url=TARGET_ENV, base_api_url=TARGET_API) + chart_html(target_chart.config, owid_env=TARGET) with col2: st.markdown(f"New version ┃ _{pretty_date(source_chart)}_") - chart_html(source_chart.config, base_url=SOURCE_ENV, base_api_url=SOURCE_API) + chart_html(source_chart.config, owid_env=SOURCE) @st.cache_resource def get_engines() -> tuple[Engine, Engine]: - _validate_env(SOURCE_ENV) - _validate_env(TARGET_ENV) - - source_engine = _get_engine_for_env(SOURCE_ENV) - target_engine = _get_engine_for_env(TARGET_ENV) - - return source_engine, target_engine + return SOURCE.get_engine(), TARGET.get_engine() def show_help_text(): diff --git a/apps/wizard/utils/__init__.py b/apps/wizard/utils/__init__.py index d51dd12d287..41ee46c0f7b 100644 --- a/apps/wizard/utils/__init__.py +++ b/apps/wizard/utils/__init__.py @@ -28,6 +28,7 @@ from apps.wizard.config import PAGES_BY_ALIAS from apps.wizard.utils.defaults import load_wizard_defaults, update_wizard_defaults_from_form +from apps.wizard.utils.env import OWIDEnv from apps.wizard.utils.step_form import StepForm from etl import config from etl.db import get_connection @@ -613,10 +614,10 @@ def bugsnag_handler(exception: Exception) -> None: error_util.handle_uncaught_app_exception = bugsnag_handler # type: ignore -def chart_html(chart_config: Dict[str, Any], base_url, base_api_url, height=500, **kwargs): - chart_config["bakedGrapherURL"] = f"http://{base_url}/grapher" - chart_config["adminBaseUrl"] = f"http://{base_url}" - chart_config["dataApiUrl"] = base_api_url +def chart_html(chart_config: Dict[str, Any], owid_env: OWIDEnv, height=500, **kwargs): + chart_config["bakedGrapherURL"] = f"{owid_env.base_site}/grapher" + chart_config["adminBaseUrl"] = owid_env.base_site + chart_config["dataApiUrl"] = owid_env.indicators_url HTML = f""" diff --git a/apps/wizard/utils/env.py b/apps/wizard/utils/env.py index b162ee3c0fc..3ed65a3d1bb 100644 --- a/apps/wizard/utils/env.py +++ b/apps/wizard/utils/env.py @@ -1,77 +1,122 @@ """Tools to handle OWID environment.""" -from typing import Literal, Optional +import re +from dataclasses import dataclass, fields +from pathlib import Path +from typing import Literal, cast +from dotenv import dotenv_values from typing_extensions import Self from etl import config +from etl.db import Engine, get_engine -OWIDEnvType = Literal["live", "staging", "local", "remote-staging", "unknown"] +OWIDEnvType = Literal["production", "local", "staging", "unknown"] + + +@dataclass +class Config: + """Configuration for OWID environment which is a subset of etl.config.""" + + DB_USER: str + DB_NAME: str + DB_PASS: str + DB_PORT: str + DB_HOST: str + + @classmethod + def from_env_file(cls, env_file: str) -> Self: + env_dict = dotenv_values(env_file) + config_dict = {field.name: env_dict[field.name] for field in fields(cls)} + return cls(**config_dict) # type: ignore + + +class UnknownOWIDEnv(Exception): + pass class OWIDEnv: """OWID environment.""" env_type_id: OWIDEnvType + conf: Config def __init__( self: Self, - env_type_id: Optional[OWIDEnvType] = None, + conf: Config | None = None, ) -> None: - if env_type_id is None: - self.env_type_id = self.detect_env_type() - else: - self.env_type_id = env_type_id + self.conf = conf or cast(Config, config) + self.env_type_id = self.detect_env_type() def detect_env_type(self: Self) -> OWIDEnvType: """Detect environment type.""" - # live - if config.DB_NAME == "live_grapher": - return "live" - # staging - elif config.DB_NAME == "staging_grapher" and config.DB_USER == "staging_grapher": - return "staging" + # production + if self.conf.DB_NAME == "live_grapher": + return "production" # local - elif config.DB_NAME == "grapher" and config.DB_USER == "grapher": + elif self.conf.DB_NAME == "grapher" and self.conf.DB_USER == "grapher": return "local" # other - elif config.DB_NAME == "owid" and config.DB_USER == "owid": - return "remote-staging" + elif self.conf.DB_NAME == "owid" and self.conf.DB_USER == "owid": + return "staging" return "unknown" + @classmethod + def from_staging(cls, branch: str) -> Self: + """Create OWIDEnv for staging.""" + conf = Config( + DB_USER="owid", + DB_NAME="owid", + DB_PASS="", + DB_PORT="3306", + DB_HOST=get_container_name(branch), + ) + return cls(conf) + + @classmethod + def from_env_file(cls, env_file: str) -> Self: + """Create OWIDEnv from env file.""" + assert Path(env_file).exists(), f"ENV file {env_file} doesn't exist" + return cls(conf=Config.from_env_file(env_file)) + + @classmethod + def from_staging_or_env_file(cls, staging_or_env_file: str) -> Self: + """Create OWIDEnv from staging or env file.""" + if Path(staging_or_env_file).exists(): + return cls.from_env_file(staging_or_env_file) + return cls.from_staging(staging_or_env_file) + + def get_engine(self) -> Engine: + """Get engine for env.""" + return get_engine(self.conf.__dict__) + @property def site(self) -> str | None: """Get site.""" - if self.env_type_id == "live": + if self.env_type_id == "production": return "https://ourworldindata.org" - elif self.env_type_id == "staging": - return "https://staging.ourworldindata.org" elif self.env_type_id == "local": return "http://localhost:3030" - elif self.env_type_id == "remote-staging": - return f"http://{config.DB_HOST}" + elif self.env_type_id == "staging": + return f"http://{self.conf.DB_HOST}" return None @property def name(self) -> str: """Get site.""" - if self.env_type_id == "live": + if self.env_type_id == "production": return "production" - elif self.env_type_id == "staging": - return "staging" elif self.env_type_id == "local": return "local" - elif self.env_type_id == "remote-staging": - return f"{config.DB_HOST}" + elif self.env_type_id == "staging": + return f"{self.conf.DB_HOST}" raise ValueError("Unknown env_type_id") @property def base_site(self) -> str | None: """Get site.""" - if self.env_type_id == "live": + if self.env_type_id == "production": return "https://admin.owid.io" - elif self.env_type_id == "staging": - return "https://staging.owid.cloud" - elif self.env_type_id in ["local", "remote-staging"]: + elif self.env_type_id in ["local", "staging"]: return self.site return None @@ -83,11 +128,28 @@ def admin_site( if self.base_site: return f"{self.base_site}/admin" + @property + def api_site(self: Self) -> str: + """Get api url.""" + if self.env_type_id == "production": + return "https://api.ourworldindata.org" + elif self.env_type_id == "staging": + return f"https://api-staging.owid.io/{self.conf.DB_HOST}" + elif self.env_type_id == "local": + return "http://localhost:8000" + else: + raise UnknownOWIDEnv() + @property def chart_approval_tool_url(self: Self) -> str: """Get chart approval tool url.""" return f"{self.admin_site}/suggested-chart-revisions/review" + @property + def indicators_url(self: Self) -> str: + """Get indicators url.""" + return self.api_site + "/v1/indicators/" + def dataset_admin_site(self: Self, dataset_id: str | int) -> str: """Get dataset admin url.""" return f"{self.admin_site}/datasets/{dataset_id}/" @@ -112,4 +174,20 @@ def thumb_url(self, slug: str): return f"{self.site}/grapher/thumbnail/{slug}.png" +def _normalise_branch(branch_name): + return re.sub(r"[\/\._]", "-", branch_name) + + +def get_container_name(branch_name): + normalized_branch = _normalise_branch(branch_name) + + # Strip staging-site- prefix to add it back later + normalized_branch = normalized_branch.replace("staging-site-", "") + + # Ensure the container name is less than 63 characters + container_name = f"staging-site-{normalized_branch[:50]}" + # Remove trailing hyphens + return container_name.rstrip("-") + + OWID_ENV = OWIDEnv() diff --git a/dag/democracy.yml b/dag/democracy.yml index 9aaa682f198..050b64845a0 100644 --- a/dag/democracy.yml +++ b/dag/democracy.yml @@ -59,3 +59,19 @@ steps: - data://garden/demography/2023-03-31/population data://grapher/democracy/2024-05-16/fh: - data://garden/democracy/2024-03-07/fh + + # Claassen Mood (2022) + data://meadow/democracy/2024-05-22/claassen_mood: + - snapshot://democracy/2024-05-22/claassen_mood.csv + data://garden/democracy/2024-03-07/claassen_mood: + - data://meadow/democracy/2024-05-22/claassen_mood + data://grapher/democracy/2024-05-22/claassen_mood: + - data://garden/democracy/2024-03-07/claassen_mood + + # Claassen Satisfaction (2022) + data://meadow/democracy/2024-05-22/claassen_satisfaction: + - snapshot://democracy/2024-05-22/claassen_satisfaction.csv + data://garden/democracy/2024-03-07/claassen_satisfaction: + - data://meadow/democracy/2024-05-22/claassen_satisfaction + data://grapher/democracy/2024-05-22/claassen_satisfaction: + - data://garden/democracy/2024-03-07/claassen_satisfaction diff --git a/etl/config.py b/etl/config.py index 83ded5b4f05..c847e1b7b3d 100644 --- a/etl/config.py +++ b/etl/config.py @@ -60,6 +60,9 @@ def load_env(): DB_IS_PRODUCTION = DB_NAME == "live_grapher" +# Special ENV file with access to production DB (read-only), used by chart-diff +ENV_FILE_PROD = os.environ.get("ENV_FILE_PROD") + if "DATA_API_ENV" in env: DATA_API_ENV = env["DATA_API_ENV"] else: diff --git a/etl/steps/data/garden/democracy/2024-03-07/bti.countries.json b/etl/steps/data/garden/democracy/2024-03-07/bti.countries.json new file mode 100644 index 00000000000..15fe8e9c649 --- /dev/null +++ b/etl/steps/data/garden/democracy/2024-03-07/bti.countries.json @@ -0,0 +1,139 @@ +{ + "Afghanistan": "Afghanistan", + "Albania": "Albania", + "Algeria": "Algeria", + "Angola": "Angola", + "Argentina": "Argentina", + "Armenia": "Armenia", + "Azerbaijan": "Azerbaijan", + "Bahrain": "Bahrain", + "Bangladesh": "Bangladesh", + "Belarus": "Belarus", + "Benin": "Benin", + "Bhutan": "Bhutan", + "Bolivia": "Bolivia", + "Bosnia and Herzegovina": "Bosnia and Herzegovina", + "Botswana": "Botswana", + "Brazil": "Brazil", + "Bulgaria": "Bulgaria", + "Burkina Faso": "Burkina Faso", + "Burundi": "Burundi", + "Cambodia": "Cambodia", + "Cameroon": "Cameroon", + "Central African Republic": "Central African Republic", + "Chad": "Chad", + "Chile": "Chile", + "China": "China", + "Colombia": "Colombia", + "Congo, Rep.": "Congo", + "Costa Rica": "Costa Rica", + "Croatia": "Croatia", + "Cuba": "Cuba", + "Czechia": "Czechia", + "C\u00f4te d'Ivoire": "Cote d'Ivoire", + "Djibouti": "Djibouti", + "Dominican Republic": "Dominican Republic", + "Ecuador": "Ecuador", + "Egypt": "Egypt", + "El Salvador": "El Salvador", + "Equatorial Guinea": "Equatorial Guinea", + "Eritrea": "Eritrea", + "Estonia": "Estonia", + "Eswatini": "Eswatini", + "Ethiopia": "Ethiopia", + "Gabon": "Gabon", + "Gambia": "Gambia", + "Georgia": "Georgia", + "Ghana": "Ghana", + "Guatemala": "Guatemala", + "Guinea": "Guinea", + "Guinea-Bissau": "Guinea-Bissau", + "Haiti": "Haiti", + "Honduras": "Honduras", + "Hungary": "Hungary", + "India": "India", + "Indonesia": "Indonesia", + "Iran": "Iran", + "Iraq": "Iraq", + "Jamaica": "Jamaica", + "Jordan": "Jordan", + "Kazakhstan": "Kazakhstan", + "Kenya": "Kenya", + "Kosovo": "Kosovo", + "Kuwait": "Kuwait", + "Kyrgyzstan": "Kyrgyzstan", + "Laos": "Laos", + "Latvia": "Latvia", + "Lebanon": "Lebanon", + "Lesotho": "Lesotho", + "Liberia": "Liberia", + "Libya": "Libya", + "Lithuania": "Lithuania", + "Madagascar": "Madagascar", + "Malawi": "Malawi", + "Malaysia": "Malaysia", + "Mali": "Mali", + "Mauritania": "Mauritania", + "Mauritius": "Mauritius", + "Mexico": "Mexico", + "Moldova": "Moldova", + "Mongolia": "Mongolia", + "Montenegro": "Montenegro", + "Morocco": "Morocco", + "Mozambique": "Mozambique", + "Myanmar": "Myanmar", + "Namibia": "Namibia", + "Nepal": "Nepal", + "Nicaragua": "Nicaragua", + "Niger": "Niger", + "Nigeria": "Nigeria", + "North Korea": "North Korea", + "North Macedonia": "North Macedonia", + "Oman": "Oman", + "Pakistan": "Pakistan", + "Panama": "Panama", + "Papua New Guinea": "Papua New Guinea", + "Paraguay": "Paraguay", + "Peru": "Peru", + "Philippines": "Philippines", + "Poland": "Poland", + "Qatar": "Qatar", + "Romania": "Romania", + "Russia": "Russia", + "Rwanda": "Rwanda", + "Saudi Arabia": "Saudi Arabia", + "Senegal": "Senegal", + "Serbia": "Serbia", + "Sierra Leone": "Sierra Leone", + "Singapore": "Singapore", + "Slovakia": "Slovakia", + "Slovenia": "Slovenia", + "Somalia": "Somalia", + "South Africa": "South Africa", + "South Korea": "South Korea", + "South Sudan": "South Sudan", + "Sri Lanka": "Sri Lanka", + "Sudan": "Sudan", + "Syria": "Syria", + "Taiwan": "Taiwan", + "Tajikistan": "Tajikistan", + "Tanzania": "Tanzania", + "Thailand": "Thailand", + "Timor-Leste": "East Timor", + "Togo": "Togo", + "Trinidad and Tobago": "Trinidad and Tobago", + "Tunisia": "Tunisia", + "Turkmenistan": "Turkmenistan", + "Uganda": "Uganda", + "Ukraine": "Ukraine", + "United Arab Emirates": "United Arab Emirates", + "Uruguay": "Uruguay", + "Uzbekistan": "Uzbekistan", + "Venezuela": "Venezuela", + "Vietnam": "Vietnam", + "Yemen": "Yemen", + "Zambia": "Zambia", + "Zimbabwe": "Zimbabwe", + "Congo, DR": "Democratic Republic of Congo", + "Tu\u0308rkiye": "Turkey" +} \ No newline at end of file diff --git a/etl/steps/data/garden/democracy/2024-03-07/bti.meta.yml b/etl/steps/data/garden/democracy/2024-03-07/bti.meta.yml new file mode 100644 index 00000000000..908916f54b4 --- /dev/null +++ b/etl/steps/data/garden/democracy/2024-03-07/bti.meta.yml @@ -0,0 +1,187 @@ +# NOTE: To learn more about the fields, hover over their names. +definitions: + common: + presentation: + topic_tags: + - Democracy + + +# Learn more about the available fields: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +dataset: + update_period_days: 365 + + +tables: + bti: + variables: + democracy_bti: + title: "Democracy score" + description_short: |- + Captures the extent of democratic features — political participation, rule of law, stable democratic institutions, political and social integration, and a capable state. It ranges from 1 to 10 (most democratic). + description_processing: |- + Values for continents have been obtained by averaging the values of the countries in the continent. + unit: "" + + regime_bti: + title: "Political regime" + description_short: |- + Identifies the political regime of a country. It distinguishes between hard-line autocracies (score 1), moderate autocracies (score 2), highly defective democracies (score 3), defective democracies (score 4), and consolidating democracies (score 5). + description_key: + - Consolidating democracies have comprehensive democratic features and minimum democratic characteristics (citizens can choose political leaders in free and fair elections and enjoy freedoms of association, expression and some further civil liberties, political power is separated, and leaders can effectively govern a state that fulfils basic functions). + - Defective democracies have minimum democratic characteristics, but limited other democratic features. + - Very defective democracies have minimum democratic characteristics, but very limited other democratic features. + - Moderate autocracies have no minimum democratic characteristics, but possibly other broadly democratic features. + - Hard-line autocracies have no minimum democratic characteristics, and few other democratic features. + - It matches the variable cat_dem_stat in Bertelsmann Transformation Index (2022), with some errors for the year 2021 — classifying some democracies as autocracies and vice versa — corrected. + unit: "" + + electfreefair_bti: + title: "Free and fair elections" + description_short: |- + Indicates the extent to which elections are free from irregularities, the rights to vote, run for office, and campaign are unrestricted, registration is easy, and campaigning and media access are fair. + description_key: + - "Score of 1: National elections, if held at all, are entirely unfree and unfair." + - "Score of 4: General elections are held, but serious irregularities during voting process and ballot count occur. The rights to vote, campaign and run for office are restricted, and elections have de facto only limited influence over who governs." + - "Score of 7: General, multi-party elections are held, conducted properly and accepted as the means of filling political posts. However, there are some constraints on the fairness of the elections with regard to registration, campaigning or media access." + - "Score of 10: There are no constraints on free and fair elections." + - "The remaining scores are intermediate categories." + unit: "" + + effective_power_bti: + title: "Effective power to govern" + description_short: |- + Indicates the extent to which no individual or group holds de facto veto power over democratically elected political leaders. + description_key: + - "Score of 1: Political decision-makers are not elected or elected bodies have no influence at all." + - "Score of 4: Democratically elected political representatives have limited power to govern. Strong veto groups are able to undermine fundamental elements of democratic procedures." + - "Score of 7: Democratically elected political representatives have considerable power to govern. However, individual power groups can set their own domains apart or enforce special-interest policies." + - "Score of 10: Democratically elected political representatives have the effective power to govern. No individual or group is holding any de facto veto power." + - The remaining scores are intermediate categories. + unit: "" + + freeassoc_bti: + title: "Freedom of association" + description_short: |- + Indicates the extent to which residents and civic groups are free from government interference to associate and assemble. + description_key: + - "Score of 1: Association and assembly rights are denied. Independent civic groups do not exist or are prohibited." + - "Score of 4: Association and assembly rights are often subject to interference or government restrictions. Residents and civic groups that do not support the government often cannot exercise these rights." + - "Score of 7: Association and assembly rights are occasionally subject to interference or government restrictions, but generally there are no outright prohibitions of independent political or civic groups." + - "Score of 10: Association and assembly rights are guaranteed against interference or government restrictions. Residents and civic groups can fully exercise these rights." + - The remaining scores are intermediate categories. + unit: "" + + freeexpr_bti: + title: "Freedom of expression" + description_short: |- + Indicates the extent to which individuals, groups, and the press can express their views free from government interference. + description_key: + - "Score of 1: Freedom of expression is denied. Independent media do not exist or are prohibited." + - "Score of 4: Freedom of expression is often subject to interference or government restrictions. Distortion and manipulation shape matters of public debate." + - "Score of 7: Freedom of expression is occasionally subject to interference or government restrictions, but there are generally no incidents of blatant intrusions like outright state censorship or media shutdowns." + - "Score of 10: Freedom of expression is guaranteed against interference or government restrictions. Individuals, groups and the press can fully exercise these rights." + - The remaining scores are intermediate categories. + unit: "" + + sep_power_bti: + title: "Separation of powers" + description_short: |- + Indicates the extent of mutual checks and balances on political powers. + description_key: + - "Score of 1: There is no separation of powers, neither de jure nor de facto." + - "Score of 4: The separation of powers is formally established but weak in practice. One branch, generally the executive, has largely undermined checks and balances." + - "Score of 7: The separation of powers is in place and functioning. Checks and balances are occasionally subject to interference, but a restoration of balance is sought." + - "Score of 10: There is a clear separation of powers with mutual checks and balances." + - The remaining scores are intermediate categories. + unit: "" + + civ_rights_bti: + title: "Civil rights" + description_short: |- + Indicates the extent to which civil rights are codified in law, and the state prosecutes any violations and works to prevent discrimination. + description_key: + - "Score of 1: Civil rights are systematically violated. There are no mechanisms and institutions to protect residents against violations of their rights." + - "Score of 4: Civil rights are codified by law, but even the most fundamental rights (i.e., to life, liberty and physical integrity) are violated in practice. Mechanisms and institutions to prosecute, punish and redress violations of civil rights are largely ineffective." + - "Score of 7: Civil rights are codified by law, but are not properly respected and protected. Mechanisms and institutions to prosecute, punish and redress violations of civil rights are in place, but are not consistently effective." + - "Score of 10: Civil rights are codified by law and respected by all state institutions, which actively prevent discrimination. Residents are effectively protected by mechanisms and institutions established to prosecute, punish and redress violations of their rights." + - The remaining scores are intermediate categories. + unit: "" + + state_bti: + title: "Stateness" + description_short: |- + Indicates the extent to which the state has the monopoly on the use of force, provides basic public services across the country, citizens accept the state as legitimate, religious dogmas do not interfere, and citizenship is accessible. + description_key: + - It ranges from countries with poor stateness (scores of 1-2.49), flawed stateness (scores of 2.5 to 4.49), fair stateness (scores of 4.5 to 6.49), and sound stateness (scores of 6.5 to 8.49) to countries with excellent stateness (score 8.5 to 10). + unit: "" + + state_basic_bti: + title: "Basic state functions" + description_short: |- + Indicates the extent to which the state has the monopoly on the use of force and provides basic public services across the country. It ranges from 1 to 10 (most functioning). + unit: "" + + political_participation_bti: + title: "Political participation" + description_short: |- + Indicates the extent to which elections are free and fair, democratically elected leaders have the effective power to govern, and citizen have the freedoms of association and expression. + description_key: + - It ranges from countries with poor political participation (scores of 1 to 2.49), over flawed political participation (scores of 2.5 to 4.49), fair political participation (scores of 4.5 to 6.49), and sound political participation (scores of 6.5 to 8.49), to countries with excellent political participation (scores of 8.5 to 10). + unit: "" + + rule_of_law_bti: + title: "Rule of law" + description_short: |- + Indicates the extent to which political power is separated, the judiciary is independent, office abuse is prosecuted, and civil rights are enforced. + description_key: + - It ranges from countries with poor rule of law (scores of 1 to 2.49), over flawed rule of law (scores of 2.5 to 4.49), fair rule of law (scores of 4.5 to 6.49), and sound rule of law (scores of 6.5 to 8.49), to countries with excellent rule of law (scores of 8.5 to 10). + unit: "" + + stability_dem_inst_bti: + title: "Stability of democratic institutions" + description_short: |- + Indicates the extent to which democratic institutions are effective and efficient and all relevant actors accept them. + description_key: + - It ranges from countries with poor stability (scores of 1 to 2.49), over flawed stability (scores of 2.5 to 4.49), fair political participation (scores of 4.5 to 6.49), and sound stability (scores of 6.5 to 8.49), to countries with excellent stability (scores of 8.5 to 10). + unit: "" + + pol_soc_integr_bti: + title: "Political and social integration" + description_short: |- + Indicates the extent to which the party system is stable and rooted in society, interest groups are numerous and diverse, regular citizens approve of democratic norms and procedures, and societal trust is high. + description_key: + - It ranges from countries with poor integration (scores of 1 to 2.49), over flawed integration (scores of 2.5 to 4.49), fair integration (scores of 4.5 to 6.49), and sound integration (scores of 6.5 to 8.49), to countries with excellent political and social integration (scores of 8.5 to 10). + unit: "" + + num_countries: + variables: + num_regime_bti: + title: |- + <% if category == '-1' %> + Number of countries with unknown regime + <% else %> + Number of << category.replace('_', ' ').replace('cracy', 'cracies').replace('archy', 'archies') >> + <% endif %> + unit: "countries" + + + num_people: + variables: + pop_regime_bti: + title: |- + <% if category == '-1' %> + Number of people living in countries with unknown regime + <% else %> + Number of people living in << category.replace('_', ' ').replace('cracy', 'cracies').replace('archy', 'archies') >> + <% endif %> + description_short: "Number of countries with available data." + unit: "people" + + + avg_pop: + variables: + democracy_bti_weighted: + title: "Democracy score (population-weighted)" + description_short: Average democratic status, weighted by population. + unit: "" diff --git a/etl/steps/data/garden/democracy/2024-03-07/bti.py b/etl/steps/data/garden/democracy/2024-03-07/bti.py new file mode 100644 index 00000000000..4a1b9b08c14 --- /dev/null +++ b/etl/steps/data/garden/democracy/2024-03-07/bti.py @@ -0,0 +1,389 @@ +"""Load a meadow dataset and create a garden dataset.""" + +from typing import Tuple, cast + +import numpy as np +from owid.catalog import Dataset, Table +from owid.catalog.tables import concat +from shared import ( + add_population_in_dummies, + add_regions_and_global_aggregates, + expand_observations, + from_wide_to_long, + make_table_with_dummies, +) + +from etl.data_helpers import geo +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) +# Missing classifications of states +REGIONS = { + "Africa": {}, + "Asia": {}, + "North America": {}, + "South America": {}, + "Europe": {}, + "Oceania": {}, +} +# Year range +YEAR_MIN = 2005 +YEAR_MAX = 2023 + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load meadow dataset. + ds_meadow = paths.load_dataset("bti") + ds_regions = paths.load_dataset("regions") + ds_population = paths.load_dataset("population") + + # Read table from meadow dataset. + tb = ds_meadow["bti"].reset_index() + + # + # Process data. + # + tb = geo.harmonize_countries( + df=tb, + countries_file=paths.country_mapping_path, + ) + + # Invert the scores of regime_bti + tb["regime_bti"] = 5 - (tb["regime_bti"] - 1) + + # Sanity checks + tb = check_pol_sys(tb) + tb = check_regime(tb) + tb = tb.drop( + columns=[ + "pol_sys", + ] + ) + + ################################################## + # AGGREGATES + + # Get country-count-related data: country-averages, number of countries, ... + tb_num_countries, tb_avg_countries = get_country_data(tb, ds_regions) + + # Get population-related data: population-weighed averages, people livin in ... + tb_num_people, tb_avg_w_countries = get_population_data(tb, ds_regions, ds_population) + ################################################## + + # Add regions to main table + tb = concat([tb, tb_avg_countries], ignore_index=True) + + # + # Save outputs. + # + tables = [ + tb.format(["country", "year"]), + tb_num_countries.format(["country", "year", "category"], short_name="num_countries"), + tb_num_people.format(["country", "year", "category"], short_name="num_people"), + tb_avg_w_countries.format(["country", "year"], short_name="avg_pop"), + ] + + # Create a new garden dataset with the same metadata as the meadow dataset. + ds_garden = create_dataset( + dest_dir, tables=tables, check_variables_metadata=True, default_metadata=ds_meadow.metadata + ) + + # Save changes in the new garden dataset. + ds_garden.save() + + +def check_pol_sys(tb: Table) -> Table: + """Sanity-check the indicator. + + Some years looked off in the previous edition, this is a sanity check. + """ + col_tmp = "pol_sys_check" + + tb.loc[ + (tb["electfreefair_bti"] >= 6) + & (tb["electfreefair_bti"].notna()) + & (tb["effective_power_bti"] >= 4) + & (tb["effective_power_bti"].notna()) + & (tb["freeassoc_bti"] >= 4) + & (tb["freeassoc_bti"].notna()) + & (tb["freeexpr_bti"] >= 4) + & (tb["freeexpr_bti"].notna()) + & (tb["sep_power_bti"] >= 4) + & (tb["sep_power_bti"].notna()) + & (tb["civ_rights_bti"] >= 4) + & (tb["civ_rights_bti"].notna()) + & (tb["state_basic_bti"] >= 3) + & (tb["state_basic_bti"].notna()), + col_tmp, + ] = 1 + + # Replace pol_sys_check = 0 if any condition is not met + tb.loc[ + (tb["electfreefair_bti"] < 6) + | (tb["effective_power_bti"] < 4) + | (tb["freeassoc_bti"] < 4) + | (tb["freeexpr_bti"] < 4) + | (tb["sep_power_bti"] < 4) + | (tb["civ_rights_bti"] < 4) + | (tb["state_basic_bti"] < 3), + col_tmp, + ] = 0 + + # print(tb[["pol_sys", "pol_sys_check"]].dropna().value_counts()) + + assert (tb["pol_sys"] == tb[col_tmp]).all(), "Miss-labelled `pol_sys`." + + tb = tb.drop(columns=[col_tmp]) + + return tb + + +def check_regime(tb: Table) -> Table: + col_tmp = "regime_bti_check" + tb.loc[(tb["pol_sys"] == 0) & (tb["democracy_bti"] >= 1) & (tb["democracy_bti"] < 4), col_tmp] = 1 + tb.loc[(tb["pol_sys"] == 0) & (tb["democracy_bti"] >= 4) & (tb["democracy_bti"] <= 10), col_tmp] = 2 + tb.loc[(tb["pol_sys"] == 1) & (tb["democracy_bti"] >= 1) & (tb["democracy_bti"] < 6), col_tmp] = 3 + tb.loc[(tb["pol_sys"] == 1) & (tb["democracy_bti"] >= 6) & (tb["democracy_bti"] < 8), col_tmp] = 4 + tb.loc[(tb["pol_sys"] == 1) & (tb["democracy_bti"] >= 8) & (tb["democracy_bti"] <= 10), col_tmp] = 5 + + tb[col_tmp] = tb[col_tmp].astype("UInt8") + + assert (tb["regime_bti"] == tb[col_tmp]).all(), "Miss-labelled `regime_bti`." + + tb = tb.drop(columns=[col_tmp]) + return tb + + +def get_country_data(tb: Table, ds_regions: Dataset) -> Tuple[Table, Table]: + """Estimate number of countries in each regime, and country-average for some indicators. + + Returns two tables: + + 1) tb_num_countres: Counts countries in different regimes + regime_bti (counts) + - Number of hard-line autocracies + - Number of moderate autocracies + - Number of highly defective democracies + - Number of defective democracies + - Number of consolidating democracies + + 2) tb_avg_countries: Country-average for some indicators + - democracy_bti (country-average) + + """ + # 1/ COUNT COUNTRIES + # Keep only non-imputed data + tb_num = tb.copy() + + # Set INTs + tb_num = tb_num.astype( + { + "regime_bti": "Int64", + } + ) + tb_num = cast(Table, tb_num) + + # Define columns on which we will estimate (i) "number of countries" and (ii) "number of people living in ..." + indicators = [ + { + "name": "regime_bti", + "name_new": "num_regime_bti", + "values_expected": { + "1": "hard-line autocracy", + "2": "moderate autocracy", + "3": "highly defective democracy", + "4": "defective democracy", + "5": "consolidating democracy", + }, + "has_na": True, + }, + ] + + # Column per indicator-dimension + tb_num = make_table_with_dummies(tb_num, indicators) + + # Add regions and global aggregates + tb_num = add_regions_and_global_aggregates(tb_num, ds_regions) + tb_num = from_wide_to_long(tb_num) + + # 2/ COUNTRY-AVERAGE INDICATORS + tb_avg = tb.copy() + indicators_avg = ["democracy_bti"] + + # Keep only relevant columns + tb_avg = tb_avg.loc[:, ["year", "country"] + indicators_avg] + + # Estimate region aggregates + tb_avg = add_regions_and_global_aggregates( + tb=tb_avg, + ds_regions=ds_regions, + aggregations={k: "mean" for k in indicators_avg}, # type: ignore + aggregations_world={k: np.mean for k in indicators_avg}, # type: ignore + ) + + # Keep only certain year range + # tb_avg = tb_avg.loc[tb_avg["year"].between(YEAR_AGG_MIN, YEAR_AGG_MAX)] + + return tb_num, tb_avg + + +def get_population_data(tb: Table, ds_regions: Dataset, ds_population: Dataset) -> Tuple[Table, Table]: + """Estimate people living in each regime, and population-weighted averages for some indicators. + + 1) tb_num_people: People living in different regimes + regime_bti + - Number of hard-line autocracies + - Number of moderate autocracies + - Number of highly defective democracies + - Number of defective democracies + - Number of consolidating democracies + + 2) tb_avg_w_countries: Population-weighted-average for some indicators + - democracy_bti + + """ + # 1/ COUNT PEOPLE + # Keep only non-imputed data + tb_ppl = tb.copy() + + # Set INTs + tb_ppl = tb_ppl.astype( + { + "regime_bti": "Int64", + } + ) + tb_ppl = cast(Table, tb_ppl) + + indicators = [ + { + "name": "regime_bti", + "name_new": "pop_regime_bti", + "values_expected": { + "1": "hard-line autocracy", + "2": "moderate autocracy", + "4": "defective democracy", + "3": "highly defective democracy", + "5": "consolidating democracy", + }, + "has_na": True, + }, + ] + + ## Get missing years (not to miss anyone!) -- Note that this can lead to country overlaps (e.g. USSR and Latvia) + tb_ppl = expand_observations_without_duplicates(tb_ppl, ds_regions) + print(f"{tb.shape} -> {tb_ppl.shape}") + + # Column per indicator-dimension + tb_ppl = make_table_with_dummies(tb_ppl, indicators) + + # Replace USSR -> current states + # tb_ppl = replace_ussr(tb_ppl, ds_regions) + + ## Counts + tb_ppl = add_population_in_dummies(tb_ppl, ds_population) + tb_ppl = add_regions_and_global_aggregates(tb_ppl, ds_regions) + tb_ppl = from_wide_to_long(tb_ppl) + + # 2/ COUNTRY-AVERAGE INDICATORS + tb_avg = tb.copy() + indicators_avg = ["democracy_bti"] + + # Keep only relevant columns + tb_avg = tb_avg.loc[:, ["year", "country"] + indicators_avg] + + # Add population in dummies (population value replaces 1, 0 otherwise) + tb_avg = add_population_in_dummies( + tb_avg, + ds_population, + drop_population=False, + ) + + # Get region aggregates + tb_avg = add_regions_and_global_aggregates( + tb=tb_avg, + ds_regions=ds_regions, + aggregations={k: "sum" for k in indicators_avg} | {"population": "sum"}, # type: ignore + min_num_values_per_year=1, + ) + + # Normalize by region's population + columns_index = ["year", "country"] + columns_indicators = [col for col in tb_avg.columns if col not in columns_index + ["population"]] + tb_avg[columns_indicators] = tb_avg[columns_indicators].div(tb_avg["population"], axis=0) + tb_avg = tb_avg.drop(columns="population") + + # Keep only certain year range + # tb_avg = tb_avg.loc[tb_avg["year"].between(YEAR_AGG_MIN, YEAR_AGG_MAX)] + + tb_avg = tb_avg.rename( + columns={ + "democracy_bti": "democracy_bti_weighted", + } + ) + return tb_ppl, tb_avg + + +def expand_observations_without_duplicates(tb: Table, ds_regions: Dataset) -> Table: + # Get list of regions + tb_regions = ds_regions["regions"] + countries = set(tb_regions.loc[(tb_regions["region_type"] == "country") & ~(tb_regions["is_historical"]), "name"]) + countries |= set(tb["country"]) + + # Full expansion + tb_exp = expand_observations(tb, countries) + + # Limit years + tb_exp = tb_exp.loc[tb_exp["year"].isin(range(YEAR_MIN, YEAR_MAX + 1, 2))] + # tb_exp = tb_exp[tb_exp["year"].between(YEAR_AGG_MIN, YEAR_AGG_MAX)] + + # # Limit entries to avoid duplicates + # tb_exp = tb_exp.loc[ + # ~( + # # YUGOSLAVIA + # ((tb_exp["country"] == "Yugoslavia") & ((tb_exp["year"] > 1990) | (tb_exp["year"] < 1921))) + # | ((tb_exp["country"] == "Slovenia") & ((tb_exp["year"] >= 1921) & (tb_exp["year"] <= 1990))) + # | ((tb_exp["country"] == "North Macedonia") & ((tb_exp["year"] >= 1921) & (tb_exp["year"] <= 1990))) + # | ((tb_exp["country"] == "Croatia") & ((tb_exp["year"] >= 1921) & (tb_exp["year"] <= 1990))) + # | ((tb_exp["country"] == "Serbia and Montenegro") & ((tb_exp["year"] > 2005) | (tb_exp["year"] <= 1990))) + # ## YUG 2 + # | ((tb_exp["country"] == "Bosnia and Herzegovina") & ((tb_exp["year"] >= 1921) & (tb_exp["year"] <= 1991))) + # | ((tb_exp["country"] == "Serbia") & ((tb_exp["year"] >= 1921) & (tb_exp["year"] <= 2005))) + # | ((tb_exp["country"] == "Montenegro") & ((tb_exp["year"] >= 1921) & (tb_exp["year"] <= 2005))) + # | ((tb_exp["country"] == "Kosovo") & ((tb_exp["year"] >= 1921) & (tb_exp["year"] <= 2007))) + # # YEMEN + # | ((tb_exp["country"] == "Yemen Arab Republic") & ((tb_exp["year"] > 1989) | (tb_exp["year"] < 1940))) + # | ((tb_exp["country"] == "Yemen People's Republic") & ((tb_exp["year"] > 1989) | (tb_exp["year"] < 1940))) + # | ((tb_exp["country"] == "Yemen") & ((tb_exp["year"] >= 1940) & (tb_exp["year"] <= 1989))) + # # GERMANY + # | ((tb_exp["country"] == "West Germany") & ((tb_exp["year"] > 1989) | (tb_exp["year"] < 1949))) + # | ((tb_exp["country"] == "East Germany") & ((tb_exp["year"] > 1989) | (tb_exp["year"] < 1949))) + # | ((tb_exp["country"] == "Germany") & (tb_exp["year"] >= 1949) & (tb_exp["year"] <= 1989)) + # # USSR + # | ((tb_exp["country"] == "USSR") & ((tb_exp["year"] > 1991) | (tb_exp["year"] < 1941))) + # | ((tb_exp["country"] == "Uzbekistan") & (tb_exp["year"] >= 1941) & (tb_exp["year"] <= 1991)) + # | ((tb_exp["country"] == "Kazakhstan") & (tb_exp["year"] >= 1941) & (tb_exp["year"] <= 1991)) + # | ((tb_exp["country"] == "Turkmenistan") & (tb_exp["year"] >= 1941) & (tb_exp["year"] <= 1991)) + # | ((tb_exp["country"] == "Kyrgyzstan") & (tb_exp["year"] >= 1941) & (tb_exp["year"] <= 1991)) + # | ((tb_exp["country"] == "Tajikistan") & (tb_exp["year"] >= 1941) & (tb_exp["year"] <= 1991)) + # | ((tb_exp["country"] == "Russia") & (tb_exp["year"] >= 1941) & (tb_exp["year"] <= 1991)) + # | ((tb_exp["country"] == "Ukraine") & (tb_exp["year"] >= 1941) & (tb_exp["year"] <= 1991)) + # | ((tb_exp["country"] == "Belarus") & (tb_exp["year"] >= 1941) & (tb_exp["year"] <= 1991)) + # | ((tb_exp["country"] == "Moldova") & (tb_exp["year"] >= 1941) & (tb_exp["year"] <= 1991)) + # | ((tb_exp["country"] == "Latvia") & (tb_exp["year"] >= 1941) & (tb_exp["year"] <= 1991)) + # | ((tb_exp["country"] == "Lithuania") & (tb_exp["year"] >= 1941) & (tb_exp["year"] <= 1991)) + # | ((tb_exp["country"] == "Estonia") & (tb_exp["year"] >= 1941) & (tb_exp["year"] <= 1991)) + # | ((tb_exp["country"] == "Armenia") & (tb_exp["year"] >= 1941) & (tb_exp["year"] <= 1991)) + # | ((tb_exp["country"] == "Georgia") & (tb_exp["year"] >= 1941) & (tb_exp["year"] <= 1991)) + # | ((tb_exp["country"] == "Azerbaijan") & (tb_exp["year"] >= 1941) & (tb_exp["year"] <= 1991)) + # # CZECHOSLOVAKIA + # | ((tb_exp["country"] == "Czechoslovakia") & ((tb_exp["year"] > 1992) | (tb_exp["year"] < 1918))) + # | ((tb_exp["country"] == "Czechia") & ((tb_exp["year"] <= 1992) & (tb_exp["year"] >= 1918))) + # | ((tb_exp["country"] == "Slovakia") & ((tb_exp["year"] <= 1992) & (tb_exp["year"] >= 1918))) + # ), + # ] + + return tb_exp diff --git a/etl/steps/data/garden/democracy/2024-03-07/claassen_mood.countries.json b/etl/steps/data/garden/democracy/2024-03-07/claassen_mood.countries.json new file mode 100644 index 00000000000..7bb1ec01bf4 --- /dev/null +++ b/etl/steps/data/garden/democracy/2024-03-07/claassen_mood.countries.json @@ -0,0 +1,146 @@ +{ + "Albania": "Albania", + "Algeria": "Algeria", + "Andorra": "Andorra", + "Argentina": "Argentina", + "Armenia": "Armenia", + "Australia": "Australia", + "Austria": "Austria", + "Azerbaijan": "Azerbaijan", + "Bahrain": "Bahrain", + "Bangladesh": "Bangladesh", + "Belarus": "Belarus", + "Belgium": "Belgium", + "Belize": "Belize", + "Benin": "Benin", + "Bolivia": "Bolivia", + "Bosnia and Herzegovina": "Bosnia and Herzegovina", + "Botswana": "Botswana", + "Brazil": "Brazil", + "Bulgaria": "Bulgaria", + "Burkina Faso": "Burkina Faso", + "Burundi": "Burundi", + "Cambodia": "Cambodia", + "Cameroon": "Cameroon", + "Canada": "Canada", + "Cape Verde": "Cape Verde", + "Chile": "Chile", + "China": "China", + "Colombia": "Colombia", + "Costa Rica": "Costa Rica", + "Croatia": "Croatia", + "Cyprus": "Cyprus", + "Czech Republic": "Czechia", + "Denmark": "Denmark", + "Dominican Republic": "Dominican Republic", + "Ecuador": "Ecuador", + "Egypt": "Egypt", + "El Salvador": "El Salvador", + "Estonia": "Estonia", + "Ethiopia": "Ethiopia", + "Finland": "Finland", + "France": "France", + "Gabon": "Gabon", + "Georgia": "Georgia", + "Germany": "Germany", + "Ghana": "Ghana", + "Greece": "Greece", + "Guatemala": "Guatemala", + "Guinea": "Guinea", + "Guyana": "Guyana", + "Haiti": "Haiti", + "Honduras": "Honduras", + "Hungary": "Hungary", + "Iceland": "Iceland", + "India": "India", + "Indonesia": "Indonesia", + "Iran": "Iran", + "Iraq": "Iraq", + "Ireland": "Ireland", + "Israel": "Israel", + "Italy": "Italy", + "Ivory Coast": "Cote d'Ivoire", + "Jamaica": "Jamaica", + "Japan": "Japan", + "Jordan": "Jordan", + "Kazakhstan": "Kazakhstan", + "Kenya": "Kenya", + "Kuwait": "Kuwait", + "Kyrgyzstan": "Kyrgyzstan", + "Latvia": "Latvia", + "Lebanon": "Lebanon", + "Lesotho": "Lesotho", + "Liberia": "Liberia", + "Libya": "Libya", + "Lithuania": "Lithuania", + "Luxembourg": "Luxembourg", + "Macedonia": "North Macedonia", + "Madagascar": "Madagascar", + "Malawi": "Malawi", + "Malaysia": "Malaysia", + "Mali": "Mali", + "Malta": "Malta", + "Mauritius": "Mauritius", + "Mexico": "Mexico", + "Moldova": "Moldova", + "Mongolia": "Mongolia", + "Montenegro": "Montenegro", + "Morocco": "Morocco", + "Mozambique": "Mozambique", + "Myanmar": "Myanmar", + "Namibia": "Namibia", + "Nepal": "Nepal", + "Netherlands": "Netherlands", + "New Zealand": "New Zealand", + "Nicaragua": "Nicaragua", + "Niger": "Niger", + "Nigeria": "Nigeria", + "Norway": "Norway", + "Pakistan": "Pakistan", + "Palestine": "Palestine", + "Panama": "Panama", + "Paraguay": "Paraguay", + "Peru": "Peru", + "Philippines": "Philippines", + "Poland": "Poland", + "Portugal": "Portugal", + "Romania": "Romania", + "Russia": "Russia", + "Rwanda": "Rwanda", + "Sao Tome and Principe": "Sao Tome and Principe", + "Saudi Arabia": "Saudi Arabia", + "Senegal": "Senegal", + "Serbia": "Serbia", + "Sierra Leone": "Sierra Leone", + "Singapore": "Singapore", + "Slovakia": "Slovakia", + "Slovenia": "Slovenia", + "South Africa": "South Africa", + "South Korea": "South Korea", + "Spain": "Spain", + "Sri Lanka": "Sri Lanka", + "Sudan": "Sudan", + "Suriname": "Suriname", + "Swaziland": "Eswatini", + "Sweden": "Sweden", + "Switzerland": "Switzerland", + "Taiwan": "Taiwan", + "Tajikistan": "Tajikistan", + "Tanzania": "Tanzania", + "Thailand": "Thailand", + "Togo": "Togo", + "Trinidad and Tobago": "Trinidad and Tobago", + "Tunisia": "Tunisia", + "Turkey": "Turkey", + "Uganda": "Uganda", + "Ukraine": "Ukraine", + "United Kingdom": "United Kingdom", + "United States of America": "United States", + "Uruguay": "Uruguay", + "Uzbekistan": "Uzbekistan", + "Venezuela": "Venezuela", + "Vietnam": "Vietnam", + "Yemen": "Yemen", + "Zambia": "Zambia", + "Zimbabwe": "Zimbabwe" +} \ No newline at end of file diff --git a/etl/steps/data/garden/democracy/2024-03-07/claassen_mood.meta.yml b/etl/steps/data/garden/democracy/2024-03-07/claassen_mood.meta.yml new file mode 100644 index 00000000000..84dac3663bb --- /dev/null +++ b/etl/steps/data/garden/democracy/2024-03-07/claassen_mood.meta.yml @@ -0,0 +1,47 @@ +# NOTE: To learn more about the fields, hover over their names. +definitions: + common: + presentation: + topic_tags: + - Democracy + + +# Learn more about the available fields: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +dataset: + update_period_days: 365 + + +tables: + claassen_mood: + variables: + democracy_support_claassen: + title: "Citizen support for democracy" + unit: "" + description_short: |- + Central estimate of the average extent to which citizens support a democratic political system and reject autocratic alternatives. It combines responses across more than one thousand nationally-representative surveys on how desirable citizens find democracy, how they evaluate undemocratic alternatives (such as a strong unelected leader, the army, or religious authorities), and how they assess democratic and autocratic political systems relative to another. + description_processing: |- + The variable matches Claassen's variable `supdem`. + description_key: &descruption_key + - "Examples of survey questions include: 'Democracy may have its problems, but it is better than any other form of government. To what extent do you agree or disagree?', 'There are many ways to govern a country. Would you approve or disapprove of the following alternatives? Elections and Parliament are abolished so that the president can decide everything.', 'I will describe different political systems to you, and I want to ask you about your opinion of each one of them with regard to the country's governance. For each one would you say it is very good, good, bad, or very bad? — A democratic political systems (public freedoms, guarantees equality in political and civil rights, alternation of power, and accountability and transparency of the executive authority)'" + - Responses above the median were considered as support of democracy. Non-supportive respondents may have opposed democracy, may have given an indifferent answer, may have answered "I don't know", or may not have responded at all. + - Higher scores indicate more support. Positive scores mean that citizen support for democracy is higher than the average across all countries and years. A score of 1 means that citizen support lies one standard deviation above the average support. + + democracy_support_high_claassen: + title: "Citizen support for democracy, upper bound" + unit: "" + description_short: |- + Upper-bound estimate of the average extent to which citizens support a democratic political system and reject autocratic alternatives. It combines responses across more than one thousand nationally-representative surveys on how desirable citizens find democracy, how they evaluate undemocratic alternatives (such as a strong unelected leader, the army, or religious authorities), and how they assess democratic and autocratic political systems relative to another. + description_key: *descruption_key + description_processing: |- + The variable matches Claassen's variable `supdem_u95`. + + democracy_support_low_claassen: + title: "Citizen support for democracy, lower bound" + unit: "" + description_short: |- + Lower-bound estimate of the average extent to which citizens support a democratic political system and reject autocratic alternatives. It combines responses across more than one thousand nationally-representative surveys on how desirable citizens find democracy, how they evaluate undemocratic alternatives (such as a strong unelected leader, the army, or religious authorities), and how they assess democratic and autocratic political systems relative to another. + description_from_producer: |- + The variable matches Claassen's variable `supdem_l95`. + description_key: *descruption_key + diff --git a/etl/steps/data/garden/democracy/2024-03-07/claassen_mood.py b/etl/steps/data/garden/democracy/2024-03-07/claassen_mood.py new file mode 100644 index 00000000000..f1464ee14ab --- /dev/null +++ b/etl/steps/data/garden/democracy/2024-03-07/claassen_mood.py @@ -0,0 +1,52 @@ +"""Load a meadow dataset and create a garden dataset.""" + +from etl.data_helpers import geo +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load meadow dataset. + ds_meadow = paths.load_dataset("claassen_mood") + + # Read table from meadow dataset. + tb = ds_meadow["claassen_mood"].reset_index() + + # + # Process data. + # + tb = geo.harmonize_countries( + df=tb, + countries_file=paths.country_mapping_path, + ) + + # Keep relevant columns + tb = tb[["country", "year", "supdem", "supdem_u95", "supdem_l95"]] + + # Rename columns + tb = tb.rename( + columns={ + "supdem": "democracy_support_claassen", + "supdem_u95": "democracy_support_high_claassen", + "supdem_l95": "democracy_support_low_claassen", + } + ) + + # Format + tb = tb.format(["country", "year"]) + + # + # Save outputs. + # + # Create a new garden dataset with the same metadata as the meadow dataset. + ds_garden = create_dataset( + dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_meadow.metadata + ) + + # Save changes in the new garden dataset. + ds_garden.save() diff --git a/etl/steps/data/garden/democracy/2024-03-07/claassen_satisfaction.countries.json b/etl/steps/data/garden/democracy/2024-03-07/claassen_satisfaction.countries.json new file mode 100644 index 00000000000..ca7c3a5df94 --- /dev/null +++ b/etl/steps/data/garden/democracy/2024-03-07/claassen_satisfaction.countries.json @@ -0,0 +1,134 @@ +{ + "Albania": "Albania", + "Algeria": "Algeria", + "Argentina": "Argentina", + "Armenia": "Armenia", + "Australia": "Australia", + "Austria": "Austria", + "Azerbaijan": "Azerbaijan", + "Bangladesh": "Bangladesh", + "Belarus": "Belarus", + "Belgium": "Belgium", + "Belize": "Belize", + "Benin": "Benin", + "Bolivia": "Bolivia", + "Bosnia and Herzegovina": "Bosnia and Herzegovina", + "Botswana": "Botswana", + "Brazil": "Brazil", + "Bulgaria": "Bulgaria", + "Burkina Faso": "Burkina Faso", + "Burundi": "Burundi", + "Cambodia": "Cambodia", + "Cameroon": "Cameroon", + "Canada": "Canada", + "Cape Verde": "Cape Verde", + "Chile": "Chile", + "China": "China", + "Colombia": "Colombia", + "Costa Rica": "Costa Rica", + "Croatia": "Croatia", + "Cyprus": "Cyprus", + "Czech Republic": "Czechia", + "Denmark": "Denmark", + "Dominican Republic": "Dominican Republic", + "Ecuador": "Ecuador", + "Egypt": "Egypt", + "El Salvador": "El Salvador", + "Estonia": "Estonia", + "Finland": "Finland", + "France": "France", + "Gabon": "Gabon", + "Georgia": "Georgia", + "Germany": "Germany", + "Ghana": "Ghana", + "Greece": "Greece", + "Guatemala": "Guatemala", + "Guinea": "Guinea", + "Guyana": "Guyana", + "Haiti": "Haiti", + "Honduras": "Honduras", + "Hong Kong": "Hong Kong", + "Hungary": "Hungary", + "Iceland": "Iceland", + "India": "India", + "Indonesia": "Indonesia", + "Ireland": "Ireland", + "Israel": "Israel", + "Italy": "Italy", + "Ivory Coast": "Cote d'Ivoire", + "Jamaica": "Jamaica", + "Japan": "Japan", + "Jordan": "Jordan", + "Kazakhstan": "Kazakhstan", + "Kenya": "Kenya", + "Kosovo": "Kosovo", + "Kyrgyzstan": "Kyrgyzstan", + "Latvia": "Latvia", + "Lesotho": "Lesotho", + "Liberia": "Liberia", + "Lithuania": "Lithuania", + "Luxembourg": "Luxembourg", + "Macedonia": "North Macedonia", + "Madagascar": "Madagascar", + "Malawi": "Malawi", + "Malaysia": "Malaysia", + "Mali": "Mali", + "Malta": "Malta", + "Mauritius": "Mauritius", + "Mexico": "Mexico", + "Moldova": "Moldova", + "Mongolia": "Mongolia", + "Montenegro": "Montenegro", + "Morocco": "Morocco", + "Mozambique": "Mozambique", + "Namibia": "Namibia", + "Nepal": "Nepal", + "Netherlands": "Netherlands", + "New Zealand": "New Zealand", + "Nicaragua": "Nicaragua", + "Niger": "Niger", + "Nigeria": "Nigeria", + "Norway": "Norway", + "Pakistan": "Pakistan", + "Panama": "Panama", + "Paraguay": "Paraguay", + "Peru": "Peru", + "Philippines": "Philippines", + "Poland": "Poland", + "Portugal": "Portugal", + "Romania": "Romania", + "Russia": "Russia", + "Sao Tome and Principe": "Sao Tome and Principe", + "Senegal": "Senegal", + "Serbia": "Serbia", + "Sierra Leone": "Sierra Leone", + "Singapore": "Singapore", + "Slovakia": "Slovakia", + "Slovenia": "Slovenia", + "South Africa": "South Africa", + "South Korea": "South Korea", + "Spain": "Spain", + "Sri Lanka": "Sri Lanka", + "Sudan": "Sudan", + "Suriname": "Suriname", + "Swaziland": "Eswatini", + "Sweden": "Sweden", + "Switzerland": "Switzerland", + "Taiwan": "Taiwan", + "Tanzania": "Tanzania", + "Thailand": "Thailand", + "Togo": "Togo", + "Trinidad and Tobago": "Trinidad and Tobago", + "Tunisia": "Tunisia", + "Turkey": "Turkey", + "Uganda": "Uganda", + "Ukraine": "Ukraine", + "United Kingdom": "United Kingdom", + "United States of America": "United States", + "Uruguay": "Uruguay", + "Uzbekistan": "Uzbekistan", + "Venezuela": "Venezuela", + "Vietnam": "Vietnam", + "Zambia": "Zambia", + "Zimbabwe": "Zimbabwe" +} diff --git a/etl/steps/data/garden/democracy/2024-03-07/claassen_satisfaction.meta.yml b/etl/steps/data/garden/democracy/2024-03-07/claassen_satisfaction.meta.yml new file mode 100644 index 00000000000..40558bdd354 --- /dev/null +++ b/etl/steps/data/garden/democracy/2024-03-07/claassen_satisfaction.meta.yml @@ -0,0 +1,47 @@ +# NOTE: To learn more about the fields, hover over their names. +definitions: + common: + presentation: + topic_tags: + - Democracy + + +# Learn more about the available fields: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +dataset: + update_period_days: 365 + + +tables: + claassen_satisfaction: + variables: + democracy_satisf_claassen: + title: "Citizen satisfaction with democracy" + unit: "" + description_short: |- + Central estimate of the average extent to which citizens are satisfied with democracy in their own country. It combines responses across more than one thousand nationally-representative surveys on how citizens' satisfaction with democracy. + description_processing: |- + The variable matches Claassen's variable `satis`. + description_key: &descruption_key + - "An examples of a survey questions is: 'On the whole, how satisfied or dissatisfied are you with the way democracy works in [own country]?'" + - Responses above the median were considered as satisfaction with democracy. Dissatisfied respondents may have actively expressed dissatisfaction with democracy, may have given an indifferent answer, may have answered "I don't know", or may not have responded at all. + - Higher scores indicate more satisfaction. Positive scores mean that citizen satisfaction with democracy is higher than the average across all countries and years. A score of 1 means that citizen support lies one standard deviation above the average support." + + democracy_satisf_high_claassen: + title: "Citizen satisfaction with democracy, upper bound" + unit: "" + description_short: |- + Upper-bound estimate of the average extent to which citizens are satisfied with democracy in their own country. It combines responses across more than one thousand nationally-representative surveys on how citizens' satisfaction with democracy. + description_key: *descruption_key + description_processing: |- + The variable matches Claassen's variable `satis_u95`. + + democracy_satisf_low_claassen: + title: "Citizen satisfaction with democracy, lower bound" + unit: "" + description_short: |- + Lower-bound estimate of the average extent to which citizens are satisfied with democracy in their own country. It combines responses across more than one thousand nationally-representative surveys on how citizens' satisfaction with democracy. + description_from_producer: |- + The variable matches Claassen's variable `satis_l95`. + description_key: *descruption_key + diff --git a/etl/steps/data/garden/democracy/2024-03-07/claassen_satisfaction.py b/etl/steps/data/garden/democracy/2024-03-07/claassen_satisfaction.py new file mode 100644 index 00000000000..5278b207c66 --- /dev/null +++ b/etl/steps/data/garden/democracy/2024-03-07/claassen_satisfaction.py @@ -0,0 +1,52 @@ +"""Load a meadow dataset and create a garden dataset.""" + +from etl.data_helpers import geo +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load meadow dataset. + ds_meadow = paths.load_dataset("claassen_satisfaction") + + # Read table from meadow dataset. + tb = ds_meadow["claassen_satisfaction"].reset_index() + + # + # Process data. + # + tb = geo.harmonize_countries( + df=tb, + countries_file=paths.country_mapping_path, + ) + + # Keep relevant columns + tb = tb[["country", "year", "satis", "satis_u95", "satis_l95"]] + + # Rename columns + tb = tb.rename( + columns={ + "satis": "democracy_satisf_claassen", + "satis_u95": "democracy_satisf_high_claassen", + "satis_l95": "democracy_satisf_low_claassen", + } + ) + + # Format + tb = tb.format(["country", "year"]) + + # + # Save outputs. + # + # Create a new garden dataset with the same metadata as the meadow dataset. + ds_garden = create_dataset( + dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_meadow.metadata + ) + + # Save changes in the new garden dataset. + ds_garden.save() diff --git a/etl/steps/data/garden/democracy/2024-03-07/shared.py b/etl/steps/data/garden/democracy/2024-03-07/shared.py index 6cb06c196a8..9bcc0b6413a 100644 --- a/etl/steps/data/garden/democracy/2024-03-07/shared.py +++ b/etl/steps/data/garden/democracy/2024-03-07/shared.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import Any, Callable, Dict, List, Optional, Tuple, cast +from typing import Any, Callable, Dict, List, Optional, Set, Tuple, cast import numpy as np import pandas as pd @@ -117,12 +117,13 @@ def default_indicator_category(x): return tb_ -def expand_observations(tb: Table) -> Table: +def expand_observations(tb: Table, regions: Set | None = None) -> Table: """Expand to have a row per (year, country).""" # Add missing years for each triplet ("warcode", "campcode", "ccode") # List of countries - regions = set(tb["country"]) + if regions is None: + regions = set(tb["country"]) # List of possible years years = np.arange(tb["year"].min(), tb["year"].max() + 1) diff --git a/etl/steps/data/grapher/democracy/2024-05-21/bti.py b/etl/steps/data/grapher/democracy/2024-05-21/bti.py new file mode 100644 index 00000000000..0a27dd55eb6 --- /dev/null +++ b/etl/steps/data/grapher/democracy/2024-05-21/bti.py @@ -0,0 +1,35 @@ +"""Load a garden dataset and create a grapher dataset.""" + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load garden dataset. + ds_garden = paths.load_dataset("bti") + + # + # Process data. + # + tables = [ + ds_garden["bti"], + ds_garden["num_countries"], + ds_garden["num_people"], + ds_garden["avg_pop"], + ] + + # + # Save outputs. + # + # Create a new grapher dataset with the same metadata as the garden dataset. + ds_grapher = create_dataset( + dest_dir, tables=tables, check_variables_metadata=True, default_metadata=ds_garden.metadata + ) + + # Save changes in the new grapher dataset. + ds_grapher.save() diff --git a/etl/steps/data/grapher/democracy/2024-05-22/claassen_mood.py b/etl/steps/data/grapher/democracy/2024-05-22/claassen_mood.py new file mode 100644 index 00000000000..912db45d53e --- /dev/null +++ b/etl/steps/data/grapher/democracy/2024-05-22/claassen_mood.py @@ -0,0 +1,32 @@ +"""Load a garden dataset and create a grapher dataset.""" + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load garden dataset. + ds_garden = paths.load_dataset("claassen_mood") + + # Read table from garden dataset. + tb = ds_garden["claassen_mood"] + + # + # Process data. + # + + # + # Save outputs. + # + # Create a new grapher dataset with the same metadata as the garden dataset. + ds_grapher = create_dataset( + dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_garden.metadata + ) + + # Save changes in the new grapher dataset. + ds_grapher.save() diff --git a/etl/steps/data/grapher/democracy/2024-05-22/claassen_satisfaction.py b/etl/steps/data/grapher/democracy/2024-05-22/claassen_satisfaction.py new file mode 100644 index 00000000000..bd7de976780 --- /dev/null +++ b/etl/steps/data/grapher/democracy/2024-05-22/claassen_satisfaction.py @@ -0,0 +1,32 @@ +"""Load a garden dataset and create a grapher dataset.""" + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load garden dataset. + ds_garden = paths.load_dataset("claassen_satisfaction") + + # Read table from garden dataset. + tb = ds_garden["claassen_satisfaction"] + + # + # Process data. + # + + # + # Save outputs. + # + # Create a new grapher dataset with the same metadata as the garden dataset. + ds_grapher = create_dataset( + dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_garden.metadata + ) + + # Save changes in the new grapher dataset. + ds_grapher.save() diff --git a/etl/steps/data/meadow/democracy/2024-05-21/bti.py b/etl/steps/data/meadow/democracy/2024-05-21/bti.py new file mode 100644 index 00000000000..99f7aff0aec --- /dev/null +++ b/etl/steps/data/meadow/democracy/2024-05-21/bti.py @@ -0,0 +1,96 @@ +"""Load a snapshot and create a meadow dataset.""" + +from owid.catalog.tables import Table, concat + +from etl.helpers import PathFinder, create_dataset +from etl.snapshot import Snapshot + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) +# YEAR COVERAGE +YEAR_MIN = 2006 +YEAR_MAX = 2024 + +# COLUMNS (relevant, renamings) +COLUMNS = { + "Regions:\n1 | East-Central and Southeast Europe\n2 | Latin America and the Caribbean\n3 | West and Central Africa\n4 | Middle East and North Africa\n5 | Southern and Eastern Africa\n6 | Eastern Europe, Caucasus and Central Asia\n7 | Asia and Oceania": "country", + " SI | Democracy Status": "democracy_bti", + " Q1 | Stateness": "state_bti", + " Q2 | Political Participation": "political_participation_bti", + " Q3 | Rule of Law": "rule_of_law_bti", + " Q4 | Stability of Democratic Institutions": "stability_dem_inst_bti", + " Q5 | Political and Social Integration": "pol_soc_integr_bti", + " Category.1": "regime_bti", + " Q2_1 | Free and fair elections": "electfreefair_bti", + " Q2_2 | Effective power to govern": "effective_power_bti", + " Q2_3 | Association / assembly rights": "freeassoc_bti", + " Q2_4 | Freedom of expression": "freeexpr_bti", + " Q3_1 | Separation of powers": "sep_power_bti", + " Q3_4 | Civil rights": "civ_rights_bti", + " Failed State": "state_basic_bti", + " Democracy/Autocracy": "pol_sys", +} +COLUMNS_INDEX = ["country", "year"] + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Retrieve snapshot. + snap = paths.load_snapshot("bti.xlsx") + + # Load data from snapshot. + tb = load_data(snap) + + # + # Process data. + # + # Ensure all columns are snake-case, set an appropriate index, and sort conveniently. + tb = tb.format(["country", "year"]) + + # + # Save outputs. + # + # Create a new meadow dataset with the same metadata as the snapshot. + ds_meadow = create_dataset(dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=snap.metadata) + + # Save changes in the new meadow dataset. + ds_meadow.save() + + +def load_data(snap: Snapshot) -> Table: + """Load data from snapshot and return a Table.""" + tbs = [] + for year in range(2006, YEAR_MAX + 1, 2): + # Read + tb_ = snap.read(sheet_name=f"BTI {year}") + # Column check + columns_missing = set(COLUMNS) - set(tb_.columns) + if columns_missing: + raise ValueError(f"Columns missing in snapshot: {columns_missing}") + # Column selection & renaming + tb_ = tb_.rename(columns=COLUMNS)[COLUMNS.values()] + # Add year (year of observation = year of report - 1) + tb_["year"] = year - 1 + tbs.append(tb_) + + # Concatenate + tb = concat(tbs) + + # Replace '-' -> NA + columns = [col for col in COLUMNS.values() if col not in COLUMNS_INDEX] + tb[columns] = tb[columns].replace("-", float("nan")) + + # Map + tb["pol_sys"] = tb["pol_sys"].replace( + { + "Aut.": 0, + "Dem.": 1, + } + ) + + # Set dtypes + tb[columns] = tb[columns].astype(float) + + return tb diff --git a/etl/steps/data/meadow/democracy/2024-05-22/claassen_mood.py b/etl/steps/data/meadow/democracy/2024-05-22/claassen_mood.py new file mode 100644 index 00000000000..b825dec8628 --- /dev/null +++ b/etl/steps/data/meadow/democracy/2024-05-22/claassen_mood.py @@ -0,0 +1,32 @@ +"""Load a snapshot and create a meadow dataset.""" + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Retrieve snapshot. + snap = paths.load_snapshot("claassen_mood.csv") + + # Load data from snapshot. + tb = snap.read() + + # + # Process data. + # + # Ensure all columns are snake-case, set an appropriate index, and sort conveniently. + tb = tb.format(["country", "year"]) + + # + # Save outputs. + # + # Create a new meadow dataset with the same metadata as the snapshot. + ds_meadow = create_dataset(dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=snap.metadata) + + # Save changes in the new meadow dataset. + ds_meadow.save() diff --git a/etl/steps/data/meadow/democracy/2024-05-22/claassen_satisfaction.py b/etl/steps/data/meadow/democracy/2024-05-22/claassen_satisfaction.py new file mode 100644 index 00000000000..081bc45eabf --- /dev/null +++ b/etl/steps/data/meadow/democracy/2024-05-22/claassen_satisfaction.py @@ -0,0 +1,32 @@ +"""Load a snapshot and create a meadow dataset.""" + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Retrieve snapshot. + snap = paths.load_snapshot("claassen_satisfaction.csv") + + # Load data from snapshot. + tb = snap.read() + + # + # Process data. + # + # Ensure all columns are snake-case, set an appropriate index, and sort conveniently. + tb = tb.format(["country", "year"]) + + # + # Save outputs. + # + # Create a new meadow dataset with the same metadata as the snapshot. + ds_meadow = create_dataset(dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=snap.metadata) + + # Save changes in the new meadow dataset. + ds_meadow.save() diff --git a/schemas/definitions.json b/schemas/definitions.json index 8d80da4bbcd..ef11806e59f 100644 --- a/schemas/definitions.json +++ b/schemas/definitions.json @@ -745,9 +745,18 @@ "description": "Unit to use in charts instead of the indicator's `unit`." }, "numDecimalPlaces": { - "type": "integer", - "description": "Number of decimal places to show in charts (and in the table tab).", - "minimum": 0 + "anyOf": [ + { + "type": "integer", + "description": "Number of decimal places to show in charts (and in the table tab).", + "minimum": 0 + }, + { + "type": "string", + "description": "Number of decimal places to show in charts (and in the table tab), allowing templating syntax.", + "pattern": "<%" + } + ] }, "zeroDay": { "type": "string", diff --git a/snapshots/climate/latest/weekly_wildfires.csv.dvc b/snapshots/climate/latest/weekly_wildfires.csv.dvc index b04e3ca01a6..dc667ecc52e 100644 --- a/snapshots/climate/latest/weekly_wildfires.csv.dvc +++ b/snapshots/climate/latest/weekly_wildfires.csv.dvc @@ -9,12 +9,12 @@ meta: citation_full: Global Wildfire Information System attribution_short: GWIS url_main: https://gwis.jrc.ec.europa.eu/apps/gwis.statistics/seasonaltrend - date_accessed: 2024-05-20 - date_published: 2024-05-20 + date_accessed: 2024-05-22 + date_published: 2024-05-22 license: name: CC BY 4.0 url: https://gwis.jrc.ec.europa.eu/about-gwis/data-license outs: - - md5: 788a705ae131667d2fe13a99e5ed8bbc - size: 11820781 + - md5: 47f06d201c0e7f8d77014f6058d89732 + size: 11820948 path: weekly_wildfires.csv diff --git a/snapshots/democracy/2024-05-21/bti.py b/snapshots/democracy/2024-05-21/bti.py new file mode 100644 index 00000000000..66c9c4cf23d --- /dev/null +++ b/snapshots/democracy/2024-05-21/bti.py @@ -0,0 +1,24 @@ +"""Script to create a snapshot of dataset.""" + +from pathlib import Path + +import click + +from etl.snapshot import Snapshot + +# Version for current snapshot dataset. +SNAPSHOT_VERSION = Path(__file__).parent.name + + +@click.command() +@click.option("--upload/--skip-upload", default=True, type=bool, help="Upload dataset to Snapshot") +def main(upload: bool) -> None: + # Create a new snapshot. + snap = Snapshot(f"democracy/{SNAPSHOT_VERSION}/bti.xlsx") + + # Download data from source, add file to DVC and upload to S3. + snap.create_snapshot(upload=upload) + + +if __name__ == "__main__": + main() diff --git a/snapshots/democracy/2024-05-21/bti.xlsx.dvc b/snapshots/democracy/2024-05-21/bti.xlsx.dvc new file mode 100644 index 00000000000..cf0b2ca6430 --- /dev/null +++ b/snapshots/democracy/2024-05-21/bti.xlsx.dvc @@ -0,0 +1,35 @@ +# Learn more at: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +meta: + origin: + # Data product / Snapshot + title: Bertelsmann Transformation Index, Scores + description: |- + Throughout the world, democracy and a market economy have become powerful frameworks in which social sustainability can prosper. Successful processes of reform can be observed in every region of the globe. There are, however, no guarantees of success; many countries undergoing transformation face stagnation and power struggles or violence and even state failure. Good governance is pivotal to reform policies that work. What are the key decisions? What are the lessons to be learned from past experiences? What strategies are likely to succeed? Under which conditions? The BTI 2024 puts development and transformation policies to the test. + + Advocating reforms aimed at supporting the development of a constitutional democracy and a socially responsible market economy, the BTI provides the framework for an exchange of good practices among agents of reform. The BTI publishes two rankings, the Status Index and the Governance Index, both of which are based on in-depth assessments of 137 countries. The Status Index ranks the countries according to the state of their democracy and market economy, while the Governance Index ranks them according to their respective leadership’s performance. Distributed among the dimensions of democracy, market economy and governance, a total of 17 criteria are subdivided into 49 questions. + + BTI countries are selected according to the following criteria: They have yet to achieve a fully consolidated democracy and market economy, have populations of more than one million, and are recognized as sovereign states. + + The Transformation Index project is managed by the Bertelsmann Stiftung. + date_published: "2024" + + # Citation + producer: Bertelsmann Transformation Index + citation_full: |- + Bertelsmann Stiftung. 2024. Bertelsmann Transformation Index 2024. + + # Files + url_main: https://bti-project.org/ + url_download: https://bti-project.org/fileadmin/api/content/en/downloads/data/BTI_2006-2024_Scores.xlsx + date_accessed: 2024-05-21 + + # License + license: + name: Bertelsmann Stiftung 2024 + url: https://bti-project.org/en/imprint + +outs: + - md5: 0bf4d3fa2b940a3312abf8f69f4411de + size: 1117266 + path: bti.xlsx diff --git a/snapshots/democracy/2024-05-22/claassen.py b/snapshots/democracy/2024-05-22/claassen.py new file mode 100644 index 00000000000..af81cf2472c --- /dev/null +++ b/snapshots/democracy/2024-05-22/claassen.py @@ -0,0 +1,25 @@ +"""Script to create a snapshot of dataset.""" + +from pathlib import Path + +import click + +from etl.snapshot import Snapshot + +# Version for current snapshot dataset. +SNAPSHOT_VERSION = Path(__file__).parent.name + + +@click.command() +@click.option("--upload/--skip-upload", default=True, type=bool, help="Upload dataset to Snapshot") +def main(upload: bool) -> None: + for suffix in ["mood", "satisfaction"]: + # Create a new snapshot. + snap = Snapshot(f"democracy/{SNAPSHOT_VERSION}/claassen_{suffix}.csv") + + # Download data from source, add file to DVC and upload to S3. + snap.create_snapshot(upload=upload) + + +if __name__ == "__main__": + main() diff --git a/snapshots/democracy/2024-05-22/claassen_mood.csv.dvc b/snapshots/democracy/2024-05-22/claassen_mood.csv.dvc new file mode 100644 index 00000000000..c3ea2c66088 --- /dev/null +++ b/snapshots/democracy/2024-05-22/claassen_mood.csv.dvc @@ -0,0 +1,32 @@ +# Learn more at: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +meta: + origin: + # Data product / Snapshot + title: Democratic Mood + description: |- + Democratic mood measures the extent to which the public of a given country supports a democratic political system and opposes any autocratic alternatives. In contrast to satisfaction with democracy, democratic mood captures principled support for democracy. It is measured by applying a Bayesian latent variable model to aggregated survey data from a wide variety of cross-national survey projects. The latest update provides estimates for 141 countries, with estimates beginning in 1988 (for some cases) and ranging until 2020. + + The Bayesian model is developed and described in this article (http://doi.org/10.1017/pan.2018.32). The mood estimates are extended and applied in several additional articles (https://doi.org/10.1111/ajps.12452, https://doi.org/10.1017/S0003055419000558, https://doi.org/10.1177/00104140211036042). This choropleth shows democratic mood in 2020. + + date_published: "2022-06-01" + + # Citation + producer: Claassen + citation_full: |- + Claassen C. Estimating Smooth Country–Year Panels of Public Opinion. Political Analysis. 2019;27(1):1-20. doi:10.1017/pan.2018.32 + + # Files + url_main: http://chrisclaassen.com/data.html + url_download: http://chrisclaassen.com/docs/mood_est_v5.csv + date_accessed: 2024-05-22 + + # License + license: + name: CC BY 4.0 + url: https://creativecommons.org/licenses/by/4.0/ + +outs: + - md5: 1d35e793bdbb1c4c589b71d0c30dbc5b + size: 295725 + path: claassen_mood.csv diff --git a/snapshots/democracy/2024-05-22/claassen_satisfaction.csv.dvc b/snapshots/democracy/2024-05-22/claassen_satisfaction.csv.dvc new file mode 100644 index 00000000000..c837aacc703 --- /dev/null +++ b/snapshots/democracy/2024-05-22/claassen_satisfaction.csv.dvc @@ -0,0 +1,36 @@ +# Learn more at: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +meta: + origin: + # Data product / Snapshot + title: Democratic Satisfaction + description: |- + Democratic satisfaction measures the extent to which the public of a given country is satisfied with the “way democracy works” in their country. Some scholars treat satisfaction as akin to democratic support; others use it as a summary measure of political support; yet others regard it as capturing an instrumental or performance-based appraisal of the regime. Whatever the interpretation, democratic satisfaction is widely used in cross-national opinion research. + + It is measured by applying a Bayesian latent variable model to aggregated survey data from a wide variety of cross-national survey projects, covering 132 countries from as early as 1973 until 2020. + + The Bayesian model is the same as that used to measure democratic mood (http://doi.org/10.1017/pan.2018.32). Earlier satisfaction estimates are used in this article. This choropleth shows democratic satisfaction in 2020. + + + + date_published: "2022-06-01" + + # Citation + producer: Claassen + citation_full: |- + Claassen C. Estimating Smooth Country–Year Panels of Public Opinion. Political Analysis. 2019;27(1):1-20. doi:10.1017/pan.2018.32 + + # Files + url_main: http://chrisclaassen.com/data.html + url_download: http://chrisclaassen.com/docs/satis_est_v2.csv + date_accessed: 2024-05-22 + + # License + license: + name: CC BY 4.0 + url: https://creativecommons.org/licenses/by/4.0/ + +outs: + - md5: cabb2209a1e93ec333e5cf5575f5f989 + size: 303689 + path: claassen_satisfaction.csv diff --git a/snapshots/excess_mortality/latest/hmd_stmf.csv.dvc b/snapshots/excess_mortality/latest/hmd_stmf.csv.dvc index 3a1fc80c561..75bc190ac24 100644 --- a/snapshots/excess_mortality/latest/hmd_stmf.csv.dvc +++ b/snapshots/excess_mortality/latest/hmd_stmf.csv.dvc @@ -13,7 +13,7 @@ meta: HMD provides an online STMF visualization toolkit (https://mpidr.shinyapps.io/stmortality). url: https://www.mortality.org/Data/STMF source_data_url: https://www.mortality.org/File/GetDocument/Public/STMF/Outputs/stmf.csv - date_accessed: 2024-05-21 + date_accessed: 2024-05-22 publication_date: 2024-05-13 publication_year: 2024 published_by: |- diff --git a/snapshots/excess_mortality/latest/wmd.csv.dvc b/snapshots/excess_mortality/latest/wmd.csv.dvc index c8180eadd81..e5077cd0ff6 100644 --- a/snapshots/excess_mortality/latest/wmd.csv.dvc +++ b/snapshots/excess_mortality/latest/wmd.csv.dvc @@ -13,7 +13,7 @@ meta: Published paper available at https://elifesciences.org/articles/69336. url: https://github.com/akarlinsky/world_mortality/ source_data_url: https://raw.githubusercontent.com/akarlinsky/world_mortality/main/world_mortality.csv - date_accessed: 2024-05-21 + date_accessed: 2024-05-22 publication_date: '2021-06-30' publication_year: 2021 published_by: |- diff --git a/snapshots/excess_mortality/latest/xm_karlinsky_kobak.csv.dvc b/snapshots/excess_mortality/latest/xm_karlinsky_kobak.csv.dvc index 222cc576126..ec64ae9e7b0 100644 --- a/snapshots/excess_mortality/latest/xm_karlinsky_kobak.csv.dvc +++ b/snapshots/excess_mortality/latest/xm_karlinsky_kobak.csv.dvc @@ -7,7 +7,7 @@ meta: For more details, refer to https://github.com/dkobak/excess-mortality#excess-mortality-during-the-covid-19-pandemic. url: https://github.com/dkobak/excess-mortality source_data_url: https://raw.githubusercontent.com/dkobak/excess-mortality/main/baselines-per-year.csv - date_accessed: 2024-05-21 + date_accessed: 2024-05-22 publication_date: '2021-06-30' publication_year: 2021 published_by: |- diff --git a/snapshots/excess_mortality/latest/xm_karlinsky_kobak_ages.csv.dvc b/snapshots/excess_mortality/latest/xm_karlinsky_kobak_ages.csv.dvc index 787e507e83f..5dda8879345 100644 --- a/snapshots/excess_mortality/latest/xm_karlinsky_kobak_ages.csv.dvc +++ b/snapshots/excess_mortality/latest/xm_karlinsky_kobak_ages.csv.dvc @@ -6,7 +6,7 @@ meta: For more details, refer to https://github.com/dkobak/excess-mortality#excess-mortality-during-the-covid-19-pandemic. url: https://github.com/dkobak/excess-mortality source_data_url: https://raw.githubusercontent.com/dkobak/excess-mortality/main/baselines-stmf.csv - date_accessed: 2024-05-21 + date_accessed: 2024-05-22 publication_date: '2021-06-30' publication_year: 2021 published_by: |- diff --git a/snapshots/who/latest/fluid.csv.dvc b/snapshots/who/latest/fluid.csv.dvc index 1bfa1b68c2d..6db49c86cf1 100644 --- a/snapshots/who/latest/fluid.csv.dvc +++ b/snapshots/who/latest/fluid.csv.dvc @@ -16,6 +16,6 @@ meta: The platform accommodates both qualitative and quantitative data which facilitates the tracking of global trends, spread, intensity, and impact of influenza. These data are made freely available to health policy makers in order to assist them in making informed decisions regarding the management of influenza. wdir: ../../../data/snapshots/who/latest outs: - - md5: 4541064d3d85e1308c6cd0b86fa0d644 - size: 152344933 + - md5: 9e6eb2916b71c3bcaf65b4abfb44a219 + size: 152638352 path: fluid.csv diff --git a/snapshots/who/latest/flunet.csv.dvc b/snapshots/who/latest/flunet.csv.dvc index 835553ae5a7..fdca1a1e6ea 100644 --- a/snapshots/who/latest/flunet.csv.dvc +++ b/snapshots/who/latest/flunet.csv.dvc @@ -16,6 +16,6 @@ meta: The data are provided remotely by National Influenza Centres (NICs) of the Global Influenza Surveillance and Response System (GISRS) and other national influenza reference laboratories collaborating actively with GISRS, or are uploaded from WHO regional databases. wdir: ../../../data/snapshots/who/latest outs: - - md5: 50f52adbbeabdf6ab09958f2358c9d83 - size: 26016578 + - md5: 58e585a80dda08088dd0eb614a18969f + size: 26018535 path: flunet.csv diff --git a/tests/apps/wizard/utils/test_env.py b/tests/apps/wizard/utils/test_env.py new file mode 100644 index 00000000000..c4585064bab --- /dev/null +++ b/tests/apps/wizard/utils/test_env.py @@ -0,0 +1,56 @@ +from apps.wizard.utils.env import Config, OWIDEnv, get_container_name + + +def test_get_container_name(): + assert get_container_name("branch") == "staging-site-branch" + assert get_container_name("feature/x") == "staging-site-feature-x" + assert get_container_name("do_not-do/this") == "staging-site-do-not-do-this" + + +def test_OWIDEnv_staging(): + env = OWIDEnv.from_staging("branch") + assert env.env_type_id == "staging" + assert env.site == "http://staging-site-branch" + assert env.name == "staging-site-branch" + assert env.base_site == "http://staging-site-branch" + assert env.admin_site == "http://staging-site-branch/admin" + assert env.api_site == "https://api-staging.owid.io/staging-site-branch" + assert env.indicators_url == "https://api-staging.owid.io/staging-site-branch/v1/indicators/" + + +def test_OWIDEnv_production(): + env = OWIDEnv( + Config( + DB_USER="user", + DB_NAME="live_grapher", + DB_PASS="xxx", + DB_PORT="3306", + DB_HOST="prod-db.owid.io", + ) + ) + assert env.env_type_id == "production" + assert env.site == "https://ourworldindata.org" + assert env.name == "production" + assert env.base_site == "https://admin.owid.io" + assert env.admin_site == "https://admin.owid.io/admin" + assert env.api_site == "https://api.ourworldindata.org" + assert env.indicators_url == "https://api.ourworldindata.org/v1/indicators/" + + +def test_OWIDEnv_local(): + env = OWIDEnv( + Config( + DB_USER="grapher", + DB_NAME="grapher", + DB_PASS="xxx", + DB_PORT="3306", + DB_HOST="127.0.0.1", + ) + ) + assert env.env_type_id == "local" + assert env.site == "http://localhost:3030" + assert env.name == "local" + assert env.base_site == "http://localhost:3030" + assert env.admin_site == "http://localhost:3030/admin" + assert env.api_site == "http://localhost:8000" + assert env.indicators_url == "http://localhost:8000/v1/indicators/"