diff --git a/dag/health.yml b/dag/health.yml index 579b3544a16..fdc4bae7c96 100644 --- a/dag/health.yml +++ b/dag/health.yml @@ -962,3 +962,10 @@ steps: - data-private://meadow/antibiotics/2024-12-02/total_pathogen_bloodstream_amr data-private://grapher/antibiotics/2024-12-02/total_pathogen_bloodstream_amr: - data-private://garden/antibiotics/2024-12-02/total_pathogen_bloodstream_amr + # WHO GLASS Enrolment + data://meadow/antibiotics/2024-12-03/glass_enrolment: + - snapshot://antibiotics/2024-12-03/glass_enrolment.xlsx + data://garden/antibiotics/2024-12-03/glass_enrolment: + - data://meadow/antibiotics/2024-12-03/glass_enrolment + data://grapher/antibiotics/2024-12-03/glass_enrolment: + - data://garden/antibiotics/2024-12-03/glass_enrolment diff --git a/etl/steps/data/garden/antibiotics/2024-12-03/glass_enrolment.countries.json b/etl/steps/data/garden/antibiotics/2024-12-03/glass_enrolment.countries.json new file mode 100644 index 00000000000..ad9139b95a6 --- /dev/null +++ b/etl/steps/data/garden/antibiotics/2024-12-03/glass_enrolment.countries.json @@ -0,0 +1,199 @@ +{ + "Afghanistan": "Afghanistan", + "Albania": "Albania", + "Algeria": "Algeria", + "Andorra": "Andorra", + "Angola": "Angola", + "Antigua and Barbuda": "Antigua and Barbuda", + "Argentina": "Argentina", + "Armenia": "Armenia", + "Australia": "Australia", + "Austria": "Austria", + "Azerbaijan": "Azerbaijan", + "Bahamas": "Bahamas", + "Bahrain": "Bahrain", + "Bangladesh": "Bangladesh", + "Barbados": "Barbados", + "Belarus": "Belarus", + "Belgium": "Belgium", + "Belize": "Belize", + "Benin": "Benin", + "Bhutan": "Bhutan", + "Bolivia (Plurinational State of)": "Bolivia", + "Bosnia and Herzegovina": "Bosnia and Herzegovina", + "Botswana": "Botswana", + "Brazil": "Brazil", + "Brunei Darussalam": "Brunei", + "Bulgaria": "Bulgaria", + "Burkina Faso": "Burkina Faso", + "Burundi": "Burundi", + "Cabo Verde": "Cape Verde", + "Cambodia": "Cambodia", + "Cameroon": "Cameroon", + "Canada": "Canada", + "Central African Republic": "Central African Republic", + "Chad": "Chad", + "Chile": "Chile", + "China": "China", + "Colombia": "Colombia", + "Cook Islands": "Cook Islands", + "Costa Rica": "Costa Rica", + "Croatia": "Croatia", + "Cuba": "Cuba", + "Cyprus": "Cyprus", + "Czech Republic": "Czechia", + "C\u00f4te d\u2019Ivoire": "Cote d'Ivoire", + "Democratic Republic of the Congo": "Democratic Republic of Congo", + "Denmark": "Denmark", + "Djibouti": "Djibouti", + "Dominica": "Dominica", + "Dominican Republic": "Dominican Republic", + "Ecuador": "Ecuador", + "Egypt": "Egypt", + "El Salvador": "El Salvador", + "Equatorial Guinea": "Equatorial Guinea", + "Eritrea": "Eritrea", + "Estonia": "Estonia", + "Eswatini": "Eswatini", + "Ethiopia": "Ethiopia", + "Fiji": "Fiji", + "Finland": "Finland", + "France": "France", + "Gabon": "Gabon", + "Gambia": "Gambia", + "Georgia": "Georgia", + "Germany": "Germany", + "Ghana": "Ghana", + "Greece": "Greece", + "Grenada": "Grenada", + "Guatemala": "Guatemala", + "Guinea": "Guinea", + "Guinea-Bissau": "Guinea-Bissau", + "Guyana": "Guyana", + "Haiti": "Haiti", + "Honduras": "Honduras", + "Hungary": "Hungary", + "Iceland": "Iceland", + "India": "India", + "Indonesia": "Indonesia", + "Iraq": "Iraq", + "Ireland": "Ireland", + "Israel": "Israel", + "Italy": "Italy", + "Jamaica": "Jamaica", + "Japan": "Japan", + "Jordan": "Jordan", + "Kazakhstan": "Kazakhstan", + "Kenya": "Kenya", + "Kiribati": "Kiribati", + "Kosovo": "Kosovo", + "Kuwait": "Kuwait", + "Kyrgyzstan": "Kyrgyzstan", + "Latvia": "Latvia", + "Lebanon": "Lebanon", + "Lesotho": "Lesotho", + "Liberia": "Liberia", + "Libya": "Libya", + "Lithuania": "Lithuania", + "Luxembourg": "Luxembourg", + "Madagascar": "Madagascar", + "Malawi": "Malawi", + "Malaysia": "Malaysia", + "Maldives": "Maldives", + "Mali": "Mali", + "Malta": "Malta", + "Marshall Islands": "Marshall Islands", + "Mauritania": "Mauritania", + "Mauritius": "Mauritius", + "Mexico": "Mexico", + "Micronesia (Federated States of)": "Micronesia (country)", + "Monaco": "Monaco", + "Mongolia": "Mongolia", + "Montenegro": "Montenegro", + "Morocco": "Morocco", + "Mozambique": "Mozambique", + "Myanmar": "Myanmar", + "Namibia": "Namibia", + "Nauru": "Nauru", + "Nepal": "Nepal", + "New Zealand": "New Zealand", + "Nicaragua": "Nicaragua", + "Nigeria": "Nigeria", + "Niue": "Niue", + "North Macedonia": "North Macedonia", + "Norway": "Norway", + "Oman": "Oman", + "Pakistan": "Pakistan", + "Palau": "Palau", + "Palestine": "Palestine", + "Panama": "Panama", + "Papua New Guinea": "Papua New Guinea", + "Paraguay": "Paraguay", + "Peru": "Peru", + "Poland": "Poland", + "Portugal": "Portugal", + "Qatar": "Qatar", + "Republic of Korea": "South Korea", + "Republic of Moldova": "Moldova", + "Romania": "Romania", + "Russian Federation": "Russia", + "Rwanda": "Rwanda", + "Saint Kitts and Nevis": "Saint Kitts and Nevis", + "Saint Lucia": "Saint Lucia", + "Saint Vincent and the Grenadines": "Saint Vincent and the Grenadines", + "Samoa": "Samoa", + "San Marino": "San Marino", + "Sao Tome and Principe": "Sao Tome and Principe", + "Saudi Arabia": "Saudi Arabia", + "Senegal": "Senegal", + "Seychelles": "Seychelles", + "Sierra Leone": "Sierra Leone", + "Singapore": "Singapore", + "Slovakia": "Slovakia", + "Slovenia": "Slovenia", + "Solomon Islands": "Solomon Islands", + "Somalia": "Somalia", + "South Africa": "South Africa", + "South Sudan": "South Sudan", + "Spain": "Spain", + "Sri Lanka": "Sri Lanka", + "Suriname": "Suriname", + "Sweden": "Sweden", + "Switzerland": "Switzerland", + "Syrian Arab Republic": "Syria", + "Tajikistan": "Tajikistan", + "Thailand": "Thailand", + "Timor-Leste": "East Timor", + "Togo": "Togo", + "Tonga": "Tonga", + "Trinidad and Tobago": "Trinidad and Tobago", + "Tunisia": "Tunisia", + "Turkmenistan": "Turkmenistan", + "Tuvalu": "Tuvalu", + "Uganda": "Uganda", + "Ukraine": "Ukraine", + "United Arab Emirates": "United Arab Emirates", + "United Kingdom of Great Britain and Northern Ireland": "United Kingdom", + "United Republic of Tanzania": "Tanzania", + "United States of America": "United States", + "Uruguay": "Uruguay", + "Uzbekistan": "Uzbekistan", + "Vanuatu": "Vanuatu", + "Venezuela (Bolivarian Republic of)": "Venezuela", + "Viet Nam": "Vietnam", + "Yemen": "Yemen", + "Zambia": "Zambia", + "Zimbabwe": "Zimbabwe", + "Comoros ": "Comoros", + "Congo ": "Congo", + "Democratic People\u2019s Republic of Korea": "North Korea", + "Hong Kong SAR (China)": "Hong Kong", + "Iran (Islamic Republic)": "Iran", + "Lao People\u2019s Democratic Republic": "Laos", + "Netherlands ": "Netherlands", + "Niger ": "Niger", + "Philippines ": "Philippines", + "Serbia ": "Serbia", + "Sudan ": "Sudan", + "T\u00fcrkiye": "Turkey" +} \ No newline at end of file diff --git a/etl/steps/data/garden/antibiotics/2024-12-03/glass_enrolment.meta.yml b/etl/steps/data/garden/antibiotics/2024-12-03/glass_enrolment.meta.yml new file mode 100644 index 00000000000..d830f8aaebc --- /dev/null +++ b/etl/steps/data/garden/antibiotics/2024-12-03/glass_enrolment.meta.yml @@ -0,0 +1,20 @@ +# NOTE: To learn more about the fields, hover over their names. +definitions: + common: + presentation: + topic_tags: + - Antibiotics + +# Learn more about the available fields: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +dataset: + update_period_days: 365 + +tables: + glass_enrolment: + variables: + enrolment: + title: Which countries have enrolled in the Global Antimicrobial Resistance Surveillance System (GLASS)? + description_short: "Global Antimicrobial Resistance Surveillance System participation status of each country." + unit: "" + short_unit: "" diff --git a/etl/steps/data/garden/antibiotics/2024-12-03/glass_enrolment.py b/etl/steps/data/garden/antibiotics/2024-12-03/glass_enrolment.py new file mode 100644 index 00000000000..06c4fc342e6 --- /dev/null +++ b/etl/steps/data/garden/antibiotics/2024-12-03/glass_enrolment.py @@ -0,0 +1,72 @@ +"""Load a meadow dataset and create a garden dataset.""" + +import numpy as np +import pandas as pd +from owid.catalog import Table + +from etl.data_helpers import geo +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load meadow dataset. + ds_meadow = paths.load_dataset("glass_enrolment") + + # Read table from meadow dataset. + tb = ds_meadow.read("glass_enrolment") + + # + # Process data. + # + tb = geo.harmonize_countries(tb, countries_file=paths.country_mapping_path) + origins = tb["amc"].metadata.origins + + # Make data meaningful. + tb = tb[["country", "year", "amr", "amc"]] + # Check there's no weird values, it should be only Y and NA + assert len(tb["amr"].unique()) == 2, "amr column should have only two unique values" + assert len(tb["amc"].unique()) == 2, "amc column should have only two unique values" + tb = combine_data(tb) + tb = tb.drop(columns=["amr", "amc"]) + tb["enrolment"].metadata.origins = origins + + tb = tb.format(["country", "year"]) + + # + # Save outputs. + # + # Create a new garden dataset with the same metadata as the meadow dataset. + ds_garden = create_dataset( + dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_meadow.metadata + ) + + # Save changes in the new garden dataset. + ds_garden.save() + + +def combine_data(tb: Table) -> Table: + """Combine data the amr and amc columns into a single column.""" + # Define conditions + tb["amr"] = tb["amr"].fillna("") + tb["amc"] = tb["amc"].fillna("") + conditions = [ + (tb["amr"] == "Y") & (tb["amc"] == "Y"), # Both AMR and AMC + (tb["amr"] == "Y") & (tb["amc"] != "Y"), # AMR only + (tb["amr"] != "Y") & (tb["amc"] == "Y"), # AMC only + (tb["amr"] != "Y") & (tb["amc"] != "Y"), # Neither + ] + + # Define corresponding outputs + choices = ["Both", "AMR only", "AMC only", "Neither"] + + # Apply row-wise conditions + tb["enrolment"] = np.select(conditions, choices, default=pd.NA) + assert all(tb["enrolment"].notna()), "There should be no missing values in the enrolment column" + + return tb diff --git a/etl/steps/data/grapher/antibiotics/2024-12-03/glass_enrolment.py b/etl/steps/data/grapher/antibiotics/2024-12-03/glass_enrolment.py new file mode 100644 index 00000000000..3f1604a2896 --- /dev/null +++ b/etl/steps/data/grapher/antibiotics/2024-12-03/glass_enrolment.py @@ -0,0 +1,28 @@ +"""Load a garden dataset and create a grapher dataset.""" + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load garden dataset. + ds_garden = paths.load_dataset("glass_enrolment") + + # Read table from garden dataset. + tb = ds_garden.read("glass_enrolment", reset_index=False) + + # + # Save outputs. + # + # Create a new grapher dataset with the same metadata as the garden dataset. + ds_grapher = create_dataset( + dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_garden.metadata + ) + + # Save changes in the new grapher dataset. + ds_grapher.save() diff --git a/etl/steps/data/meadow/antibiotics/2024-12-03/glass_enrolment.py b/etl/steps/data/meadow/antibiotics/2024-12-03/glass_enrolment.py new file mode 100644 index 00000000000..9c94caea506 --- /dev/null +++ b/etl/steps/data/meadow/antibiotics/2024-12-03/glass_enrolment.py @@ -0,0 +1,38 @@ +"""Load a snapshot and create a meadow dataset.""" + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Retrieve snapshot. + snap = paths.load_snapshot("glass_enrolment.xlsx") + + # Load data from snapshot. + tb = snap.read() + # Drop the rows where there isn't a country name + tb = tb.dropna(subset=["Code"]) + + # Check the number of countries + assert len(tb["Country"] == 197) + # Rename columns + tb = tb.drop(columns=["Country"]).rename(columns={"Label": "country"}) + tb["year"] = snap.metadata.origin.date_published.split("-")[0] + # Process data. + # + # Ensure all columns are snake-case, set an appropriate index, and sort conveniently. + tb = tb.format(["country", "year"]) + + # + # Save outputs. + # + # Create a new meadow dataset with the same metadata as the snapshot. + ds_meadow = create_dataset(dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=snap.metadata) + + # Save changes in the new meadow dataset. + ds_meadow.save() diff --git a/snapshots/antibiotics/2024-12-03/glass_enrolment.py b/snapshots/antibiotics/2024-12-03/glass_enrolment.py new file mode 100644 index 00000000000..9c31f08a086 --- /dev/null +++ b/snapshots/antibiotics/2024-12-03/glass_enrolment.py @@ -0,0 +1,30 @@ +"""Script to create a snapshot of dataset. + +The data should be available here: https://www.who.int/initiatives/glass/country-participation + +But if it is out of date (e.g not in sync with the image on the page above), then contact glass@who.int to access the latest data. +""" + +from pathlib import Path + +import click + +from etl.snapshot import Snapshot + +# Version for current snapshot dataset. +SNAPSHOT_VERSION = Path(__file__).parent.name + + +@click.command() +@click.option("--upload/--skip-upload", default=True, type=bool, help="Upload dataset to Snapshot") +@click.option("--path-to-file", "-f", prompt=True, type=str, help="Path to local data file.") +def main(path_to_file: str, upload: bool) -> None: + # Create a new snapshot. + snap = Snapshot(f"antibiotics/{SNAPSHOT_VERSION}/glass_enrolment.xlsx") + + # Copy local data file to snapshots data folder, add file to DVC and upload to S3. + snap.create_snapshot(filename=path_to_file, upload=upload) + + +if __name__ == "__main__": + main() diff --git a/snapshots/antibiotics/2024-12-03/glass_enrolment.xlsx.dvc b/snapshots/antibiotics/2024-12-03/glass_enrolment.xlsx.dvc new file mode 100644 index 00000000000..a124b134985 --- /dev/null +++ b/snapshots/antibiotics/2024-12-03/glass_enrolment.xlsx.dvc @@ -0,0 +1,29 @@ +# Learn more at: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +meta: + origin: + # Data product / Snapshot + title: GLASS Country Participation + description: |- + The list of countries that are enrolled in the WHO's Global Antimicrobial Resistance and Use Surveillance System (GLASS). Countries can be enrolled in GLASS to collect and share data on antimicrobial consumption (AMC), and, or antimicrobial resistance (AMR) in line with the GLASS-AMR and GLASS-AMC methodologies, respectively. + date_published: "2024-12-03" + + # Citation + producer: World Health Organization + citation_full: |- + GLASS Country Participation (2024). Global Antimicrobial Resistance and Use Surveillance System (GLASS), World Health Organization. + attribution_short: WHO + + # Files + url_main: https://www.who.int/initiatives/glass/country-participation + date_accessed: 2024-12-03 + + # License + license: + name: © 2024 WHO + url: https://www.who.int/about/policies/terms-of-use + +outs: + - md5: d10cca0830352c8145c9feb4958b4120 + size: 88534 + path: glass_enrolment.xlsx