Skip to content

Commit

Permalink
📊 antibiotics: who glass enrolled countries (#3683)
Browse files Browse the repository at this point in the history
* 📊 who glass enrolled countries

* adding who glass enrolment

* format dag
  • Loading branch information
spoonerf authored Dec 4, 2024
1 parent c5a1045 commit e349737
Show file tree
Hide file tree
Showing 8 changed files with 423 additions and 0 deletions.
7 changes: 7 additions & 0 deletions dag/health.yml
Original file line number Diff line number Diff line change
Expand Up @@ -962,3 +962,10 @@ steps:
- data-private://meadow/antibiotics/2024-12-02/total_pathogen_bloodstream_amr
data-private://grapher/antibiotics/2024-12-02/total_pathogen_bloodstream_amr:
- data-private://garden/antibiotics/2024-12-02/total_pathogen_bloodstream_amr
# WHO GLASS Enrolment
data://meadow/antibiotics/2024-12-03/glass_enrolment:
- snapshot://antibiotics/2024-12-03/glass_enrolment.xlsx
data://garden/antibiotics/2024-12-03/glass_enrolment:
- data://meadow/antibiotics/2024-12-03/glass_enrolment
data://grapher/antibiotics/2024-12-03/glass_enrolment:
- data://garden/antibiotics/2024-12-03/glass_enrolment
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
{
"Afghanistan": "Afghanistan",
"Albania": "Albania",
"Algeria": "Algeria",
"Andorra": "Andorra",
"Angola": "Angola",
"Antigua and Barbuda": "Antigua and Barbuda",
"Argentina": "Argentina",
"Armenia": "Armenia",
"Australia": "Australia",
"Austria": "Austria",
"Azerbaijan": "Azerbaijan",
"Bahamas": "Bahamas",
"Bahrain": "Bahrain",
"Bangladesh": "Bangladesh",
"Barbados": "Barbados",
"Belarus": "Belarus",
"Belgium": "Belgium",
"Belize": "Belize",
"Benin": "Benin",
"Bhutan": "Bhutan",
"Bolivia (Plurinational State of)": "Bolivia",
"Bosnia and Herzegovina": "Bosnia and Herzegovina",
"Botswana": "Botswana",
"Brazil": "Brazil",
"Brunei Darussalam": "Brunei",
"Bulgaria": "Bulgaria",
"Burkina Faso": "Burkina Faso",
"Burundi": "Burundi",
"Cabo Verde": "Cape Verde",
"Cambodia": "Cambodia",
"Cameroon": "Cameroon",
"Canada": "Canada",
"Central African Republic": "Central African Republic",
"Chad": "Chad",
"Chile": "Chile",
"China": "China",
"Colombia": "Colombia",
"Cook Islands": "Cook Islands",
"Costa Rica": "Costa Rica",
"Croatia": "Croatia",
"Cuba": "Cuba",
"Cyprus": "Cyprus",
"Czech Republic": "Czechia",
"C\u00f4te d\u2019Ivoire": "Cote d'Ivoire",
"Democratic Republic of the Congo": "Democratic Republic of Congo",
"Denmark": "Denmark",
"Djibouti": "Djibouti",
"Dominica": "Dominica",
"Dominican Republic": "Dominican Republic",
"Ecuador": "Ecuador",
"Egypt": "Egypt",
"El Salvador": "El Salvador",
"Equatorial Guinea": "Equatorial Guinea",
"Eritrea": "Eritrea",
"Estonia": "Estonia",
"Eswatini": "Eswatini",
"Ethiopia": "Ethiopia",
"Fiji": "Fiji",
"Finland": "Finland",
"France": "France",
"Gabon": "Gabon",
"Gambia": "Gambia",
"Georgia": "Georgia",
"Germany": "Germany",
"Ghana": "Ghana",
"Greece": "Greece",
"Grenada": "Grenada",
"Guatemala": "Guatemala",
"Guinea": "Guinea",
"Guinea-Bissau": "Guinea-Bissau",
"Guyana": "Guyana",
"Haiti": "Haiti",
"Honduras": "Honduras",
"Hungary": "Hungary",
"Iceland": "Iceland",
"India": "India",
"Indonesia": "Indonesia",
"Iraq": "Iraq",
"Ireland": "Ireland",
"Israel": "Israel",
"Italy": "Italy",
"Jamaica": "Jamaica",
"Japan": "Japan",
"Jordan": "Jordan",
"Kazakhstan": "Kazakhstan",
"Kenya": "Kenya",
"Kiribati": "Kiribati",
"Kosovo": "Kosovo",
"Kuwait": "Kuwait",
"Kyrgyzstan": "Kyrgyzstan",
"Latvia": "Latvia",
"Lebanon": "Lebanon",
"Lesotho": "Lesotho",
"Liberia": "Liberia",
"Libya": "Libya",
"Lithuania": "Lithuania",
"Luxembourg": "Luxembourg",
"Madagascar": "Madagascar",
"Malawi": "Malawi",
"Malaysia": "Malaysia",
"Maldives": "Maldives",
"Mali": "Mali",
"Malta": "Malta",
"Marshall Islands": "Marshall Islands",
"Mauritania": "Mauritania",
"Mauritius": "Mauritius",
"Mexico": "Mexico",
"Micronesia (Federated States of)": "Micronesia (country)",
"Monaco": "Monaco",
"Mongolia": "Mongolia",
"Montenegro": "Montenegro",
"Morocco": "Morocco",
"Mozambique": "Mozambique",
"Myanmar": "Myanmar",
"Namibia": "Namibia",
"Nauru": "Nauru",
"Nepal": "Nepal",
"New Zealand": "New Zealand",
"Nicaragua": "Nicaragua",
"Nigeria": "Nigeria",
"Niue": "Niue",
"North Macedonia": "North Macedonia",
"Norway": "Norway",
"Oman": "Oman",
"Pakistan": "Pakistan",
"Palau": "Palau",
"Palestine": "Palestine",
"Panama": "Panama",
"Papua New Guinea": "Papua New Guinea",
"Paraguay": "Paraguay",
"Peru": "Peru",
"Poland": "Poland",
"Portugal": "Portugal",
"Qatar": "Qatar",
"Republic of Korea": "South Korea",
"Republic of Moldova": "Moldova",
"Romania": "Romania",
"Russian Federation": "Russia",
"Rwanda": "Rwanda",
"Saint Kitts and Nevis": "Saint Kitts and Nevis",
"Saint Lucia": "Saint Lucia",
"Saint Vincent and the Grenadines": "Saint Vincent and the Grenadines",
"Samoa": "Samoa",
"San Marino": "San Marino",
"Sao Tome and Principe": "Sao Tome and Principe",
"Saudi Arabia": "Saudi Arabia",
"Senegal": "Senegal",
"Seychelles": "Seychelles",
"Sierra Leone": "Sierra Leone",
"Singapore": "Singapore",
"Slovakia": "Slovakia",
"Slovenia": "Slovenia",
"Solomon Islands": "Solomon Islands",
"Somalia": "Somalia",
"South Africa": "South Africa",
"South Sudan": "South Sudan",
"Spain": "Spain",
"Sri Lanka": "Sri Lanka",
"Suriname": "Suriname",
"Sweden": "Sweden",
"Switzerland": "Switzerland",
"Syrian Arab Republic": "Syria",
"Tajikistan": "Tajikistan",
"Thailand": "Thailand",
"Timor-Leste": "East Timor",
"Togo": "Togo",
"Tonga": "Tonga",
"Trinidad and Tobago": "Trinidad and Tobago",
"Tunisia": "Tunisia",
"Turkmenistan": "Turkmenistan",
"Tuvalu": "Tuvalu",
"Uganda": "Uganda",
"Ukraine": "Ukraine",
"United Arab Emirates": "United Arab Emirates",
"United Kingdom of Great Britain and Northern Ireland": "United Kingdom",
"United Republic of Tanzania": "Tanzania",
"United States of America": "United States",
"Uruguay": "Uruguay",
"Uzbekistan": "Uzbekistan",
"Vanuatu": "Vanuatu",
"Venezuela (Bolivarian Republic of)": "Venezuela",
"Viet Nam": "Vietnam",
"Yemen": "Yemen",
"Zambia": "Zambia",
"Zimbabwe": "Zimbabwe",
"Comoros ": "Comoros",
"Congo ": "Congo",
"Democratic People\u2019s Republic of Korea": "North Korea",
"Hong Kong SAR (China)": "Hong Kong",
"Iran (Islamic Republic)": "Iran",
"Lao People\u2019s Democratic Republic": "Laos",
"Netherlands ": "Netherlands",
"Niger ": "Niger",
"Philippines ": "Philippines",
"Serbia ": "Serbia",
"Sudan ": "Sudan",
"T\u00fcrkiye": "Turkey"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# NOTE: To learn more about the fields, hover over their names.
definitions:
common:
presentation:
topic_tags:
- Antibiotics

# Learn more about the available fields:
# http://docs.owid.io/projects/etl/architecture/metadata/reference/
dataset:
update_period_days: 365

tables:
glass_enrolment:
variables:
enrolment:
title: Which countries have enrolled in the Global Antimicrobial Resistance Surveillance System (GLASS)?
description_short: "Global Antimicrobial Resistance Surveillance System participation status of each country."
unit: ""
short_unit: ""
72 changes: 72 additions & 0 deletions etl/steps/data/garden/antibiotics/2024-12-03/glass_enrolment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
"""Load a meadow dataset and create a garden dataset."""

import numpy as np
import pandas as pd
from owid.catalog import Table

from etl.data_helpers import geo
from etl.helpers import PathFinder, create_dataset

# Get paths and naming conventions for current step.
paths = PathFinder(__file__)


def run(dest_dir: str) -> None:
#
# Load inputs.
#
# Load meadow dataset.
ds_meadow = paths.load_dataset("glass_enrolment")

# Read table from meadow dataset.
tb = ds_meadow.read("glass_enrolment")

#
# Process data.
#
tb = geo.harmonize_countries(tb, countries_file=paths.country_mapping_path)
origins = tb["amc"].metadata.origins

# Make data meaningful.
tb = tb[["country", "year", "amr", "amc"]]
# Check there's no weird values, it should be only Y and NA
assert len(tb["amr"].unique()) == 2, "amr column should have only two unique values"
assert len(tb["amc"].unique()) == 2, "amc column should have only two unique values"
tb = combine_data(tb)
tb = tb.drop(columns=["amr", "amc"])
tb["enrolment"].metadata.origins = origins

tb = tb.format(["country", "year"])

#
# Save outputs.
#
# Create a new garden dataset with the same metadata as the meadow dataset.
ds_garden = create_dataset(
dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_meadow.metadata
)

# Save changes in the new garden dataset.
ds_garden.save()


def combine_data(tb: Table) -> Table:
"""Combine data the amr and amc columns into a single column."""
# Define conditions
tb["amr"] = tb["amr"].fillna("")
tb["amc"] = tb["amc"].fillna("")
conditions = [
(tb["amr"] == "Y") & (tb["amc"] == "Y"), # Both AMR and AMC
(tb["amr"] == "Y") & (tb["amc"] != "Y"), # AMR only
(tb["amr"] != "Y") & (tb["amc"] == "Y"), # AMC only
(tb["amr"] != "Y") & (tb["amc"] != "Y"), # Neither
]

# Define corresponding outputs
choices = ["Both", "AMR only", "AMC only", "Neither"]

# Apply row-wise conditions
tb["enrolment"] = np.select(conditions, choices, default=pd.NA)
assert all(tb["enrolment"].notna()), "There should be no missing values in the enrolment column"

return tb
28 changes: 28 additions & 0 deletions etl/steps/data/grapher/antibiotics/2024-12-03/glass_enrolment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
"""Load a garden dataset and create a grapher dataset."""

from etl.helpers import PathFinder, create_dataset

# Get paths and naming conventions for current step.
paths = PathFinder(__file__)


def run(dest_dir: str) -> None:
#
# Load inputs.
#
# Load garden dataset.
ds_garden = paths.load_dataset("glass_enrolment")

# Read table from garden dataset.
tb = ds_garden.read("glass_enrolment", reset_index=False)

#
# Save outputs.
#
# Create a new grapher dataset with the same metadata as the garden dataset.
ds_grapher = create_dataset(
dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_garden.metadata
)

# Save changes in the new grapher dataset.
ds_grapher.save()
38 changes: 38 additions & 0 deletions etl/steps/data/meadow/antibiotics/2024-12-03/glass_enrolment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""Load a snapshot and create a meadow dataset."""

from etl.helpers import PathFinder, create_dataset

# Get paths and naming conventions for current step.
paths = PathFinder(__file__)


def run(dest_dir: str) -> None:
#
# Load inputs.
#
# Retrieve snapshot.
snap = paths.load_snapshot("glass_enrolment.xlsx")

# Load data from snapshot.
tb = snap.read()
# Drop the rows where there isn't a country name
tb = tb.dropna(subset=["Code"])

# Check the number of countries
assert len(tb["Country"] == 197)
# Rename columns
tb = tb.drop(columns=["Country"]).rename(columns={"Label": "country"})
tb["year"] = snap.metadata.origin.date_published.split("-")[0]
# Process data.
#
# Ensure all columns are snake-case, set an appropriate index, and sort conveniently.
tb = tb.format(["country", "year"])

#
# Save outputs.
#
# Create a new meadow dataset with the same metadata as the snapshot.
ds_meadow = create_dataset(dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=snap.metadata)

# Save changes in the new meadow dataset.
ds_meadow.save()
Loading

0 comments on commit e349737

Please sign in to comment.