Skip to content

Commit

Permalink
adding attributable deaths and non-attributable deaths
Browse files Browse the repository at this point in the history
  • Loading branch information
spoonerf committed Dec 4, 2024
1 parent 1aacf8b commit 71e493b
Show file tree
Hide file tree
Showing 8 changed files with 232 additions and 0 deletions.
9 changes: 9 additions & 0 deletions dag/health.yml
Original file line number Diff line number Diff line change
Expand Up @@ -969,9 +969,18 @@ steps:
- data://meadow/antibiotics/2024-12-03/glass_enrolment
data://grapher/antibiotics/2024-12-03/glass_enrolment:
- data://garden/antibiotics/2024-12-03/glass_enrolment
# MICROBE - total deaths by pathogen
data-private://meadow/antibiotics/2024-12-04/microbe_total_pathogens:
- snapshot-private://antibiotics/2024-12-04/microbe_total_pathogens.csv
data-private://garden/antibiotics/2024-12-04/microbe_total_pathogens:
- data-private://meadow/antibiotics/2024-12-04/microbe_total_pathogens
data-private://grapher/antibiotics/2024-12-04/microbe_total_pathogens:
- data-private://garden/antibiotics/2024-12-04/microbe_total_pathogens
# MICROBE - total deaths by pathogen and amr resistance
data-private://meadow/antibiotics/2024-12-04/microbe_total_pathogens_amr:
- snapshot-private://antibiotics/2024-12-04/microbe_total_pathogens_amr.csv
data-private://garden/antibiotics/2024-12-04/microbe_total_pathogens_amr:
- data-private://meadow/antibiotics/2024-12-04/microbe_total_pathogens_amr
- data-private://garden/antibiotics/2024-12-04/microbe_total_pathogens
data-private://grapher/antibiotics/2024-12-04/microbe_total_pathogens_amr:
- data-private://garden/antibiotics/2024-12-04/microbe_total_pathogens_amr
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"Global": "World"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# NOTE: To learn more about the fields, hover over their names.
definitions:
common:
presentation:
topic_tags:
- Antibiotics

# Learn more about the available fields:
# http://docs.owid.io/projects/etl/architecture/metadata/reference/
dataset:
update_period_days: 365

tables:
microbe_total_pathogens_amr:
variables:
amr_attributable_deaths:
title: Total deaths from infections attributed to AMR, by pathogen
unit: deaths
description_short: Estimated number of deaths from infections that are attributed to antimicrobial resistance.
presentation:
title_public: Total deaths from infections attributed to AMR, by pathogen
display:
roundingMode: significantFigures
numSignificantFigures: 3
non_amr_attributable_deaths:
title: Total global deaths from infections not attributed to AMR, by pathogen
unit: deaths
description_short: Estimated number of deaths from infections that are not attributed to antimicrobial resistance.
presentation:
title_public: Total global deaths from infections not attributed to AMR, by pathogen
display:
roundingMode: significantFigures
numSignificantFigures: 3
total_deaths:
title: Total global deaths from infections
unit: deaths
description_short: Estimated number of deaths from infections.
presentation:
title_public: Total global deaths from infections
display:
roundingMode: significantFigures
numSignificantFigures: 3
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
"""Load a meadow dataset and create a garden dataset."""

from etl.data_helpers import geo
from etl.helpers import PathFinder, create_dataset

# Get paths and naming conventions for current step.
paths = PathFinder(__file__)


def run(dest_dir: str) -> None:
#
# Load inputs.
#
# Load meadow dataset.
ds_meadow = paths.load_dataset("microbe_total_pathogens_amr")
ds_total = paths.load_dataset("microbe_total_pathogens")

# Read table from meadow dataset.
tb = (
ds_meadow.read("microbe_total_pathogens_amr")
.drop(columns=["upper", "lower"])
.rename(columns={"value": "amr_attributable_deaths"})
)
tb_total = (
ds_total.read("microbe_total_pathogens")
.drop(columns=["upper", "lower"])
.rename(columns={"value": "total_deaths"})
)
#
# Process data.
#
tb = geo.harmonize_countries(
df=tb,
countries_file=paths.country_mapping_path,
)

tb = tb.merge(tb_total, on=["country", "year", "pathogen", "pathogen_type"], how="right")

tb["amr_attributable_deaths"] = tb["amr_attributable_deaths"].fillna(0)
tb["non_amr_attributable_deaths"] = tb["total_deaths"] - tb["amr_attributable_deaths"]
# Process data.
tb = tb.drop(columns=["country", "pathogen_type"]).rename(columns={"pathogen": "country"})

tb = tb.format(["country", "year"])

#
# Save outputs.
#
# Create a new garden dataset with the same metadata as the meadow dataset.
ds_garden = create_dataset(
dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_meadow.metadata
)

# Save changes in the new garden dataset.
ds_garden.save()
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
"""Load a garden dataset and create a grapher dataset."""

from etl.helpers import PathFinder, create_dataset

# Get paths and naming conventions for current step.
paths = PathFinder(__file__)


def run(dest_dir: str) -> None:
#
# Load inputs.
#
# Load garden dataset.
ds_garden = paths.load_dataset("microbe_total_pathogens_amr")

# Read table from garden dataset.
tb = ds_garden.read("microbe_total_pathogens_amr", reset_index=False)

#
# Save outputs.
#
# Create a new grapher dataset with the same metadata as the garden dataset.
ds_grapher = create_dataset(
dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_garden.metadata
)

# Save changes in the new grapher dataset.
ds_grapher.save()
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
"""Load a snapshot and create a meadow dataset."""

from etl.helpers import PathFinder, create_dataset

# Get paths and naming conventions for current step.
paths = PathFinder(__file__)


def run(dest_dir: str) -> None:
#
# Load inputs.
#
# Retrieve snapshot.
snap = paths.load_snapshot("microbe_total_pathogens_amr.csv")

# Load data from snapshot.
tb = snap.read()
assert all(tb["Age"] == "All Ages")
assert all(tb["Sex"] == "Both sexes")
assert all(tb["Measure"] == "Deaths")
assert all(tb["Metric"] == "Number")
assert all(tb["Counterfactual"] == "Attributable")
assert all(tb["Infectious syndrome"] == "All infectious syndromes")

#
# Process data.
tb = tb.drop(columns=["Age", "Sex", "Measure", "Metric", "Infectious syndrome", "Counterfactual"])
tb = tb.rename(columns={"Location": "country", "Year": "year", "Pathogen": "pathogen"})
# Ensure all columns are snake-case, set an appropriate index, and sort conveniently.
tb = tb.format(["country", "year", "pathogen"])

#
# Save outputs.
#
# Create a new meadow dataset with the same metadata as the snapshot.
ds_meadow = create_dataset(dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=snap.metadata)

# Save changes in the new meadow dataset.
ds_meadow.save()
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Learn more at:
# http://docs.owid.io/projects/etl/architecture/metadata/reference/
meta:
origin:
# Data product / Snapshot
title: Total deaths by pathogen attributable to antimicrobial resistance
description: |-
The MICROBE (Measuring Infectious Causes and Resistance Outcomes for Burden Estimation) tool visualizes the fatal and nonfatal health outcomes of infections, pathogens, and antimicrobial resistance across different countries and regions. The tool shows a novel estimation method, [published in The Lancet](https://www.thelancet.com/journals/lancet/article/PIIS0140-6736(24)01867-1/fulltext), including the burden of infections and their underlying pathogens, as well as, the burden of pathogens that are both susceptible and resistant to antibiotics. This tool is useful for understanding the burden of these outcomes, as well as illustrating how they nest together. The tabs explore different health outcomes by geography, age and sex. All tabs include a bar visualization for comparison, as well as a map view for a global perspective.
date_published: "2024-09-28"

# Citation
producer: Institute for Health Metrics and Evaluation (IHME); University of Oxford
citation_full: |-
Institute for Health Metrics and Evaluation (IHME), University of Oxford. MICROBE. Seattle, WA: IHME, University of Washington, 2024. Available from [https://vizhub.healthdata.org/microbe](https://vizhub.healthdata.org/microbe)
attribution_short: MICROBE

# Files
url_main: https://vizhub.healthdata.org/microbe/
date_accessed: 2024-12-04

# License
license:
name: IHME's Free-of-Charge Non-commercial User Agreement
url: https://www.healthdata.org/Data-tools-practices/data-practices/ihme-free-charge-non-commercial-user-agreement


is_public: false
outs:
- md5: 95bd7ca4c721a4e5113fd54ee598dad3
size: 3989
path: microbe_total_pathogens_amr.csv
25 changes: 25 additions & 0 deletions snapshots/antibiotics/2024-12-04/microbe_total_pathogens_amr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"""Script to create a snapshot of dataset."""

from pathlib import Path

import click

from etl.snapshot import Snapshot

# Version for current snapshot dataset.
SNAPSHOT_VERSION = Path(__file__).parent.name


@click.command()
@click.option("--upload/--skip-upload", default=True, type=bool, help="Upload dataset to Snapshot")
@click.option("--path-to-file", "-f", prompt=True, type=str, help="Path to local data file.")
def main(path_to_file: str, upload: bool) -> None:
# Create a new snapshot.
snap = Snapshot(f"antibiotics/{SNAPSHOT_VERSION}/microbe_total_pathogens_amr.csv")

# Copy local data file to snapshots data folder, add file to DVC and upload to S3.
snap.create_snapshot(filename=path_to_file, upload=upload)


if __name__ == "__main__":
main()

0 comments on commit 71e493b

Please sign in to comment.