From 9ec43578a62359221b0d9ab288e382841ddb0699 Mon Sep 17 00:00:00 2001 From: spoonerf Date: Wed, 4 Dec 2024 12:50:42 +0000 Subject: [PATCH 1/3] =?UTF-8?q?=F0=9F=93=8A=20microbe=20total=20deaths=20f?= =?UTF-8?q?rom=20pathogend?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From 1aacf8bfa12fae94636a200433a47c38f7e07d29 Mon Sep 17 00:00:00 2001 From: spoonerf Date: Wed, 4 Dec 2024 13:04:28 +0000 Subject: [PATCH 2/3] adding total deaths from pathogens --- dag/health.yml | 6 +++ .../total_pathogen_bloodstream.meta.yml | 18 ++++---- .../microbe_total_pathogens.countries.json | 3 ++ .../microbe_total_pathogens.meta.yml | 46 +++++++++++++++++++ .../2024-12-04/microbe_total_pathogens.py | 35 ++++++++++++++ .../2024-12-04/microbe_total_pathogens.py | 28 +++++++++++ .../2024-12-04/microbe_total_pathogens.py | 39 ++++++++++++++++ .../microbe_total_pathogens.csv.dvc | 27 +++++++++++ .../2024-12-04/microbe_total_pathogens.py | 37 +++++++++++++++ 9 files changed, 230 insertions(+), 9 deletions(-) create mode 100644 etl/steps/data/garden/antibiotics/2024-12-04/microbe_total_pathogens.countries.json create mode 100644 etl/steps/data/garden/antibiotics/2024-12-04/microbe_total_pathogens.meta.yml create mode 100644 etl/steps/data/garden/antibiotics/2024-12-04/microbe_total_pathogens.py create mode 100644 etl/steps/data/grapher/antibiotics/2024-12-04/microbe_total_pathogens.py create mode 100644 etl/steps/data/meadow/antibiotics/2024-12-04/microbe_total_pathogens.py create mode 100644 snapshots/antibiotics/2024-12-04/microbe_total_pathogens.csv.dvc create mode 100644 snapshots/antibiotics/2024-12-04/microbe_total_pathogens.py diff --git a/dag/health.yml b/dag/health.yml index fdc4bae7c96..3f0f58e2039 100644 --- a/dag/health.yml +++ b/dag/health.yml @@ -969,3 +969,9 @@ steps: - data://meadow/antibiotics/2024-12-03/glass_enrolment data://grapher/antibiotics/2024-12-03/glass_enrolment: - data://garden/antibiotics/2024-12-03/glass_enrolment + data-private://meadow/antibiotics/2024-12-04/microbe_total_pathogens: + - snapshot-private://antibiotics/2024-12-04/microbe_total_pathogens.csv + data-private://garden/antibiotics/2024-12-04/microbe_total_pathogens: + - data-private://meadow/antibiotics/2024-12-04/microbe_total_pathogens + data-private://grapher/antibiotics/2024-12-04/microbe_total_pathogens: + - data-private://garden/antibiotics/2024-12-04/microbe_total_pathogens diff --git a/etl/steps/data/garden/antibiotics/2024-12-02/total_pathogen_bloodstream.meta.yml b/etl/steps/data/garden/antibiotics/2024-12-02/total_pathogen_bloodstream.meta.yml index 60ee224f93b..04044bb693f 100644 --- a/etl/steps/data/garden/antibiotics/2024-12-02/total_pathogen_bloodstream.meta.yml +++ b/etl/steps/data/garden/antibiotics/2024-12-02/total_pathogen_bloodstream.meta.yml @@ -15,31 +15,31 @@ tables: total_pathogen_bloodstream: variables: value: - title: Total deaths from << pathogen >> infections + title: Total deaths from << pathogen >> bloodstream infections unit: deaths - description_short: Estimated number of deaths << pathogen >> infections. << pathogen >> is a {definitions.pathogen_type}. + description_short: Estimated number of deaths << pathogen >> bloodstream infections. << pathogen >> is a {definitions.pathogen_type}. presentation: - title_public: Total deaths from << pathogen >> infections + title_public: Total deaths from << pathogen >> bloodstream infections display: roundingMode: significantFigures numSignificantFigures: 3 name: << pathogen >> upper: - title: Upper bound of total deaths from << pathogen >> infections + title: Upper bound of total deaths from << pathogen >> bloodstream infections unit: deaths - description_short: Estimated number of deaths << pathogen >> infections. << pathogen >> is a {definitions.pathogen_type}. + description_short: Estimated number of deaths << pathogen >> bloodstream infections. << pathogen >> is a {definitions.pathogen_type}. presentation: - title_public: Upper bound of total deaths from << pathogen >> infections + title_public: Upper bound of total deaths from << pathogen >> bloodstream infections display: roundingMode: significantFigures numSignificantFigures: 3 name: << pathogen >> lower: - title: Lower bound of total deaths from << pathogen >> infections + title: Lower bound of total deaths from << pathogen >> bloodstream infections unit: deaths - description_short: Estimated number of deaths << pathogen >> infections. << pathogen >> is a {definitions.pathogen_type}. + description_short: Estimated number of deaths << pathogen >> bloodstream infections. << pathogen >> is a {definitions.pathogen_type}. presentation: - title_public: Lower bound of total deaths from << pathogen >> infections + title_public: Lower bound of total deaths from << pathogen >> bloodstream infections display: roundingMode: significantFigures numSignificantFigures: 3 diff --git a/etl/steps/data/garden/antibiotics/2024-12-04/microbe_total_pathogens.countries.json b/etl/steps/data/garden/antibiotics/2024-12-04/microbe_total_pathogens.countries.json new file mode 100644 index 00000000000..a7f8eced782 --- /dev/null +++ b/etl/steps/data/garden/antibiotics/2024-12-04/microbe_total_pathogens.countries.json @@ -0,0 +1,3 @@ +{ + "Global": "World" +} \ No newline at end of file diff --git a/etl/steps/data/garden/antibiotics/2024-12-04/microbe_total_pathogens.meta.yml b/etl/steps/data/garden/antibiotics/2024-12-04/microbe_total_pathogens.meta.yml new file mode 100644 index 00000000000..e29b06ed86b --- /dev/null +++ b/etl/steps/data/garden/antibiotics/2024-12-04/microbe_total_pathogens.meta.yml @@ -0,0 +1,46 @@ +# NOTE: To learn more about the fields, hover over their names. +definitions: + common: + presentation: + topic_tags: + - Antibiotics + pathogen_type: <% if pathogen_type == "Fungi" %>fungus<% elif pathogen_type == "Viruses" %>virus<% else %><< pathogen_type.lower() >><% endif %> + +# Learn more about the available fields: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +dataset: + update_period_days: 365 + +tables: + microbe_total_pathogens: + variables: + value: + title: Total deaths from << pathogen >> infections + unit: deaths + description_short: Estimated number of deaths << pathogen >> infections. << pathogen >> is a {definitions.pathogen_type}. + presentation: + title_public: Total deaths from << pathogen >> infections + display: + roundingMode: significantFigures + numSignificantFigures: 3 + name: << pathogen >> + upper: + title: Upper bound of total deaths from << pathogen >> infections + unit: deaths + description_short: Estimated number of deaths << pathogen >> infections. << pathogen >> is a {definitions.pathogen_type}. + presentation: + title_public: Upper bound of total deaths from << pathogen >> infections + display: + roundingMode: significantFigures + numSignificantFigures: 3 + name: << pathogen >> + lower: + title: Lower bound of total deaths from << pathogen >> infections + unit: deaths + description_short: Estimated number of deaths << pathogen >> infections. << pathogen >> is a {definitions.pathogen_type}. + presentation: + title_public: Lower bound of total deaths from << pathogen >> infections + display: + roundingMode: significantFigures + numSignificantFigures: 3 + name: << pathogen >> diff --git a/etl/steps/data/garden/antibiotics/2024-12-04/microbe_total_pathogens.py b/etl/steps/data/garden/antibiotics/2024-12-04/microbe_total_pathogens.py new file mode 100644 index 00000000000..b6883c23419 --- /dev/null +++ b/etl/steps/data/garden/antibiotics/2024-12-04/microbe_total_pathogens.py @@ -0,0 +1,35 @@ +"""Load a meadow dataset and create a garden dataset.""" + +from etl.data_helpers import geo +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load meadow dataset. + ds_meadow = paths.load_dataset("microbe_total_pathogens") + + # Read table from meadow dataset. + tb = ds_meadow.read("microbe_total_pathogens") + + # + # Process data. + # + tb = geo.harmonize_countries(df=tb, countries_file=paths.country_mapping_path) + tb = tb.format(["country", "year", "pathogen", "pathogen_type"]) + + # + # Save outputs. + # + # Create a new garden dataset with the same metadata as the meadow dataset. + ds_garden = create_dataset( + dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_meadow.metadata + ) + + # Save changes in the new garden dataset. + ds_garden.save() diff --git a/etl/steps/data/grapher/antibiotics/2024-12-04/microbe_total_pathogens.py b/etl/steps/data/grapher/antibiotics/2024-12-04/microbe_total_pathogens.py new file mode 100644 index 00000000000..898f3506d2b --- /dev/null +++ b/etl/steps/data/grapher/antibiotics/2024-12-04/microbe_total_pathogens.py @@ -0,0 +1,28 @@ +"""Load a garden dataset and create a grapher dataset.""" + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load garden dataset. + ds_garden = paths.load_dataset("microbe_total_pathogens") + + # Read table from garden dataset. + tb = ds_garden.read("microbe_total_pathogens", reset_index=False) + + # + # Save outputs. + # + # Create a new grapher dataset with the same metadata as the garden dataset. + ds_grapher = create_dataset( + dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_garden.metadata + ) + + # Save changes in the new grapher dataset. + ds_grapher.save() diff --git a/etl/steps/data/meadow/antibiotics/2024-12-04/microbe_total_pathogens.py b/etl/steps/data/meadow/antibiotics/2024-12-04/microbe_total_pathogens.py new file mode 100644 index 00000000000..f03016ef003 --- /dev/null +++ b/etl/steps/data/meadow/antibiotics/2024-12-04/microbe_total_pathogens.py @@ -0,0 +1,39 @@ +"""Load a snapshot and create a meadow dataset.""" + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Retrieve snapshot. + snap = paths.load_snapshot("microbe_total_pathogens.csv") + + # Load data from snapshot. + tb = snap.read() + assert all(tb["Age"] == "All Ages") + assert all(tb["Sex"] == "Both sexes") + assert all(tb["Measure"] == "Deaths") + assert all(tb["Metric"] == "Number") + assert all(tb["Counterfactual"] == "Total") + assert all(tb["Infectious syndrome"] == "All infectious syndromes") + + # + # Process data. + tb = tb.drop(columns=["Age", "Sex", "Measure", "Metric", "Infectious syndrome", "Counterfactual"]) + tb = tb.rename(columns={"Location": "country", "Year": "year", "Pathogen": "pathogen"}) + # Ensure all columns are snake-case, set an appropriate index, and sort conveniently. + tb = tb.format(["country", "year", "pathogen"]) + + # + # Save outputs. + # + # Create a new meadow dataset with the same metadata as the snapshot. + ds_meadow = create_dataset(dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=snap.metadata) + + # Save changes in the new meadow dataset. + ds_meadow.save() diff --git a/snapshots/antibiotics/2024-12-04/microbe_total_pathogens.csv.dvc b/snapshots/antibiotics/2024-12-04/microbe_total_pathogens.csv.dvc new file mode 100644 index 00000000000..e904e5d6873 --- /dev/null +++ b/snapshots/antibiotics/2024-12-04/microbe_total_pathogens.csv.dvc @@ -0,0 +1,27 @@ +# Learn more at: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +meta: + origin: + # Data product / Snapshot + title: Total deaths by pathogen + description: |- + The MICROBE (Measuring Infectious Causes and Resistance Outcomes for Burden Estimation) tool visualizes the fatal and nonfatal health outcomes of infections, pathogens, and antimicrobial resistance across different countries and regions. The tool shows a novel estimation method, [published in The Lancet](https://www.thelancet.com/journals/lancet/article/PIIS0140-6736(24)01867-1/fulltext), including the burden of infections and their underlying pathogens, as well as, the burden of pathogens that are both susceptible and resistant to antibiotics. This tool is useful for understanding the burden of these outcomes, as well as illustrating how they nest together. The tabs explore different health outcomes by geography, age and sex. All tabs include a bar visualization for comparison, as well as a map view for a global perspective. + date_published: "2024-09-28" + # Citation + producer: Institute for Health Metrics and Evaluation (IHME); University of Oxford + citation_full: |- + Institute for Health Metrics and Evaluation (IHME), University of Oxford. MICROBE. Seattle, WA: IHME, University of Washington, 2024. Available from [https://vizhub.healthdata.org/microbe](https://vizhub.healthdata.org/microbe) + attribution_short: MICROBE + # Files + url_main: https://vizhub.healthdata.org/microbe/ + date_accessed: 2024-12-04 + + # License + license: + name: IHME's Free-of-Charge Non-commercial User Agreement + url: https://www.healthdata.org/Data-tools-practices/data-practices/ihme-free-charge-non-commercial-user-agreement + is_public: false +outs: + - md5: bff21259d44b1ab5a61ecf675285d13b + size: 10960 + path: microbe_total_pathogens.csv diff --git a/snapshots/antibiotics/2024-12-04/microbe_total_pathogens.py b/snapshots/antibiotics/2024-12-04/microbe_total_pathogens.py new file mode 100644 index 00000000000..754aaafe897 --- /dev/null +++ b/snapshots/antibiotics/2024-12-04/microbe_total_pathogens.py @@ -0,0 +1,37 @@ +"""Script to create a snapshot of dataset. + +To download the data visit: https://vizhub.healthdata.org/microbe/ + +- Select the 'Pathogens' tab. +- Infectious syndrome: 'All infectious syndromes' +- Location: 'Global' +- Age: 'All ages' +- Sex: 'Both' +- Measure: 'Deaths' +- Metric: 'Number' + +""" + +from pathlib import Path + +import click + +from etl.snapshot import Snapshot + +# Version for current snapshot dataset. +SNAPSHOT_VERSION = Path(__file__).parent.name + + +@click.command() +@click.option("--upload/--skip-upload", default=True, type=bool, help="Upload dataset to Snapshot") +@click.option("--path-to-file", "-f", prompt=True, type=str, help="Path to local data file.") +def main(path_to_file: str, upload: bool) -> None: + # Create a new snapshot. + snap = Snapshot(f"antibiotics/{SNAPSHOT_VERSION}/microbe_total_pathogens.csv") + + # Download data from source, add file to DVC and upload to S3. + snap.create_snapshot(filename=path_to_file, upload=upload) + + +if __name__ == "__main__": + main() From 71e493b0d3d7d56cc5ab8d9f16310a699e270f31 Mon Sep 17 00:00:00 2001 From: spoonerf Date: Wed, 4 Dec 2024 13:47:21 +0000 Subject: [PATCH 3/3] adding attributable deaths and non-attributable deaths --- dag/health.yml | 9 +++ ...microbe_total_pathogens_amr.countries.json | 3 + .../microbe_total_pathogens_amr.meta.yml | 42 ++++++++++++++ .../2024-12-04/microbe_total_pathogens_amr.py | 55 +++++++++++++++++++ .../2024-12-04/microbe_total_pathogens_amr.py | 28 ++++++++++ .../2024-12-04/microbe_total_pathogens_amr.py | 39 +++++++++++++ .../microbe_total_pathogens_amr.csv.dvc | 31 +++++++++++ .../2024-12-04/microbe_total_pathogens_amr.py | 25 +++++++++ 8 files changed, 232 insertions(+) create mode 100644 etl/steps/data/garden/antibiotics/2024-12-04/microbe_total_pathogens_amr.countries.json create mode 100644 etl/steps/data/garden/antibiotics/2024-12-04/microbe_total_pathogens_amr.meta.yml create mode 100644 etl/steps/data/garden/antibiotics/2024-12-04/microbe_total_pathogens_amr.py create mode 100644 etl/steps/data/grapher/antibiotics/2024-12-04/microbe_total_pathogens_amr.py create mode 100644 etl/steps/data/meadow/antibiotics/2024-12-04/microbe_total_pathogens_amr.py create mode 100644 snapshots/antibiotics/2024-12-04/microbe_total_pathogens_amr.csv.dvc create mode 100644 snapshots/antibiotics/2024-12-04/microbe_total_pathogens_amr.py diff --git a/dag/health.yml b/dag/health.yml index 3f0f58e2039..aa6af23ae00 100644 --- a/dag/health.yml +++ b/dag/health.yml @@ -969,9 +969,18 @@ steps: - data://meadow/antibiotics/2024-12-03/glass_enrolment data://grapher/antibiotics/2024-12-03/glass_enrolment: - data://garden/antibiotics/2024-12-03/glass_enrolment + # MICROBE - total deaths by pathogen data-private://meadow/antibiotics/2024-12-04/microbe_total_pathogens: - snapshot-private://antibiotics/2024-12-04/microbe_total_pathogens.csv data-private://garden/antibiotics/2024-12-04/microbe_total_pathogens: - data-private://meadow/antibiotics/2024-12-04/microbe_total_pathogens data-private://grapher/antibiotics/2024-12-04/microbe_total_pathogens: - data-private://garden/antibiotics/2024-12-04/microbe_total_pathogens + # MICROBE - total deaths by pathogen and amr resistance + data-private://meadow/antibiotics/2024-12-04/microbe_total_pathogens_amr: + - snapshot-private://antibiotics/2024-12-04/microbe_total_pathogens_amr.csv + data-private://garden/antibiotics/2024-12-04/microbe_total_pathogens_amr: + - data-private://meadow/antibiotics/2024-12-04/microbe_total_pathogens_amr + - data-private://garden/antibiotics/2024-12-04/microbe_total_pathogens + data-private://grapher/antibiotics/2024-12-04/microbe_total_pathogens_amr: + - data-private://garden/antibiotics/2024-12-04/microbe_total_pathogens_amr diff --git a/etl/steps/data/garden/antibiotics/2024-12-04/microbe_total_pathogens_amr.countries.json b/etl/steps/data/garden/antibiotics/2024-12-04/microbe_total_pathogens_amr.countries.json new file mode 100644 index 00000000000..a7f8eced782 --- /dev/null +++ b/etl/steps/data/garden/antibiotics/2024-12-04/microbe_total_pathogens_amr.countries.json @@ -0,0 +1,3 @@ +{ + "Global": "World" +} \ No newline at end of file diff --git a/etl/steps/data/garden/antibiotics/2024-12-04/microbe_total_pathogens_amr.meta.yml b/etl/steps/data/garden/antibiotics/2024-12-04/microbe_total_pathogens_amr.meta.yml new file mode 100644 index 00000000000..2edb137309c --- /dev/null +++ b/etl/steps/data/garden/antibiotics/2024-12-04/microbe_total_pathogens_amr.meta.yml @@ -0,0 +1,42 @@ +# NOTE: To learn more about the fields, hover over their names. +definitions: + common: + presentation: + topic_tags: + - Antibiotics + +# Learn more about the available fields: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +dataset: + update_period_days: 365 + +tables: + microbe_total_pathogens_amr: + variables: + amr_attributable_deaths: + title: Total deaths from infections attributed to AMR, by pathogen + unit: deaths + description_short: Estimated number of deaths from infections that are attributed to antimicrobial resistance. + presentation: + title_public: Total deaths from infections attributed to AMR, by pathogen + display: + roundingMode: significantFigures + numSignificantFigures: 3 + non_amr_attributable_deaths: + title: Total global deaths from infections not attributed to AMR, by pathogen + unit: deaths + description_short: Estimated number of deaths from infections that are not attributed to antimicrobial resistance. + presentation: + title_public: Total global deaths from infections not attributed to AMR, by pathogen + display: + roundingMode: significantFigures + numSignificantFigures: 3 + total_deaths: + title: Total global deaths from infections + unit: deaths + description_short: Estimated number of deaths from infections. + presentation: + title_public: Total global deaths from infections + display: + roundingMode: significantFigures + numSignificantFigures: 3 diff --git a/etl/steps/data/garden/antibiotics/2024-12-04/microbe_total_pathogens_amr.py b/etl/steps/data/garden/antibiotics/2024-12-04/microbe_total_pathogens_amr.py new file mode 100644 index 00000000000..ca8e9998c57 --- /dev/null +++ b/etl/steps/data/garden/antibiotics/2024-12-04/microbe_total_pathogens_amr.py @@ -0,0 +1,55 @@ +"""Load a meadow dataset and create a garden dataset.""" + +from etl.data_helpers import geo +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load meadow dataset. + ds_meadow = paths.load_dataset("microbe_total_pathogens_amr") + ds_total = paths.load_dataset("microbe_total_pathogens") + + # Read table from meadow dataset. + tb = ( + ds_meadow.read("microbe_total_pathogens_amr") + .drop(columns=["upper", "lower"]) + .rename(columns={"value": "amr_attributable_deaths"}) + ) + tb_total = ( + ds_total.read("microbe_total_pathogens") + .drop(columns=["upper", "lower"]) + .rename(columns={"value": "total_deaths"}) + ) + # + # Process data. + # + tb = geo.harmonize_countries( + df=tb, + countries_file=paths.country_mapping_path, + ) + + tb = tb.merge(tb_total, on=["country", "year", "pathogen", "pathogen_type"], how="right") + + tb["amr_attributable_deaths"] = tb["amr_attributable_deaths"].fillna(0) + tb["non_amr_attributable_deaths"] = tb["total_deaths"] - tb["amr_attributable_deaths"] + # Process data. + tb = tb.drop(columns=["country", "pathogen_type"]).rename(columns={"pathogen": "country"}) + + tb = tb.format(["country", "year"]) + + # + # Save outputs. + # + # Create a new garden dataset with the same metadata as the meadow dataset. + ds_garden = create_dataset( + dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_meadow.metadata + ) + + # Save changes in the new garden dataset. + ds_garden.save() diff --git a/etl/steps/data/grapher/antibiotics/2024-12-04/microbe_total_pathogens_amr.py b/etl/steps/data/grapher/antibiotics/2024-12-04/microbe_total_pathogens_amr.py new file mode 100644 index 00000000000..2a5df838f2e --- /dev/null +++ b/etl/steps/data/grapher/antibiotics/2024-12-04/microbe_total_pathogens_amr.py @@ -0,0 +1,28 @@ +"""Load a garden dataset and create a grapher dataset.""" + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load garden dataset. + ds_garden = paths.load_dataset("microbe_total_pathogens_amr") + + # Read table from garden dataset. + tb = ds_garden.read("microbe_total_pathogens_amr", reset_index=False) + + # + # Save outputs. + # + # Create a new grapher dataset with the same metadata as the garden dataset. + ds_grapher = create_dataset( + dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_garden.metadata + ) + + # Save changes in the new grapher dataset. + ds_grapher.save() diff --git a/etl/steps/data/meadow/antibiotics/2024-12-04/microbe_total_pathogens_amr.py b/etl/steps/data/meadow/antibiotics/2024-12-04/microbe_total_pathogens_amr.py new file mode 100644 index 00000000000..0e88870141e --- /dev/null +++ b/etl/steps/data/meadow/antibiotics/2024-12-04/microbe_total_pathogens_amr.py @@ -0,0 +1,39 @@ +"""Load a snapshot and create a meadow dataset.""" + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Retrieve snapshot. + snap = paths.load_snapshot("microbe_total_pathogens_amr.csv") + + # Load data from snapshot. + tb = snap.read() + assert all(tb["Age"] == "All Ages") + assert all(tb["Sex"] == "Both sexes") + assert all(tb["Measure"] == "Deaths") + assert all(tb["Metric"] == "Number") + assert all(tb["Counterfactual"] == "Attributable") + assert all(tb["Infectious syndrome"] == "All infectious syndromes") + + # + # Process data. + tb = tb.drop(columns=["Age", "Sex", "Measure", "Metric", "Infectious syndrome", "Counterfactual"]) + tb = tb.rename(columns={"Location": "country", "Year": "year", "Pathogen": "pathogen"}) + # Ensure all columns are snake-case, set an appropriate index, and sort conveniently. + tb = tb.format(["country", "year", "pathogen"]) + + # + # Save outputs. + # + # Create a new meadow dataset with the same metadata as the snapshot. + ds_meadow = create_dataset(dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=snap.metadata) + + # Save changes in the new meadow dataset. + ds_meadow.save() diff --git a/snapshots/antibiotics/2024-12-04/microbe_total_pathogens_amr.csv.dvc b/snapshots/antibiotics/2024-12-04/microbe_total_pathogens_amr.csv.dvc new file mode 100644 index 00000000000..77cd02a9cf8 --- /dev/null +++ b/snapshots/antibiotics/2024-12-04/microbe_total_pathogens_amr.csv.dvc @@ -0,0 +1,31 @@ +# Learn more at: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +meta: + origin: + # Data product / Snapshot + title: Total deaths by pathogen attributable to antimicrobial resistance + description: |- + The MICROBE (Measuring Infectious Causes and Resistance Outcomes for Burden Estimation) tool visualizes the fatal and nonfatal health outcomes of infections, pathogens, and antimicrobial resistance across different countries and regions. The tool shows a novel estimation method, [published in The Lancet](https://www.thelancet.com/journals/lancet/article/PIIS0140-6736(24)01867-1/fulltext), including the burden of infections and their underlying pathogens, as well as, the burden of pathogens that are both susceptible and resistant to antibiotics. This tool is useful for understanding the burden of these outcomes, as well as illustrating how they nest together. The tabs explore different health outcomes by geography, age and sex. All tabs include a bar visualization for comparison, as well as a map view for a global perspective. + date_published: "2024-09-28" + + # Citation + producer: Institute for Health Metrics and Evaluation (IHME); University of Oxford + citation_full: |- + Institute for Health Metrics and Evaluation (IHME), University of Oxford. MICROBE. Seattle, WA: IHME, University of Washington, 2024. Available from [https://vizhub.healthdata.org/microbe](https://vizhub.healthdata.org/microbe) + attribution_short: MICROBE + + # Files + url_main: https://vizhub.healthdata.org/microbe/ + date_accessed: 2024-12-04 + + # License + license: + name: IHME's Free-of-Charge Non-commercial User Agreement + url: https://www.healthdata.org/Data-tools-practices/data-practices/ihme-free-charge-non-commercial-user-agreement + + + is_public: false +outs: + - md5: 95bd7ca4c721a4e5113fd54ee598dad3 + size: 3989 + path: microbe_total_pathogens_amr.csv diff --git a/snapshots/antibiotics/2024-12-04/microbe_total_pathogens_amr.py b/snapshots/antibiotics/2024-12-04/microbe_total_pathogens_amr.py new file mode 100644 index 00000000000..7b387e0ef02 --- /dev/null +++ b/snapshots/antibiotics/2024-12-04/microbe_total_pathogens_amr.py @@ -0,0 +1,25 @@ +"""Script to create a snapshot of dataset.""" + +from pathlib import Path + +import click + +from etl.snapshot import Snapshot + +# Version for current snapshot dataset. +SNAPSHOT_VERSION = Path(__file__).parent.name + + +@click.command() +@click.option("--upload/--skip-upload", default=True, type=bool, help="Upload dataset to Snapshot") +@click.option("--path-to-file", "-f", prompt=True, type=str, help="Path to local data file.") +def main(path_to_file: str, upload: bool) -> None: + # Create a new snapshot. + snap = Snapshot(f"antibiotics/{SNAPSHOT_VERSION}/microbe_total_pathogens_amr.csv") + + # Copy local data file to snapshots data folder, add file to DVC and upload to S3. + snap.create_snapshot(filename=path_to_file, upload=upload) + + +if __name__ == "__main__": + main()