diff --git a/dag/health.yml b/dag/health.yml index 79ea360a9d7..268ca8d9c25 100644 --- a/dag/health.yml +++ b/dag/health.yml @@ -926,3 +926,32 @@ steps: - data://meadow/antibiotics/2024-11-15/testing_coverage data://grapher/antibiotics/2024-11-15/testing_coverage: - data://garden/antibiotics/2024-11-15/testing_coverage + + # IHME Neonatal all infectious syndromes + data-private://meadow/antibiotics/2024-11-20/microbe: + - snapshot-private://antibiotics/2024-11-20/microbe.zip + data-private://garden/antibiotics/2024-11-20/microbe: + - data-private://meadow/antibiotics/2024-11-20/microbe + data-private://grapher/antibiotics/2024-11-20/microbe: + - data-private://garden/antibiotics/2024-11-20/microbe + # IHME Neonatal bloodstream infections by pathogen + data-private://meadow/antibiotics/2024-11-20/pathogen_bloodstream: + - snapshot-private://antibiotics/2024-11-20/pathogen_bloodstream.csv + data-private://garden/antibiotics/2024-11-20/pathogen_bloodstream: + - data-private://meadow/antibiotics/2024-11-20/pathogen_bloodstream + data-private://grapher/antibiotics/2024-11-20/pathogen_bloodstream: + - data-private://garden/antibiotics/2024-11-20/pathogen_bloodstream + # IHME Neonatal bloodstream infections by resistance + data-private://meadow/antibiotics/2024-11-20/bloodstream_amr: + - snapshot-private://antibiotics/2024-11-20/bloodstream_amr.csv + data-private://garden/antibiotics/2024-11-20/bloodstream_amr: + - data-private://meadow/antibiotics/2024-11-20/bloodstream_amr + data-private://grapher/antibiotics/2024-11-20/bloodstream_amr: + - data-private://garden/antibiotics/2024-11-20/bloodstream_amr + # IHME Neonatal infections by syndrome + data-private://meadow/antibiotics/2024-11-24/total_syndrome: + - snapshot-private://antibiotics/2024-11-24/total_syndrome.csv + data-private://garden/antibiotics/2024-11-24/total_syndrome: + - data-private://meadow/antibiotics/2024-11-24/total_syndrome + data-private://grapher/antibiotics/2024-11-24/total_syndrome: + - data-private://garden/antibiotics/2024-11-24/total_syndrome diff --git a/etl/steps/data/garden/antibiotics/2024-11-20/bloodstream_amr.countries.json b/etl/steps/data/garden/antibiotics/2024-11-20/bloodstream_amr.countries.json new file mode 100644 index 00000000000..a7f8eced782 --- /dev/null +++ b/etl/steps/data/garden/antibiotics/2024-11-20/bloodstream_amr.countries.json @@ -0,0 +1,3 @@ +{ + "Global": "World" +} \ No newline at end of file diff --git a/etl/steps/data/garden/antibiotics/2024-11-20/bloodstream_amr.meta.yml b/etl/steps/data/garden/antibiotics/2024-11-20/bloodstream_amr.meta.yml new file mode 100644 index 00000000000..5ae8c20ec36 --- /dev/null +++ b/etl/steps/data/garden/antibiotics/2024-11-20/bloodstream_amr.meta.yml @@ -0,0 +1,74 @@ +definitions: + common: + presentation: + topic_tags: + - Antibiotics + display: + numSignificantFigures: 3 + + pathogen_type: + <%- if pathogen_type == "Fungi" -%> + fungus + <%- elif pathogen_type == "Viruses" -%> + virus + <%- else -%> + << pathogen_type.lower() >> + <%- endif -%> + counterfactual: + <% if counterfactual == "Associated" %> + where the death was associated with antibiotic resistance + <% elif counterfactual == "Susceptible" %> + where the bacteria causing the death were susceptible to antibiotics + <% elif counterfactual == "Untested" %> + where the pathogen was not tested for antibiotic resistance + <% endif %> + + + +# Learn more about the available fields: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +dataset: + update_period_days: 365 + + +tables: + bloodstream_amr: + variables: + value: + title: Neonatal deaths from << pathogen >> infections {definitions.counterfactual} + unit: deaths + description_short: Estimated number of [neonates](#dod:neonatal) – newborns under 28 days of age –  who die each year from << pathogen >> infections, {definitions.counterfactual} << pathogen >> is a {definitions.pathogen_type}. + display: + name: << pathogen >> - << counterfactual >> + upper: + title: Upper bound of neonatal deaths from << pathogen >> infections {definitions.counterfactual} + unit: deaths + description_short: Estimated number of [neonates](#dod:neonatal) – newborns under 28 days of age –  who die each year from << pathogen >> infections, {definitions.counterfactual}. << pathogen >> is a {definitions.pathogen_type}. + display: + name: << pathogen >> - << counterfactual >> + lower: + title: Lower bound of neonatal deaths from << pathogen >> infections {definitions.counterfactual} + unit: deaths + description_short: Estimated number of [neonates](#dod:neonatal) – newborns under 28 days of age –  who die each year from << pathogen >> infections, {definitions.counterfactual}. << pathogen >> is a {definitions.pathogen_type}. + display: + name: << pathogen >> - << counterfactual >> + amr_entity: + variables: + value: + title: Global neonatal deaths from bloodstream infections, by pathogen {definitions.counterfactual} + unit: deaths + description_short: Estimated number of [neonates](#dod:neonatal) – newborns under 28 days of age –  who die each year from bloodstream infections, {definitions.counterfactual}. + display: + name: << counterfactual >> + upper: + title: Upper bound of global neonatal deaths from bloodstream infections, by pathogen {definitions.counterfactual} + unit: deaths + description_short: Estimated number of [neonates](#dod:neonatal) – newborns under 28 days of age –  who die each year from bloodstream infections, {definitions.counterfactual}. + display: + name: << counterfactual >> + lower: + title: Lower bound of global neonatal deaths from bloodstream infections, by pathogen {definitions.counterfactual} + unit: deaths + description_short: Estimated number of [neonates](#dod:neonatal) – newborns under 28 days of age –  who die each year from bloodstream infections, {definitions.counterfactual}. + display: + name: << counterfactual >> \ No newline at end of file diff --git a/etl/steps/data/garden/antibiotics/2024-11-20/bloodstream_amr.py b/etl/steps/data/garden/antibiotics/2024-11-20/bloodstream_amr.py new file mode 100644 index 00000000000..4c676495983 --- /dev/null +++ b/etl/steps/data/garden/antibiotics/2024-11-20/bloodstream_amr.py @@ -0,0 +1,38 @@ +"""Load a meadow dataset and create a garden dataset.""" + +from etl.data_helpers import geo +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load meadow dataset. + ds_meadow = paths.load_dataset("bloodstream_amr") + + # Read table from meadow dataset. + tb = ds_meadow.read("bloodstream_amr") + + # + # Process data. + # + tb = geo.harmonize_countries(df=tb, countries_file=paths.country_mapping_path) + tb = tb.drop(columns=["age", "sex", "measure", "metric", "infectious_syndrome"]) + tb_amr = tb.drop(columns=["country", "pathogen_type"]).rename(columns={"pathogen": "country"}) + tb_amr = tb_amr.format(["country", "year", "counterfactual"], short_name="amr_entity") + tb = tb.format(["country", "year", "pathogen", "pathogen_type", "counterfactual"]) + + # + # Save outputs. + # + # Create a new garden dataset with the same metadata as the meadow dataset. + ds_garden = create_dataset( + dest_dir, tables=[tb, tb_amr], check_variables_metadata=True, default_metadata=ds_meadow.metadata + ) + + # Save changes in the new garden dataset. + ds_garden.save() diff --git a/etl/steps/data/garden/antibiotics/2024-11-20/microbe.countries.json b/etl/steps/data/garden/antibiotics/2024-11-20/microbe.countries.json new file mode 100644 index 00000000000..a7f8eced782 --- /dev/null +++ b/etl/steps/data/garden/antibiotics/2024-11-20/microbe.countries.json @@ -0,0 +1,3 @@ +{ + "Global": "World" +} \ No newline at end of file diff --git a/etl/steps/data/garden/antibiotics/2024-11-20/microbe.meta.yml b/etl/steps/data/garden/antibiotics/2024-11-20/microbe.meta.yml new file mode 100644 index 00000000000..e20d3c35f3d --- /dev/null +++ b/etl/steps/data/garden/antibiotics/2024-11-20/microbe.meta.yml @@ -0,0 +1,53 @@ +# NOTE: To learn more about the fields, hover over their names. +definitions: + common: + presentation: + topic_tags: + - Global Health + display: + numSignificantFigures: 3 + pathogen_type: + <%- if pathogen_type == "Fungi" -%> + fungus + <%- elif pathogen_type == "Viruses" -%> + virus + <%- else -%> + << pathogen_type.lower() >> + <%- endif -%> + + +# Learn more about the available fields: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +dataset: + update_period_days: 365 + + +tables: + microbe: + variables: + value: + title: Neonatal deaths from << pathogen >> infections + unit: deaths + description_short: Estimated number of [neonates](#dod:neonatal) – newborns under 28 days of age –  who die each year from << pathogen >> infections. << pathogen >> is a {definitions.pathogen_type}. + upper: + title: Upper bound of neonatal deaths from << pathogen >> infections + unit: deaths + description_short: Estimated number of [neonates](#dod:neonatal) – newborns under 28 days of age –  who die each year from << pathogen >> infections. << pathogen >> is a {definitions.pathogen_type}. + lower: + title: Lower bound of neonatal deaths from << pathogen >> infections + unit: deaths + description_short: Estimated number of [neonates](#dod:neonatal) – newborns under 28 days of age –  who die each year from << pathogen >> infections. << pathogen >> is a {definitions.pathogen_type}. + pathogen_entity: + variables: + value: + title: Global neonatal deaths from bloodstream infections, by pathogen + unit: deaths + description_short: Estimated number of [neonates](#dod:neonatal) – newborns under 28 days of age –  who die each year from bloodstream infections. + upper: + title: Upper bound of global neonatal deaths from bloodstream infections, by pathogen + unit: deaths + description_short: Estimated number of [neonates](#dod:neonatal) – newborns under 28 days of age –  who die each year from bloodstream infections. + lower: + title: Lower bound of global neonatal deaths from bloodstream infections, by pathogen + unit: deaths + description_short: Estimated number of [neonates](#dod:neonatal) – newborns under 28 days of age –  who die each year from bloodstream infections. \ No newline at end of file diff --git a/etl/steps/data/garden/antibiotics/2024-11-20/microbe.py b/etl/steps/data/garden/antibiotics/2024-11-20/microbe.py new file mode 100644 index 00000000000..fd25d05a3ff --- /dev/null +++ b/etl/steps/data/garden/antibiotics/2024-11-20/microbe.py @@ -0,0 +1,39 @@ +"""Load a meadow dataset and create a garden dataset.""" + +from etl.data_helpers import geo +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load meadow dataset. + ds_meadow = paths.load_dataset("microbe") + + # Read table from meadow dataset. + tb = ds_meadow.read("microbe") + + # + # Process data. + # + tb = geo.harmonize_countries(df=tb, countries_file=paths.country_mapping_path) + + tb = tb.drop(columns=["age", "sex", "measure", "metric", "counterfactual", "infectious_syndrome"]) + # Create a table where the pathogen is the entity + tb_pathogen = tb.drop(columns=["country", "pathogen_type"]).rename(columns={"pathogen": "country"}) + tb = tb.format(["country", "year", "pathogen_type", "pathogen"]) + tb_pathogen = tb_pathogen.format(["country", "year"], short_name="pathogen_entity") + # + # Save outputs. + # + # Create a new garden dataset with the same metadata as the meadow dataset. + ds_garden = create_dataset( + dest_dir, tables=[tb, tb_pathogen], check_variables_metadata=True, default_metadata=ds_meadow.metadata + ) + + # Save changes in the new garden dataset. + ds_garden.save() diff --git a/etl/steps/data/garden/antibiotics/2024-11-20/pathogen_bloodstream.countries.json b/etl/steps/data/garden/antibiotics/2024-11-20/pathogen_bloodstream.countries.json new file mode 100644 index 00000000000..a7f8eced782 --- /dev/null +++ b/etl/steps/data/garden/antibiotics/2024-11-20/pathogen_bloodstream.countries.json @@ -0,0 +1,3 @@ +{ + "Global": "World" +} \ No newline at end of file diff --git a/etl/steps/data/garden/antibiotics/2024-11-20/pathogen_bloodstream.meta.yml b/etl/steps/data/garden/antibiotics/2024-11-20/pathogen_bloodstream.meta.yml new file mode 100644 index 00000000000..fd08ad170a9 --- /dev/null +++ b/etl/steps/data/garden/antibiotics/2024-11-20/pathogen_bloodstream.meta.yml @@ -0,0 +1,40 @@ +# NOTE: To learn more about the fields, hover over their names. +definitions: + common: + presentation: + topic_tags: + - Antibiotics + display: + numSignificantFigures: 3 + name: << pathogen >> + + pathogen_type: + <% if pathogen_type == "Fungi" %> + fungus + <% elif pathogen_type == "Viruses" %> + virus + <% else %> + << pathogen_type.lower() >> + <% endif %> + +# Learn more about the available fields: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +dataset: + update_period_days: 365 + + +tables: + pathogen_bloodstream: + variables: + value: + title: Neonatal deaths from << pathogen >> infections + unit: deaths + description_short: Estimated number of [neonates](#dod:neonatal) – newborns under 28 days of age –  who die each year from << pathogen >> infections. << pathogen >> is a {definitions.pathogen_type}. + upper: + title: Upper bound of neonatal deaths from << pathogen >> infections + unit: deaths + description_short: Estimated number of [neonates](#dod:neonatal) – newborns under 28 days of age –  who die each year from << pathogen >> infections. << pathogen >> is a {definitions.pathogen_type}. + lower: + title: Lower bound of neonatal deaths from << pathogen >> infections + unit: deaths + description_short: Estimated number of [neonates](#dod:neonatal) – newborns under 28 days of age –  who die each year from << pathogen >> infections. << pathogen >> is a {definitions.pathogen_type}. \ No newline at end of file diff --git a/etl/steps/data/garden/antibiotics/2024-11-20/pathogen_bloodstream.py b/etl/steps/data/garden/antibiotics/2024-11-20/pathogen_bloodstream.py new file mode 100644 index 00000000000..3ccb1ae40bf --- /dev/null +++ b/etl/steps/data/garden/antibiotics/2024-11-20/pathogen_bloodstream.py @@ -0,0 +1,37 @@ +"""Load a meadow dataset and create a garden dataset.""" + +from etl.data_helpers import geo +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load meadow dataset. + ds_meadow = paths.load_dataset("pathogen_bloodstream") + + # Read table from meadow dataset. + tb = ds_meadow.read("pathogen_bloodstream") + + # + # Process data. + # + tb = geo.harmonize_countries(df=tb, countries_file=paths.country_mapping_path) + + tb = tb.drop(columns=["age", "sex", "measure", "metric", "counterfactual", "infectious_syndrome"]) + tb = tb.format(["country", "year", "pathogen_type", "pathogen"]) + + # + # Save outputs. + # + # Create a new garden dataset with the same metadata as the meadow dataset. + ds_garden = create_dataset( + dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_meadow.metadata + ) + + # Save changes in the new garden dataset. + ds_garden.save() diff --git a/etl/steps/data/garden/antibiotics/2024-11-24/total_syndrome.countries.json b/etl/steps/data/garden/antibiotics/2024-11-24/total_syndrome.countries.json new file mode 100644 index 00000000000..a7f8eced782 --- /dev/null +++ b/etl/steps/data/garden/antibiotics/2024-11-24/total_syndrome.countries.json @@ -0,0 +1,3 @@ +{ + "Global": "World" +} \ No newline at end of file diff --git a/etl/steps/data/garden/antibiotics/2024-11-24/total_syndrome.meta.yml b/etl/steps/data/garden/antibiotics/2024-11-24/total_syndrome.meta.yml new file mode 100644 index 00000000000..812eeaf01dc --- /dev/null +++ b/etl/steps/data/garden/antibiotics/2024-11-24/total_syndrome.meta.yml @@ -0,0 +1,42 @@ +# NOTE: To learn more about the fields, hover over their names. +definitions: + common: + presentation: + topic_tags: + - Antibiotics + + +# Learn more about the available fields: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +dataset: + update_period_days: 365 + + +tables: + total_syndrome: + variables: + value: + title: Global neonatal deaths from << infectious_syndrome.lower() >> infections + unit: deaths + description_short: Estimated number of [neonates](#dod:neonatal) – newborns under 28 days of age –  who die each year from << infectious_syndrome.lower() >>. + display: + roundingMode: significantFigures + numSignificantFigures: 3 + name: << infectious_syndrome >> + upper: + title: Upper bound of global neonatal deaths from << infectious_syndrome.lower() >> infections + unit: deaths + description_short: Estimated number of [neonates](#dod:neonatal) – newborns under 28 days of age –  who die each year from << infectious_syndrome.lower() >>. + display: + roundingMode: significantFigures + numSignificantFigures: 3 + name: << infectious_syndrome >> + lower: + title: Lower bound of global neonatal deaths from << infectious_syndrome.lower() >> infections + unit: deaths + description_short: Estimated number of [neonates](#dod:neonatal) – newborns under 28 days of age –  who die each year from << infectious_syndrome.lower() >>. + display: + numSignificantFigures: 3 + roundingMode: significantFigures + name: << infectious_syndrome >> + diff --git a/etl/steps/data/garden/antibiotics/2024-11-24/total_syndrome.py b/etl/steps/data/garden/antibiotics/2024-11-24/total_syndrome.py new file mode 100644 index 00000000000..fb418bd8636 --- /dev/null +++ b/etl/steps/data/garden/antibiotics/2024-11-24/total_syndrome.py @@ -0,0 +1,35 @@ +"""Load a meadow dataset and create a garden dataset.""" + +from etl.data_helpers import geo +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load meadow dataset. + ds_meadow = paths.load_dataset("total_syndrome") + + # Read table from meadow dataset. + tb = ds_meadow.read("total_syndrome") + + # + # Process data. + # + tb = geo.harmonize_countries(df=tb, countries_file=paths.country_mapping_path) + tb = tb.format(["country", "year", "infectious_syndrome"]) + + # + # Save outputs. + # + # Create a new garden dataset with the same metadata as the meadow dataset. + ds_garden = create_dataset( + dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_meadow.metadata + ) + + # Save changes in the new garden dataset. + ds_garden.save() diff --git a/etl/steps/data/grapher/antibiotics/2024-11-20/bloodstream_amr.py b/etl/steps/data/grapher/antibiotics/2024-11-20/bloodstream_amr.py new file mode 100644 index 00000000000..74525ddf3d8 --- /dev/null +++ b/etl/steps/data/grapher/antibiotics/2024-11-20/bloodstream_amr.py @@ -0,0 +1,29 @@ +"""Load a garden dataset and create a grapher dataset.""" + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load garden dataset. + ds_garden = paths.load_dataset("bloodstream_amr") + + # Read table from garden dataset. + tb = ds_garden.read("bloodstream_amr", reset_index=False) + tb_amr = ds_garden.read("amr_entity", reset_index=False) + + # + # Save outputs. + # + # Create a new grapher dataset with the same metadata as the garden dataset. + ds_grapher = create_dataset( + dest_dir, tables=[tb, tb_amr], check_variables_metadata=True, default_metadata=ds_garden.metadata + ) + + # Save changes in the new grapher dataset. + ds_grapher.save() diff --git a/etl/steps/data/grapher/antibiotics/2024-11-20/microbe.py b/etl/steps/data/grapher/antibiotics/2024-11-20/microbe.py new file mode 100644 index 00000000000..848a2788147 --- /dev/null +++ b/etl/steps/data/grapher/antibiotics/2024-11-20/microbe.py @@ -0,0 +1,28 @@ +"""Load a garden dataset and create a grapher dataset.""" + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load garden dataset. + ds_garden = paths.load_dataset("microbe") + + # Read table from garden dataset. + tb = ds_garden.read("microbe", reset_index=False) + tb_pathogen = ds_garden.read("pathogen_entity", reset_index=False) + # + # Save outputs. + # + # Create a new grapher dataset with the same metadata as the garden dataset. + ds_grapher = create_dataset( + dest_dir, tables=[tb, tb_pathogen], check_variables_metadata=True, default_metadata=ds_garden.metadata + ) + + # Save changes in the new grapher dataset. + ds_grapher.save() diff --git a/etl/steps/data/grapher/antibiotics/2024-11-20/pathogen_bloodstream.py b/etl/steps/data/grapher/antibiotics/2024-11-20/pathogen_bloodstream.py new file mode 100644 index 00000000000..1b41f653a19 --- /dev/null +++ b/etl/steps/data/grapher/antibiotics/2024-11-20/pathogen_bloodstream.py @@ -0,0 +1,28 @@ +"""Load a garden dataset and create a grapher dataset.""" + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load garden dataset. + ds_garden = paths.load_dataset("pathogen_bloodstream") + + # Read table from garden dataset. + tb = ds_garden.read("pathogen_bloodstream", reset_index=False) + + # + # Save outputs. + # + # Create a new grapher dataset with the same metadata as the garden dataset. + ds_grapher = create_dataset( + dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_garden.metadata + ) + + # Save changes in the new grapher dataset. + ds_grapher.save() diff --git a/etl/steps/data/grapher/antibiotics/2024-11-24/total_syndrome.py b/etl/steps/data/grapher/antibiotics/2024-11-24/total_syndrome.py new file mode 100644 index 00000000000..375d4e8f358 --- /dev/null +++ b/etl/steps/data/grapher/antibiotics/2024-11-24/total_syndrome.py @@ -0,0 +1,28 @@ +"""Load a garden dataset and create a grapher dataset.""" + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load garden dataset. + ds_garden = paths.load_dataset("total_syndrome") + + # Read table from garden dataset. + tb = ds_garden.read("total_syndrome", reset_index=False) + + # + # Save outputs. + # + # Create a new grapher dataset with the same metadata as the garden dataset. + ds_grapher = create_dataset( + dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_garden.metadata + ) + + # Save changes in the new grapher dataset. + ds_grapher.save() diff --git a/etl/steps/data/meadow/antibiotics/2024-11-20/bloodstream_amr.py b/etl/steps/data/meadow/antibiotics/2024-11-20/bloodstream_amr.py new file mode 100644 index 00000000000..c938b4b7837 --- /dev/null +++ b/etl/steps/data/meadow/antibiotics/2024-11-20/bloodstream_amr.py @@ -0,0 +1,32 @@ +"""Load a snapshot and create a meadow dataset.""" + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Retrieve snapshot. + snap = paths.load_snapshot("bloodstream_amr.csv") + + # Load data from snapshot. + tb = snap.read() + + # + # Process data. + tb = tb.rename(columns={"Location": "country", "Year": "year"}) + # Ensure all columns are snake-case, set an appropriate index, and sort conveniently. + tb = tb.format(["country", "year", "pathogen", "counterfactual"]) + + # + # Save outputs. + # + # Create a new meadow dataset with the same metadata as the snapshot. + ds_meadow = create_dataset(dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=snap.metadata) + + # Save changes in the new meadow dataset. + ds_meadow.save() diff --git a/etl/steps/data/meadow/antibiotics/2024-11-20/microbe.py b/etl/steps/data/meadow/antibiotics/2024-11-20/microbe.py new file mode 100644 index 00000000000..406587008d8 --- /dev/null +++ b/etl/steps/data/meadow/antibiotics/2024-11-20/microbe.py @@ -0,0 +1,38 @@ +"""Load a snapshot and create a meadow dataset.""" + +from owid.catalog import processing as pr + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + +YEARS = range(1990, 2022) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Retrieve snapshot. + snap = paths.load_snapshot("microbe.zip") + tables = [] + for year in YEARS: + # Load data from snapshot. + tb = snap.read_in_archive(filename=f"neonatal/pathogen_{year}.csv") + tables.append(tb) + tb = pr.concat(tables) + # + # Process data. + tb = tb.rename(columns={"Location": "country", "Year": "year"}) + # Ensure all columns are snake-case, set an appropriate index, and sort conveniently. + tb = tb.format(["country", "year", "pathogen"]) + + # + # Save outputs. + # + # Create a new meadow dataset with the same metadata as the snapshot. + ds_meadow = create_dataset(dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=snap.metadata) + + # Save changes in the new meadow dataset. + ds_meadow.save() diff --git a/etl/steps/data/meadow/antibiotics/2024-11-20/pathogen_bloodstream.py b/etl/steps/data/meadow/antibiotics/2024-11-20/pathogen_bloodstream.py new file mode 100644 index 00000000000..8fcfab4386d --- /dev/null +++ b/etl/steps/data/meadow/antibiotics/2024-11-20/pathogen_bloodstream.py @@ -0,0 +1,32 @@ +"""Load a snapshot and create a meadow dataset.""" + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Retrieve snapshot. + snap = paths.load_snapshot("pathogen_bloodstream.csv") + + # Load data from snapshot. + tb = snap.read() + + # + # Process data. + tb = tb.rename(columns={"Location": "country", "Year": "year"}) + # Ensure all columns are snake-case, set an appropriate index, and sort conveniently. + tb = tb.format(["country", "year", "pathogen"]) + + # + # Save outputs. + # + # Create a new meadow dataset with the same metadata as the snapshot. + ds_meadow = create_dataset(dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=snap.metadata) + + # Save changes in the new meadow dataset. + ds_meadow.save() diff --git a/etl/steps/data/meadow/antibiotics/2024-11-24/total_syndrome.py b/etl/steps/data/meadow/antibiotics/2024-11-24/total_syndrome.py new file mode 100644 index 00000000000..9449bd70a0a --- /dev/null +++ b/etl/steps/data/meadow/antibiotics/2024-11-24/total_syndrome.py @@ -0,0 +1,31 @@ +"""Load a snapshot and create a meadow dataset.""" + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Retrieve snapshot. + snap = paths.load_snapshot("total_syndrome.csv") + + # Load data from snapshot. + tb = snap.read() + + tb = tb.drop(columns=["Age", "Sex", "Measure", "Metric", "Pathogen", "Counterfactual"]) + tb = tb.rename(columns={"Location": "country", "Year": "year"}) + # Ensure all columns are snake-case, set an appropriate index, and sort conveniently. + tb = tb.format(["country", "year", "infectious_syndrome"]) + + # + # Save outputs. + # + # Create a new meadow dataset with the same metadata as the snapshot. + ds_meadow = create_dataset(dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=snap.metadata) + + # Save changes in the new meadow dataset. + ds_meadow.save() diff --git a/snapshots/antibiotics/2024-11-20/bloodstream_amr.csv.dvc b/snapshots/antibiotics/2024-11-20/bloodstream_amr.csv.dvc new file mode 100644 index 00000000000..d697903762e --- /dev/null +++ b/snapshots/antibiotics/2024-11-20/bloodstream_amr.csv.dvc @@ -0,0 +1,33 @@ +# Learn more at: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +meta: + origin: + # Data product / Snapshot + title: Neonatal deaths from bloodstream infections by pathogen and resistance type + description: |- + The MICROBE (Measuring Infectious Causes and Resistance Outcomes for Burden Estimation) tool visualizes the fatal and nonfatal health outcomes of infections, pathogens, and antimicrobial resistance across different countries and regions. The tool shows a novel estimation method, [published in The Lancet](https://www.thelancet.com/journals/lancet/article/PIIS0140-6736(24)01867-1/fulltext), including the burden of infections and their underlying pathogens, as well as, the burden of pathogens that are both susceptible and resistant to antibiotics. This tool is useful for understanding the burden of these outcomes, as well as illustrating how they nest together. The tabs explore different health outcomes by geography, age and sex. All tabs include a bar visualization for comparison, as well as a map view for a global perspective. + + date_published: "2024-09-28" + + # Citation + producer: Institute for Health Metrics and Evaluation (IHME); University of Oxford + citation_full: |- + Institute for Health Metrics and Evaluation (IHME), University of Oxford. MICROBE. Seattle, WA: IHME, University of Washington, 2024. Available from [https://vizhub.healthdata.org/microbe](https://vizhub.healthdata.org/microbe) + + attribution_short: MICROBE + + # Files + url_main: https://vizhub.healthdata.org/microbe/ + date_accessed: 2024-11-20 + + # License + license: + name: IHME's Free-of-Charge Non-commercial User Agreement + url: https://www.healthdata.org/Data-tools-practices/data-practices/ihme-free-charge-non-commercial-user-agreement + + + is_public: false +outs: + - md5: 56bf846bbb0e76403635ba94b784eedb + size: 9491 + path: bloodstream_amr.csv diff --git a/snapshots/antibiotics/2024-11-20/bloodstream_amr.py b/snapshots/antibiotics/2024-11-20/bloodstream_amr.py new file mode 100644 index 00000000000..3785f615d9d --- /dev/null +++ b/snapshots/antibiotics/2024-11-20/bloodstream_amr.py @@ -0,0 +1,25 @@ +"""Script to create a snapshot of dataset.""" + +from pathlib import Path + +import click + +from etl.snapshot import Snapshot + +# Version for current snapshot dataset. +SNAPSHOT_VERSION = Path(__file__).parent.name + + +@click.command() +@click.option("--upload/--skip-upload", default=True, type=bool, help="Upload dataset to Snapshot") +@click.option("--path-to-file", prompt=True, type=str, help="Path to local data file.") +def main(path_to_file: str, upload: bool) -> None: + # Create a new snapshot. + snap = Snapshot(f"antibiotics/{SNAPSHOT_VERSION}/bloodstream_amr.csv") + + # Copy local data file to snapshots data folder, add file to DVC and upload to S3. + snap.create_snapshot(filename=path_to_file, upload=upload) + + +if __name__ == "__main__": + main() diff --git a/snapshots/antibiotics/2024-11-20/microbe.py b/snapshots/antibiotics/2024-11-20/microbe.py new file mode 100644 index 00000000000..3332fd8a741 --- /dev/null +++ b/snapshots/antibiotics/2024-11-20/microbe.py @@ -0,0 +1,25 @@ +"""Script to create a snapshot of dataset.""" + +from pathlib import Path + +import click + +from etl.snapshot import Snapshot + +# Version for current snapshot dataset. +SNAPSHOT_VERSION = Path(__file__).parent.name + + +@click.command() +@click.option("--upload/--skip-upload", default=True, type=bool, help="Upload dataset to Snapshot") +@click.option("--path-to-file", prompt=True, type=str, help="Path to local data file.") +def main(path_to_file: str, upload: bool) -> None: + # Create a new snapshot. + snap = Snapshot(f"antibiotics/{SNAPSHOT_VERSION}/microbe.zip") + + # Copy local data file to snapshots data folder, add file to DVC and upload to S3. + snap.create_snapshot(filename=path_to_file, upload=upload) + + +if __name__ == "__main__": + main() diff --git a/snapshots/antibiotics/2024-11-20/microbe.zip.dvc b/snapshots/antibiotics/2024-11-20/microbe.zip.dvc new file mode 100644 index 00000000000..8593fe51b47 --- /dev/null +++ b/snapshots/antibiotics/2024-11-20/microbe.zip.dvc @@ -0,0 +1,31 @@ +# Learn more at: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +meta: + origin: + # Data product / Snapshot + title: Neonatal deaths by pathogen + description: |- + The MICROBE (Measuring Infectious Causes and Resistance Outcomes for Burden Estimation) tool visualizes the fatal and nonfatal health outcomes of infections, pathogens, and antimicrobial resistance across different countries and regions. The tool shows a novel estimation method, [published in The Lancet](https://www.thelancet.com/journals/lancet/article/PIIS0140-6736(24)01867-1/fulltext), including the burden of infections and their underlying pathogens, as well as, the burden of pathogens that are both susceptible and resistant to antibiotics. This tool is useful for understanding the burden of these outcomes, as well as illustrating how they nest together. The tabs explore different health outcomes by geography, age and sex. All tabs include a bar visualization for comparison, as well as a map view for a global perspective. + date_published: "2024-09-28" + + # Citation + producer: Institute for Health Metrics and Evaluation (IHME); University of Oxford + citation_full: |- + Institute for Health Metrics and Evaluation (IHME), University of Oxford. MICROBE. Seattle, WA: IHME, University of Washington, 2024. Available from [https://vizhub.healthdata.org/microbe](https://vizhub.healthdata.org/microbe) + attribution_short: MICROBE + + # Files + url_main: https://vizhub.healthdata.org/microbe/ + date_accessed: 2024-11-20 + + # License + license: + name: IHME's Free-of-Charge Non-commercial User Agreement + url: https://www.healthdata.org/Data-tools-practices/data-practices/ihme-free-charge-non-commercial-user-agreement + + + is_public: false +outs: + - md5: e8d3cdcd212a28c245ef9d9539fe8c8a + size: 118009 + path: microbe.zip diff --git a/snapshots/antibiotics/2024-11-20/pathogen_bloodstream.csv.dvc b/snapshots/antibiotics/2024-11-20/pathogen_bloodstream.csv.dvc new file mode 100644 index 00000000000..7850fa88488 --- /dev/null +++ b/snapshots/antibiotics/2024-11-20/pathogen_bloodstream.csv.dvc @@ -0,0 +1,31 @@ +# Learn more at: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +meta: + origin: + # Data product / Snapshot + title: Neonatal deaths from bloodstream infections by pathogen + description: |- + The MICROBE (Measuring Infectious Causes and Resistance Outcomes for Burden Estimation) tool visualizes the fatal and nonfatal health outcomes of infections, pathogens, and antimicrobial resistance across different countries and regions. The tool shows a novel estimation method, [published in The Lancet](https://www.thelancet.com/journals/lancet/article/PIIS0140-6736(24)01867-1/fulltext), including the burden of infections and their underlying pathogens, as well as, the burden of pathogens that are both susceptible and resistant to antibiotics. This tool is useful for understanding the burden of these outcomes, as well as illustrating how they nest together. The tabs explore different health outcomes by geography, age and sex. All tabs include a bar visualization for comparison, as well as a map view for a global perspective. + date_published: "2024-09-28" + + # Citation + producer: Institute for Health Metrics and Evaluation (IHME); University of Oxford + citation_full: |- + Institute for Health Metrics and Evaluation (IHME), University of Oxford. MICROBE. Seattle, WA: IHME, University of Washington, 2024. Available from [https://vizhub.healthdata.org/microbe](https://vizhub.healthdata.org/microbe) + attribution_short: MICROBE + + # Files + url_main: https://vizhub.healthdata.org/microbe/ + date_accessed: 2024-11-20 + + # License + license: + name: IHME's Free-of-Charge Non-commercial User Agreement + url: https://www.healthdata.org/Data-tools-practices/data-practices/ihme-free-charge-non-commercial-user-agreement + + + is_public: false +outs: + - md5: 72e4f5e570394ca8204e628e3a8f1ca3 + size: 6467 + path: pathogen_bloodstream.csv diff --git a/snapshots/antibiotics/2024-11-20/pathogen_bloodstream.py b/snapshots/antibiotics/2024-11-20/pathogen_bloodstream.py new file mode 100644 index 00000000000..db667fe903f --- /dev/null +++ b/snapshots/antibiotics/2024-11-20/pathogen_bloodstream.py @@ -0,0 +1,25 @@ +"""Script to create a snapshot of dataset.""" + +from pathlib import Path + +import click + +from etl.snapshot import Snapshot + +# Version for current snapshot dataset. +SNAPSHOT_VERSION = Path(__file__).parent.name + + +@click.command() +@click.option("--upload/--skip-upload", default=True, type=bool, help="Upload dataset to Snapshot") +@click.option("--path-to-file", prompt=True, type=str, help="Path to local data file.") +def main(path_to_file: str, upload: bool) -> None: + # Create a new snapshot. + snap = Snapshot(f"antibiotics/{SNAPSHOT_VERSION}/pathogen_bloodstream.csv") + + # Copy local data file to snapshots data folder, add file to DVC and upload to S3. + snap.create_snapshot(filename=path_to_file, upload=upload) + + +if __name__ == "__main__": + main() diff --git a/snapshots/antibiotics/2024-11-24/total_syndrome.csv.dvc b/snapshots/antibiotics/2024-11-24/total_syndrome.csv.dvc new file mode 100644 index 00000000000..06b71675cf5 --- /dev/null +++ b/snapshots/antibiotics/2024-11-24/total_syndrome.csv.dvc @@ -0,0 +1,33 @@ +# Learn more at: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +meta: + origin: + # Data product / Snapshot + title: Neonatal deaths from infections by syndrome + description: |- + The MICROBE (Measuring Infectious Causes and Resistance Outcomes for Burden Estimation) tool visualizes the fatal and nonfatal health outcomes of infections, pathogens, and antimicrobial resistance across different countries and regions. The tool shows a novel estimation method, [published in The Lancet](https://www.thelancet.com/journals/lancet/article/PIIS0140-6736(24)01867-1/fulltext), including the burden of infections and their underlying pathogens, as well as, the burden of pathogens that are both susceptible and resistant to antibiotics. This tool is useful for understanding the burden of these outcomes, as well as illustrating how they nest together. The tabs explore different health outcomes by geography, age and sex. All tabs include a bar visualization for comparison, as well as a map view for a global perspective. + + date_published: "2024-09-28" + + # Citation + producer: Institute for Health Metrics and Evaluation (IHME); University of Oxford + citation_full: |- + Institute for Health Metrics and Evaluation (IHME), University of Oxford. MICROBE. Seattle, WA: IHME, University of Washington, 2024. Available from [https://vizhub.healthdata.org/microbe](https://vizhub.healthdata.org/microbe) + + attribution_short: MICROBE + + # Files + url_main: https://vizhub.healthdata.org/microbe/ + date_accessed: 2024-11-24 + + # License + license: + name: IHME's Free-of-Charge Non-commercial User Agreement + url: https://www.healthdata.org/Data-tools-practices/data-practices/ihme-free-charge-non-commercial-user-agreement + + + is_public: false +outs: + - md5: bb68023abf4d3344e1d62919af169628 + size: 3320 + path: total_syndrome.csv diff --git a/snapshots/antibiotics/2024-11-24/total_syndrome.py b/snapshots/antibiotics/2024-11-24/total_syndrome.py new file mode 100644 index 00000000000..4aa7d7ce79b --- /dev/null +++ b/snapshots/antibiotics/2024-11-24/total_syndrome.py @@ -0,0 +1,25 @@ +"""Script to create a snapshot of dataset.""" + +from pathlib import Path + +import click + +from etl.snapshot import Snapshot + +# Version for current snapshot dataset. +SNAPSHOT_VERSION = Path(__file__).parent.name + + +@click.command() +@click.option("--upload/--skip-upload", default=True, type=bool, help="Upload dataset to Snapshot") +@click.option("--path-to-file", prompt=True, type=str, help="Path to local data file.") +def main(path_to_file: str, upload: bool) -> None: + # Create a new snapshot. + snap = Snapshot(f"antibiotics/{SNAPSHOT_VERSION}/total_syndrome.csv") + + # Copy local data file to snapshots data folder, add file to DVC and upload to S3. + snap.create_snapshot(filename=path_to_file, upload=upload) + + +if __name__ == "__main__": + main()