From 88d3c410c0d9ab1d84c148cc51d249d321cf6a4d Mon Sep 17 00:00:00 2001 From: spoonerf Date: Tue, 26 Nov 2024 15:59:40 +0000 Subject: [PATCH 01/30] adding an aggregate table --- .../2024-11-12/antimicrobial_usage.meta.yml | 32 +++++++++++++++---- .../2024-11-12/antimicrobial_usage.py | 16 ++++++++++ 2 files changed, 42 insertions(+), 6 deletions(-) diff --git a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.meta.yml b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.meta.yml index 0a97108591e..66c919d7d81 100644 --- a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.meta.yml +++ b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.meta.yml @@ -4,8 +4,7 @@ definitions: presentation: topic_tags: - Global Health - aware_description: - <% if aware == "A" %> + aware_description: <% if aware == "A" %> Access antibiotics have activity against a wide range of common pathogens and show lower resistance potential than antibiotics in the other groups. <% elif aware == "W" %> Watch antibiotic have higher resistance potential and include most of the highest priority agents among the Critically Important Antimicrobials for Human Medicine and/or antibiotics that are at relatively high risk of bacterial resistance. @@ -14,8 +13,7 @@ definitions: <% elif aware == "O" %> The use of the Not classified/Not recommended antibiotics is not evidence-based, nor recommended in high-quality international guidelines. WHO does not recommend the use of these antibiotics in clinical practice. <% endif %> - routeofadministration: - <% if routeofadministration == "O" %> + routeofadministration: <% if routeofadministration == "O" %> orally administered <% elif routeofadministration == "P" %> parentearally administered @@ -24,14 +22,24 @@ definitions: <% elif routeofadministration == "I" %> inhaled <% endif %> - + antimicrobialclass: + <% if antimicrobialclass == "Antibacterials (ATC J01, A07AA, P01AB)" %> + antibiotics + <% elif antimicrobialclass == "Antimalarials (ATC P01B)" %> + antimalarials + <% elif antimicrobialclass == "Antimycotics and antifungals for systemic use (J02, D01B)" %> + antifungals + <% elif antimicrobialclass == "Antivirals for systemic use (ATC J05)" %> + antivirals + <% elif antimicrobialclass == "Drugs for the treatment of tuberculosis (ATC J04A)" %> + antituberculosis medicines + <% endif %> # Learn more about the available fields: # http://docs.owid.io/projects/etl/architecture/metadata/reference/ dataset: update_period_days: 308 - tables: class: variables: @@ -45,6 +53,18 @@ tables: description_short: Volume of antimicrobials used per 1000 inhabitants per day. #description_processing: <> unit: defined daily doses per 1000 inhabitants per day + class_aggregated: + variables: + ddd: + title: Defined daily doses of {defintions.antimicrobialclass} used + description_short: Volume of antimicrobials used in a given year. + #description_processing: <> + unit: defined daily doses + did: + title: Defined daily doses per 1000 inhabitants per day of {defintions.antimicrobialclass} used + description_short: Volume of antimicrobials used per 1000 inhabitants per day. + #description_processing: <> + unit: defined daily doses per 1000 inhabitants per day aware: variables: ddd: diff --git a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py index d167c0d1737..e24af2a6319 100644 --- a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py +++ b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py @@ -1,5 +1,7 @@ """Load a meadow dataset and create a garden dataset.""" +from owid.catalog import Table + from etl.data_helpers import geo from etl.helpers import PathFinder, create_dataset @@ -29,8 +31,12 @@ def run(dest_dir: str) -> None: ) tb_aware = tb_aware.drop(columns=["whoregioncode", "whoregionname", "incomeworldbankjune", "aware", "notes"]) + # Aggregate by antimicrobial class + tb_class_agg = aggregate_antimicrobial_classes(tb_class) + tb_class = tb_class.format(["country", "year", "antimicrobialclass", "atc4name", "routeofadministration"]) tb_aware = tb_aware.format(["country", "year", "awarelabel"]) + tb_class_agg = tb_class_agg.format(["country", "year", "antimicrobialclass"], short_name="class_aggregated") # # Save outputs. @@ -42,3 +48,13 @@ def run(dest_dir: str) -> None: # Save changes in the new garden dataset. ds_garden.save() + + +def aggregate_antimicrobial_classes(tb_class: Table) -> Table: + """ + Aggregating by antimicrobial class + """ + + tb_class = tb_class.groupby(["country", "year", "antimicrobialclass"])[["ddd", "did"]].sum().reset_index() + + return tb_class From ea752035f9d42e1b9233872ee71af839498eb2fe Mon Sep 17 00:00:00 2001 From: spoonerf Date: Tue, 26 Nov 2024 16:00:21 +0000 Subject: [PATCH 02/30] add grapher --- .../grapher/antibiotics/2024-11-12/antimicrobial_usage.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/etl/steps/data/grapher/antibiotics/2024-11-12/antimicrobial_usage.py b/etl/steps/data/grapher/antibiotics/2024-11-12/antimicrobial_usage.py index 1750c76a66b..9a24fbc4608 100644 --- a/etl/steps/data/grapher/antibiotics/2024-11-12/antimicrobial_usage.py +++ b/etl/steps/data/grapher/antibiotics/2024-11-12/antimicrobial_usage.py @@ -16,6 +16,7 @@ def run(dest_dir: str) -> None: # Read table from garden dataset. tb_class = ds_garden["class"] tb_aware = ds_garden["aware"] + tb_class_agg = ds_garden["class_aggregated"] # # Process data. @@ -26,7 +27,10 @@ def run(dest_dir: str) -> None: # # Create a new grapher dataset with the same metadata as the garden dataset. ds_grapher = create_dataset( - dest_dir, tables=[tb_class, tb_aware], check_variables_metadata=True, default_metadata=ds_garden.metadata + dest_dir, + tables=[tb_class, tb_aware, tb_class_agg], + check_variables_metadata=True, + default_metadata=ds_garden.metadata, ) # Save changes in the new grapher dataset. From 103e9d3b486f2498bbdb7fa4ecb57c56bb030abd Mon Sep 17 00:00:00 2001 From: spoonerf Date: Tue, 26 Nov 2024 16:06:47 +0000 Subject: [PATCH 03/30] typ --- .../antibiotics/2024-11-12/antimicrobial_usage.meta.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.meta.yml b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.meta.yml index 66c919d7d81..e8eb934abe8 100644 --- a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.meta.yml +++ b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.meta.yml @@ -56,12 +56,12 @@ tables: class_aggregated: variables: ddd: - title: Defined daily doses of {defintions.antimicrobialclass} used + title: Defined daily doses of {definitions.antimicrobialclass} used description_short: Volume of antimicrobials used in a given year. #description_processing: <> unit: defined daily doses did: - title: Defined daily doses per 1000 inhabitants per day of {defintions.antimicrobialclass} used + title: Defined daily doses per 1000 inhabitants per day of {definitions.antimicrobialclass} used description_short: Volume of antimicrobials used per 1000 inhabitants per day. #description_processing: <> unit: defined daily doses per 1000 inhabitants per day From e5b3986ecdfa2dfef965637ff5a886a5f267dbde Mon Sep 17 00:00:00 2001 From: spoonerf Date: Tue, 26 Nov 2024 16:07:55 +0000 Subject: [PATCH 04/30] forgot to add table --- .../garden/antibiotics/2024-11-12/antimicrobial_usage.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py index e24af2a6319..fc2fa15038c 100644 --- a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py +++ b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py @@ -43,7 +43,10 @@ def run(dest_dir: str) -> None: # # Create a new garden dataset with the same metadata as the meadow dataset. ds_garden = create_dataset( - dest_dir, tables=[tb_class, tb_aware], check_variables_metadata=True, default_metadata=ds_meadow.metadata + dest_dir, + tables=[tb_class, tb_aware, tb_class_agg], + check_variables_metadata=True, + default_metadata=ds_meadow.metadata, ) # Save changes in the new garden dataset. From e1295c50c38a275521906ae7afe340754f0d8b57 Mon Sep 17 00:00:00 2001 From: spoonerf Date: Tue, 26 Nov 2024 17:42:58 +0000 Subject: [PATCH 05/30] trying out description processing --- .../2024-11-12/antimicrobial_usage.meta.yml | 4 +-- .../2024-11-12/antimicrobial_usage.py | 35 ++++++++++++++++--- 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.meta.yml b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.meta.yml index e8eb934abe8..710b7d03d07 100644 --- a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.meta.yml +++ b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.meta.yml @@ -58,12 +58,12 @@ tables: ddd: title: Defined daily doses of {definitions.antimicrobialclass} used description_short: Volume of antimicrobials used in a given year. - #description_processing: <> + description_processing: <> unit: defined daily doses did: title: Defined daily doses per 1000 inhabitants per day of {definitions.antimicrobialclass} used description_short: Volume of antimicrobials used per 1000 inhabitants per day. - #description_processing: <> + description_processing: <> unit: defined daily doses per 1000 inhabitants per day aware: variables: diff --git a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py index fc2fa15038c..632d5485441 100644 --- a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py +++ b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py @@ -24,6 +24,8 @@ def run(dest_dir: str) -> None: # tb_class = geo.harmonize_countries(df=tb_class, countries_file=paths.country_mapping_path) tb_aware = geo.harmonize_countries(df=tb_aware, countries_file=paths.country_mapping_path) + # Aggregate by antimicrobial class + tb_class_agg = aggregate_antimicrobial_classes(tb_class) # Drop columns that are not needed in the garden dataset. tb_class = tb_class.drop( @@ -31,13 +33,10 @@ def run(dest_dir: str) -> None: ) tb_aware = tb_aware.drop(columns=["whoregioncode", "whoregionname", "incomeworldbankjune", "aware", "notes"]) - # Aggregate by antimicrobial class - tb_class_agg = aggregate_antimicrobial_classes(tb_class) - tb_class = tb_class.format(["country", "year", "antimicrobialclass", "atc4name", "routeofadministration"]) tb_aware = tb_aware.format(["country", "year", "awarelabel"]) tb_class_agg = tb_class_agg.format(["country", "year", "antimicrobialclass"], short_name="class_aggregated") - + tb_class = format_notes(tb_class_agg) # # Save outputs. # @@ -58,6 +57,32 @@ def aggregate_antimicrobial_classes(tb_class: Table) -> Table: Aggregating by antimicrobial class """ - tb_class = tb_class.groupby(["country", "year", "antimicrobialclass"])[["ddd", "did"]].sum().reset_index() + tb_class = tb_class.groupby(["country", "year", "antimicrobialclass", "notes"])[["ddd", "did"]].sum().reset_index() return tb_class + + +def format_notes(tb: Table) -> Table: + """ + Format notes column + """ + for note in tb["notes"].unique(): + msk = tb["notes"] == note + tb_note = tb[msk] + countries = tb_note["country"].unique() + countries_formatted = combine_countries(countries) + description_processing_string = f"In {countries_formatted}: {note}" + tb.loc[msk, "description_processing"] = description_processing_string + return tb + + +def combine_countries(countries): + # Combine countries into a string + if not countries: + return "" + elif len(countries) == 1: + return countries[0] + elif len(countries) == 2: + return " and ".join(countries) + else: + return ", ".join(countries[:-1]) + " and " + countries[-1] From 6d80645e412e6c9dd35ddb3d0ae67bd989815f10 Mon Sep 17 00:00:00 2001 From: spoonerf Date: Tue, 26 Nov 2024 18:04:53 +0000 Subject: [PATCH 06/30] combine anti tb into antibacs --- .../antibiotics/2024-11-12/antimicrobial_usage.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py index 632d5485441..96448a8387f 100644 --- a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py +++ b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py @@ -35,8 +35,9 @@ def run(dest_dir: str) -> None: tb_class = tb_class.format(["country", "year", "antimicrobialclass", "atc4name", "routeofadministration"]) tb_aware = tb_aware.format(["country", "year", "awarelabel"]) + tb_class_agg = format_notes(tb_class_agg) tb_class_agg = tb_class_agg.format(["country", "year", "antimicrobialclass"], short_name="class_aggregated") - tb_class = format_notes(tb_class_agg) + # # Save outputs. # @@ -56,7 +57,15 @@ def aggregate_antimicrobial_classes(tb_class: Table) -> Table: """ Aggregating by antimicrobial class """ - + # Combine antitubercolosis into antibacterials + tb_class["antimicrobialclass"] = tb_class["antimicrobialclass"].astype(str) + tb_class["antimicrobialclass"] = tb_class["antimicrobialclass"].replace( + { + "Drugs for the treatment of tuberculosis (ATC J04A)": "Antibacterials (ATC J01, A07AA, P01AB, ATC J04A)", + "Antibacterials (ATC J01, A07AA, P01AB)": "Antibacterials (ATC J01, A07AA, P01AB, ATC J04A)", + }, + ) + assert len(tb_class["antimicrobialclass"].unique()) == 4 tb_class = tb_class.groupby(["country", "year", "antimicrobialclass", "notes"])[["ddd", "did"]].sum().reset_index() return tb_class From 3a31c50065a98af6be1437f8f2fafbe55c6df7c5 Mon Sep 17 00:00:00 2001 From: spoonerf Date: Wed, 27 Nov 2024 09:10:40 +0000 Subject: [PATCH 07/30] adding description processing to index --- .../garden/antibiotics/2024-11-12/antimicrobial_usage.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py index 96448a8387f..0c85c497938 100644 --- a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py +++ b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py @@ -36,7 +36,10 @@ def run(dest_dir: str) -> None: tb_class = tb_class.format(["country", "year", "antimicrobialclass", "atc4name", "routeofadministration"]) tb_aware = tb_aware.format(["country", "year", "awarelabel"]) tb_class_agg = format_notes(tb_class_agg) - tb_class_agg = tb_class_agg.format(["country", "year", "antimicrobialclass"], short_name="class_aggregated") + tb_class_agg = tb_class_agg.drop(columns=["notes"]) + tb_class_agg = tb_class_agg.format( + ["country", "year", "antimicrobialclass", "description_processing"], short_name="class_aggregated" + ) # # Save outputs. From 0fcd45144cf83af95dc8f00902a2b6def665b00f Mon Sep 17 00:00:00 2001 From: spoonerf Date: Wed, 27 Nov 2024 09:55:49 +0000 Subject: [PATCH 08/30] one desc processing per index --- .../garden/antibiotics/2024-11-12/antimicrobial_usage.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py index 0c85c497938..cd4fa909fb4 100644 --- a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py +++ b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py @@ -36,7 +36,6 @@ def run(dest_dir: str) -> None: tb_class = tb_class.format(["country", "year", "antimicrobialclass", "atc4name", "routeofadministration"]) tb_aware = tb_aware.format(["country", "year", "awarelabel"]) tb_class_agg = format_notes(tb_class_agg) - tb_class_agg = tb_class_agg.drop(columns=["notes"]) tb_class_agg = tb_class_agg.format( ["country", "year", "antimicrobialclass", "description_processing"], short_name="class_aggregated" ) @@ -85,6 +84,14 @@ def format_notes(tb: Table) -> Table: countries_formatted = combine_countries(countries) description_processing_string = f"In {countries_formatted}: {note}" tb.loc[msk, "description_processing"] = description_processing_string + # Now combine them per each country, year and antimicrobial class + tb = tb.drop(columns=["notes"]) + tb = ( + tb.groupby(["country", "year", "antimicrobialclass", "did", "ddd"])["description_processing"] + .apply(lambda x: "; ".join(x)) + .reset_index() + ) + return tb From 534aea8b181d0abbbff770295d5f6b4696ff2801 Mon Sep 17 00:00:00 2001 From: spoonerf Date: Wed, 27 Nov 2024 09:59:06 +0000 Subject: [PATCH 09/30] sort out origins --- .../garden/antibiotics/2024-11-12/antimicrobial_usage.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py index cd4fa909fb4..8879392b7b1 100644 --- a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py +++ b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py @@ -26,7 +26,8 @@ def run(dest_dir: str) -> None: tb_aware = geo.harmonize_countries(df=tb_aware, countries_file=paths.country_mapping_path) # Aggregate by antimicrobial class tb_class_agg = aggregate_antimicrobial_classes(tb_class) - + # Save the origins of the aggregated table to insert back in later + origins = tb_class_agg["did"].metadata.origins # Drop columns that are not needed in the garden dataset. tb_class = tb_class.drop( columns=["whoregioncode", "whoregionname", "countryiso3", "incomeworldbankjune", "atc4", "notes"] @@ -36,6 +37,9 @@ def run(dest_dir: str) -> None: tb_class = tb_class.format(["country", "year", "antimicrobialclass", "atc4name", "routeofadministration"]) tb_aware = tb_aware.format(["country", "year", "awarelabel"]) tb_class_agg = format_notes(tb_class_agg) + # Insert back the origins + tb_class_agg["did"].metadata.origins = origins + tb_class_agg["ddd"].metadata.origins = origins tb_class_agg = tb_class_agg.format( ["country", "year", "antimicrobialclass", "description_processing"], short_name="class_aggregated" ) From d492082c1946d90d0f70e1370329e04692692b89 Mon Sep 17 00:00:00 2001 From: spoonerf Date: Wed, 27 Nov 2024 10:36:07 +0000 Subject: [PATCH 10/30] sorting out description processing --- .../antibiotics/2024-11-12/antimicrobial_usage.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py index 8879392b7b1..29ae5342785 100644 --- a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py +++ b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py @@ -1,6 +1,7 @@ """Load a meadow dataset and create a garden dataset.""" from owid.catalog import Table +from owid.catalog import processing as pr from etl.data_helpers import geo from etl.helpers import PathFinder, create_dataset @@ -90,11 +91,14 @@ def format_notes(tb: Table) -> Table: tb.loc[msk, "description_processing"] = description_processing_string # Now combine them per each country, year and antimicrobial class tb = tb.drop(columns=["notes"]) - tb = ( - tb.groupby(["country", "year", "antimicrobialclass", "did", "ddd"])["description_processing"] - .apply(lambda x: "; ".join(x)) + # Creating onedescription processing for each antimicrobial class, the variable unit + tb_desc = ( + tb.groupby(["antimicrobialclass"])["description_processing"] + .apply(lambda x: "; ".join(set(x))) # Using set to remove duplicates .reset_index() ) + tb = tb.drop(columns=["description_processing"]) + tb = pr.merge(tb, tb_desc, on=["antimicrobialclass"]) return tb From f72744c4878d94edcd3e7106820b789422726136 Mon Sep 17 00:00:00 2001 From: spoonerf Date: Wed, 27 Nov 2024 10:44:27 +0000 Subject: [PATCH 11/30] sorting out description processing --- .../antibiotics/2024-11-12/antimicrobial_usage.meta.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.meta.yml b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.meta.yml index 710b7d03d07..c27573e514e 100644 --- a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.meta.yml +++ b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.meta.yml @@ -56,12 +56,12 @@ tables: class_aggregated: variables: ddd: - title: Defined daily doses of {definitions.antimicrobialclass} used + title: Defined daily doses of <> used description_short: Volume of antimicrobials used in a given year. description_processing: <> unit: defined daily doses did: - title: Defined daily doses per 1000 inhabitants per day of {definitions.antimicrobialclass} used + title: Defined daily doses per 1000 inhabitants per day of <> used description_short: Volume of antimicrobials used per 1000 inhabitants per day. description_processing: <> unit: defined daily doses per 1000 inhabitants per day From 7ac048f8fd23a3d79efb994adc446fffb987ef1a Mon Sep 17 00:00:00 2001 From: spoonerf Date: Wed, 27 Nov 2024 10:46:35 +0000 Subject: [PATCH 12/30] removing antitb drugs --- .../antibiotics/2024-11-12/antimicrobial_usage.meta.yml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.meta.yml b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.meta.yml index c27573e514e..b1c27b1c15e 100644 --- a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.meta.yml +++ b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.meta.yml @@ -23,7 +23,7 @@ definitions: inhaled <% endif %> antimicrobialclass: - <% if antimicrobialclass == "Antibacterials (ATC J01, A07AA, P01AB)" %> + <% if antimicrobialclass == "Antibacterials (ATC J01, A07AA, P01AB, ATC J04A)" %> antibiotics <% elif antimicrobialclass == "Antimalarials (ATC P01B)" %> antimalarials @@ -31,8 +31,6 @@ definitions: antifungals <% elif antimicrobialclass == "Antivirals for systemic use (ATC J05)" %> antivirals - <% elif antimicrobialclass == "Drugs for the treatment of tuberculosis (ATC J04A)" %> - antituberculosis medicines <% endif %> # Learn more about the available fields: @@ -56,12 +54,12 @@ tables: class_aggregated: variables: ddd: - title: Defined daily doses of <> used + title: Defined daily doses of {definitions.antimicrobialclass} used description_short: Volume of antimicrobials used in a given year. description_processing: <> unit: defined daily doses did: - title: Defined daily doses per 1000 inhabitants per day of <> used + title: Defined daily doses per 1000 inhabitants per day of {definitions.antimicrobialclass} used description_short: Volume of antimicrobials used per 1000 inhabitants per day. description_processing: <> unit: defined daily doses per 1000 inhabitants per day From 3813f8d72a9743e39f1ef087c55230e1dccfde51 Mon Sep 17 00:00:00 2001 From: spoonerf Date: Wed, 27 Nov 2024 10:58:23 +0000 Subject: [PATCH 13/30] deep copy of tb_class to prevent change of antimicrobial class --- .../2024-11-12/antimicrobial_usage.meta.yml | 2 +- .../antibiotics/2024-11-12/antimicrobial_usage.py | 13 +++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.meta.yml b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.meta.yml index b1c27b1c15e..f189c5e3ab6 100644 --- a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.meta.yml +++ b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.meta.yml @@ -42,7 +42,7 @@ tables: class: variables: ddd: - title: Defined daily doses of {definitions.routeofadministration} << antimicrobialclass>> - << atc4name.lower() >> used + title: Defined daily doses of {definitions.routeofadministration} << antimicrobialclass.lower()>> - << atc4name.lower() >> used description_short: Volume of antimicrobials used in a given year. #description_processing: <> unit: defined daily doses diff --git a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py index 29ae5342785..c093d1418c3 100644 --- a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py +++ b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py @@ -60,22 +60,23 @@ def run(dest_dir: str) -> None: ds_garden.save() -def aggregate_antimicrobial_classes(tb_class: Table) -> Table: +def aggregate_antimicrobial_classes(tb: Table) -> Table: """ Aggregating by antimicrobial class """ + tb = tb.copy(deep=True) # Combine antitubercolosis into antibacterials - tb_class["antimicrobialclass"] = tb_class["antimicrobialclass"].astype(str) - tb_class["antimicrobialclass"] = tb_class["antimicrobialclass"].replace( + tb["antimicrobialclass"] = tb["antimicrobialclass"].astype(str) + tb["antimicrobialclass"] = tb["antimicrobialclass"].replace( { "Drugs for the treatment of tuberculosis (ATC J04A)": "Antibacterials (ATC J01, A07AA, P01AB, ATC J04A)", "Antibacterials (ATC J01, A07AA, P01AB)": "Antibacterials (ATC J01, A07AA, P01AB, ATC J04A)", }, ) - assert len(tb_class["antimicrobialclass"].unique()) == 4 - tb_class = tb_class.groupby(["country", "year", "antimicrobialclass", "notes"])[["ddd", "did"]].sum().reset_index() + assert len(tb["antimicrobialclass"].unique()) == 4 + tb = tb.groupby(["country", "year", "antimicrobialclass", "notes"])[["ddd", "did"]].sum().reset_index() - return tb_class + return tb def format_notes(tb: Table) -> Table: From 5799850c3d64b60ffa202eb6dec3692f2231e906 Mon Sep 17 00:00:00 2001 From: spoonerf Date: Wed, 27 Nov 2024 11:04:27 +0000 Subject: [PATCH 14/30] add back in tb, separately --- .../antibiotics/2024-11-12/antimicrobial_usage.meta.yml | 8 +++----- .../garden/antibiotics/2024-11-12/antimicrobial_usage.py | 6 +++++- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.meta.yml b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.meta.yml index f189c5e3ab6..693fb119ae3 100644 --- a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.meta.yml +++ b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.meta.yml @@ -24,13 +24,15 @@ definitions: <% endif %> antimicrobialclass: <% if antimicrobialclass == "Antibacterials (ATC J01, A07AA, P01AB, ATC J04A)" %> - antibiotics + antibiotics including antituberculosis drugs <% elif antimicrobialclass == "Antimalarials (ATC P01B)" %> antimalarials <% elif antimicrobialclass == "Antimycotics and antifungals for systemic use (J02, D01B)" %> antifungals <% elif antimicrobialclass == "Antivirals for systemic use (ATC J05)" %> antivirals + <% elif antimicrobialclass == "Drugs for the treatment of tuberculosis (ATC J04A)" %> + antituberculosis drugs <% endif %> # Learn more about the available fields: @@ -44,12 +46,10 @@ tables: ddd: title: Defined daily doses of {definitions.routeofadministration} << antimicrobialclass.lower()>> - << atc4name.lower() >> used description_short: Volume of antimicrobials used in a given year. - #description_processing: <> unit: defined daily doses did: title: Defined daily doses per 1000 inhabitants per day of {definitions.routeofadministration} << antimicrobialclass>> - << atc4name.lower() >> used description_short: Volume of antimicrobials used per 1000 inhabitants per day. - #description_processing: <> unit: defined daily doses per 1000 inhabitants per day class_aggregated: variables: @@ -68,10 +68,8 @@ tables: ddd: title: Defined daily doses of << awarelabel >> antibiotics used description_short: "Volume of AWaRe category: << awarelabel >> antibiotics used in a given year. {definitions.aware_description}" - #description_processing: <> unit: defined daily doses did: title: Defined daily doses per 1000 inhabitants per day of << awarelabel>> antibiotics used description_short: "Volume of AWaRe category: <> used per 1000 inhabitants per day. {definitions.aware_description}" - #description_processing: <> unit: defined daily doses per 1000 inhabitants per day diff --git a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py index c093d1418c3..963d1b1e8f0 100644 --- a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py +++ b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py @@ -62,11 +62,13 @@ def run(dest_dir: str) -> None: def aggregate_antimicrobial_classes(tb: Table) -> Table: """ - Aggregating by antimicrobial class + Aggregating by antimicrobial class, we want to combine antibacterials and antituberculosis, but also keep antituberculosis separately """ tb = tb.copy(deep=True) # Combine antitubercolosis into antibacterials tb["antimicrobialclass"] = tb["antimicrobialclass"].astype(str) + tb_anti_tb = tb[tb["antimicrobialclass"].str.contains("Drugs for the treatment of tuberculosis (ATC J04A)")] + # Combine tb with antibacterials, but also have tb separately tb["antimicrobialclass"] = tb["antimicrobialclass"].replace( { "Drugs for the treatment of tuberculosis (ATC J04A)": "Antibacterials (ATC J01, A07AA, P01AB, ATC J04A)", @@ -74,6 +76,8 @@ def aggregate_antimicrobial_classes(tb: Table) -> Table: }, ) assert len(tb["antimicrobialclass"].unique()) == 4 + # Adding antituberculosis back in + tb = pr.concat([tb, tb_anti_tb]) tb = tb.groupby(["country", "year", "antimicrobialclass", "notes"])[["ddd", "did"]].sum().reset_index() return tb From b1222420c150fed07da38ebebe85ccec43c79b08 Mon Sep 17 00:00:00 2001 From: spoonerf Date: Wed, 27 Nov 2024 12:22:36 +0000 Subject: [PATCH 15/30] add back missing countries --- .../garden/antibiotics/2024-11-12/antimicrobial_usage.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py index 963d1b1e8f0..d47572e5e45 100644 --- a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py +++ b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py @@ -67,7 +67,8 @@ def aggregate_antimicrobial_classes(tb: Table) -> Table: tb = tb.copy(deep=True) # Combine antitubercolosis into antibacterials tb["antimicrobialclass"] = tb["antimicrobialclass"].astype(str) - tb_anti_tb = tb[tb["antimicrobialclass"].str.contains("Drugs for the treatment of tuberculosis (ATC J04A)")] + tb_anti_tb = tb[tb["antimicrobialclass"] == "Drugs for the treatment of tuberculosis (ATC J04A)"] + assert len(tb_anti_tb) > 0 # Combine tb with antibacterials, but also have tb separately tb["antimicrobialclass"] = tb["antimicrobialclass"].replace( { @@ -78,7 +79,9 @@ def aggregate_antimicrobial_classes(tb: Table) -> Table: assert len(tb["antimicrobialclass"].unique()) == 4 # Adding antituberculosis back in tb = pr.concat([tb, tb_anti_tb]) - tb = tb.groupby(["country", "year", "antimicrobialclass", "notes"])[["ddd", "did"]].sum().reset_index() + tb = ( + tb.groupby(["country", "year", "antimicrobialclass", "notes"], dropna=False)[["ddd", "did"]].sum().reset_index() + ) return tb From a223bfa58367bfbf97a79d3c8ecb2cdc87dfcab7 Mon Sep 17 00:00:00 2001 From: spoonerf Date: Wed, 27 Nov 2024 12:56:51 +0000 Subject: [PATCH 16/30] sorting out the description processing --- .../2024-11-12/antimicrobial_usage.py | 57 +++++++++++-------- 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py index d47572e5e45..ec64c1afbf1 100644 --- a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py +++ b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py @@ -1,5 +1,6 @@ """Load a meadow dataset and create a garden dataset.""" +import pandas as pd from owid.catalog import Table from owid.catalog import processing as pr @@ -25,6 +26,9 @@ def run(dest_dir: str) -> None: # tb_class = geo.harmonize_countries(df=tb_class, countries_file=paths.country_mapping_path) tb_aware = geo.harmonize_countries(df=tb_aware, countries_file=paths.country_mapping_path) + # Create tb_notes for use in metadata + tb_notes = tb_class[["country", "year", "antimicrobialclass", "notes"]].dropna(subset=["notes"]) + # Aggregate by antimicrobial class tb_class_agg = aggregate_antimicrobial_classes(tb_class) # Save the origins of the aggregated table to insert back in later @@ -37,7 +41,7 @@ def run(dest_dir: str) -> None: tb_class = tb_class.format(["country", "year", "antimicrobialclass", "atc4name", "routeofadministration"]) tb_aware = tb_aware.format(["country", "year", "awarelabel"]) - tb_class_agg = format_notes(tb_class_agg) + tb_class_agg = format_notes(tb_class_agg, tb_notes) # Insert back the origins tb_class_agg["did"].metadata.origins = origins tb_class_agg["ddd"].metadata.origins = origins @@ -65,11 +69,15 @@ def aggregate_antimicrobial_classes(tb: Table) -> Table: Aggregating by antimicrobial class, we want to combine antibacterials and antituberculosis, but also keep antituberculosis separately """ tb = tb.copy(deep=True) - # Combine antitubercolosis into antibacterials + # Convert the column to strings (if not already done) tb["antimicrobialclass"] = tb["antimicrobialclass"].astype(str) - tb_anti_tb = tb[tb["antimicrobialclass"] == "Drugs for the treatment of tuberculosis (ATC J04A)"] - assert len(tb_anti_tb) > 0 - # Combine tb with antibacterials, but also have tb separately + + # Create a completely independent copy of antituberculosis rows and reset its index + msk = tb["antimicrobialclass"] == "Drugs for the treatment of tuberculosis (ATC J04A)" + tb_anti_tb = tb[msk].reset_index(drop=True) + assert len(tb_anti_tb["antimicrobialclass"].unique()) == 1 + + # Modify antimicrobialclass in tb tb["antimicrobialclass"] = tb["antimicrobialclass"].replace( { "Drugs for the treatment of tuberculosis (ATC J04A)": "Antibacterials (ATC J01, A07AA, P01AB, ATC J04A)", @@ -77,35 +85,38 @@ def aggregate_antimicrobial_classes(tb: Table) -> Table: }, ) assert len(tb["antimicrobialclass"].unique()) == 4 - # Adding antituberculosis back in - tb = pr.concat([tb, tb_anti_tb]) - tb = ( - tb.groupby(["country", "year", "antimicrobialclass", "notes"], dropna=False)[["ddd", "did"]].sum().reset_index() + tb = tb.groupby(["country", "year", "antimicrobialclass"], dropna=False)[["ddd", "did"]].sum().reset_index() + assert len(tb["antimicrobialclass"].unique()) == 4 + # Add the antituberculosis data back to tb + tb_anti_tb = ( + tb_anti_tb.groupby(["country", "year", "antimicrobialclass"], dropna=False)[["ddd", "did"]].sum().reset_index() ) + tb_combined = pr.concat([tb, tb_anti_tb]) - return tb + tb_combined.set_index(["country", "year", "antimicrobialclass"], verify_integrity=True) + + return tb_combined -def format_notes(tb: Table) -> Table: +def format_notes(tb: Table, tb_notes: Table) -> Table: """ Format notes column """ - for note in tb["notes"].unique(): - msk = tb["notes"] == note - tb_note = tb[msk] - countries = tb_note["country"].unique() - countries_formatted = combine_countries(countries) - description_processing_string = f"In {countries_formatted}: {note}" - tb.loc[msk, "description_processing"] = description_processing_string - # Now combine them per each country, year and antimicrobial class - tb = tb.drop(columns=["notes"]) + for note in tb_notes["notes"].unique(): + if pd.notna(note): + msk = tb_notes["notes"] == note + tb_note = tb_notes[msk] + countries = tb_note["country"].unique() + countries_formatted = combine_countries(countries) + description_processing_string = f"In {countries_formatted}: {note}" + tb_notes.loc[msk, "description_processing"] = description_processing_string # Creating onedescription processing for each antimicrobial class, the variable unit tb_desc = ( - tb.groupby(["antimicrobialclass"])["description_processing"] - .apply(lambda x: "; ".join(set(x))) # Using set to remove duplicates + tb_notes.dropna(subset=["description_processing"]) # Remove NaNs + .groupby(["antimicrobialclass"])["description_processing"] + .apply(lambda x: "; ".join(set(x))) # Combine unique values .reset_index() ) - tb = tb.drop(columns=["description_processing"]) tb = pr.merge(tb, tb_desc, on=["antimicrobialclass"]) return tb From 6d5f724739ec760f89008deaea4a35dab2a66bf2 Mon Sep 17 00:00:00 2001 From: spoonerf Date: Wed, 27 Nov 2024 14:26:52 +0000 Subject: [PATCH 17/30] pivoting agg table --- .../2024-11-12/antimicrobial_usage.meta.yml | 54 ++++++++++++---- .../2024-11-12/antimicrobial_usage.py | 63 ++++++++++++++----- 2 files changed, 91 insertions(+), 26 deletions(-) diff --git a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.meta.yml b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.meta.yml index 693fb119ae3..d60be150409 100644 --- a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.meta.yml +++ b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.meta.yml @@ -38,31 +38,61 @@ definitions: # Learn more about the available fields: # http://docs.owid.io/projects/etl/architecture/metadata/reference/ dataset: - update_period_days: 308 + update_period_days: 365 tables: class: variables: ddd: title: Defined daily doses of {definitions.routeofadministration} << antimicrobialclass.lower()>> - << atc4name.lower() >> used - description_short: Volume of antimicrobials used in a given year. + description_short: Total defined daily doses of antimicrobials used in a given year. unit: defined daily doses did: title: Defined daily doses per 1000 inhabitants per day of {definitions.routeofadministration} << antimicrobialclass>> - << atc4name.lower() >> used - description_short: Volume of antimicrobials used per 1000 inhabitants per day. + description_short: Total defined daily doses of antimicrobials used per 1000 inhabitants per day. unit: defined daily doses per 1000 inhabitants per day class_aggregated: variables: - ddd: - title: Defined daily doses of {definitions.antimicrobialclass} used - description_short: Volume of antimicrobials used in a given year. - description_processing: <> + ddd_anti_malarials: + title: Defined daily doses of antimalarials used + description_short: Total defined daily doses of antimalarials used in a given year. unit: defined daily doses - did: - title: Defined daily doses per 1000 inhabitants per day of {definitions.antimicrobialclass} used - description_short: Volume of antimicrobials used per 1000 inhabitants per day. - description_processing: <> - unit: defined daily doses per 1000 inhabitants per day + ddd_antibacterials_and_antituberculosis: + title: Defined daily doses of antibiotics and antituberculosis drugs used + description_short: Total defined daily doses of antibiotics and antituberculosis drugs used in a given year. + unit: defined daily doses + ddd_antifungals: + title: Defined daily doses of antifungals used + description_short: Total defined daily doses of antifungals used in a given year. + unit: defined daily doses + ddd_antituberculosis: + title: Defined daily doses of antituberculosis drugs used + description_short: Total defined daily doses of antituberculosis drugs used in a given year. + unit: defined daily doses + ddd_antivirals: + title: Defined daily doses of antivirals used + description_short: Total defined daily doses of antivirals used in a given year. + unit: defined daily doses + did_anti_malarials: + title: Defined daily doses of antimalarials used per 1,000 inhabitants per day + description_short: Total defined daily doses of antimalarials used in a given year per 1,000 inhabitants per day. + unit: defined daily doses per 1,000 inhabitants per day + did_antibacterials_and_antituberculosis: + title: Defined daily doses of antibiotics and antituberculosis drugs used per 1,000 inhabitants per day + description_short: Total defined daily doses of antibiotics and antituberculosis drugs used in a given year per 1,000 inhabitants per day. + unit: defined daily doses per 1,000 inhabitants per day + did_antifungals: + title: Defined daily doses of antifungals used per 1,000 inhabitants per day + description_short: Total defined daily doses of antifungals used in a given year per 1,000 inhabitants per day. + unit: defined daily doses per 1,000 inhabitants per day + did_antituberculosis: + title: Defined daily doses of antituberculosis drugs used per 1,000 inhabitants per day + description_short: Total defined daily doses of antituberculosis drugs used in a given year per 1,000 inhabitants per day. + unit: defined daily doses per 1,000 inhabitants per day + did_antivirals: + title: Defined daily doses of antivirals used per 1,000 inhabitants per day + description_short: Total defined daily doses of antivirals used in a given year per 1,000 inhabitants per day. + unit: defined daily doses per 1,000 inhabitants per day aware: variables: ddd: diff --git a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py index ec64c1afbf1..4e962868728 100644 --- a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py +++ b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py @@ -26,13 +26,11 @@ def run(dest_dir: str) -> None: # tb_class = geo.harmonize_countries(df=tb_class, countries_file=paths.country_mapping_path) tb_aware = geo.harmonize_countries(df=tb_aware, countries_file=paths.country_mapping_path) - # Create tb_notes for use in metadata - tb_notes = tb_class[["country", "year", "antimicrobialclass", "notes"]].dropna(subset=["notes"]) # Aggregate by antimicrobial class - tb_class_agg = aggregate_antimicrobial_classes(tb_class) + tb_class_agg, tb_notes = aggregate_antimicrobial_classes(tb_class) # Save the origins of the aggregated table to insert back in later - origins = tb_class_agg["did"].metadata.origins + # origins = tb_class_agg["did"].metadata.origins # Drop columns that are not needed in the garden dataset. tb_class = tb_class.drop( columns=["whoregioncode", "whoregionname", "countryiso3", "incomeworldbankjune", "atc4", "notes"] @@ -41,13 +39,11 @@ def run(dest_dir: str) -> None: tb_class = tb_class.format(["country", "year", "antimicrobialclass", "atc4name", "routeofadministration"]) tb_aware = tb_aware.format(["country", "year", "awarelabel"]) - tb_class_agg = format_notes(tb_class_agg, tb_notes) # Insert back the origins - tb_class_agg["did"].metadata.origins = origins - tb_class_agg["ddd"].metadata.origins = origins - tb_class_agg = tb_class_agg.format( - ["country", "year", "antimicrobialclass", "description_processing"], short_name="class_aggregated" - ) + # tb_class_agg["did"].metadata.origins = origins + # tb_class_agg["ddd"].metadata.origins = origins + tb_class_agg = pivot_aggregated_table(tb_class_agg, tb_notes) + tb_class_agg = tb_class_agg.format(["country", "year"], short_name="class_aggregated") # # Save outputs. @@ -64,6 +60,40 @@ def run(dest_dir: str) -> None: ds_garden.save() +def pivot_aggregated_table(tb_class_agg: Table, tb_notes: Table) -> Table: + """ + Pivot the aggregated table to have a column for each antimicrobial class, then add the description_processing metadata + """ + + tb_notes_dict = { + "Antibacterials (ATC J01, A07AA, P01AB)": "antibacterials", + "Antimalarials (ATC P01B)": "anti_malarials", + "Antimycotics and antifungals for systemic use (J02, D01B)": "antifungals", + "Antivirals for systemic use (ATC J05)": "antivirals", + "Drugs for the treatment of tuberculosis (ATC J04A)": "antituberculosis", + "Antibacterials (ATC J01, A07AA, P01AB, ATC J04A)": "antibacterials_and_antituberculosis", + } + tb_notes["category"] = tb_notes["antimicrobialclass"].map(tb_notes_dict) + tb_class_agg = tb_class_agg.copy(deep=True) + tb_class_agg["antimicrobialclass"] = tb_class_agg["antimicrobialclass"].replace(tb_notes_dict) + tb_class_agg = tb_class_agg.pivot(index=["country", "year"], columns="antimicrobialclass", values=["ddd", "did"]) + tb_class_agg.columns = tb_class_agg.columns.to_flat_index() + tb_class_agg.columns = [f"{col[0]}_{col[1]}" for col in tb_class_agg.columns] + tb_class_agg = tb_class_agg.reset_index() + + for key in tb_notes_dict.values(): + if f"ddd_{key}" in tb_class_agg.columns: + tb_class_agg[f"ddd_{key}"].metadata.description_processing = tb_notes["description_processing"][ + tb_notes["category"] == key + ] + if f"did_{key}" in tb_class_agg.columns: + tb_class_agg[f"did_{key}"].metadata.description_processing = tb_notes["description_processing"][ + tb_notes["category"] == key + ] + + return tb_class_agg + + def aggregate_antimicrobial_classes(tb: Table) -> Table: """ Aggregating by antimicrobial class, we want to combine antibacterials and antituberculosis, but also keep antituberculosis separately @@ -85,6 +115,11 @@ def aggregate_antimicrobial_classes(tb: Table) -> Table: }, ) assert len(tb["antimicrobialclass"].unique()) == 4 + # Format the notes tables before it's removed + tb_notes = tb[["country", "year", "antimicrobialclass", "notes"]].dropna(subset=["notes"]) + tb_notes = format_notes(tb_notes) + + # Aggregate the data tb = tb.groupby(["country", "year", "antimicrobialclass"], dropna=False)[["ddd", "did"]].sum().reset_index() assert len(tb["antimicrobialclass"].unique()) == 4 # Add the antituberculosis data back to tb @@ -95,10 +130,10 @@ def aggregate_antimicrobial_classes(tb: Table) -> Table: tb_combined.set_index(["country", "year", "antimicrobialclass"], verify_integrity=True) - return tb_combined + return tb_combined, tb_notes -def format_notes(tb: Table, tb_notes: Table) -> Table: +def format_notes(tb_notes: Table) -> Table: """ Format notes column """ @@ -117,9 +152,9 @@ def format_notes(tb: Table, tb_notes: Table) -> Table: .apply(lambda x: "; ".join(set(x))) # Combine unique values .reset_index() ) - tb = pr.merge(tb, tb_desc, on=["antimicrobialclass"]) + # tb = pr.merge(tb, tb_desc, on=["antimicrobialclass"]) - return tb + return tb_desc def combine_countries(countries): From b4daf5546df7d59eceef76d7f6c4220aaf59c28d Mon Sep 17 00:00:00 2001 From: spoonerf Date: Wed, 27 Nov 2024 14:38:55 +0000 Subject: [PATCH 18/30] fixing metadata --- .../2024-11-12/antimicrobial_usage.meta.yml | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.meta.yml b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.meta.yml index d60be150409..40b3d77309d 100644 --- a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.meta.yml +++ b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.meta.yml @@ -45,61 +45,61 @@ tables: variables: ddd: title: Defined daily doses of {definitions.routeofadministration} << antimicrobialclass.lower()>> - << atc4name.lower() >> used - description_short: Total defined daily doses of antimicrobials used in a given year. + description_short: Total [defined daily doses](#dod:defined-daily-doses) of antimicrobials used in a given year. unit: defined daily doses did: title: Defined daily doses per 1000 inhabitants per day of {definitions.routeofadministration} << antimicrobialclass>> - << atc4name.lower() >> used - description_short: Total defined daily doses of antimicrobials used per 1000 inhabitants per day. + description_short: Total [defined daily doses](#dod:defined-daily-doses) of antimicrobials used per 1000 inhabitants per day. unit: defined daily doses per 1000 inhabitants per day class_aggregated: variables: ddd_anti_malarials: title: Defined daily doses of antimalarials used - description_short: Total defined daily doses of antimalarials used in a given year. + description_short: Total [defined daily doses](#dod:defined-daily-doses) of antimalarials used in a given year. unit: defined daily doses ddd_antibacterials_and_antituberculosis: title: Defined daily doses of antibiotics and antituberculosis drugs used - description_short: Total defined daily doses of antibiotics and antituberculosis drugs used in a given year. + description_short: Total [defined daily doses](#dod:defined-daily-doses) of antibiotics and antituberculosis drugs used in a given year. unit: defined daily doses ddd_antifungals: title: Defined daily doses of antifungals used - description_short: Total defined daily doses of antifungals used in a given year. + description_short: Total [defined daily doses](#dod:defined-daily-doses) of antifungals used in a given year. unit: defined daily doses ddd_antituberculosis: title: Defined daily doses of antituberculosis drugs used - description_short: Total defined daily doses of antituberculosis drugs used in a given year. + description_short: Total [defined daily doses](#dod:defined-daily-doses) of antituberculosis drugs used in a given year. unit: defined daily doses ddd_antivirals: title: Defined daily doses of antivirals used - description_short: Total defined daily doses of antivirals used in a given year. + description_short: Total [defined daily doses](#dod:defined-daily-doses) of antivirals used in a given year. unit: defined daily doses did_anti_malarials: title: Defined daily doses of antimalarials used per 1,000 inhabitants per day - description_short: Total defined daily doses of antimalarials used in a given year per 1,000 inhabitants per day. + description_short: Total [defined daily doses](#dod:defined-daily-doses) of antimalarials used in a given year per 1,000 inhabitants per day. unit: defined daily doses per 1,000 inhabitants per day did_antibacterials_and_antituberculosis: title: Defined daily doses of antibiotics and antituberculosis drugs used per 1,000 inhabitants per day - description_short: Total defined daily doses of antibiotics and antituberculosis drugs used in a given year per 1,000 inhabitants per day. + description_short: Total [defined daily doses](#dod:defined-daily-doses) of antibiotics and antituberculosis drugs used in a given year per 1,000 inhabitants per day. unit: defined daily doses per 1,000 inhabitants per day did_antifungals: title: Defined daily doses of antifungals used per 1,000 inhabitants per day - description_short: Total defined daily doses of antifungals used in a given year per 1,000 inhabitants per day. + description_short: Total [defined daily doses](#dod:defined-daily-doses) of antifungals used in a given year per 1,000 inhabitants per day. unit: defined daily doses per 1,000 inhabitants per day did_antituberculosis: title: Defined daily doses of antituberculosis drugs used per 1,000 inhabitants per day - description_short: Total defined daily doses of antituberculosis drugs used in a given year per 1,000 inhabitants per day. + description_short: Total [defined daily doses](#dod:defined-daily-doses) of antituberculosis drugs used in a given year per 1,000 inhabitants per day. unit: defined daily doses per 1,000 inhabitants per day did_antivirals: title: Defined daily doses of antivirals used per 1,000 inhabitants per day - description_short: Total defined daily doses of antivirals used in a given year per 1,000 inhabitants per day. + description_short: Total [defined daily doses](#dod:defined-daily-doses) of antivirals used in a given year per 1,000 inhabitants per day. unit: defined daily doses per 1,000 inhabitants per day aware: variables: ddd: title: Defined daily doses of << awarelabel >> antibiotics used - description_short: "Volume of AWaRe category: << awarelabel >> antibiotics used in a given year. {definitions.aware_description}" + description_short: "Total [defined daily doses](#dod:defined-daily-doses) of AWaRe category: << awarelabel >> antibiotics used in a given year. {definitions.aware_description}" unit: defined daily doses did: title: Defined daily doses per 1000 inhabitants per day of << awarelabel>> antibiotics used - description_short: "Volume of AWaRe category: <> used per 1000 inhabitants per day. {definitions.aware_description}" + description_short: "Total [defined daily doses](#dod:defined-daily-doses) of AWaRe category: <> used per 1000 inhabitants per day. {definitions.aware_description}" unit: defined daily doses per 1000 inhabitants per day From e873676d43161b5e0ae29b1f634048a53a70deae Mon Sep 17 00:00:00 2001 From: spoonerf Date: Wed, 27 Nov 2024 14:42:16 +0000 Subject: [PATCH 19/30] try and improve the formatting of description processing --- .../garden/antibiotics/2024-11-12/antimicrobial_usage.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py index 4e962868728..ab2be526704 100644 --- a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py +++ b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py @@ -30,7 +30,6 @@ def run(dest_dir: str) -> None: # Aggregate by antimicrobial class tb_class_agg, tb_notes = aggregate_antimicrobial_classes(tb_class) # Save the origins of the aggregated table to insert back in later - # origins = tb_class_agg["did"].metadata.origins # Drop columns that are not needed in the garden dataset. tb_class = tb_class.drop( columns=["whoregioncode", "whoregionname", "countryiso3", "incomeworldbankjune", "atc4", "notes"] @@ -39,9 +38,6 @@ def run(dest_dir: str) -> None: tb_class = tb_class.format(["country", "year", "antimicrobialclass", "atc4name", "routeofadministration"]) tb_aware = tb_aware.format(["country", "year", "awarelabel"]) - # Insert back the origins - # tb_class_agg["did"].metadata.origins = origins - # tb_class_agg["ddd"].metadata.origins = origins tb_class_agg = pivot_aggregated_table(tb_class_agg, tb_notes) tb_class_agg = tb_class_agg.format(["country", "year"], short_name="class_aggregated") @@ -89,7 +85,7 @@ def pivot_aggregated_table(tb_class_agg: Table, tb_notes: Table) -> Table: if f"did_{key}" in tb_class_agg.columns: tb_class_agg[f"did_{key}"].metadata.description_processing = tb_notes["description_processing"][ tb_notes["category"] == key - ] + ].astype(str) return tb_class_agg @@ -143,7 +139,7 @@ def format_notes(tb_notes: Table) -> Table: tb_note = tb_notes[msk] countries = tb_note["country"].unique() countries_formatted = combine_countries(countries) - description_processing_string = f"In {countries_formatted}: {note}" + description_processing_string = f"- In {countries_formatted}: {note}\n" tb_notes.loc[msk, "description_processing"] = description_processing_string # Creating onedescription processing for each antimicrobial class, the variable unit tb_desc = ( From ab8a6e39b48eb449497ddb9fb793d8ee74b8d84b Mon Sep 17 00:00:00 2001 From: spoonerf Date: Fri, 29 Nov 2024 11:59:20 +0000 Subject: [PATCH 20/30] pablo's suggestions --- .../2024-11-12/antimicrobial_usage.py | 26 ++++++++++++------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py index ab2be526704..f4eb74f05cd 100644 --- a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py +++ b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py @@ -85,7 +85,7 @@ def pivot_aggregated_table(tb_class_agg: Table, tb_notes: Table) -> Table: if f"did_{key}" in tb_class_agg.columns: tb_class_agg[f"did_{key}"].metadata.description_processing = tb_notes["description_processing"][ tb_notes["category"] == key - ].astype(str) + ].astype("string") return tb_class_agg @@ -96,7 +96,7 @@ def aggregate_antimicrobial_classes(tb: Table) -> Table: """ tb = tb.copy(deep=True) # Convert the column to strings (if not already done) - tb["antimicrobialclass"] = tb["antimicrobialclass"].astype(str) + tb["antimicrobialclass"] = tb["antimicrobialclass"].astype("string") # Create a completely independent copy of antituberculosis rows and reset its index msk = tb["antimicrobialclass"] == "Drugs for the treatment of tuberculosis (ATC J04A)" @@ -110,7 +110,14 @@ def aggregate_antimicrobial_classes(tb: Table) -> Table: "Antibacterials (ATC J01, A07AA, P01AB)": "Antibacterials (ATC J01, A07AA, P01AB, ATC J04A)", }, ) - assert len(tb["antimicrobialclass"].unique()) == 4 + expected_class_values = { + "Antibacterials (ATC J01, A07AA, P01AB, ATC J04A)", + "Antimalarials (ATC P01B)", + "Antimycotics and antifungals for systemic use (J02, D01B)", + "Antivirals for systemic use (ATC J05)", + } + actual_values = set(tb["antimicrobialclass"].unique()) + assert actual_values == expected_class_values # Format the notes tables before it's removed tb_notes = tb[["country", "year", "antimicrobialclass", "notes"]].dropna(subset=["notes"]) tb_notes = format_notes(tb_notes) @@ -134,13 +141,12 @@ def format_notes(tb_notes: Table) -> Table: Format notes column """ for note in tb_notes["notes"].unique(): - if pd.notna(note): - msk = tb_notes["notes"] == note - tb_note = tb_notes[msk] - countries = tb_note["country"].unique() - countries_formatted = combine_countries(countries) - description_processing_string = f"- In {countries_formatted}: {note}\n" - tb_notes.loc[msk, "description_processing"] = description_processing_string + msk = tb_notes["notes"] == note + tb_note = tb_notes[msk] + countries = tb_note["country"].unique() + countries_formatted = combine_countries(countries) + description_processing_string = f"- In {countries_formatted}: {note}\n" + tb_notes.loc[msk, "description_processing"] = description_processing_string # Creating onedescription processing for each antimicrobial class, the variable unit tb_desc = ( tb_notes.dropna(subset=["description_processing"]) # Remove NaNs From 09fa0fa6c334a0ec33563019daab220e29b1055f Mon Sep 17 00:00:00 2001 From: spoonerf Date: Fri, 29 Nov 2024 15:23:05 +0000 Subject: [PATCH 21/30] description changes --- .../2024-11-12/antimicrobial_usage.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py index f4eb74f05cd..e51c0c73a54 100644 --- a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py +++ b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py @@ -72,20 +72,20 @@ def pivot_aggregated_table(tb_class_agg: Table, tb_notes: Table) -> Table: tb_notes["category"] = tb_notes["antimicrobialclass"].map(tb_notes_dict) tb_class_agg = tb_class_agg.copy(deep=True) tb_class_agg["antimicrobialclass"] = tb_class_agg["antimicrobialclass"].replace(tb_notes_dict) - tb_class_agg = tb_class_agg.pivot(index=["country", "year"], columns="antimicrobialclass", values=["ddd", "did"]) - tb_class_agg.columns = tb_class_agg.columns.to_flat_index() - tb_class_agg.columns = [f"{col[0]}_{col[1]}" for col in tb_class_agg.columns] - tb_class_agg = tb_class_agg.reset_index() + tb_class_agg = tb_class_agg.pivot( + index=["country", "year"], columns="antimicrobialclass", values=["ddd", "did"], join_column_levels_with="_" + ) + tb_class_agg = tb_class_agg.reset_index(drop=True) for key in tb_notes_dict.values(): if f"ddd_{key}" in tb_class_agg.columns: - tb_class_agg[f"ddd_{key}"].metadata.description_processing = tb_notes["description_processing"][ - tb_notes["category"] == key - ] + tb_class_agg[f"ddd_{key}"].metadata.description_processing = "\n".join( + tb_notes["description_processing"][tb_notes["category"] == key].tolist() + ) if f"did_{key}" in tb_class_agg.columns: - tb_class_agg[f"did_{key}"].metadata.description_processing = tb_notes["description_processing"][ - tb_notes["category"] == key - ].astype("string") + tb_class_agg[f"did_{key}"].metadata.description_processing = "\n".join( + tb_notes["description_processing"][tb_notes["category"] == key].tolist() + ) return tb_class_agg From 8d519f69649cba3c74b4d04fdfabf42063e1533a Mon Sep 17 00:00:00 2001 From: spoonerf Date: Fri, 29 Nov 2024 15:25:03 +0000 Subject: [PATCH 22/30] remove unused pandas --- .../data/garden/antibiotics/2024-11-12/antimicrobial_usage.py | 1 - 1 file changed, 1 deletion(-) diff --git a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py index e51c0c73a54..de09787d357 100644 --- a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py +++ b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py @@ -1,6 +1,5 @@ """Load a meadow dataset and create a garden dataset.""" -import pandas as pd from owid.catalog import Table from owid.catalog import processing as pr From a5d8eb5c252b3e0cc2adc91371888f549732b354 Mon Sep 17 00:00:00 2001 From: spoonerf Date: Fri, 29 Nov 2024 15:34:12 +0000 Subject: [PATCH 23/30] change to description_key --- .../2024-11-12/antimicrobial_usage.py | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py index de09787d357..302566db572 100644 --- a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py +++ b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py @@ -57,7 +57,7 @@ def run(dest_dir: str) -> None: def pivot_aggregated_table(tb_class_agg: Table, tb_notes: Table) -> Table: """ - Pivot the aggregated table to have a column for each antimicrobial class, then add the description_processing metadata + Pivot the aggregated table to have a column for each antimicrobial class, then add the description_key metadata """ tb_notes_dict = { @@ -78,12 +78,12 @@ def pivot_aggregated_table(tb_class_agg: Table, tb_notes: Table) -> Table: for key in tb_notes_dict.values(): if f"ddd_{key}" in tb_class_agg.columns: - tb_class_agg[f"ddd_{key}"].metadata.description_processing = "\n".join( - tb_notes["description_processing"][tb_notes["category"] == key].tolist() + tb_class_agg[f"ddd_{key}"].metadata.description_key = "\n".join( + tb_notes["description_key"][tb_notes["category"] == key].tolist() ) if f"did_{key}" in tb_class_agg.columns: - tb_class_agg[f"did_{key}"].metadata.description_processing = "\n".join( - tb_notes["description_processing"][tb_notes["category"] == key].tolist() + tb_class_agg[f"did_{key}"].metadata.description_key = "\n".join( + tb_notes["description_key"][tb_notes["category"] == key].tolist() ) return tb_class_agg @@ -144,16 +144,15 @@ def format_notes(tb_notes: Table) -> Table: tb_note = tb_notes[msk] countries = tb_note["country"].unique() countries_formatted = combine_countries(countries) - description_processing_string = f"- In {countries_formatted}: {note}\n" - tb_notes.loc[msk, "description_processing"] = description_processing_string + description_key_string = f"- In {countries_formatted}: {note}\n" + tb_notes.loc[msk, "description_key"] = description_key_string # Creating onedescription processing for each antimicrobial class, the variable unit tb_desc = ( - tb_notes.dropna(subset=["description_processing"]) # Remove NaNs - .groupby(["antimicrobialclass"])["description_processing"] - .apply(lambda x: "; ".join(set(x))) # Combine unique values + tb_notes.dropna(subset=["description_key"]) # Remove NaNs + .groupby(["antimicrobialclass"])["description_key"] + .apply(lambda x: "".join(set(x))) # Combine unique values .reset_index() ) - # tb = pr.merge(tb, tb_desc, on=["antimicrobialclass"]) return tb_desc From 3fea12d72b9c66a0f1e4576a5dad50c2e4dc82fa Mon Sep 17 00:00:00 2001 From: spoonerf Date: Fri, 29 Nov 2024 15:42:12 +0000 Subject: [PATCH 24/30] back to dp --- .../2024-11-12/antimicrobial_usage.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py index 302566db572..8cc91bc38a5 100644 --- a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py +++ b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py @@ -57,7 +57,7 @@ def run(dest_dir: str) -> None: def pivot_aggregated_table(tb_class_agg: Table, tb_notes: Table) -> Table: """ - Pivot the aggregated table to have a column for each antimicrobial class, then add the description_key metadata + Pivot the aggregated table to have a column for each antimicrobial class, then add the description_processing metadata """ tb_notes_dict = { @@ -78,12 +78,12 @@ def pivot_aggregated_table(tb_class_agg: Table, tb_notes: Table) -> Table: for key in tb_notes_dict.values(): if f"ddd_{key}" in tb_class_agg.columns: - tb_class_agg[f"ddd_{key}"].metadata.description_key = "\n".join( - tb_notes["description_key"][tb_notes["category"] == key].tolist() + tb_class_agg[f"ddd_{key}"].metadata.description_processing = "\n".join( + tb_notes["description_processing"][tb_notes["category"] == key].tolist() ) if f"did_{key}" in tb_class_agg.columns: - tb_class_agg[f"did_{key}"].metadata.description_key = "\n".join( - tb_notes["description_key"][tb_notes["category"] == key].tolist() + tb_class_agg[f"did_{key}"].metadata.description_processing = "\n".join( + tb_notes["description_processing"][tb_notes["category"] == key].tolist() ) return tb_class_agg @@ -144,12 +144,12 @@ def format_notes(tb_notes: Table) -> Table: tb_note = tb_notes[msk] countries = tb_note["country"].unique() countries_formatted = combine_countries(countries) - description_key_string = f"- In {countries_formatted}: {note}\n" - tb_notes.loc[msk, "description_key"] = description_key_string + description_processing_string = f"- In {countries_formatted}: {note}\n" + tb_notes.loc[msk, "description_processing"] = description_processing_string # Creating onedescription processing for each antimicrobial class, the variable unit tb_desc = ( - tb_notes.dropna(subset=["description_key"]) # Remove NaNs - .groupby(["antimicrobialclass"])["description_key"] + tb_notes.dropna(subset=["description_processing"]) # Remove NaNs + .groupby(["antimicrobialclass"])["description_processing"] .apply(lambda x: "".join(set(x))) # Combine unique values .reset_index() ) From e1f14495a668718efb61e1bd56d63e9973e22e12 Mon Sep 17 00:00:00 2001 From: spoonerf Date: Fri, 29 Nov 2024 15:55:48 +0000 Subject: [PATCH 25/30] try out desc key --- .../garden/antibiotics/2024-11-12/antimicrobial_usage.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py index 8cc91bc38a5..c177c575a1c 100644 --- a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py +++ b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py @@ -81,10 +81,16 @@ def pivot_aggregated_table(tb_class_agg: Table, tb_notes: Table) -> Table: tb_class_agg[f"ddd_{key}"].metadata.description_processing = "\n".join( tb_notes["description_processing"][tb_notes["category"] == key].tolist() ) + tb_class_agg[f"ddd_{key}"].metadata.description_key = tb_notes["description_processing"][ + tb_notes["category"] == key + ].tolist() if f"did_{key}" in tb_class_agg.columns: tb_class_agg[f"did_{key}"].metadata.description_processing = "\n".join( tb_notes["description_processing"][tb_notes["category"] == key].tolist() ) + tb_class_agg[f"did_{key}"].metadata.description_key = tb_notes["description_processing"][ + tb_notes["category"] == key + ].tolist() return tb_class_agg From 34d5252a85dbbda9eee9b27cc107cd25ff84bb27 Mon Sep 17 00:00:00 2001 From: spoonerf Date: Fri, 29 Nov 2024 15:58:21 +0000 Subject: [PATCH 26/30] try out desc key --- .../antibiotics/2024-11-12/antimicrobial_usage.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py index c177c575a1c..d0f4710e5c9 100644 --- a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py +++ b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py @@ -78,16 +78,16 @@ def pivot_aggregated_table(tb_class_agg: Table, tb_notes: Table) -> Table: for key in tb_notes_dict.values(): if f"ddd_{key}" in tb_class_agg.columns: - tb_class_agg[f"ddd_{key}"].metadata.description_processing = "\n".join( - tb_notes["description_processing"][tb_notes["category"] == key].tolist() - ) + # tb_class_agg[f"ddd_{key}"].metadata.description_processing = "\n".join( + # tb_notes["description_processing"][tb_notes["category"] == key].tolist() + # ) tb_class_agg[f"ddd_{key}"].metadata.description_key = tb_notes["description_processing"][ tb_notes["category"] == key ].tolist() if f"did_{key}" in tb_class_agg.columns: - tb_class_agg[f"did_{key}"].metadata.description_processing = "\n".join( - tb_notes["description_processing"][tb_notes["category"] == key].tolist() - ) + # tb_class_agg[f"did_{key}"].metadata.description_processing = "\n".join( + # tb_notes["description_processing"][tb_notes["category"] == key].tolist() + # ) tb_class_agg[f"did_{key}"].metadata.description_key = tb_notes["description_processing"][ tb_notes["category"] == key ].tolist() @@ -150,7 +150,7 @@ def format_notes(tb_notes: Table) -> Table: tb_note = tb_notes[msk] countries = tb_note["country"].unique() countries_formatted = combine_countries(countries) - description_processing_string = f"- In {countries_formatted}: {note}\n" + description_processing_string = f"In {countries_formatted}: {note}\n" tb_notes.loc[msk, "description_processing"] = description_processing_string # Creating onedescription processing for each antimicrobial class, the variable unit tb_desc = ( From a45a2f0f5ee66ec81d446764a21d56688c551424 Mon Sep 17 00:00:00 2001 From: spoonerf Date: Fri, 29 Nov 2024 16:00:44 +0000 Subject: [PATCH 27/30] try out desc key --- .../data/garden/antibiotics/2024-11-12/antimicrobial_usage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py index d0f4710e5c9..808bd6cb4c3 100644 --- a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py +++ b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py @@ -150,7 +150,7 @@ def format_notes(tb_notes: Table) -> Table: tb_note = tb_notes[msk] countries = tb_note["country"].unique() countries_formatted = combine_countries(countries) - description_processing_string = f"In {countries_formatted}: {note}\n" + description_processing_string = f"In {countries_formatted}: {note}" tb_notes.loc[msk, "description_processing"] = description_processing_string # Creating onedescription processing for each antimicrobial class, the variable unit tb_desc = ( From 731ba81fd0d633586560587fb71330f2f9b38136 Mon Sep 17 00:00:00 2001 From: spoonerf Date: Fri, 29 Nov 2024 16:08:54 +0000 Subject: [PATCH 28/30] try out desc key --- .../garden/antibiotics/2024-11-12/antimicrobial_usage.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py index 808bd6cb4c3..f3a58a2d357 100644 --- a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py +++ b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py @@ -83,15 +83,14 @@ def pivot_aggregated_table(tb_class_agg: Table, tb_notes: Table) -> Table: # ) tb_class_agg[f"ddd_{key}"].metadata.description_key = tb_notes["description_processing"][ tb_notes["category"] == key - ].tolist() + ] if f"did_{key}" in tb_class_agg.columns: # tb_class_agg[f"did_{key}"].metadata.description_processing = "\n".join( # tb_notes["description_processing"][tb_notes["category"] == key].tolist() # ) tb_class_agg[f"did_{key}"].metadata.description_key = tb_notes["description_processing"][ tb_notes["category"] == key - ].tolist() - + ] return tb_class_agg @@ -156,7 +155,7 @@ def format_notes(tb_notes: Table) -> Table: tb_desc = ( tb_notes.dropna(subset=["description_processing"]) # Remove NaNs .groupby(["antimicrobialclass"])["description_processing"] - .apply(lambda x: "".join(set(x))) # Combine unique values + .apply(lambda x: list(set(x))) # Combine unique values into a list .reset_index() ) From d05d93c1c74c0171c6fc5ddf9608e8a8b8bf378d Mon Sep 17 00:00:00 2001 From: spoonerf Date: Fri, 29 Nov 2024 16:34:23 +0000 Subject: [PATCH 29/30] tidy up the notes column --- .../2024-11-12/antimicrobial_usage.py | 29 ++++++++++++++----- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py index f3a58a2d357..2191393054b 100644 --- a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py +++ b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.py @@ -26,6 +26,8 @@ def run(dest_dir: str) -> None: tb_class = geo.harmonize_countries(df=tb_class, countries_file=paths.country_mapping_path) tb_aware = geo.harmonize_countries(df=tb_aware, countries_file=paths.country_mapping_path) + # Tidy notes column + tb_class = tidy_notes(tb_class) # Aggregate by antimicrobial class tb_class_agg, tb_notes = aggregate_antimicrobial_classes(tb_class) # Save the origins of the aggregated table to insert back in later @@ -78,16 +80,10 @@ def pivot_aggregated_table(tb_class_agg: Table, tb_notes: Table) -> Table: for key in tb_notes_dict.values(): if f"ddd_{key}" in tb_class_agg.columns: - # tb_class_agg[f"ddd_{key}"].metadata.description_processing = "\n".join( - # tb_notes["description_processing"][tb_notes["category"] == key].tolist() - # ) tb_class_agg[f"ddd_{key}"].metadata.description_key = tb_notes["description_processing"][ tb_notes["category"] == key ] if f"did_{key}" in tb_class_agg.columns: - # tb_class_agg[f"did_{key}"].metadata.description_processing = "\n".join( - # tb_notes["description_processing"][tb_notes["category"] == key].tolist() - # ) tb_class_agg[f"did_{key}"].metadata.description_key = tb_notes["description_processing"][ tb_notes["category"] == key ] @@ -149,7 +145,7 @@ def format_notes(tb_notes: Table) -> Table: tb_note = tb_notes[msk] countries = tb_note["country"].unique() countries_formatted = combine_countries(countries) - description_processing_string = f"In {countries_formatted}: {note}" + description_processing_string = f"For {countries_formatted}: {note}" tb_notes.loc[msk, "description_processing"] = description_processing_string # Creating onedescription processing for each antimicrobial class, the variable unit tb_desc = ( @@ -162,6 +158,25 @@ def format_notes(tb_notes: Table) -> Table: return tb_desc +def tidy_notes(tb_class: Table) -> Table: + """ + Tidy notes column - improve the syntax and fix spelling errors + """ + notes_dict = { + "Only consumption in the community reported": "only antimicrobial consumption in the community is reported.", + "For antimycotics and antifungals: only J02 reported": "for antimycotics and antifungals, only antimycotics for systemic use (ATC code J02) are reported.", + "For antibiotics: only J01 and P01AB reported": "for antibiotics, only antibiotics for systemic use (ATC code J01) and nitroimidazole derivatives (ATC code P01AB) are reported.", + "For antibiotics: only J01 reported": "for antibiotics, only antibiotics for systemic use (ATC code J01) are reported", + "For antifungals: only use in the hospital reported": "for antifungals, only those used in hospitals are reported.", + "Data incomplete since not collected from all sources of data": "data is incomplete since it's not collected from all sources.", + "Only consumption in the public sector reported and this is estimated to reppresent less than 90% of the antimicrobial used in the country ": "only consumption in the public sector reported and this is estimated to represent less than 90% of total antimicrobial usage.", + "Data incomplete: not all antibiotics reported systematically": "data is incomplete, not all antibiotics reported systematically.", + "For antituberculosis medicines: data are incomplete": "data are incomplete for antituberculosis medicines.", + } + tb_class["notes"] = tb_class["notes"].replace(notes_dict) + return tb_class + + def combine_countries(countries): # Combine countries into a string if not countries: From 1716e6f2035f3d99f4492d3d85938b5437617398 Mon Sep 17 00:00:00 2001 From: spoonerf Date: Fri, 29 Nov 2024 16:39:31 +0000 Subject: [PATCH 30/30] don't forget decimal places --- .../2024-11-12/antimicrobial_usage.meta.yml | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.meta.yml b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.meta.yml index 40b3d77309d..d21d02b8b1f 100644 --- a/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.meta.yml +++ b/etl/steps/data/garden/antibiotics/2024-11-12/antimicrobial_usage.meta.yml @@ -47,59 +47,87 @@ tables: title: Defined daily doses of {definitions.routeofadministration} << antimicrobialclass.lower()>> - << atc4name.lower() >> used description_short: Total [defined daily doses](#dod:defined-daily-doses) of antimicrobials used in a given year. unit: defined daily doses + display: + numDecimalPlaces: 0 did: title: Defined daily doses per 1000 inhabitants per day of {definitions.routeofadministration} << antimicrobialclass>> - << atc4name.lower() >> used description_short: Total [defined daily doses](#dod:defined-daily-doses) of antimicrobials used per 1000 inhabitants per day. unit: defined daily doses per 1000 inhabitants per day + display: + numDecimalPlaces: 1 class_aggregated: variables: ddd_anti_malarials: title: Defined daily doses of antimalarials used description_short: Total [defined daily doses](#dod:defined-daily-doses) of antimalarials used in a given year. unit: defined daily doses + display: + numDecimalPlaces: 0 ddd_antibacterials_and_antituberculosis: title: Defined daily doses of antibiotics and antituberculosis drugs used description_short: Total [defined daily doses](#dod:defined-daily-doses) of antibiotics and antituberculosis drugs used in a given year. unit: defined daily doses + display: + numDecimalPlaces: 0 ddd_antifungals: title: Defined daily doses of antifungals used description_short: Total [defined daily doses](#dod:defined-daily-doses) of antifungals used in a given year. unit: defined daily doses + display: + numDecimalPlaces: 0 ddd_antituberculosis: title: Defined daily doses of antituberculosis drugs used description_short: Total [defined daily doses](#dod:defined-daily-doses) of antituberculosis drugs used in a given year. unit: defined daily doses + display: + numDecimalPlaces: 0 ddd_antivirals: title: Defined daily doses of antivirals used description_short: Total [defined daily doses](#dod:defined-daily-doses) of antivirals used in a given year. unit: defined daily doses + display: + numDecimalPlaces: 0 did_anti_malarials: title: Defined daily doses of antimalarials used per 1,000 inhabitants per day description_short: Total [defined daily doses](#dod:defined-daily-doses) of antimalarials used in a given year per 1,000 inhabitants per day. unit: defined daily doses per 1,000 inhabitants per day + display: + numDecimalPlaces: 1 did_antibacterials_and_antituberculosis: title: Defined daily doses of antibiotics and antituberculosis drugs used per 1,000 inhabitants per day description_short: Total [defined daily doses](#dod:defined-daily-doses) of antibiotics and antituberculosis drugs used in a given year per 1,000 inhabitants per day. unit: defined daily doses per 1,000 inhabitants per day + display: + numDecimalPlaces: 1 did_antifungals: title: Defined daily doses of antifungals used per 1,000 inhabitants per day description_short: Total [defined daily doses](#dod:defined-daily-doses) of antifungals used in a given year per 1,000 inhabitants per day. unit: defined daily doses per 1,000 inhabitants per day + display: + numDecimalPlaces: 1 did_antituberculosis: title: Defined daily doses of antituberculosis drugs used per 1,000 inhabitants per day description_short: Total [defined daily doses](#dod:defined-daily-doses) of antituberculosis drugs used in a given year per 1,000 inhabitants per day. unit: defined daily doses per 1,000 inhabitants per day + display: + numDecimalPlaces: 1 did_antivirals: title: Defined daily doses of antivirals used per 1,000 inhabitants per day description_short: Total [defined daily doses](#dod:defined-daily-doses) of antivirals used in a given year per 1,000 inhabitants per day. unit: defined daily doses per 1,000 inhabitants per day + display: + numDecimalPlaces: 1 aware: variables: ddd: title: Defined daily doses of << awarelabel >> antibiotics used description_short: "Total [defined daily doses](#dod:defined-daily-doses) of AWaRe category: << awarelabel >> antibiotics used in a given year. {definitions.aware_description}" unit: defined daily doses + display: + numDecimalPlaces: 0 did: title: Defined daily doses per 1000 inhabitants per day of << awarelabel>> antibiotics used description_short: "Total [defined daily doses](#dod:defined-daily-doses) of AWaRe category: <> used per 1000 inhabitants per day. {definitions.aware_description}" unit: defined daily doses per 1000 inhabitants per day + display: + numDecimalPlaces: 1