From 9f2beb0d79443a58023573d2fe6d5b567097f486 Mon Sep 17 00:00:00 2001 From: spoonerf Date: Wed, 22 May 2024 16:04:17 +0100 Subject: [PATCH] almost there --- .../impairments.excluded_countries.json | 3 +- .../garden/ihme_gbd/2024-05-20/impairments.py | 33 +++++++++++++++++-- .../meadow/ihme_gbd/2024-05-20/impairments.py | 6 ++-- 3 files changed, 35 insertions(+), 7 deletions(-) diff --git a/etl/steps/data/garden/ihme_gbd/2024-05-20/impairments.excluded_countries.json b/etl/steps/data/garden/ihme_gbd/2024-05-20/impairments.excluded_countries.json index 5445a6b6c61..10e334bed43 100644 --- a/etl/steps/data/garden/ihme_gbd/2024-05-20/impairments.excluded_countries.json +++ b/etl/steps/data/garden/ihme_gbd/2024-05-20/impairments.excluded_countries.json @@ -3,6 +3,5 @@ "Middle East & North Africa - WB", "Region of the Americas", "South Asia - WB", - "South-East Asia Region", - "Sub-Saharan Africa - WB" + "South-East Asia Region" ] \ No newline at end of file diff --git a/etl/steps/data/garden/ihme_gbd/2024-05-20/impairments.py b/etl/steps/data/garden/ihme_gbd/2024-05-20/impairments.py index e7c1c2dab8c..109fe80eed6 100644 --- a/etl/steps/data/garden/ihme_gbd/2024-05-20/impairments.py +++ b/etl/steps/data/garden/ihme_gbd/2024-05-20/impairments.py @@ -1,5 +1,8 @@ """Load a meadow dataset and create a garden dataset.""" +from owid.catalog import Table +from owid.catalog import processing as pr + from etl.data_helpers import geo from etl.helpers import PathFinder, create_dataset @@ -20,11 +23,17 @@ def run(dest_dir: str) -> None: # # Process data. # - tb = geo.harmonize_countries(df=tb, countries_file=paths.country_mapping_path) + tb = geo.harmonize_countries( + df=tb, countries_file=paths.country_mapping_path, excluded_countries_file=paths.excluded_countries_path + ) # Dropping sex column as we only have values for both sexes if len(tb["sex"].unique() == 1): tb = tb.drop(columns="sex") - tb = tb.format(["country", "year", "metric", "neglected_tropical_disease", "impairment", "age"]) + # Split up the causes of blindness + tb = other_vision_loss_minus_trachoma(tb) + + cols = tb.columns.drop(["value"]).to_list() + tb = tb.format(cols) # # Save outputs. @@ -36,3 +45,23 @@ def run(dest_dir: str) -> None: # Save changes in the new garden dataset. ds_garden.save() + + +def other_vision_loss_minus_trachoma(tb: Table) -> Table: + """ + To split up the causes of blindness we need to subtract trachoma from other vision loss + """ + + tb_other_vision_loss = tb[tb["cause"] == "Other vision loss"].copy() + tb_trachoma = tb[tb["cause"] == "Trachoma"].copy() + + tb_combine = tb_other_vision_loss.merge( + tb_trachoma, on=["country", "year", "metric", "impairment", "age"], suffixes=("", "_trachoma") + ) + # Can I subtract rates if they have the same denominator? I think so + tb_combine["value"] = tb_combine["value"] - tb_combine["value_trachoma"] + tb_combine["cause"] = "Other vision loss minus trachoma" + + tb = pr.concat([tb, tb_combine], ignore_index=True) + + return tb diff --git a/etl/steps/data/meadow/ihme_gbd/2024-05-20/impairments.py b/etl/steps/data/meadow/ihme_gbd/2024-05-20/impairments.py index adad92484f9..67164c54ea1 100644 --- a/etl/steps/data/meadow/ihme_gbd/2024-05-20/impairments.py +++ b/etl/steps/data/meadow/ihme_gbd/2024-05-20/impairments.py @@ -23,7 +23,7 @@ def run(dest_dir: str) -> None: # tb = clean_data(tb) # Ensure all columns are snake-case, set an appropriate index, and sort conveniently. - tb = tb.format(["country", "year", "metric", "neglected_tropical_disease", "impairment", "age", "sex"]) + tb = tb.format(["country", "year", "metric", "cause", "impairment", "age", "sex"]) # # Save outputs. @@ -44,7 +44,7 @@ def clean_data(tb: Table) -> Table: "measure_name": "measure", "sex_name": "sex", "age_name": "age", - "cause_name": "neglected_tropical_disease", + "cause_name": "cause", "metric_name": "metric", }, errors="ignore", @@ -71,7 +71,7 @@ def clean_data(tb: Table) -> Table: "impairment": "category", "sex": "category", "age": "category", - "neglected_tropical_disease": "category", + "cause": "category", "metric": "category", "year": "int", }