Skip to content

Commit

Permalink
almost there
Browse files Browse the repository at this point in the history
  • Loading branch information
spoonerf committed May 22, 2024
1 parent c8186d5 commit 9f2beb0
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,5 @@
"Middle East & North Africa - WB",
"Region of the Americas",
"South Asia - WB",
"South-East Asia Region",
"Sub-Saharan Africa - WB"
"South-East Asia Region"
]
33 changes: 31 additions & 2 deletions etl/steps/data/garden/ihme_gbd/2024-05-20/impairments.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
"""Load a meadow dataset and create a garden dataset."""

from owid.catalog import Table
from owid.catalog import processing as pr

from etl.data_helpers import geo
from etl.helpers import PathFinder, create_dataset

Expand All @@ -20,11 +23,17 @@ def run(dest_dir: str) -> None:
#
# Process data.
#
tb = geo.harmonize_countries(df=tb, countries_file=paths.country_mapping_path)
tb = geo.harmonize_countries(
df=tb, countries_file=paths.country_mapping_path, excluded_countries_file=paths.excluded_countries_path
)
# Dropping sex column as we only have values for both sexes
if len(tb["sex"].unique() == 1):
tb = tb.drop(columns="sex")
tb = tb.format(["country", "year", "metric", "neglected_tropical_disease", "impairment", "age"])
# Split up the causes of blindness
tb = other_vision_loss_minus_trachoma(tb)

cols = tb.columns.drop(["value"]).to_list()
tb = tb.format(cols)

#
# Save outputs.
Expand All @@ -36,3 +45,23 @@ def run(dest_dir: str) -> None:

# Save changes in the new garden dataset.
ds_garden.save()


def other_vision_loss_minus_trachoma(tb: Table) -> Table:
"""
To split up the causes of blindness we need to subtract trachoma from other vision loss
"""

tb_other_vision_loss = tb[tb["cause"] == "Other vision loss"].copy()
tb_trachoma = tb[tb["cause"] == "Trachoma"].copy()

tb_combine = tb_other_vision_loss.merge(
tb_trachoma, on=["country", "year", "metric", "impairment", "age"], suffixes=("", "_trachoma")
)
# Can I subtract rates if they have the same denominator? I think so
tb_combine["value"] = tb_combine["value"] - tb_combine["value_trachoma"]
tb_combine["cause"] = "Other vision loss minus trachoma"

tb = pr.concat([tb, tb_combine], ignore_index=True)

return tb
6 changes: 3 additions & 3 deletions etl/steps/data/meadow/ihme_gbd/2024-05-20/impairments.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def run(dest_dir: str) -> None:
#
tb = clean_data(tb)
# Ensure all columns are snake-case, set an appropriate index, and sort conveniently.
tb = tb.format(["country", "year", "metric", "neglected_tropical_disease", "impairment", "age", "sex"])
tb = tb.format(["country", "year", "metric", "cause", "impairment", "age", "sex"])

#
# Save outputs.
Expand All @@ -44,7 +44,7 @@ def clean_data(tb: Table) -> Table:
"measure_name": "measure",
"sex_name": "sex",
"age_name": "age",
"cause_name": "neglected_tropical_disease",
"cause_name": "cause",
"metric_name": "metric",
},
errors="ignore",
Expand All @@ -71,7 +71,7 @@ def clean_data(tb: Table) -> Table:
"impairment": "category",
"sex": "category",
"age": "category",
"neglected_tropical_disease": "category",
"cause": "category",
"metric": "category",
"year": "int",
}
Expand Down

0 comments on commit 9f2beb0

Please sign in to comment.