Skip to content

Commit

Permalink
working on shared
Browse files Browse the repository at this point in the history
  • Loading branch information
spoonerf committed May 24, 2024
1 parent b698b68 commit cd27482
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 10 deletions.
3 changes: 3 additions & 0 deletions dag/health.yml
Original file line number Diff line number Diff line change
Expand Up @@ -615,6 +615,7 @@ steps:
- snapshot-private://ihme_gbd/2024-05-20/gbd_cause.csv
data-private://garden/ihme_gbd/2024-05-20/gbd_cause:
- data-private://meadow/ihme_gbd/2024-05-20/gbd_cause
- data://garden/regions/2023-01-01/regions
data-private://grapher/ihme_gbd/2024-05-20/gbd_cause:
- data-private://garden/ihme_gbd/2024-05-20/gbd_cause

Expand All @@ -623,6 +624,7 @@ steps:
- snapshot-private://ihme_gbd/2024-05-20/gbd_prevalence.csv
data-private://garden/ihme_gbd/2024-05-20/gbd_prevalence:
- data-private://meadow/ihme_gbd/2024-05-20/gbd_prevalence
- data://garden/regions/2023-01-01/regions
data-private://grapher/ihme_gbd/2024-05-20/gbd_prevalence:
- data-private://garden/ihme_gbd/2024-05-20/gbd_prevalence

Expand All @@ -631,5 +633,6 @@ steps:
- snapshot-private://ihme_gbd/2024-05-20/gbd_mental_health.csv
data-private://garden/ihme_gbd/2024-05-20/gbd_mental_health:
- data-private://meadow/ihme_gbd/2024-05-20/gbd_mental_health
- data://garden/regions/2023-01-01/regions
data-private://grapher/ihme_gbd/2024-05-20/gbd_mental_health:
- data-private://garden/ihme_gbd/2024-05-20/gbd_mental_health
22 changes: 21 additions & 1 deletion etl/steps/data/garden/ihme_gbd/2024-05-20/gbd_cause.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,21 @@
"""Load a meadow dataset and create a garden dataset."""

from shared import add_regional_aggregates

from etl.data_helpers import geo
from etl.helpers import PathFinder, create_dataset

# Get paths and naming conventions for current step.
paths = PathFinder(__file__)
REGIONS = ["North America", "South America", "Europe", "Africa", "Asia", "Oceania"]
AGE_GROUPS_RANGES = {
"All ages": [0, None],
"<5 years": [0, 4],
"5-14 years": [5, 14],
"15-49 years": [15, 49],
"50-69 years": [50, 69],
"70+ years": [70, None],
}


def run(dest_dir: str) -> None:
Expand All @@ -16,11 +27,20 @@ def run(dest_dir: str) -> None:

# Read table from meadow dataset.
tb = ds_meadow["gbd_cause"].reset_index()

ds_regions = paths.load_dataset("regions")
#
# Process data.
#
tb = geo.harmonize_countries(df=tb, countries_file=paths.country_mapping_path)
# Add regional aggregates
tb = add_regional_aggregates(
tb=tb,
ds_regions=ds_regions,
index_cols=["country", "year", "metric", "measure", "cause", "age"],
regions=REGIONS,
age_group_mapping=AGE_GROUPS_RANGES,
)

# Split into two tables: one for deaths, one for DALYs
tb_deaths = tb[tb["measure"] == "Deaths"].copy()
tb_dalys = tb[tb["measure"] == "DALYs (Disability-Adjusted Life Years)"].copy()
Expand Down
25 changes: 16 additions & 9 deletions etl/steps/data/garden/ihme_gbd/2024-05-20/shared.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,24 +13,31 @@ def add_regional_aggregates(
"""
Adding the regional aggregated data for the OWID continent regions
"""
# Add population data
tb = add_population(
df=tb, country_col="country", year_col="year", age_col="age", age_group_mapping=age_group_mapping
tb_number = tb[tb["metric"].isin(["Number", "Percent"])].copy()
tb_number = add_population(
df=tb_number, country_col="country", year_col="year", age_col="age", age_group_mapping=age_group_mapping
)
tb_number = tb[tb["metric"] == "Number"].copy()
tb_rate = tb[tb["metric"] == "Rate"].copy()
# Add region aggregates.
tb_number = geo.add_regions_to_table(
tb_number,
tb_percent = tb[tb["metric"] == "Percent"].copy()
tb_number_percent = pr.concat([tb_number, tb_percent], ignore_index=True)
# Add region aggregates - for Number
tb_number_percent = geo.add_regions_to_table(
tb_number_percent,
index_columns=index_cols,
regions=regions,
ds_regions=ds_regions,
min_num_values_per_year=1,
)
# Add region aggregates - for Rate - only need population here?

tb_rate_regions = tb_number[tb_number["country"].isin(regions)].copy()
tb_rate_regions["value"] = tb_number["value"] / tb_number["population"] * 100_000
tb_rate_regions["metric"] = "Rate"
tb_rate = pr.concat([tb_rate, tb_rate_regions], ignore_index=True)
tb_rate = tb_rate.drop(columns="population")

tb_out = pr.concat([tb_number, tb_rate, tb_rate_regions], ignore_index=True)
tb_out = tb_out.drop(columns=["population"])
tb_out = pr.concat(
[tb_number_percent, tb_rate],
ignore_index=True,
)
return tb_out

0 comments on commit cd27482

Please sign in to comment.