Skip to content

Commit

Permalink
adding age-groups to shared
Browse files Browse the repository at this point in the history
  • Loading branch information
spoonerf committed May 28, 2024
1 parent e50da5d commit cb70490
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 21 deletions.
34 changes: 33 additions & 1 deletion etl/steps/data/garden/ihme_gbd/2024-05-20/gbd_mental_health.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,32 @@
"""Load a meadow dataset and create a garden dataset."""

from shared import add_regional_aggregates, add_share_population

from etl.data_helpers import geo
from etl.helpers import PathFinder, create_dataset

# Get paths and naming conventions for current step.
paths = PathFinder(__file__)
REGIONS = ["North America", "South America", "Europe", "Africa", "Asia", "Oceania"]
AGE_GROUPS_RANGES = {
"All ages": [0, None],
"<5 years": [0, 4],
"5-14 years": [5, 14],
"15-19 years": [15, 19],
"15-49 years": [15, 49],
"20-24 years": [20, 24],
"25-29 years": [25, 29],
"30-34 years": [30, 34],
"35-39 years": [35, 39],
"40-44 years": [40, 44],
"45-49 years": [45, 49],
"50-54 years": [50, 54],
"50-69 years": [50, 69],
"55-59 years": [55, 59],
"60-64 years": [60, 64],
"65-69 years": [65, 69],
"70+ years": [70, None],
}


def run(dest_dir: str) -> None:
Expand All @@ -13,10 +35,20 @@ def run(dest_dir: str) -> None:
#
# Load meadow dataset.
ds_meadow = paths.load_dataset("gbd_mental_health")

# Load regions dataset.
ds_regions = paths.load_dataset("regions")
# Read table from meadow dataset.
tb = ds_meadow["gbd_mental_health"].reset_index()

tb = add_regional_aggregates(
tb,
ds_regions,
index_cols=["country", "year", "metric", "cause", "age", "sex"],
regions=REGIONS,
age_group_mapping=AGE_GROUPS_RANGES,
)
# Add a share of the population column
tb = add_share_population(tb)
#
# Process data.
#
Expand Down
17 changes: 1 addition & 16 deletions etl/steps/data/garden/ihme_gbd/2024-05-20/gbd_prevalence.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
"""Load a meadow dataset and create a garden dataset."""

from owid.catalog import Table
from owid.catalog import processing as pr
from shared import add_regional_aggregates
from shared import add_regional_aggregates, add_share_population

from etl.data_helpers import geo
from etl.helpers import PathFinder, create_dataset
Expand Down Expand Up @@ -72,16 +70,3 @@ def run(dest_dir: str) -> None:

# Save changes in the new garden dataset.
ds_garden.save()


def add_share_population(tb: Table) -> Table:
"""
Add a share of the population column to the table.
The 'Rate' column is the number of cases per 100,000 people, we want the equivalent per 100 people.
"""
tb_share = tb[tb["metric"] == "Rate"].copy()
tb_share["metric"] = "Share"
tb_share["value"] = tb_share["value"] / 1000

tb = pr.concat([tb, tb_share], ignore_index=True)
return tb
35 changes: 31 additions & 4 deletions etl/steps/data/garden/ihme_gbd/2024-05-20/shared.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,24 @@ def add_regional_aggregates(
tb_number = tb[tb["metric"].isin(["Number", "Percent"])].copy()
tb_rate = tb[tb["metric"] == "Rate"].copy()
tb_percent = tb[tb["metric"] == "Percent"].copy()
# Add population data
tb_number = add_population(
df=tb_number, country_col="country", year_col="year", age_col="age", age_group_mapping=age_group_mapping
)
# Add population data - some datasets will have data disaggregated by sex
if "sex" in tb.columns:
tb_number = add_population(
df=tb_number,
country_col="country",
year_col="year",
age_col="age",
age_group_mapping=age_group_mapping,
sex_col="sex",
sex_group_all="Both",
sex_group_female="Female",
sex_group_male="Male",
)
else:
tb_number = add_population(
df=tb_number, country_col="country", year_col="year", age_col="age", age_group_mapping=age_group_mapping
)
assert tb_number["value"].notna().all(), "Values are missing in the Number table, check configuration"
# Combine Number and Percent tables
tb_number_percent = pr.concat([tb_number, tb_percent], ignore_index=True)
# Add region aggregates - for Number and Percent (if present)
Expand All @@ -51,3 +65,16 @@ def add_regional_aggregates(
)
tb_out = tb_out.drop(columns="population")
return tb_out


def add_share_population(tb: Table) -> Table:
"""
Add a share of the population column to the table.
The 'Rate' column is the number of cases per 100,000 people, we want the equivalent per 100 people.
"""
tb_share = tb[tb["metric"] == "Rate"].copy()
tb_share["metric"] = "Share"
tb_share["value"] = tb_share["value"] / 1000

tb = pr.concat([tb, tb_share], ignore_index=True)
return tb

0 comments on commit cb70490

Please sign in to comment.