Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

📊 demography: unique age-group variable names #3706

Merged
merged 3 commits into from
Dec 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions etl/steps/data/garden/un/2024-03-14/un_wpp_most.meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ definitions:
tables:
population_5_year_age_groups:
variables:
age_group:
age_group_five:
title: Five year age-group with the highest population
unit: ""
description_short: |-
Expand All @@ -36,7 +36,7 @@ tables:
title_public: Five year age-group with the highest population
population_10_year_age_groups:
variables:
age_group:
age_group_ten:
title: Ten year age-group with the highest population
unit: ""
description_short: |-
Expand Down
18 changes: 10 additions & 8 deletions etl/steps/data/garden/un/2024-03-14/un_wpp_most.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Any

from owid.catalog import Table
from owid.catalog import processing as pr
from structlog import get_logger
Expand All @@ -22,9 +24,9 @@ def run(dest_dir: str) -> None:
log.info(f"Creating population table for {age_group} year age groups")
# filter data for just sex = all, metrics = population, variant = estimates
if age_group == 5:
tb_pop_filter = create_five_year_age_groups(tb_pop)
tb_pop_filter = create_five_year_age_groups(tb_pop, origins)
if age_group == 10:
tb_pop_filter = create_ten_year_age_groups(tb_pop)
tb_pop_filter = create_ten_year_age_groups(tb_pop, origins)
# Group by country and year, and apply the custom function
tb_pop_filter = (
tb_pop_filter.groupby(["country", "year"], group_keys=False)
Expand All @@ -33,8 +35,6 @@ def run(dest_dir: str) -> None:
)
# The function above creates NAs for some countrys that don't appear to be in the table e.g. Vatican, Melanesia, so dropping here

# tb_pop_filter = tb_pop_filter.copy_metadata(tb_pop)
tb_pop_filter["age_group"].metadata.origins = [origins]
tb_pop_filter = tb_pop_filter.drop(columns=["population"])
tb_pop_filter = tb_pop_filter.set_index(["country", "year"], verify_integrity=True)
tb_pop_filter.metadata.short_name = f"population_{age_group}_year_age_groups"
Expand All @@ -48,7 +48,7 @@ def run(dest_dir: str) -> None:
ds_garden.save()


def create_ten_year_age_groups(tb: Table) -> Table:
def create_ten_year_age_groups(tb: Table, origins: Any) -> Table:
# Initialize an empty list to hold the age bands
age_bands = []
# Loop through a range with a step of 5, stopping before 100
Expand All @@ -73,12 +73,13 @@ def create_ten_year_age_groups(tb: Table) -> Table:
tb = tb[(tb.age != "0-4") & (tb.age != "5-9") & (tb.age != "10-14") & (tb.age != "15-19")]
# Concatenate the 0-9 and 10-19 age groups with the original table
tb = pr.concat([tb, tb_0_9, tb_10_19])
tb = tb.rename(columns={"age": "age_group"})
tb = tb.rename(columns={"age": "age_group_ten"})
tb["age_group_ten"].metadata.origins = [origins]
tb = tb.reset_index(drop=True)
return tb


def create_five_year_age_groups(tb: Table) -> Table:
def create_five_year_age_groups(tb: Table, origins: Any) -> Table:
# Initialize an empty list to hold the age bands
age_bands = []
# Loop through a range with a step of 5, stopping before 100
Expand All @@ -90,7 +91,8 @@ def create_five_year_age_groups(tb: Table) -> Table:
tb = tb[(tb.sex == "all") & (tb.variant == "estimates") & (tb.age.isin(age_bands))]
assert tb["age"].nunique() == len(age_bands), "Age groups are not as expected"
tb = tb.drop(columns=["sex", "variant", "population_change", "population_density"])
tb = tb.rename(columns={"age": "age_group"})
tb = tb.rename(columns={"age": "age_group_five"})
tb["age_group_five"].metadata.origins = [origins]
tb = tb.reset_index(drop=True)
return tb

Expand Down
Loading