From 97948bbd23f7905e88367b2a99b8f70a08a8f6b1 Mon Sep 17 00:00:00 2001 From: spoonerf Date: Sun, 8 Dec 2024 10:55:59 +0000 Subject: [PATCH 1/3] =?UTF-8?q?=F0=9F=93=8A=20age-group=20fix?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From 552ccbbc2c19768597870d66631fbadc83377175 Mon Sep 17 00:00:00 2001 From: spoonerf Date: Sun, 8 Dec 2024 10:56:32 +0000 Subject: [PATCH 2/3] fix variable names uniqueness --- etl/steps/data/garden/un/2024-03-14/un_wpp_most.meta.yml | 4 ++-- etl/steps/data/garden/un/2024-03-14/un_wpp_most.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/etl/steps/data/garden/un/2024-03-14/un_wpp_most.meta.yml b/etl/steps/data/garden/un/2024-03-14/un_wpp_most.meta.yml index 69812ea6a9f..c7793d9cbb2 100644 --- a/etl/steps/data/garden/un/2024-03-14/un_wpp_most.meta.yml +++ b/etl/steps/data/garden/un/2024-03-14/un_wpp_most.meta.yml @@ -9,7 +9,7 @@ definitions: tables: population_5_year_age_groups: variables: - age_group: + age_group_five: title: Five year age-group with the highest population unit: "" description_short: |- @@ -36,7 +36,7 @@ tables: title_public: Five year age-group with the highest population population_10_year_age_groups: variables: - age_group: + age_group_ten: title: Ten year age-group with the highest population unit: "" description_short: |- diff --git a/etl/steps/data/garden/un/2024-03-14/un_wpp_most.py b/etl/steps/data/garden/un/2024-03-14/un_wpp_most.py index 6a1ddddce4b..c7b5e721d92 100644 --- a/etl/steps/data/garden/un/2024-03-14/un_wpp_most.py +++ b/etl/steps/data/garden/un/2024-03-14/un_wpp_most.py @@ -73,7 +73,7 @@ def create_ten_year_age_groups(tb: Table) -> Table: tb = tb[(tb.age != "0-4") & (tb.age != "5-9") & (tb.age != "10-14") & (tb.age != "15-19")] # Concatenate the 0-9 and 10-19 age groups with the original table tb = pr.concat([tb, tb_0_9, tb_10_19]) - tb = tb.rename(columns={"age": "age_group"}) + tb = tb.rename(columns={"age": "age_group_ten"}) tb = tb.reset_index(drop=True) return tb @@ -90,7 +90,7 @@ def create_five_year_age_groups(tb: Table) -> Table: tb = tb[(tb.sex == "all") & (tb.variant == "estimates") & (tb.age.isin(age_bands))] assert tb["age"].nunique() == len(age_bands), "Age groups are not as expected" tb = tb.drop(columns=["sex", "variant", "population_change", "population_density"]) - tb = tb.rename(columns={"age": "age_group"}) + tb = tb.rename(columns={"age": "age_group_five"}) tb = tb.reset_index(drop=True) return tb From 9c4949f7055dc443efe5d1b3d47c87dc1268530f Mon Sep 17 00:00:00 2001 From: spoonerf Date: Sun, 8 Dec 2024 11:03:03 +0000 Subject: [PATCH 3/3] add origins --- etl/steps/data/garden/un/2024-03-14/un_wpp_most.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/etl/steps/data/garden/un/2024-03-14/un_wpp_most.py b/etl/steps/data/garden/un/2024-03-14/un_wpp_most.py index c7b5e721d92..0b0220c589f 100644 --- a/etl/steps/data/garden/un/2024-03-14/un_wpp_most.py +++ b/etl/steps/data/garden/un/2024-03-14/un_wpp_most.py @@ -1,3 +1,5 @@ +from typing import Any + from owid.catalog import Table from owid.catalog import processing as pr from structlog import get_logger @@ -22,9 +24,9 @@ def run(dest_dir: str) -> None: log.info(f"Creating population table for {age_group} year age groups") # filter data for just sex = all, metrics = population, variant = estimates if age_group == 5: - tb_pop_filter = create_five_year_age_groups(tb_pop) + tb_pop_filter = create_five_year_age_groups(tb_pop, origins) if age_group == 10: - tb_pop_filter = create_ten_year_age_groups(tb_pop) + tb_pop_filter = create_ten_year_age_groups(tb_pop, origins) # Group by country and year, and apply the custom function tb_pop_filter = ( tb_pop_filter.groupby(["country", "year"], group_keys=False) @@ -33,8 +35,6 @@ def run(dest_dir: str) -> None: ) # The function above creates NAs for some countrys that don't appear to be in the table e.g. Vatican, Melanesia, so dropping here - # tb_pop_filter = tb_pop_filter.copy_metadata(tb_pop) - tb_pop_filter["age_group"].metadata.origins = [origins] tb_pop_filter = tb_pop_filter.drop(columns=["population"]) tb_pop_filter = tb_pop_filter.set_index(["country", "year"], verify_integrity=True) tb_pop_filter.metadata.short_name = f"population_{age_group}_year_age_groups" @@ -48,7 +48,7 @@ def run(dest_dir: str) -> None: ds_garden.save() -def create_ten_year_age_groups(tb: Table) -> Table: +def create_ten_year_age_groups(tb: Table, origins: Any) -> Table: # Initialize an empty list to hold the age bands age_bands = [] # Loop through a range with a step of 5, stopping before 100 @@ -74,11 +74,12 @@ def create_ten_year_age_groups(tb: Table) -> Table: # Concatenate the 0-9 and 10-19 age groups with the original table tb = pr.concat([tb, tb_0_9, tb_10_19]) tb = tb.rename(columns={"age": "age_group_ten"}) + tb["age_group_ten"].metadata.origins = [origins] tb = tb.reset_index(drop=True) return tb -def create_five_year_age_groups(tb: Table) -> Table: +def create_five_year_age_groups(tb: Table, origins: Any) -> Table: # Initialize an empty list to hold the age bands age_bands = [] # Loop through a range with a step of 5, stopping before 100 @@ -91,6 +92,7 @@ def create_five_year_age_groups(tb: Table) -> Table: assert tb["age"].nunique() == len(age_bands), "Age groups are not as expected" tb = tb.drop(columns=["sex", "variant", "population_change", "population_density"]) tb = tb.rename(columns={"age": "age_group_five"}) + tb["age_group_five"].metadata.origins = [origins] tb = tb.reset_index(drop=True) return tb