From a8df1224997a8cc508c9123e181389f865709a48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lucas=20Rod=C3=A9s-Guirao?= Date: Tue, 3 Dec 2024 19:48:23 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=93=8A=20add=20demography=20long=20run=20?= =?UTF-8?q?indicators=20(#3684)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 📊 add demography long run indicators * birth rate long run indicator * fix metadata * wip --- dag/demography.yml | 16 ++++- .../demography/2024-12-03/birth_rate.meta.yml | 45 ++++++++++++++ .../demography/2024-12-03/birth_rate.py | 62 +++++++++++++++++++ .../2024-12-03/fertility_rate.meta.yml | 4 +- .../demography/2024-12-03/birth_rate.py | 28 +++++++++ 5 files changed, 152 insertions(+), 3 deletions(-) create mode 100644 etl/steps/data/garden/demography/2024-12-03/birth_rate.meta.yml create mode 100644 etl/steps/data/garden/demography/2024-12-03/birth_rate.py create mode 100644 etl/steps/data/grapher/demography/2024-12-03/birth_rate.py diff --git a/dag/demography.yml b/dag/demography.yml index fb55001e45e..453a694c419 100644 --- a/dag/demography.yml +++ b/dag/demography.yml @@ -245,9 +245,23 @@ steps: data://grapher/demography/2024-11-26/multiple_births: - data://garden/demography/2024-11-26/multiple_births - # OMM: HFD + UN WPP + # OMM: Fertility Rate -- HFD + UN WPP data://garden/demography/2024-12-03/fertility_rate: - data://garden/hmd/2024-11-19/hfd - data://garden/un/2024-07-12/un_wpp data://grapher/demography/2024-12-03/fertility_rate: - data://garden/demography/2024-12-03/fertility_rate + + # OMM: Mean Age at Birth -- HFD + UN WPP + # data://garden/demography/2024-12-03/mean_age_birth: + # - data://garden/hmd/2024-11-19/hfd + # - data://garden/un/2024-07-12/un_wpp + # data://grapher/demography/2024-12-03/mean_age_birth: + # - data://garden/demography/2024-12-03/mean_age_birth + + # OMM: Birth rate -- HFD + UN WPP + data://garden/demography/2024-12-03/birth_rate: + - data://garden/hmd/2024-12-01/hmd + - data://garden/un/2024-07-12/un_wpp + data://grapher/demography/2024-12-03/birth_rate: + - data://garden/demography/2024-12-03/birth_rate diff --git a/etl/steps/data/garden/demography/2024-12-03/birth_rate.meta.yml b/etl/steps/data/garden/demography/2024-12-03/birth_rate.meta.yml new file mode 100644 index 00000000000..0ee61f6edb7 --- /dev/null +++ b/etl/steps/data/garden/demography/2024-12-03/birth_rate.meta.yml @@ -0,0 +1,45 @@ +# NOTE: To learn more about the fields, hover over their names. +definitions: + common: + presentation: + title_public: Birth rate + topic_tags: + - Fertility Rate + display: + name: |- + Birth rate + +# Learn more about the available fields: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +dataset: + title: Birth Rate (HMD; UN WPP) + update_period_days: 365 + +tables: + birth_rate: + variables: + birth_rate: + title: Birth rate + unit: births per 1,000 people + description_short: |- + The total number of births per 1,000 people in a given year. + description_processing: |- + The birth data is constructed by combining data from multiple sources: + + - Before 1949: Historical estimates by Human Mortality Database (2024). + + - 1950-2023: Population records by the UN World Population Prospects (2024 revision). + + - 2024-2100: Projections based on Medium variant by the UN World Population Prospects (2024 revision). + + birth_rate_hist: + title: Birth rate (historical) + unit: births per 1,000 people + description_short: |- + The total number of births per 1,000 people in a given year. + description_processing: |- + The birth data is constructed by combining data from multiple sources: + + - Before 1949: Historical estimates by Human Mortality Database (2024). + + - 1950-2023: Population records by the UN World Population Prospects (2024 revision). diff --git a/etl/steps/data/garden/demography/2024-12-03/birth_rate.py b/etl/steps/data/garden/demography/2024-12-03/birth_rate.py new file mode 100644 index 00000000000..02508c2497b --- /dev/null +++ b/etl/steps/data/garden/demography/2024-12-03/birth_rate.py @@ -0,0 +1,62 @@ +"""Load a meadow dataset and create a garden dataset.""" + +import owid.catalog.processing as pr +import pandas as pd + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + +YEAR_WPP_PROJ_START = 2024 +YEAR_WPP_START = 1950 + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load meadow dataset. + ds_hmd = paths.load_dataset("hmd") + ds_un = paths.load_dataset("un_wpp") + + # Read table from meadow dataset. + tb_hmd = ds_hmd.read("births") + tb_un = ds_un.read("births") + + # + # Process data. + # + # UN + tb_un = tb_un.loc[ + (tb_un["age"] == "all") & (tb_un["variant"].isin(["medium", "estimates"])), + ["country", "year", "birth_rate"], + ] + # HMD + tb_hmd = tb_hmd.loc[ + (tb_hmd["year"] < YEAR_WPP_START) & (tb_hmd["sex"] == "total"), ["country", "year", "birth_rate"] + ] + + # Combine + tb = pr.concat([tb_hmd, tb_un], ignore_index=True, short_name="birth_rate") + tb = tb.dropna(subset=["birth_rate"]) + + # Add historical variant + tb["birth_rate_hist"] = tb["birth_rate"].copy() + tb.loc[tb["year"] > YEAR_WPP_PROJ_START, "birth_rate_hist"] = pd.NA + + # Format + tb = tb.format(["country", "year"]) + + # + # Save outputs. + # + # Create a new garden dataset with the same metadata as the meadow dataset. + ds_garden = create_dataset( + dest_dir, + tables=[tb], + check_variables_metadata=True, + ) + + # Save changes in the new garden dataset. + ds_garden.save() diff --git a/etl/steps/data/garden/demography/2024-12-03/fertility_rate.meta.yml b/etl/steps/data/garden/demography/2024-12-03/fertility_rate.meta.yml index 01bb4e5accb..3390773b651 100644 --- a/etl/steps/data/garden/demography/2024-12-03/fertility_rate.meta.yml +++ b/etl/steps/data/garden/demography/2024-12-03/fertility_rate.meta.yml @@ -27,7 +27,7 @@ tables: description_processing: |- The fertility data is constructed by combining data from multiple sources: - - 1800 - 1949: Historical estimates by Human Fertility Database (2024). + - Before 1949: Historical estimates by Human Fertility Database (2024). - 1950-2023: Population records by the UN World Population Prospects (2024 revision). @@ -47,7 +47,7 @@ tables: description_processing: |- The fertility data is constructed by combining data from multiple sources: - - 1800 - 1949: Historical estimates by Human Fertility Database (2024). + - Before 1949: Historical estimates by Human Fertility Database (2024). - 1950-2023: Population records by the UN World Population Prospects (2024 revision). presentation: diff --git a/etl/steps/data/grapher/demography/2024-12-03/birth_rate.py b/etl/steps/data/grapher/demography/2024-12-03/birth_rate.py new file mode 100644 index 00000000000..dc16db838f9 --- /dev/null +++ b/etl/steps/data/grapher/demography/2024-12-03/birth_rate.py @@ -0,0 +1,28 @@ +"""Load a garden dataset and create a grapher dataset.""" + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load garden dataset. + ds_garden = paths.load_dataset("birth_rate") + + # Read table from garden dataset. + tb = ds_garden.read("birth_rate", reset_index=False) + + # + # Save outputs. + # + # Create a new grapher dataset with the same metadata as the garden dataset. + ds_grapher = create_dataset( + dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_garden.metadata + ) + + # Save changes in the new grapher dataset. + ds_grapher.save()