📊 add demography long run indicators (#3684)

* 📊 add demography long run indicators * birth rate long run indicator * fix metadata * wip
owid · Dec 3, 2024 · a8df122 · a8df122
1 parent 5d0b081
commit a8df122
Show file tree

Hide file tree

Showing 5 changed files with 152 additions and 3 deletions.
diff --git a/dag/demography.yml b/dag/demography.yml
@@ -245,9 +245,23 @@ steps:
   data://grapher/demography/2024-11-26/multiple_births:
     - data://garden/demography/2024-11-26/multiple_births
 
-  # OMM: HFD + UN WPP
+  # OMM: Fertility Rate -- HFD + UN WPP
   data://garden/demography/2024-12-03/fertility_rate:
     - data://garden/hmd/2024-11-19/hfd
     - data://garden/un/2024-07-12/un_wpp
   data://grapher/demography/2024-12-03/fertility_rate:
     - data://garden/demography/2024-12-03/fertility_rate
+
+  # OMM: Mean Age at Birth -- HFD + UN WPP
+  # data://garden/demography/2024-12-03/mean_age_birth:
+  #   - data://garden/hmd/2024-11-19/hfd
+  #   - data://garden/un/2024-07-12/un_wpp
+  # data://grapher/demography/2024-12-03/mean_age_birth:
+  #   - data://garden/demography/2024-12-03/mean_age_birth
+
+  # OMM: Birth rate -- HFD + UN WPP
+  data://garden/demography/2024-12-03/birth_rate:
+    - data://garden/hmd/2024-12-01/hmd
+    - data://garden/un/2024-07-12/un_wpp
+  data://grapher/demography/2024-12-03/birth_rate:
+    - data://garden/demography/2024-12-03/birth_rate
diff --git a/etl/steps/data/garden/demography/2024-12-03/birth_rate.meta.yml b/etl/steps/data/garden/demography/2024-12-03/birth_rate.meta.yml
@@ -0,0 +1,45 @@
+# NOTE: To learn more about the fields, hover over their names.
+definitions:
+  common:
+    presentation:
+      title_public: Birth rate
+      topic_tags:
+        - Fertility Rate
+    display:
+      name: |-
+        Birth rate
+
+# Learn more about the available fields:
+# http://docs.owid.io/projects/etl/architecture/metadata/reference/
+dataset:
+  title: Birth Rate (HMD; UN WPP)
+  update_period_days: 365
+
+tables:
+  birth_rate:
+    variables:
+      birth_rate:
+        title: Birth rate
+        unit: births per 1,000 people
+        description_short: |-
+          The total number of births per 1,000 people in a given year.
+        description_processing: |-
+          The birth data is constructed by combining data from multiple sources:
+
+          - Before 1949: Historical estimates by Human Mortality Database (2024).
+
+          - 1950-2023: Population records by the UN World Population Prospects (2024 revision).
+
+          - 2024-2100: Projections based on Medium variant by the UN World Population Prospects (2024 revision).
+
+      birth_rate_hist:
+        title: Birth rate (historical)
+        unit: births per 1,000 people
+        description_short: |-
+          The total number of births per 1,000 people in a given year.
+        description_processing: |-
+          The birth data is constructed by combining data from multiple sources:
+
+          - Before 1949: Historical estimates by Human Mortality Database (2024).
+
+          - 1950-2023: Population records by the UN World Population Prospects (2024 revision).
diff --git a/etl/steps/data/garden/demography/2024-12-03/birth_rate.py b/etl/steps/data/garden/demography/2024-12-03/birth_rate.py
@@ -0,0 +1,62 @@
+"""Load a meadow dataset and create a garden dataset."""
+
+import owid.catalog.processing as pr
+import pandas as pd
+
+from etl.helpers import PathFinder, create_dataset
+
+# Get paths and naming conventions for current step.
+paths = PathFinder(__file__)
+
+YEAR_WPP_PROJ_START = 2024
+YEAR_WPP_START = 1950
+
+
+def run(dest_dir: str) -> None:
+    #
+    # Load inputs.
+    #
+    # Load meadow dataset.
+    ds_hmd = paths.load_dataset("hmd")
+    ds_un = paths.load_dataset("un_wpp")
+
+    # Read table from meadow dataset.
+    tb_hmd = ds_hmd.read("births")
+    tb_un = ds_un.read("births")
+
+    #
+    # Process data.
+    #
+    # UN
+    tb_un = tb_un.loc[
+        (tb_un["age"] == "all") & (tb_un["variant"].isin(["medium", "estimates"])),
+        ["country", "year", "birth_rate"],
+    ]
+    # HMD
+    tb_hmd = tb_hmd.loc[
+        (tb_hmd["year"] < YEAR_WPP_START) & (tb_hmd["sex"] == "total"), ["country", "year", "birth_rate"]
+    ]
+
+    # Combine
+    tb = pr.concat([tb_hmd, tb_un], ignore_index=True, short_name="birth_rate")
+    tb = tb.dropna(subset=["birth_rate"])
+
+    # Add historical variant
+    tb["birth_rate_hist"] = tb["birth_rate"].copy()
+    tb.loc[tb["year"] > YEAR_WPP_PROJ_START, "birth_rate_hist"] = pd.NA
+
+    # Format
+    tb = tb.format(["country", "year"])
+
+    #
+    # Save outputs.
+    #
+    # Create a new garden dataset with the same metadata as the meadow dataset.
+    ds_garden = create_dataset(
+        dest_dir,
+        tables=[tb],
+        check_variables_metadata=True,
+    )
+
+    # Save changes in the new garden dataset.
+    ds_garden.save()
diff --git a/etl/steps/data/garden/demography/2024-12-03/fertility_rate.meta.yml b/etl/steps/data/garden/demography/2024-12-03/fertility_rate.meta.yml
@@ -27,7 +27,7 @@ tables:
         description_processing: |-
           The fertility data is constructed by combining data from multiple sources:
 
-          - 1800 - 1949: Historical estimates by Human Fertility Database (2024).
+          - Before 1949: Historical estimates by Human Fertility Database (2024).
 
           - 1950-2023: Population records by the UN World Population Prospects (2024 revision).
 
@@ -47,7 +47,7 @@ tables:
         description_processing: |-
           The fertility data is constructed by combining data from multiple sources:
 
-          - 1800 - 1949: Historical estimates by Human Fertility Database (2024).
+          - Before 1949: Historical estimates by Human Fertility Database (2024).
 
           - 1950-2023: Population records by the UN World Population Prospects (2024 revision).
         presentation:

diff --git a/etl/steps/data/grapher/demography/2024-12-03/birth_rate.py b/etl/steps/data/grapher/demography/2024-12-03/birth_rate.py
@@ -0,0 +1,28 @@
+"""Load a garden dataset and create a grapher dataset."""
+
+from etl.helpers import PathFinder, create_dataset
+
+# Get paths and naming conventions for current step.
+paths = PathFinder(__file__)
+
+
+def run(dest_dir: str) -> None:
+    #
+    # Load inputs.
+    #
+    # Load garden dataset.
+    ds_garden = paths.load_dataset("birth_rate")
+
+    # Read table from garden dataset.
+    tb = ds_garden.read("birth_rate", reset_index=False)
+
+    #
+    # Save outputs.
+    #
+    # Create a new grapher dataset with the same metadata as the garden dataset.
+    ds_grapher = create_dataset(
+        dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_garden.metadata
+    )
+
+    # Save changes in the new grapher dataset.
+    ds_grapher.save()