owid · antea04 · Nov 28, 2024 · Nov 20, 2024 · Nov 21, 2024 · Nov 28, 2024
diff --git a/dag/demography.yml b/dag/demography.yml
@@ -231,6 +231,10 @@ steps:
   data://grapher/un/2024-07-16/migrant_stock:
     - data://garden/un/2024-07-16/migrant_stock
 
+  # Migration UN DESA (flows)
+  data://grapher/migration/2024-11-20/migrant_stock_flows:
+    - data://garden/un/2024-07-16/migrant_stock
+
   # Internal displacement monitoring centre
   data://meadow/idmc/2024-08-02/internal_displacement:
     - snapshot://idmc/2024-08-02/internal_displacement.xlsx
@@ -269,3 +273,5 @@ steps:
     - data://garden/un/2024-07-12/un_wpp
   data://grapher/un/2024-10-01/births_by_age:
     - data://garden/un/2024-10-01/births_by_age
+
+
diff --git a/etl/steps/data/grapher/migration/2024-11-20/migrant_stock_flows.meta.yml b/etl/steps/data/grapher/migration/2024-11-20/migrant_stock_flows.meta.yml
@@ -0,0 +1,16 @@
+# NOTE: To learn more about the fields, hover over their names.
+definitions:
+  common:
+    presentation:
+      topic_tags:
+        - Migration
+
+
+# Learn more about the available fields:
+# http://docs.owid.io/projects/etl/architecture/metadata/reference/
+dataset:
+  update_period_days: 365
+  title: International Migrant Stock (Origin and Destination)
+
+
+
diff --git a/etl/steps/data/grapher/migration/2024-11-20/migrant_stock_flows.py b/etl/steps/data/grapher/migration/2024-11-20/migrant_stock_flows.py
@@ -0,0 +1,83 @@
+"""Load a garden dataset and create a grapher dataset.
+This grapher step has two purposes:
+1. Format the data in a way that is compatible with the grapher database (split into two tables and index on country and year).
+2. Add metadata programmatically to the data."""
+
+from etl.helpers import PathFinder, create_dataset
+
+# Get paths and naming conventions for current step.
+paths = PathFinder(__file__)
+
+
+def run(dest_dir: str) -> None:
+    #
+    # Load inputs.
+    #
+    # Load garden dataset.
+    ds_garden = paths.load_dataset("migrant_stock")
+
+    # Read table from garden dataset.
+    tb = ds_garden.read("migrant_stock_dest_origin")
+
+    # grapher cannot handle this many indicators, so we remove male and female columns
+    tb = tb.drop(columns=["migrants_female", "migrants_male"])
+
+    tb_dest_cols = tb.pivot(
+        index=["country_origin", "year"],
+        columns="country_destination",
+        values=["migrants_all_sexes"],
+    )
+
+    tb_dest_cols.columns = [col[0] + "_to_" + col[1] for col in tb_dest_cols.columns]
+
+    tb_origin_cols = tb.pivot(
+        index=["country_destination", "year"],
+        columns="country_origin",
+        values=["migrants_all_sexes"],
+    )
+
+    tb_origin_cols.columns = [col[0] + "_from_" + col[1] for col in tb_origin_cols.columns]
+
+    # add metadata:
+
+    for col in tb_dest_cols.columns:
+        dest = col.split("migrants_all_sexes_to_")[1]
+        tb_dest_cols[col].metadata.unit = "people"
+        tb_dest_cols[col].metadata.short_unit = ""
+        tb_dest_cols[col].metadata.title = f"Number of immigrants who moved to {dest}"
+        tb_dest_cols[
+            col
+        ].metadata.description_short = f"Number of migrants who have moved to {dest}. The numbers describe cumulative migrant stock, not migrants who moved in this year."
+
+    for col in tb_origin_cols.columns:
+        origin = col.split("migrants_all_sexes_from_")[1]
+
+        tb_origin_cols[col].metadata.unit = "people"
+        tb_origin_cols[col].metadata.short_unit = ""
+        tb_origin_cols[col].metadata.title = f"Number of emigrants who moved from {origin}"
+        tb_origin_cols[
+            col
+        ].metadata.description_short = f"Number of migrants who have moved to away from {origin}. The numbers describe cumulative migrant stock, not migrants who moved in this year."
+
+    tb_dest_cols = tb_dest_cols.reset_index()
+    tb_dest_cols = tb_dest_cols.rename(columns={"country_origin": "country"})
+    tb_dest_cols.metadata.short_name = "migrant_stock_origin"
+    tb_dest_cols = tb_dest_cols.format(["country", "year"])
+
+    tb_origin_cols = tb_origin_cols.reset_index()
+    tb_origin_cols = tb_origin_cols.rename(columns={"country_destination": "country"})
+    tb_origin_cols.metadata.short_name = "migrant_stock_destination"
+    tb_origin_cols = tb_origin_cols.format(["country", "year"])
+
+    # Save outputs
+    #
+    # Create a new grapher dataset with the same metadata as the garden dataset
+    ds_grapher = create_dataset(
+        dest_dir,
+        tables=[tb_origin_cols, tb_dest_cols],
+        check_variables_metadata=True,
+        default_metadata=ds_garden.metadata,
+    )
+
+    # Save changes in the new grapher dataset.
+    ds_grapher.save()