Skip to content

Commit

Permalink
📊 include un desa migrant stock flows in grapher (#3581)
Browse files Browse the repository at this point in the history
* 📊 include un desa migrant stock flows in grapher

* 🐛 rename columns to avoid duplicates

* ✨ PR comments

---------

Co-authored-by: Tuna Acisu <[email protected]>
  • Loading branch information
antea04 and Tuna Acisu authored Nov 28, 2024
1 parent 6532def commit 8de72c2
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 0 deletions.
4 changes: 4 additions & 0 deletions dag/demography.yml
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,10 @@ steps:
data://grapher/un/2024-07-16/migrant_stock:
- data://garden/un/2024-07-16/migrant_stock

# Migration UN DESA (flows)
data://grapher/migration/2024-11-20/migrant_stock_flows:
- data://garden/un/2024-07-16/migrant_stock

# Internal displacement monitoring centre
data://meadow/idmc/2024-08-02/internal_displacement:
- snapshot://idmc/2024-08-02/internal_displacement.xlsx
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# NOTE: To learn more about the fields, hover over their names.
definitions:
common:
presentation:
topic_tags:
- Migration


# Learn more about the available fields:
# http://docs.owid.io/projects/etl/architecture/metadata/reference/
dataset:
update_period_days: 365
title: International Migrant Stock (Origin and Destination)



82 changes: 82 additions & 0 deletions etl/steps/data/grapher/migration/2024-11-20/migrant_stock_flows.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
"""Load a garden dataset and create a grapher dataset.
This grapher step has two purposes:
1. Format the data in a way that is compatible with the grapher database (split into two tables and index on country and year).
2. Add metadata programmatically to the data."""

from etl.helpers import PathFinder, create_dataset

# Get paths and naming conventions for current step.
paths = PathFinder(__file__)


def run(dest_dir: str) -> None:
#
# Load inputs.
#
# Load garden dataset.
ds_garden = paths.load_dataset("migrant_stock")

# Read table from garden dataset.
tb = ds_garden.read("migrant_stock_dest_origin")

tb = tb.drop(columns=["migrants_female", "migrants_male"])

tb_dest_cols = tb.pivot(
index=["country_origin", "year"],
columns="country_destination",
values=["migrants_all_sexes"],
)

tb_dest_cols.columns = [col[0] + "_to_" + col[1] for col in tb_dest_cols.columns]

tb_origin_cols = tb.pivot(
index=["country_destination", "year"],
columns="country_origin",
values=["migrants_all_sexes"],
)

tb_origin_cols.columns = [col[0] + "_from_" + col[1] for col in tb_origin_cols.columns]

# add metadata:

for col in tb_dest_cols.columns:
dest = col.split("migrants_all_sexes_to_")[1]
tb_dest_cols[col].metadata.unit = "people"
tb_dest_cols[col].metadata.short_unit = ""
tb_dest_cols[col].metadata.title = f"Number of immigrants who moved to {dest}"
tb_dest_cols[
col
].metadata.description_short = f"Number of migrants who have moved to {dest}. The numbers describe cumulative migrant stock, not migrants who moved in this year."

for col in tb_origin_cols.columns:
origin = col.split("migrants_all_sexes_from_")[1]

tb_origin_cols[col].metadata.unit = "people"
tb_origin_cols[col].metadata.short_unit = ""
tb_origin_cols[col].metadata.title = f"Number of emigrants who moved from {origin}"
tb_origin_cols[
col
].metadata.description_short = f"Number of migrants who have moved to away from {origin}. The numbers describe cumulative migrant stock, not migrants who moved in this year."

tb_dest_cols = tb_dest_cols.reset_index()
tb_dest_cols = tb_dest_cols.rename(columns={"country_origin": "country"})
tb_dest_cols.metadata.short_name = "migrant_stock_origin"
tb_dest_cols = tb_dest_cols.format(["country", "year"])

tb_origin_cols = tb_origin_cols.reset_index()
tb_origin_cols = tb_origin_cols.rename(columns={"country_destination": "country"})
tb_origin_cols.metadata.short_name = "migrant_stock_destination"
tb_origin_cols = tb_origin_cols.format(["country", "year"])

# Save outputs
#
# Create a new grapher dataset with the same metadata as the garden dataset
ds_grapher = create_dataset(
dest_dir,
tables=[tb_origin_cols, tb_dest_cols],
check_variables_metadata=True,
default_metadata=ds_garden.metadata,
)

# Save changes in the new grapher dataset.
ds_grapher.save()

0 comments on commit 8de72c2

Please sign in to comment.