diff --git a/dag/archive/poverty_inequality.yml b/dag/archive/poverty_inequality.yml index e2229fd573f..8ca39d6197c 100644 --- a/dag/archive/poverty_inequality.yml +++ b/dag/archive/poverty_inequality.yml @@ -43,3 +43,12 @@ steps: - data://meadow/ophi/2023-07-05/multidimensional_poverty_index data://grapher/ophi/2023-07-05/multidimensional_poverty_index: - data://garden/ophi/2023-07-05/multidimensional_poverty_index + + # Poverty projections from the World Bank + data://meadow/wb/2024-06-26/poverty_projections: + - snapshot://wb/2024-06-26/poverty_projections_number_global.csv + - snapshot://wb/2024-06-26/poverty_projections_share_regions.csv + data://garden/wb/2024-06-26/poverty_projections: + - data://meadow/wb/2024-06-26/poverty_projections + data://grapher/wb/2024-06-26/poverty_projections: + - data://garden/wb/2024-06-26/poverty_projections diff --git a/dag/poverty_inequality.yml b/dag/poverty_inequality.yml index 0e8c94291c3..25a1c80a547 100644 --- a/dag/poverty_inequality.yml +++ b/dag/poverty_inequality.yml @@ -114,15 +114,6 @@ steps: data://grapher/oecd/2024-04-30/affordable_housing_database: - data://garden/oecd/2024-04-30/affordable_housing_database - # Poverty projections from the World Bank - data://meadow/wb/2024-06-26/poverty_projections: - - snapshot://wb/2024-06-26/poverty_projections_number_global.csv - - snapshot://wb/2024-06-26/poverty_projections_share_regions.csv - data://garden/wb/2024-06-26/poverty_projections: - - data://meadow/wb/2024-06-26/poverty_projections - data://grapher/wb/2024-06-26/poverty_projections: - - data://garden/wb/2024-06-26/poverty_projections - # Institute of Global Homelessness - Better Data Project data://meadow/igh/2024-07-05/better_data_homelessness: - snapshot://igh/2024-07-05/better_data_homelessness.xlsx @@ -130,3 +121,11 @@ steps: - data://meadow/igh/2024-07-05/better_data_homelessness data://grapher/igh/2024-07-05/better_data_homelessness: - data://garden/igh/2024-07-05/better_data_homelessness + + # Poverty projections from the Poverty, Prosperity and Planet Report 2024 + data://meadow/wb/2024-12-03/poverty_projections: + - snapshot://wb/2024-12-03/reproducibility_package_poverty_prosperity_planet.zip + data://garden/wb/2024-12-03/poverty_projections: + - data://meadow/wb/2024-12-03/poverty_projections + data://grapher/wb/2024-12-03/poverty_projections: + - data://garden/wb/2024-12-03/poverty_projections diff --git a/etl/steps/data/garden/wb/2024-12-03/poverty_projections.countries.json b/etl/steps/data/garden/wb/2024-12-03/poverty_projections.countries.json new file mode 100644 index 00000000000..7ca68192813 --- /dev/null +++ b/etl/steps/data/garden/wb/2024-12-03/poverty_projections.countries.json @@ -0,0 +1,10 @@ +{ + "EAP": "East Asia and Pacific (PIP)", + "ECA": "Europe and Central Asia (PIP)", + "LAC": "Latin America and the Caribbean (PIP)", + "MNA": "Middle East and North Africa (PIP)", + "OHI": "Other high income countries (PIP)", + "SAS": "South Asia (PIP)", + "SSA": "Sub-Saharan Africa (PIP)", + "World": "World" +} \ No newline at end of file diff --git a/etl/steps/data/garden/wb/2024-12-03/poverty_projections.meta.yml b/etl/steps/data/garden/wb/2024-12-03/poverty_projections.meta.yml new file mode 100644 index 00000000000..563186b77a5 --- /dev/null +++ b/etl/steps/data/garden/wb/2024-12-03/poverty_projections.meta.yml @@ -0,0 +1,106 @@ +# NOTE: To learn more about the fields, hover over their names. +definitions: + common: + processing_level: minor + display: &common-display + tolerance: 0 + entityAnnotationsMap: |- + Other high income countries (PIP): e.g. US, Western Europe, Australia, Japan, South Korea and Saudi Arabia + presentation: + topic_tags: + - Poverty + + description_key_povertyline: |- + <% if povertyline == "2.15" %> + Extreme poverty here is defined as living below the International Poverty Line of $2.15 per day. + <% elif povertyline == "3.65" %> + A poverty line of $3.65 a day represents definitions of national poverty lines in lower-middle-income countries. + <% elif povertyline == "6.85" %> + A poverty line of $6.85 a day represents definitions of national poverty lines in upper-middle-income countries. + <%- endif -%> + + description_key_ppp: |- + The data is measured in international-$ at 2017 prices – this adjusts for inflation and for differences in the cost of living between countries. + + description_key_income_consumption: |- + Depending on the country and year, the data relates to income measured after taxes and benefits, or to consumption, per capita. "Per capita" means that the income of each household is attributed equally to each member of the household (including children). + + description_key_nonmarket_income: |- + Non-market sources of income, including food grown by subsistence farmers for their own consumption, are taken into account. + + description_key_scenarios: |- + <% if scenario == "Historical" %> + Estimates are based on household surveys or extrapolated up until the year of the data release using GDP growth estimates and forecasts. For more details about the methodology, please refer to the [World Bank PIP documentation](https://datanalytics.worldbank.org/PIP-Methodology/lineupestimates.html#nowcasts). + <% elif scenario == "Current forecast + historical growth" %> + This data is a projection of the estimates based on GDP growth projections from the World Bank's Global Economic Prospects and the the Macro Poverty Outlook, together with IMF's World Economic Outlook, in the period 2025-2029. For the period 2030-2050, the data is projected using the average annual historical GDP per capita growth over 2010-2019. + <% elif scenario == "2% growth" %> + This data is a projection of the estimates based on a scenario of 2% average GDP per capita growth, while keeping income inequality constant. + <% elif scenario == "2% growth + Gini reduction 1%" %> + This data is a projection of the estimates based on a scenatio of 2% average GDP per capita growth, while reducing income inequality by 1% of the Gini coefficient per year. + <% elif scenario == "2% growth + Gini reduction 2%" %> + This data is a projection of the estimates based on a scenatio of 2% average GDP per capita growth, while reducing income inequality by 2% of the Gini coefficient per year. + <% elif scenario == "4% growth" %> + This data is a projection of the estimates based on a scenario of 4% average GDP per capita growth, while keeping income inequality constant. + <% elif scenario == "6% growth" %> + This data is a projection of the estimates based on a scenario of 6% average GDP per capita growth, while keeping income inequality constant. + <% elif scenario == "8% growth" %> + This data is a projection of the estimates based on a scenario of 8% average GDP per capita growth, while keeping income inequality constant. + <%- endif -%> + + isprojection_by_scenario: |- + <% if scenario == "Historical" %> + false + <% else %> + true + <%- endif -%> + + +# Learn more about the available fields: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +dataset: + title: Poverty projections by the World Bank + update_period_days: 681 + + +tables: + poverty_projections: + variables: + fgt0: + title: $<> a day - Share of population in poverty (<>) + unit: "%" + short_unit: "%" + description_short: "Percentage of population living in households with an income or consumption per person below $<> a day" + description_key: + - "{definitions.description_key_povertyline}" + - "{definitions.description_key_ppp}" + - "{definitions.description_key_income_consumption}" + - "{definitions.description_key_nonmarket_income}" + - "{definitions.description_key_scenarios}" + presentation: + title_public: Share of population living in poverty + title_variant: $<> a day, <> + display: + name: Share of population living below $<> a day (<>) + numDecimalPlaces: 1 + isProjection: {definitions.isprojection_by_scenario} + <<: *common-display + + poorpop: + title: $<> a day - Number of people in poverty (<>) + unit: "people" + short_unit: "" + description_short: "Number of people living in households with an income or consumption per person below $<> a day" + description_key: + - "{definitions.description_key_povertyline}" + - "{definitions.description_key_ppp}" + - "{definitions.description_key_income_consumption}" + - "{definitions.description_key_nonmarket_income}" + - "{definitions.description_key_scenarios}" + presentation: + title_public: Number of people living in poverty + title_variant: $<> a day, <> + display: + name: Number of people living below $<> a day (<>) + numDecimalPlaces: 0 + isProjection: {definitions.isprojection_by_scenario} + <<: *common-display \ No newline at end of file diff --git a/etl/steps/data/garden/wb/2024-12-03/poverty_projections.py b/etl/steps/data/garden/wb/2024-12-03/poverty_projections.py new file mode 100644 index 00000000000..3b64c7d2cbc --- /dev/null +++ b/etl/steps/data/garden/wb/2024-12-03/poverty_projections.py @@ -0,0 +1,125 @@ +"""Load a meadow dataset and create a garden dataset.""" + +import owid.catalog.processing as pr +from owid.catalog import Table +from owid.datautils.dataframes import map_series + +from etl.data_helpers import geo +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + +# Define latest year without projections +LATEST_YEAR_WITHOUT_PROJECTIONS = 2024 + +# Define tables to be loaded. I am not processing country, because they were created for the aggregations and not to highlight them. +TABLES = ["region", "global"] + +# Define scenarios and new names +SCENARIOS = { + "historical": "Historical", + "current_forecast": "Current forecast + historical growth", + "2pct": "2% growth", + "2pct_gini1": "2% growth + Gini reduction 1%", + "2pct_gini2": "2% growth + Gini reduction 2%", + "4pct": "4% growth", + "6pct": "6% growth", + "8pct": "8% growth", +} + +# Define index columns +INDEX_COLUMNS = ["country", "year", "povertyline", "scenario"] + +# Define indicator columns +INDICATOR_COLUMNS = ["fgt0", "poorpop"] + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load meadow dataset. + ds_meadow = paths.load_dataset("poverty_projections") + + # Read table from meadow dataset. + # Define empty table list to store tables. + tables = [] + for table in TABLES: + tb = ds_meadow.read(table) + + # Append table to list. + tables.append(tb) + + # + # Process data. + # + # Concatenate tables + tb = pr.concat(tables, ignore_index=True) + + # Multiply poorpop by 1_000_000 + tb["poorpop"] = tb["poorpop"] * 1_000_000 + + tb = geo.harmonize_countries( + df=tb, + countries_file=paths.country_mapping_path, + ) + + tb = connect_estimates_with_projections(tb) + + # Rename scenario column + tb["scenario"] = map_series( + series=tb["scenario"], + mapping=SCENARIOS, + ) + + # Recover origins + tb["scenario"] = tb["scenario"].copy_metadata(tb["country"]) + + tb = tb.format(INDEX_COLUMNS, short_name="poverty_projections") + + # + # Save outputs. + # + # Create a new garden dataset with the same metadata as the meadow dataset. + ds_garden = create_dataset( + dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_meadow.metadata + ) + + # Save changes in the new garden dataset. + ds_garden.save() + + +def connect_estimates_with_projections(tb: Table) -> Table: + """ + Connects estimates with projections for visualizations in Grapher. + This is repeating the latest estimate in the historical scenario in the rest of the scenarios. + """ + + tb = tb.copy() + + # Make table wider, by using scenario as columns + tb = tb.pivot(index=["country", "year", "povertyline"], columns="scenario", values=INDICATOR_COLUMNS) + + # For year LATEST_YEAR_WITHOUT_PROJECTIONS, fill the rest of the columns with the same value + for indicator in INDICATOR_COLUMNS: + for scenario in SCENARIOS.keys(): + if scenario != "historical": + tb.loc[ + tb.index.get_level_values("year") == LATEST_YEAR_WITHOUT_PROJECTIONS, (indicator, scenario) + ] = tb.loc[ + tb.index.get_level_values("year") == LATEST_YEAR_WITHOUT_PROJECTIONS, (indicator, scenario) + ].combine_first( + tb.loc[ + tb.index.get_level_values("year") == LATEST_YEAR_WITHOUT_PROJECTIONS, (indicator, "historical") + ] + ) + + # Make table long again, by creating a scenario column + tb = tb.stack(level="scenario", future_stack=True).reset_index() + + # Recover origins + for indicator in INDICATOR_COLUMNS: + tb[indicator] = tb[indicator].copy_metadata(tb["country"]) + + return tb diff --git a/etl/steps/data/grapher/wb/2024-12-03/poverty_projections.py b/etl/steps/data/grapher/wb/2024-12-03/poverty_projections.py new file mode 100644 index 00000000000..08e7178862b --- /dev/null +++ b/etl/steps/data/grapher/wb/2024-12-03/poverty_projections.py @@ -0,0 +1,28 @@ +"""Load a garden dataset and create a grapher dataset.""" + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load garden dataset. + ds_garden = paths.load_dataset("poverty_projections") + + # Read table from garden dataset. + tb = ds_garden.read("poverty_projections", reset_index=False) + + # + # Save outputs. + # + # Create a new grapher dataset with the same metadata as the garden dataset. + ds_grapher = create_dataset( + dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_garden.metadata + ) + + # Save changes in the new grapher dataset. + ds_grapher.save() diff --git a/etl/steps/data/meadow/wb/2024-12-03/poverty_projections.py b/etl/steps/data/meadow/wb/2024-12-03/poverty_projections.py new file mode 100644 index 00000000000..5855e159ba8 --- /dev/null +++ b/etl/steps/data/meadow/wb/2024-12-03/poverty_projections.py @@ -0,0 +1,60 @@ +"""Load a snapshot and create a meadow dataset.""" + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + +# Define files directory +FILES_DIRECTORY = "FR_WLD_2024_198/Reproducibility package/Chapter 1/1-data/raw/forecasts" + +# Define index columns +INDEX_COLUMNS = ["country", "year", "povertyline", "scenario"] + +# Define table parameters +TABLE_PARAMETERS = { + "country": {"file": "FGTcountry_1990_2050_3pr24.dta"}, + "region": {"file": "FGTregion_1990_2050_3pr24.dta"}, + "global": {"file": "FGTglobal_1990_2050_3pr24.dta"}, +} + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Retrieve snapshot. + snap = paths.load_snapshot("reproducibility_package_poverty_prosperity_planet.zip") + + # Define empty list to store tables. + tables = [] + for table, table_config in TABLE_PARAMETERS.items(): + # Load data from snapshot. + tb = snap.read_in_archive(f"{FILES_DIRECTORY}/{table_config['file']}") + + # + # Process data. + # + # Rename and add columns + if table == "region": + tb = tb.rename(columns={"region_pip": "country"}) + elif table == "global": + tb["country"] = "World" + + # Remove duplicates in the data + tb = tb.drop_duplicates(subset=INDEX_COLUMNS) + + # Ensure all columns are snake-case, set an appropriate index, and sort conveniently. + tb = tb.format(keys=INDEX_COLUMNS, short_name=table) + + # Append table to list. + tables.append(tb) + + # + # Save outputs. + # + # Create a new meadow dataset with the same metadata as the snapshot. + ds_meadow = create_dataset(dest_dir, tables=tables, check_variables_metadata=True, default_metadata=snap.metadata) + + # Save changes in the new meadow dataset. + ds_meadow.save() diff --git a/snapshots/wb/2024-12-03/reproducibility_package_poverty_prosperity_planet.py b/snapshots/wb/2024-12-03/reproducibility_package_poverty_prosperity_planet.py new file mode 100644 index 00000000000..0fa8bed05b3 --- /dev/null +++ b/snapshots/wb/2024-12-03/reproducibility_package_poverty_prosperity_planet.py @@ -0,0 +1,24 @@ +"""Script to create a snapshot of dataset.""" + +from pathlib import Path + +import click + +from etl.snapshot import Snapshot + +# Version for current snapshot dataset. +SNAPSHOT_VERSION = Path(__file__).parent.name + + +@click.command() +@click.option("--upload/--skip-upload", default=True, type=bool, help="Upload dataset to Snapshot") +def main(upload: bool) -> None: + # Create a new snapshot. + snap = Snapshot(f"wb/{SNAPSHOT_VERSION}/reproducibility_package_poverty_prosperity_planet.zip") + + # Download data from source, add file to DVC and upload to S3. + snap.create_snapshot(upload=upload) + + +if __name__ == "__main__": + main() diff --git a/snapshots/wb/2024-12-03/reproducibility_package_poverty_prosperity_planet.zip.dvc b/snapshots/wb/2024-12-03/reproducibility_package_poverty_prosperity_planet.zip.dvc new file mode 100644 index 00000000000..2e560863971 --- /dev/null +++ b/snapshots/wb/2024-12-03/reproducibility_package_poverty_prosperity_planet.zip.dvc @@ -0,0 +1,29 @@ +# Learn more at: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +meta: + origin: + # Data product / Snapshot + title: Reproducibility package for Poverty, Prosperity and Planet Report 2024 + description: |- + The World Bank has set a clear mission: ending extreme poverty and boosting shared prosperity on a livable planet. This new edition of the biennial series, previously titled Poverty and Shared Prosperity, assesses the three components of the mission and emphasizes that reducing poverty and increasing shared prosperity must be achieved without high costs to the environment. The current polycrisis—where the multiple crises of slow economic growth, increased fragility, climate risks, and heightened uncertainty have come together at the same time—makes national development strategies and international cooperation difficult. This overview summarizes the progress toward achieving these goals, outlines promising pathways to speed up the progress on multiple fronts, and proposes priorities tailored to countries at various levels of poverty, income, and environmental vulnerability. Offering the first post-COVID-19 (Coronavirus) pandemic assessment of global progress on this interlinked agenda, the report finds that global poverty reduction has resumed but at a pace slower than before the COVID-19 crisis. It also provides evidence that the number of countries with high levels of income inequality has declined considerably during the past two decades, but the pace of improvements in shared prosperity has slowed and that inequality remains high in Latin America and the Caribbean and in Sub-Saharan Africa. The report also finds evidence of countries’ increasing ability to manage natural hazards where there has been progress in poverty reduction and shared prosperity; but in the poorest settings, the report finds that climate risks are significantly higher. + date_published: "2024-09-26" + + # Citation + producer: Lakner et al. + citation_full: |- + Lakner, C., Genoni, M. E., Stemmler, H., Yonzan, N., & Tetteh Baah, S. K. (2024). Reproducibility package for Poverty, Prosperity and Planet Report 2024. World Bank. https://doi.org/10.60572/KGE4-CX54 + + # Files + url_main: https://reproducibility.worldbank.org/index.php/catalog/189/ + url_download: https://reproducibility.worldbank.org/index.php/catalog/189/download/552/FR_WLD_2024_198.zip + date_accessed: 2024-12-03 + + # License + license: + name: Modified BSD3 + url: https://reproducibility.worldbank.org/index.php/catalog/189/#project_desc_container1674844764972 + +outs: + - md5: 3a942e2ccc863f67d4879250d7468e57 + size: 91787338 + path: reproducibility_package_poverty_prosperity_planet.zip