📊 wb: modify poverty projections metadata (#3712)

* 📊 wb: modify poverty projections metadata * ✨ changes in metadata * empty * 🐛 Exclude chart diffs with indistinguishable configs --------- Co-authored-by: Marigold <[email protected]>
owid · Dec 10, 2024 · 5322743 · 5322743
1 parent 82241e4
commit 5322743
Show file tree

Hide file tree

Showing 3 changed files with 35 additions and 19 deletions.
diff --git a/apps/wizard/app_pages/chart_diff/chart_diff.py b/apps/wizard/app_pages/chart_diff/chart_diff.py
@@ -1,5 +1,6 @@
 import datetime as dt
 import difflib
+import json
 import pprint
 from typing import Any, Dict, List, Optional
 
@@ -653,6 +654,7 @@ def _modified_chart_configs_on_staging(
     select
         c.id as chartId,
         MD5(cc.full) as chartChecksum,
+        cc.full as chartConfig,
         c.lastEditedByUserId as chartLastEditedByUserId,
         c.publishedByUserId as chartPublishedByUserId,
         c.lastEditedAt as chartLastEditedAt
@@ -699,6 +701,20 @@ def _modified_chart_configs_on_staging(
     diff = source_df.copy()
     diff["configEdited"] = source_df["chartChecksum"] != target_df["chartChecksum"]
 
+    # Go through edited configs and do a more detailed comparison
+    ix = diff["configEdited"] & target_df["chartChecksum"].notnull()
+    equal_configs = []
+    for chart_id, row in diff.loc[ix].iterrows():
+        source_config = json.loads(row["chartConfig"])
+        target_config = json.loads(target_df.loc[chart_id, "chartConfig"])
+
+        # Compare configs
+        if configs_are_equal(source_config, target_config):
+            equal_configs.append(chart_id)
+
+    # Exclude configs that have different chartChecksum, but are actually the same (e.g. have just different version)
+    diff = diff[~diff.index.isin(equal_configs)]
+
     # Add flag 'edited in staging'
     diff["chartEditedInStaging"] = True
 

diff --git a/etl/steps/data/garden/wb/2024-12-03/poverty_projections.meta.yml b/etl/steps/data/garden/wb/2024-12-03/poverty_projections.meta.yml
@@ -29,28 +29,28 @@ definitions:
     Non-market sources of income, including food grown by subsistence farmers for their own consumption, are taken into account.
 
   description_key_scenarios: |-
-    <% if scenario == "Historical" %>
+    <% if scenario == "Historical estimates" %>
     Estimates are based on household surveys or extrapolated up until the year of the data release using GDP growth estimates and forecasts. For more details about the methodology, please refer to the [World Bank PIP documentation](https://datanalytics.worldbank.org/PIP-Methodology/lineupestimates.html#nowcasts).
-    <% elif scenario == "Current forecast + historical growth" %>
+    <% elif scenario == "Current forecast + historical growth projections" %>
     This data is a projection of the estimates based on GDP growth projections from the World Bank's Global Economic Prospects and the the Macro Poverty Outlook, together with IMF's World Economic Outlook, in the period 2025-2029. For the period 2030-2050, the data is projected using the average annual historical GDP per capita growth over 2010-2019.
-    <% elif scenario == "Historical + current forecast + historical growth" %>
+    <% elif scenario == "Historical estimates + current forecast + historical growth projections" %>
     This data combines data based on household surveys or extrapolated up until the year of the data release using GDP growth estimates and forecasts, with projections based on GDP growth projections from the World Bank's Global Economic Prospects and the the Macro Poverty Outlook, together with IMF's World Economic Outlook, in the period 2025-2029. For the period 2030-2050, the data is projected using the average annual historical GDP per capita growth over 2010-2019.
-    <% elif scenario == "2% growth" %>
+    <% elif scenario == "2% growth projections" %>
     This data is a projection of the estimates based on a scenario of 2% average GDP per capita growth, while keeping income inequality constant.
-    <% elif scenario == "2% growth + Gini reduction 1%" %>
+    <% elif scenario == "2% growth + Gini reduction 1% projections" %>
     This data is a projection of the estimates based on a scenatio of 2% average GDP per capita growth, while reducing income inequality by 1% of the Gini coefficient per year.
-    <% elif scenario == "2% growth + Gini reduction 2%" %>
+    <% elif scenario == "2% growth + Gini reduction 2% projections" %>
     This data is a projection of the estimates based on a scenatio of 2% average GDP per capita growth, while reducing income inequality by 2% of the Gini coefficient per year.
-    <% elif scenario == "4% growth" %>
+    <% elif scenario == "4% growth projections" %>
     This data is a projection of the estimates based on a scenario of 4% average GDP per capita growth, while keeping income inequality constant.
-    <% elif scenario == "6% growth" %>
+    <% elif scenario == "6% growth projections" %>
     This data is a projection of the estimates based on a scenario of 6% average GDP per capita growth, while keeping income inequality constant.
-    <% elif scenario == "8% growth" %>
+    <% elif scenario == "8% growth projections" %>
     This data is a projection of the estimates based on a scenario of 8% average GDP per capita growth, while keeping income inequality constant.
     <%- endif -%>
 
   isprojection_by_scenario: |-
-    <% if scenario == "Historical" or scenario == "Historical + current forecast + historical growth" %>
+    <% if scenario == "Historical estimates" or scenario == "Historical estimates + current forecast + historical growth projections" %>
     false
     <% else %>
     true

diff --git a/etl/steps/data/garden/wb/2024-12-03/poverty_projections.py b/etl/steps/data/garden/wb/2024-12-03/poverty_projections.py
@@ -18,14 +18,14 @@
 
 # Define scenarios and new names
 SCENARIOS = {
-    "historical": "Historical",
-    "current_forecast": "Current forecast + historical growth",
-    "2pct": "2% growth",
-    "2pct_gini1": "2% growth + Gini reduction 1%",
-    "2pct_gini2": "2% growth + Gini reduction 2%",
-    "4pct": "4% growth",
-    "6pct": "6% growth",
-    "8pct": "8% growth",
+    "historical": "Historical estimates",
+    "current_forecast": "Current forecast + historical growth projections",
+    "2pct": "2% growth projections",
+    "2pct_gini1": "2% growth + Gini reduction 1% projections",
+    "2pct_gini2": "2% growth + Gini reduction 2% projections",
+    "4pct": "4% growth projections",
+    "6pct": "6% growth projections",
+    "8pct": "8% growth projections",
 }
 
 # Define index columns
@@ -124,7 +124,7 @@ def connect_estimates_with_projections(tb: Table) -> Table:
     tb_connected = pr.concat([tb_historical, tb_current_forecast], ignore_index=True)
 
     # Rename scenario column to "Historical + current forecast + historical growth"
-    tb_connected["scenario"] = "Historical + current forecast + historical growth"
+    tb_connected["scenario"] = "Historical estimates + projections"
 
     # Keep only the columns in INDEX_COLUMNS and INDICATOR_COLUMNS
     tb_connected = tb_connected[INDEX_COLUMNS + INDICATOR_COLUMNS]