owid · pabloarosado · Apr 19, 2024 · Apr 11, 2024 · Apr 11, 2024 · Apr 11, 2024
diff --git a/apps/wizard/pages/dashboard.py b/apps/wizard/pages/dashboard.py
@@ -65,6 +65,10 @@
     "snapshot/wb/income_groups.xlsx",
     # World Bank country shapes.
     "snapshot/countries/world_bank.zip",
+    # World Bank WDI.
+    "snapshot/worldbank_wdi/wdi.zip",
+    "meadow/worldbank_wdi/wdi",
+    "garden/worldbank_wdi/wdi",
     # Other steps we don't want to update (because the underlying data does not get updated).
     # TODO: We need a better way to achieve this, for example adding update_period_days to all steps and snapshots.
     #  A simpler alternative would be to move these steps to a separate file in a meaningful place.

diff --git a/dag/archive/main.yml b/dag/archive/main.yml
@@ -116,6 +116,28 @@ steps:
   data://grapher/un/2022-11-29/undp_hdr:
     - data://garden/un/2022-11-29/undp_hdr
 
+  #
+  # EM-DAT Natural disasters (2023).
+  #
+  data://meadow/emdat/2023-09-20/natural_disasters:
+    - snapshot://emdat/2023-09-20/natural_disasters.xlsx
+  # The following dataset has a table for yearly data and another for decadal data.
+  data://garden/emdat/2023-09-20/natural_disasters:
+    - data://meadow/emdat/2023-09-20/natural_disasters
+    - data://garden/demography/2023-03-31/population
+    - data://garden/wb/2023-04-30/income_groups
+    - data://garden/regions/2023-01-01/regions
+    - data://garden/worldbank_wdi/2023-05-29/wdi
+  # The following dataset has all (yearly and decadal) variables together.
+  data://grapher/emdat/2023-09-20/natural_disasters:
+    - data://garden/emdat/2023-09-20/natural_disasters
+  # The following dataset has only global data, and entity corresponds to the type of disaster.
+  data://grapher/emdat/2023-09-20/natural_disasters_global_by_type:
+    - data://garden/emdat/2023-09-20/natural_disasters
+  # Natural disasters explorer.
+  data://explorers/emdat/2023-09-20/natural_disasters:
+    - data://garden/emdat/2023-09-20/natural_disasters
+
 # Include all active steps plus all archive steps.
 include:
   - dag/main.yml

diff --git a/dag/main.yml b/dag/main.yml
@@ -177,28 +177,6 @@ steps:
   data://grapher/un/2024-04-09/undp_hdr:
     - data://garden/un/2024-04-09/undp_hdr
 
-  #
-  # EM-DAT Natural disasters (2023).
-  #
-  data://meadow/emdat/2023-09-20/natural_disasters:
-    - snapshot://emdat/2023-09-20/natural_disasters.xlsx
-  # The following dataset has a table for yearly data and another for decadal data.
-  data://garden/emdat/2023-09-20/natural_disasters:
-    - data://meadow/emdat/2023-09-20/natural_disasters
-    - data://garden/demography/2023-03-31/population
-    - data://garden/wb/2023-04-30/income_groups
-    - data://garden/regions/2023-01-01/regions
-    - data://garden/worldbank_wdi/2023-05-29/wdi
-  # The following dataset has all (yearly and decadal) variables together.
-  data://grapher/emdat/2023-09-20/natural_disasters:
-    - data://garden/emdat/2023-09-20/natural_disasters
-  # The following dataset has only global data, and entity corresponds to the type of disaster.
-  data://grapher/emdat/2023-09-20/natural_disasters_global_by_type:
-    - data://garden/emdat/2023-09-20/natural_disasters
-  # Natural disasters explorer.
-  data://explorers/emdat/2023-09-20/natural_disasters:
-    - data://garden/emdat/2023-09-20/natural_disasters
-
   # Country profiles - overview
   data://garden/country_profile/2022/overview:
     - backport://backport/owid/latest/dataset_5599_ihme__global_burden_of_disease__deaths_and_dalys__institute_for_health_metrics_and_evaluation__2022_04
@@ -720,6 +698,37 @@ steps:
   data://grapher/wb/2024-03-11/income_groups:
     - data://garden/wb/2024-03-11/income_groups
 
+  #
+  # EM-DAT Natural disasters.
+  #
+  data://meadow/emdat/2024-04-11/natural_disasters:
+    - snapshot://emdat/2024-04-11/natural_disasters.xlsx
+
+  # The following dataset has a table for yearly data and another for decadal data.
+  data://garden/emdat/2024-04-11/natural_disasters:
+    - data://meadow/emdat/2024-04-11/natural_disasters
+    - data://garden/demography/2023-03-31/population
+    - data://garden/wb/2024-03-11/income_groups
+    - data://garden/regions/2023-01-01/regions
+    - data://garden/worldbank_wdi/2023-05-29/wdi
+
+  # The following dataset has all (yearly and decadal) variables together.
+  data://grapher/emdat/2024-04-11/natural_disasters:
+    - data://garden/emdat/2024-04-11/natural_disasters
+
+  # The following dataset has only global data, and entity corresponds to the type of disaster.
+  data://grapher/emdat/2024-04-11/natural_disasters_global_by_type:
+    - data://garden/emdat/2024-04-11/natural_disasters
+
+  # Natural disasters explorer.
+  data://explorers/emdat/latest/natural_disasters:
+    - data://garden/emdat/2024-04-11/natural_disasters
+
+  ######################################################################################################################
+  # Older versions that should be archived once they are not used by any other steps.
+
+  ######################################################################################################################
+
 include:
   - dag/open_numbers.yml
   - dag/faostat.yml

diff --git a/etl/steps/data/explorers/emdat/latest/natural_disasters.py b/etl/steps/data/explorers/emdat/latest/natural_disasters.py
@@ -0,0 +1,115 @@
+"""Natural disasters explorer data step.
+
+Loads the latest EM-DAT natural_disasters data from garden and stores a table (as a csv file) for yearly data, and
+another for decadal data.
+
+NOTES:
+* Some of the columns in the output files are not used by the explorer (but they appear in the "Sort by" dropdown menu),
+  consider removing them. For now, we'll ensure all of the old columns are present, to avoid any possible issues.
+* Most charts in the explorer are generated from the data in the files, but 3 of them are directly linked to grapher
+  charts, namely:
+  "All disasters (by type) - Deaths - Decadal average - false"
+  "All disasters (by type) - Deaths - Decadal average - true"
+  "All disasters (by type) - Economic damages (% GDP) - Decadal average - false"
+  At some point it would be good to let the explorer take all the data from files.
+
+"""
+
+from owid.catalog import Table
+
+from etl.helpers import PathFinder, create_dataset
+
+paths = PathFinder(__file__)
+
+# Mapping of old to new disaster type names.
+DISASTER_TYPE_RENAMING = {
+    "all_disasters": "all_disasters",
+    "drought": "drought",
+    "earthquake": "earthquake",
+    "extreme_temperature": "temperature",
+    "flood": "flood",
+    "fog": "fog",
+    "glacial_lake_outburst": "glacial_lake",
+    "landslide": "landslide",
+    "dry_mass_movement": "mass_movement",
+    "extreme_weather": "storm",
+    "volcanic_activity": "volcanic",
+    "wildfire": "wildfire",
+}
+
+
+def create_wide_tables(table: Table) -> Table:
+    """Convert input table from long to wide format, and adjust column names to adjust to the old names in the files
+    used by the explorer.
+    """
+    # Adapt disaster type names to match those in the old explorer files.
+    table = table.reset_index()
+    table["type"] = table.astype({"type": str})["type"].replace(DISASTER_TYPE_RENAMING)
+
+    # Create wide table.
+    table_wide = table.pivot(index=["country", "year"], columns="type", join_column_levels_with="_")
+
+    # Rename columns to match the old names in explorer.
+    table_wide = table_wide.rename(
+        columns={
+            column: column.replace("per_100k_people", "rate_per_100k")
+            .replace("total_dead", "deaths")
+            .replace("total_damages_per_gdp", "total_damages_pct_gdp")
+            for column in table_wide.columns
+        },
+        errors="raise",
+    )
+
+    # Remove unnecessary columns.
+    table_wide = table_wide[
+        [
+            column
+            for column in table_wide.columns
+            if not column.startswith(
+                ("gdp_", "n_events_", "population_", "insured_damages_per_gdp", "reconstruction_costs_per_gdp_")
+            )
+            if column
+            not in [
+                "affected_rate_per_100k_glacial_lake",
+                "homeless_rate_per_100k_glacial_lake",
+                "total_damages_pct_gdp_fog",
+            ]
+        ]
+    ]
+
+    # Set an appropriate index and sort conveniently.
+    table_wide = table_wide.format()
+
+    return table_wide
+
+
+def run(dest_dir: str) -> None:
+    #
+    # Load inputs.
+    #
+    # Load the latest dataset from garden.
+    ds_garden = paths.load_dataset("natural_disasters")
+
+    # Read tables with yearly and decadal data.
+    tb_yearly = ds_garden["natural_disasters_yearly"]
+    tb_decadal = ds_garden["natural_disasters_decadal"]
+
+    #
+    # Process data.
+    #
+    # Create wide tables adapted to the old format in explorers.
+    tb_yearly_wide = create_wide_tables(table=tb_yearly)
+    tb_decadal_wide = create_wide_tables(table=tb_decadal)
+
+    #
+    # Save outputs.
+    #
+    # Initialize a new grapher dataset and add dataset metadata.
+    ds_grapher = create_dataset(
+        dest_dir,
+        tables=[tb_yearly_wide, tb_decadal_wide],
+        default_metadata=ds_garden.metadata,
+        check_variables_metadata=True,
+        formats=["csv"],
+    )
+    ds_grapher.save()