Adapt code to remove warnings after pandas update

owid · Apr 16, 2024 · de74844 · de74844
1 parent c9e6867
commit de74844
Show file tree

Hide file tree

Showing 4 changed files with 36 additions and 7 deletions.
diff --git a/etl/steps/data/explorers/emdat/latest/natural_disasters.py b/etl/steps/data/explorers/emdat/latest/natural_disasters.py
@@ -44,7 +44,7 @@ def create_wide_tables(table: Table) -> Table:
     """
     # Adapt disaster type names to match those in the old explorer files.
     table = table.reset_index()
-    table["type"] = table["type"].replace(DISASTER_TYPE_RENAMING)
+    table["type"] = table.astype({"type": str})["type"].replace(DISASTER_TYPE_RENAMING)
 
     # Create wide table.
     table_wide = table.pivot(index=["country", "year"], columns="type", join_column_levels_with="_")
@@ -56,7 +56,8 @@ def create_wide_tables(table: Table) -> Table:
             .replace("total_dead", "deaths")
             .replace("total_damages_per_gdp", "total_damages_pct_gdp")
             for column in table_wide.columns
-        }
+        },
+        errors="raise",
     )
 
     # Remove unnecessary columns.
@@ -83,17 +84,26 @@ def create_wide_tables(table: Table) -> Table:
 
 
 def run(dest_dir: str) -> None:
+    #
+    # Load inputs.
+    #
     # Load the latest dataset from garden.
     ds_garden = paths.load_dataset("natural_disasters")
 
-    # Load tables with yearly and decadal data.
+    # Read tables with yearly and decadal data.
     tb_yearly = ds_garden["natural_disasters_yearly"]
     tb_decadal = ds_garden["natural_disasters_decadal"]
 
+    #
+    # Process data.
+    #
     # Create wide tables adapted to the old format in explorers.
     tb_yearly_wide = create_wide_tables(table=tb_yearly)
     tb_decadal_wide = create_wide_tables(table=tb_decadal)
 
+    #
+    # Save outputs.
+    #
     # Initialize a new grapher dataset and add dataset metadata.
     ds_grapher = create_dataset(
         dest_dir,

diff --git a/etl/steps/data/garden/emdat/2024-04-11/natural_disasters.py b/etl/steps/data/garden/emdat/2024-04-11/natural_disasters.py
@@ -526,7 +526,7 @@ def sanity_checks_on_outputs(tb: Table, is_decade: bool) -> None:
 
 def run(dest_dir: str) -> None:
     #
-    # Load data.
+    # Load inputs.
     #
     # Load natural disasters dataset from meadow and read its main table.
     ds_meadow = paths.load_dataset("natural_disasters")

diff --git a/etl/steps/data/grapher/emdat/2024-04-11/natural_disasters.py b/etl/steps/data/grapher/emdat/2024-04-11/natural_disasters.py
@@ -32,15 +32,28 @@ def create_wide_tables(table: Table, is_decade: bool) -> Table:
 
 
 def run(dest_dir: str) -> None:
+    #
+    # Load inputs.
+    #
     # Load garden tables and remove unnecessary columns.
     ds_garden = paths.load_dataset("natural_disasters")
-    tb_yearly = ds_garden["natural_disasters_yearly"].drop(columns=["population", "gdp"], errors="raise")
-    tb_decadal = ds_garden["natural_disasters_decadal"].drop(columns=["population", "gdp"], errors="raise")
+    tb_yearly = ds_garden["natural_disasters_yearly"]
+    tb_decadal = ds_garden["natural_disasters_decadal"]
+
+    #
+    # Process data.
+    #
+    # Remove unnecessary columns.
+    tb_yearly = tb_yearly.drop(columns=["population", "gdp"], errors="raise")
+    tb_decadal = tb_decadal.drop(columns=["population", "gdp"], errors="raise")
 
     # Create wide tables.
     tb_yearly_wide = create_wide_tables(table=tb_yearly, is_decade=False)
     tb_decadal_wide = create_wide_tables(table=tb_decadal, is_decade=True)
 
+    #
+    # Save outputs.
+    #
     # Create new grapher dataset, add tables, and save dataset.
     ds_grapher = create_dataset(
         dest_dir,

diff --git a/etl/steps/data/grapher/emdat/2024-04-11/natural_disasters_global_by_type.py b/etl/steps/data/grapher/emdat/2024-04-11/natural_disasters_global_by_type.py
@@ -7,18 +7,24 @@
 
 
 def run(dest_dir: str) -> None:
+    #
+    # Load inputs.
+    #
     # Load garden dataset and read table on yearly data.
     ds_garden = paths.load_dataset("natural_disasters")
     tb = ds_garden["natural_disasters_yearly"].reset_index()
 
+    #
+    # Process data.
+    #
     # Select data for the World and remove unnecessary columns.
     tb_global = (
         tb[tb["country"] == "World"]
         .drop(columns=["country", "population", "gdp"], errors="raise")
         .reset_index(drop=True)
     )
     # Assign human-readable names to disaster types.
-    tb_global["type"] = tb_global["type"].replace(
+    tb_global["type"] = tb_global.astype({"type": str})["type"].replace(
         {disaster_type: disaster_type.capitalize().replace("_", " ") for disaster_type in tb_global["type"].unique()}
     )
     # Treat column for disaster type as the new entity (so they can be selected in grapher as if they were countries).