Skip to content

Commit

Permalink
Adapt code to remove warnings after pandas update
Browse files Browse the repository at this point in the history
  • Loading branch information
pabloarosado committed Apr 16, 2024
1 parent c9e6867 commit de74844
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 7 deletions.
16 changes: 13 additions & 3 deletions etl/steps/data/explorers/emdat/latest/natural_disasters.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def create_wide_tables(table: Table) -> Table:
"""
# Adapt disaster type names to match those in the old explorer files.
table = table.reset_index()
table["type"] = table["type"].replace(DISASTER_TYPE_RENAMING)
table["type"] = table.astype({"type": str})["type"].replace(DISASTER_TYPE_RENAMING)

# Create wide table.
table_wide = table.pivot(index=["country", "year"], columns="type", join_column_levels_with="_")
Expand All @@ -56,7 +56,8 @@ def create_wide_tables(table: Table) -> Table:
.replace("total_dead", "deaths")
.replace("total_damages_per_gdp", "total_damages_pct_gdp")
for column in table_wide.columns
}
},
errors="raise",
)

# Remove unnecessary columns.
Expand All @@ -83,17 +84,26 @@ def create_wide_tables(table: Table) -> Table:


def run(dest_dir: str) -> None:
#
# Load inputs.
#
# Load the latest dataset from garden.
ds_garden = paths.load_dataset("natural_disasters")

# Load tables with yearly and decadal data.
# Read tables with yearly and decadal data.
tb_yearly = ds_garden["natural_disasters_yearly"]
tb_decadal = ds_garden["natural_disasters_decadal"]

#
# Process data.
#
# Create wide tables adapted to the old format in explorers.
tb_yearly_wide = create_wide_tables(table=tb_yearly)
tb_decadal_wide = create_wide_tables(table=tb_decadal)

#
# Save outputs.
#
# Initialize a new grapher dataset and add dataset metadata.
ds_grapher = create_dataset(
dest_dir,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,7 @@ def sanity_checks_on_outputs(tb: Table, is_decade: bool) -> None:

def run(dest_dir: str) -> None:
#
# Load data.
# Load inputs.
#
# Load natural disasters dataset from meadow and read its main table.
ds_meadow = paths.load_dataset("natural_disasters")
Expand Down
17 changes: 15 additions & 2 deletions etl/steps/data/grapher/emdat/2024-04-11/natural_disasters.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,28 @@ def create_wide_tables(table: Table, is_decade: bool) -> Table:


def run(dest_dir: str) -> None:
#
# Load inputs.
#
# Load garden tables and remove unnecessary columns.
ds_garden = paths.load_dataset("natural_disasters")
tb_yearly = ds_garden["natural_disasters_yearly"].drop(columns=["population", "gdp"], errors="raise")
tb_decadal = ds_garden["natural_disasters_decadal"].drop(columns=["population", "gdp"], errors="raise")
tb_yearly = ds_garden["natural_disasters_yearly"]
tb_decadal = ds_garden["natural_disasters_decadal"]

#
# Process data.
#
# Remove unnecessary columns.
tb_yearly = tb_yearly.drop(columns=["population", "gdp"], errors="raise")
tb_decadal = tb_decadal.drop(columns=["population", "gdp"], errors="raise")

# Create wide tables.
tb_yearly_wide = create_wide_tables(table=tb_yearly, is_decade=False)
tb_decadal_wide = create_wide_tables(table=tb_decadal, is_decade=True)

#
# Save outputs.
#
# Create new grapher dataset, add tables, and save dataset.
ds_grapher = create_dataset(
dest_dir,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,24 @@


def run(dest_dir: str) -> None:
#
# Load inputs.
#
# Load garden dataset and read table on yearly data.
ds_garden = paths.load_dataset("natural_disasters")
tb = ds_garden["natural_disasters_yearly"].reset_index()

#
# Process data.
#
# Select data for the World and remove unnecessary columns.
tb_global = (
tb[tb["country"] == "World"]
.drop(columns=["country", "population", "gdp"], errors="raise")
.reset_index(drop=True)
)
# Assign human-readable names to disaster types.
tb_global["type"] = tb_global["type"].replace(
tb_global["type"] = tb_global.astype({"type": str})["type"].replace(
{disaster_type: disaster_type.capitalize().replace("_", " ") for disaster_type in tb_global["type"].unique()}
)
# Treat column for disaster type as the new entity (so they can be selected in grapher as if they were countries).
Expand Down

0 comments on commit de74844

Please sign in to comment.