Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

📊 Update natural disasters data reference branch #2515

Merged
merged 7 commits into from
Apr 19, 2024
4 changes: 4 additions & 0 deletions apps/wizard/pages/dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@
"snapshot/wb/income_groups.xlsx",
# World Bank country shapes.
"snapshot/countries/world_bank.zip",
# World Bank WDI.
"snapshot/worldbank_wdi/wdi.zip",
"meadow/worldbank_wdi/wdi",
"garden/worldbank_wdi/wdi",
# Other steps we don't want to update (because the underlying data does not get updated).
# TODO: We need a better way to achieve this, for example adding update_period_days to all steps and snapshots.
# A simpler alternative would be to move these steps to a separate file in a meaningful place.
Expand Down
22 changes: 22 additions & 0 deletions dag/archive/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,28 @@ steps:
data://grapher/un/2022-11-29/undp_hdr:
- data://garden/un/2022-11-29/undp_hdr

#
# EM-DAT Natural disasters (2023).
#
data://meadow/emdat/2023-09-20/natural_disasters:
- snapshot://emdat/2023-09-20/natural_disasters.xlsx
# The following dataset has a table for yearly data and another for decadal data.
data://garden/emdat/2023-09-20/natural_disasters:
- data://meadow/emdat/2023-09-20/natural_disasters
- data://garden/demography/2023-03-31/population
- data://garden/wb/2023-04-30/income_groups
- data://garden/regions/2023-01-01/regions
- data://garden/worldbank_wdi/2023-05-29/wdi
# The following dataset has all (yearly and decadal) variables together.
data://grapher/emdat/2023-09-20/natural_disasters:
- data://garden/emdat/2023-09-20/natural_disasters
# The following dataset has only global data, and entity corresponds to the type of disaster.
data://grapher/emdat/2023-09-20/natural_disasters_global_by_type:
- data://garden/emdat/2023-09-20/natural_disasters
# Natural disasters explorer.
data://explorers/emdat/2023-09-20/natural_disasters:
- data://garden/emdat/2023-09-20/natural_disasters

# Include all active steps plus all archive steps.
include:
- dag/main.yml
Expand Down
53 changes: 31 additions & 22 deletions dag/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -177,28 +177,6 @@ steps:
data://grapher/un/2024-04-09/undp_hdr:
- data://garden/un/2024-04-09/undp_hdr

#
# EM-DAT Natural disasters (2023).
#
data://meadow/emdat/2023-09-20/natural_disasters:
- snapshot://emdat/2023-09-20/natural_disasters.xlsx
# The following dataset has a table for yearly data and another for decadal data.
data://garden/emdat/2023-09-20/natural_disasters:
- data://meadow/emdat/2023-09-20/natural_disasters
- data://garden/demography/2023-03-31/population
- data://garden/wb/2023-04-30/income_groups
- data://garden/regions/2023-01-01/regions
- data://garden/worldbank_wdi/2023-05-29/wdi
# The following dataset has all (yearly and decadal) variables together.
data://grapher/emdat/2023-09-20/natural_disasters:
- data://garden/emdat/2023-09-20/natural_disasters
# The following dataset has only global data, and entity corresponds to the type of disaster.
data://grapher/emdat/2023-09-20/natural_disasters_global_by_type:
- data://garden/emdat/2023-09-20/natural_disasters
# Natural disasters explorer.
data://explorers/emdat/2023-09-20/natural_disasters:
- data://garden/emdat/2023-09-20/natural_disasters

# Country profiles - overview
data://garden/country_profile/2022/overview:
- backport://backport/owid/latest/dataset_5599_ihme__global_burden_of_disease__deaths_and_dalys__institute_for_health_metrics_and_evaluation__2022_04
Expand Down Expand Up @@ -720,6 +698,37 @@ steps:
data://grapher/wb/2024-03-11/income_groups:
- data://garden/wb/2024-03-11/income_groups

#
# EM-DAT Natural disasters.
#
data://meadow/emdat/2024-04-11/natural_disasters:
- snapshot://emdat/2024-04-11/natural_disasters.xlsx

# The following dataset has a table for yearly data and another for decadal data.
data://garden/emdat/2024-04-11/natural_disasters:
- data://meadow/emdat/2024-04-11/natural_disasters
- data://garden/demography/2023-03-31/population
- data://garden/wb/2024-03-11/income_groups
- data://garden/regions/2023-01-01/regions
- data://garden/worldbank_wdi/2023-05-29/wdi

# The following dataset has all (yearly and decadal) variables together.
data://grapher/emdat/2024-04-11/natural_disasters:
- data://garden/emdat/2024-04-11/natural_disasters

# The following dataset has only global data, and entity corresponds to the type of disaster.
data://grapher/emdat/2024-04-11/natural_disasters_global_by_type:
- data://garden/emdat/2024-04-11/natural_disasters

# Natural disasters explorer.
data://explorers/emdat/latest/natural_disasters:
- data://garden/emdat/2024-04-11/natural_disasters

######################################################################################################################
# Older versions that should be archived once they are not used by any other steps.

######################################################################################################################

include:
- dag/open_numbers.yml
- dag/faostat.yml
Expand Down
115 changes: 115 additions & 0 deletions etl/steps/data/explorers/emdat/latest/natural_disasters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
"""Natural disasters explorer data step.

Loads the latest EM-DAT natural_disasters data from garden and stores a table (as a csv file) for yearly data, and
another for decadal data.

NOTES:
* Some of the columns in the output files are not used by the explorer (but they appear in the "Sort by" dropdown menu),
consider removing them. For now, we'll ensure all of the old columns are present, to avoid any possible issues.
* Most charts in the explorer are generated from the data in the files, but 3 of them are directly linked to grapher
charts, namely:
"All disasters (by type) - Deaths - Decadal average - false"
"All disasters (by type) - Deaths - Decadal average - true"
"All disasters (by type) - Economic damages (% GDP) - Decadal average - false"
At some point it would be good to let the explorer take all the data from files.

"""

from owid.catalog import Table

from etl.helpers import PathFinder, create_dataset

paths = PathFinder(__file__)

# Mapping of old to new disaster type names.
DISASTER_TYPE_RENAMING = {
"all_disasters": "all_disasters",
"drought": "drought",
"earthquake": "earthquake",
"extreme_temperature": "temperature",
"flood": "flood",
"fog": "fog",
"glacial_lake_outburst": "glacial_lake",
"landslide": "landslide",
"dry_mass_movement": "mass_movement",
"extreme_weather": "storm",
"volcanic_activity": "volcanic",
"wildfire": "wildfire",
}


def create_wide_tables(table: Table) -> Table:
"""Convert input table from long to wide format, and adjust column names to adjust to the old names in the files
used by the explorer.
"""
# Adapt disaster type names to match those in the old explorer files.
table = table.reset_index()
table["type"] = table.astype({"type": str})["type"].replace(DISASTER_TYPE_RENAMING)

# Create wide table.
table_wide = table.pivot(index=["country", "year"], columns="type", join_column_levels_with="_")

# Rename columns to match the old names in explorer.
table_wide = table_wide.rename(
columns={
column: column.replace("per_100k_people", "rate_per_100k")
.replace("total_dead", "deaths")
.replace("total_damages_per_gdp", "total_damages_pct_gdp")
for column in table_wide.columns
},
errors="raise",
)

# Remove unnecessary columns.
table_wide = table_wide[
[
column
for column in table_wide.columns
if not column.startswith(
("gdp_", "n_events_", "population_", "insured_damages_per_gdp", "reconstruction_costs_per_gdp_")
)
if column
not in [
"affected_rate_per_100k_glacial_lake",
"homeless_rate_per_100k_glacial_lake",
"total_damages_pct_gdp_fog",
]
]
]

# Set an appropriate index and sort conveniently.
table_wide = table_wide.format()

return table_wide


def run(dest_dir: str) -> None:
#
# Load inputs.
#
# Load the latest dataset from garden.
ds_garden = paths.load_dataset("natural_disasters")

# Read tables with yearly and decadal data.
tb_yearly = ds_garden["natural_disasters_yearly"]
tb_decadal = ds_garden["natural_disasters_decadal"]

#
# Process data.
#
# Create wide tables adapted to the old format in explorers.
tb_yearly_wide = create_wide_tables(table=tb_yearly)
tb_decadal_wide = create_wide_tables(table=tb_decadal)

#
# Save outputs.
#
# Initialize a new grapher dataset and add dataset metadata.
ds_grapher = create_dataset(
dest_dir,
tables=[tb_yearly_wide, tb_decadal_wide],
default_metadata=ds_garden.metadata,
check_variables_metadata=True,
formats=["csv"],
)
ds_grapher.save()
Loading
Loading