Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft: fix surge lookup creation for new Xarray #13

Merged
merged 6 commits into from
Dec 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 39 additions & 22 deletions pyCIAM/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ def prep_sliiders(
seg_var="seg_adm",
selectors={},
calc_popdens_with_wetland_area=True,
expand_exposure=True,
storage_options={},
):
"""Import the SLIIDERS dataset (or a different dataset formatted analogously),
Expand Down Expand Up @@ -66,6 +67,11 @@ def prep_sliiders(
If True, assume that population can also exist in Wetland area. This is
observed empirically, but presumably at a lower density. Diaz 2016 assumes False
but Depsky 2023 assumes True.
expand_exposure : bool, default True
If the input contains population ("pop") and capital ("K") for a fixed year,
plus a country-level scaling factor for each year, setting this to True
(default) expands this to a panel dataset of each variable. This substantially
increases size of the dataset, so can be set to False if not Needed
storage_options : dict, optional
Passed to :py:function:`xarray.open_zarr`

Expand Down Expand Up @@ -99,14 +105,14 @@ def prep_sliiders(
).sel(selectors, drop=True)

inputs = inputs_all.sel({seg_var: seg_vals})
inputs = _s2d(inputs).assign(constants.to_dict())
inputs = _s2d(inputs).assign(constants)

# assign country level vars to each segment
for v in inputs.data_vars:
if "country" in inputs[v].dims:
inputs[v] = inputs[v].sel(country=inputs.seg_country).drop("country")

if "vsl" not in inputs.data_vars:
if "vsl" not in inputs.data_vars and "vsl_ypc_mult" in inputs.data_vars:
if "ref_income" in inputs:
ref_income = inputs.ref_income
else:
Expand All @@ -118,7 +124,7 @@ def prep_sliiders(
* (inputs.ypcc / ref_income) ** inputs.vsl_inc_elast
)

if "pop" not in inputs.data_vars:
if expand_exposure and "pop" not in inputs.data_vars:
exp_year = [
v for v in inputs.data_vars if v.startswith("pop_") and "scale" not in v
]
Expand All @@ -127,19 +133,23 @@ def prep_sliiders(
pop_var = "pop_" + exp_year
inputs["pop"] = inputs[pop_var] * inputs.pop_scale
inputs = inputs.drop(pop_var)
if "K" not in inputs.data_vars:
if expand_exposure and "K" not in inputs.data_vars:
K_var = "K_" + exp_year
inputs["K"] = inputs[K_var] * inputs.K_scale
inputs = inputs.drop(K_var)
if "dfact" not in inputs.data_vars:
if "dfact" not in inputs.data_vars and "npv_start" in inputs.data_vars:
inputs["dfact"] = (1 / (1 + inputs.dr)) ** (inputs.year - inputs.npv_start)

if "landrent" or "ypc" not in inputs.data_vars:
area = inputs.landarea
if calc_popdens_with_wetland_area:
area = area + inputs.wetland
popdens = (inputs.pop / area).fillna(0)
if "landrent" not in inputs.data_vars:
if (
"landrent" not in inputs.data_vars
and "min_coastland_scale" in inputs.data_vars
and "dr" in inputs.data_vars
):
coastland_scale = np.minimum(
1,
np.maximum(
Expand All @@ -149,26 +159,32 @@ def prep_sliiders(
)
inputs["landrent"] = inputs.interior * coastland_scale * inputs.dr

if "ypc" not in inputs.data_vars:
if (
"ypc" not in inputs.data_vars
and "min_pyc_scale" in inputs.data_vars
and "ypc_scale_denom" in inputs.data_vars
and "ypc_scale_elast" in inputs.data_vars
):
ypc_scale = np.maximum(
inputs.min_ypc_scale,
(popdens / inputs.ypc_scale_denom) ** inputs.ypc_scale_elast,
)
inputs["ypc"] = ypc_scale * inputs.ypcc

to_drop = [
"interior",
"dr",
"min_coastland_scale",
"min_ypc_scale",
"ypc_scale_denom",
"ypc_scale_elast",
"vsl_ypc_mult",
"vsl_inc_elast",
]
if expand_exposure:
to_drop += ["pop_scale", "K_scale"]
return inputs.drop(
[
"pop_scale",
"K_scale",
"interior",
"dr",
"min_coastland_scale",
"min_ypc_scale",
"ypc_scale_denom",
"ypc_scale_elast",
"vsl_ypc_mult",
"vsl_inc_elast",
],
to_drop,
errors="ignore",
)

Expand Down Expand Up @@ -555,7 +571,8 @@ def get_nearest_slrs(slr_ds, lonlats, x1="seg_lon", y1="seg_lat"):

def add_nearest_slrs(sliiders_ds, slr_ds):
"""Add a variable to ``sliiders_ds`` called `SLR_site_id` that contains the nearest
SLR site to each segment."""
SLR site to each segment.
"""
sliiders_lonlat = sliiders_ds[["seg_lon", "seg_lat"]].to_dataframe()
return sliiders_ds.assign(
SLR_site_id=get_nearest_slrs(slr_ds, sliiders_lonlat).to_xarray()
Expand Down Expand Up @@ -672,7 +689,7 @@ def load_ciam_inputs(
seg_vals,
# dropping the "refA_scenario_selectors" b/c this doesn't need to be added to
# the input dataset object
constants=params[params.map(type) != dict],
constants=params[params.map(type) != dict].to_dict(),
seg_var=seg_var,
selectors=selectors,
storage_options=storage_options,
Expand Down Expand Up @@ -771,7 +788,7 @@ def load_diaz_inputs(
inputs = prep_sliiders(
input_store,
seg_vals,
constants=params[params.map(type) != dict],
constants=params[params.map(type) != dict].to_dict(),
seg_var="seg",
calc_popdens_with_wetland_area=False,
storage_options=storage_options,
Expand Down
25 changes: 12 additions & 13 deletions pyCIAM/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,7 +504,8 @@ def calc_costs(
# --------- ELEVATION DISTRIBUTION-DEPENENT COSTS ----------
def calc_elev_bin_weights(slr, lb_elevs, bin_width):
"""Calculates the fraction of a cell inundated/abandoned given a defined
slr/retreat height."""
slr/retreat height.
"""
return _pos(np.minimum(slr - lb_elevs, bin_width)) / bin_width

# loop over each elevation band to sum up elevation-distribution-dependent costs
Expand Down Expand Up @@ -832,7 +833,6 @@ def select_optimal_case(
``optimalfixed``, which represents the optimal adaptation choice for this region
for each socioeconomic and SLR trajectory.
"""

opt_case = (
xr.open_zarr(
str(all_case_cost_path), chunks=None, storage_options=storage_options
Expand Down Expand Up @@ -898,7 +898,7 @@ def execute_pyciam(
diaz_config=False,
dask_client_func=Client,
storage_options=None,
**model_kwargs
**model_kwargs,
):
"""Execute the full pyCIAM model. The following inputs are assumed:

Expand Down Expand Up @@ -1032,7 +1032,6 @@ def execute_pyciam(
**model_kwargs
Passed directly to :py:func:`pyCIAM.calc_costs`
"""

# convert filepaths to appropriate path representation
(
params_path,
Expand Down Expand Up @@ -1190,7 +1189,7 @@ def execute_pyciam(
"case": CASES,
"costtype": COSTTYPES,
seg_var: ciam_in[seg_var].values,
"scenario": slr.scenario,
"scenario": slr.scenario.astype("unicode"),
"quantile": quantiles,
"year": np.arange(params.model_start, ciam_in.year.max().item() + 1),
**{
Expand Down Expand Up @@ -1251,7 +1250,7 @@ def execute_pyciam(
quantiles=quantiles,
diaz_inputs=diaz_inputs,
eps=eps,
**model_kwargs
**model_kwargs,
),
dim="seg",
)
Expand Down Expand Up @@ -1316,7 +1315,7 @@ def execute_pyciam(
storage_options=storage_options,
diaz_inputs=diaz_inputs,
check=check,
**model_kwargs
**model_kwargs,
)
)

Expand Down Expand Up @@ -1365,7 +1364,7 @@ def execute_pyciam(
seg_var=seg_var,
eps=eps,
check=check,
storage_options=storage_options
storage_options=storage_options,
),
axis=1,
)
Expand Down Expand Up @@ -1439,7 +1438,7 @@ def get_refA(
quantiles=[0.5],
eps=1,
diaz_inputs=False,
**model_kwargs
**model_kwargs,
):
if diaz_inputs:
inputs, slr = load_diaz_inputs(
Expand All @@ -1463,7 +1462,7 @@ def get_refA(
include_ncc=True,
storage_options=storage_options,
quantiles=quantiles,
**params.refA_scenario_selectors
**params.refA_scenario_selectors,
)
slr = slr.unstack("scen_mc")
slr = slr.squeeze(drop=True)
Expand Down Expand Up @@ -1514,7 +1513,7 @@ def calc_all_cases(
storage_options={},
check=True,
diaz_inputs=False,
**model_kwargs
**model_kwargs,
):
if check_finished_zarr_workflow(
finalstore=output_path if check else None,
Expand Down Expand Up @@ -1565,7 +1564,7 @@ def calc_all_cases(
surge_lookup=surge,
elev_chunksize=None,
min_R_noadapt=refA,
**model_kwargs
**model_kwargs,
).to_dataset(name="costs")
if seg_var != "seg":
out = out.rename(seg=seg_var)
Expand All @@ -1589,7 +1588,7 @@ def optimize_case(
seg_var="seg_adm",
check=True,
eps=1,
storage_options={}
storage_options={},
):
# use last fpath to check if this task has already been run
if check and check_finished_zarr_workflow(
Expand Down
21 changes: 10 additions & 11 deletions pyCIAM/surge/lookup.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
"""This module contains functions related to creating a storm surge lookup table used
"""Functions to create a storm surge lookup table.

This module contains functions related to creating a storm surge lookup table used
when running pyCIAM in "probabilistic" mode (i.e. running on many thousands of Monte
Carlo samples of sea level rise trajectories). In this mode, calculating storm surge
damages for each elevation slice, each year, each segment, each socioeconomic
Expand Down Expand Up @@ -61,8 +63,7 @@ def _get_lslr_rhdiff_range(
mc_dim="mc_sample_id",
storage_options={},
):
"""Get the range of lslr and rhdiff that we need to model to cover the full range
across scenario/mcs.
"""Get range of lslr and rhdiff that we need to model to cover the full range.

The minimum LSLR value we'll need to model for the purposes of
assessing storm damage is the minimum across sites of: the site-level maximum of "0
Expand All @@ -71,7 +72,6 @@ def _get_lslr_rhdiff_range(
maximum experienced at any site in any year for all of the sceanrio/mcs we use in
the binned LSL dataset.
"""

if isinstance(slr_0_years, int):
slr_0_years = [slr_0_years] * len(slr_stores)
assert len(slr_0_years) == len(slr_stores)
Expand Down Expand Up @@ -153,14 +153,12 @@ def _get_lslr_rhdiff_range(
{
"lslr_by_seg": (
("lslr", seg_var),
np.linspace(min_lslr, max_lslr, n_interp_pts_lslr),
np.linspace(min_lslr.values, max_lslr.values, n_interp_pts_lslr),
),
"rh_diff_by_seg": (
("rh_diff", seg_var),
np.linspace(0, rh_diff_max, n_interp_pts_rhdiff),
np.linspace(0, rh_diff_max.values, n_interp_pts_rhdiff),
),
},
coords={
"lslr": np.arange(n_interp_pts_lslr),
"rh_diff": np.arange(n_interp_pts_lslr),
seg_var: pc_in[seg_var].values,
Expand Down Expand Up @@ -245,7 +243,7 @@ def _save_storm_dam(
slr_0_years=2005,
storage_options={},
):
"""Function to map over each chunk to run through damage calcs."""
"""Map over each chunk to run through damage calcs."""
diff_ranges = _get_lslr_rhdiff_range(
sliiders_store,
slr_stores,
Expand Down Expand Up @@ -381,7 +379,9 @@ def create_surge_lookup(
client_kwargs={},
storage_options={},
):
"""Create a storm surge lookup table which is used to define a linear spline
"""Create storm surge lookup table.

Create a storm surge lookup table which is used to define a linear spline
function for each region modeled in pyCIAM. This output is not strictly necessary to
run pyCIAM but substantially reduces computational expense when running pyCIAM on a
large probabilistic ensemble of SLR trajectories.
Expand Down Expand Up @@ -460,7 +460,6 @@ def create_surge_lookup(
-------
Returns None, but saves storm surge lookup table to `surge_lookup_store`.
"""

to_save = _create_surge_lookup_skeleton_store(
sliiders_store,
n_interp_pts_lslr,
Expand Down
7 changes: 1 addition & 6 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,12 @@ authors = [{ name = "Ian Bolliger", email = "[email protected]"}, { name = "Nichol
maintainers = [{ name = "Ian Bolliger", email = "[email protected]"}]
dependencies = [
"cloudpathlib",
"dask",
"distributed",
"gitpython",
"numpy",
"rhg_compute_tools",
"pandas",
"parameterize_jobs",
"pint-xarray",
"scipy",
"scikit-learn",
"xarray",
"xarray[accel,parallel]",
"zarr"
]
requires-python = ">=3.6"
Expand Down