From 69078195452b43fb5b3df3899e158e44b9075bab Mon Sep 17 00:00:00 2001 From: Ian Bolliger Date: Fri, 15 Nov 2024 15:41:19 -0800 Subject: [PATCH 1/6] include io dependencies of xarray --- pyproject.toml | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c2ee927..c089902 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,18 +19,12 @@ authors = [{ name = "Ian Bolliger", email = "ian@reask.earth"}, { name = "Nichol maintainers = [{ name = "Ian Bolliger", email = "ian@reask.earth"}] dependencies = [ "cloudpathlib", - "dask", - "distributed", "gitpython", - "numpy", "rhg_compute_tools", - "pandas", "parameterize_jobs", "pint-xarray", - "scipy", "scikit-learn", - "xarray", - "zarr" + "xarray[complete]", ] requires-python = ">=3.6" dynamic = ["version"] From e7173c916aff6cce636a6a554898c2e5c49fc6cc Mon Sep 17 00:00:00 2001 From: Ian Bolliger Date: Fri, 15 Nov 2024 16:27:09 -0800 Subject: [PATCH 2/6] fix storm surge lookup creation for newer xarray --- pyCIAM/surge/lookup.py | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/pyCIAM/surge/lookup.py b/pyCIAM/surge/lookup.py index 7e9d08d..789e785 100644 --- a/pyCIAM/surge/lookup.py +++ b/pyCIAM/surge/lookup.py @@ -1,4 +1,6 @@ -"""This module contains functions related to creating a storm surge lookup table used +"""Functions to create a storm surge lookup table. + +This module contains functions related to creating a storm surge lookup table used when running pyCIAM in "probabilistic" mode (i.e. running on many thousands of Monte Carlo samples of sea level rise trajectories). In this mode, calculating storm surge damages for each elevation slice, each year, each segment, each socioeconomic @@ -61,8 +63,7 @@ def _get_lslr_rhdiff_range( mc_dim="mc_sample_id", storage_options={}, ): - """Get the range of lslr and rhdiff that we need to model to cover the full range - across scenario/mcs. + """Get range of lslr and rhdiff that we need to model to cover the full range. The minimum LSLR value we'll need to model for the purposes of assessing storm damage is the minimum across sites of: the site-level maximum of "0 @@ -71,7 +72,6 @@ def _get_lslr_rhdiff_range( maximum experienced at any site in any year for all of the sceanrio/mcs we use in the binned LSL dataset. """ - if isinstance(slr_0_years, int): slr_0_years = [slr_0_years] * len(slr_stores) assert len(slr_0_years) == len(slr_stores) @@ -149,18 +149,25 @@ def _get_lslr_rhdiff_range( # occasionally, the gumbel fit was negative, so we set the 1-year return to 0 assert (rh_diff_max > 0).all() + seg_vals = pc_in[seg_var].values return xr.Dataset( { "lslr_by_seg": ( ("lslr", seg_var), - np.linspace(min_lslr, max_lslr, n_interp_pts_lslr), + np.repeat( + np.linspace(min_lslr, max_lslr, n_interp_pts_lslr)[:, np.newaxis], + len(seg_vals), + axis=1, + ), ), "rh_diff_by_seg": ( ("rh_diff", seg_var), - np.linspace(0, rh_diff_max, n_interp_pts_rhdiff), + np.repeat( + np.linspace(0, rh_diff_max, n_interp_pts_rhdiff)[:, np.newaxis], + len(seg_vals), + axis=1, + ), ), - }, - coords={ "lslr": np.arange(n_interp_pts_lslr), "rh_diff": np.arange(n_interp_pts_lslr), seg_var: pc_in[seg_var].values, @@ -245,7 +252,7 @@ def _save_storm_dam( slr_0_years=2005, storage_options={}, ): - """Function to map over each chunk to run through damage calcs.""" + """Map over each chunk to run through damage calcs.""" diff_ranges = _get_lslr_rhdiff_range( sliiders_store, slr_stores, @@ -381,7 +388,9 @@ def create_surge_lookup( client_kwargs={}, storage_options={}, ): - """Create a storm surge lookup table which is used to define a linear spline + """Create storm surge lookup table. + + Create a storm surge lookup table which is used to define a linear spline function for each region modeled in pyCIAM. This output is not strictly necessary to run pyCIAM but substantially reduces computational expense when running pyCIAM on a large probabilistic ensemble of SLR trajectories. @@ -460,7 +469,6 @@ def create_surge_lookup( ------- Returns None, but saves storm surge lookup table to `surge_lookup_store`. """ - to_save = _create_surge_lookup_skeleton_store( sliiders_store, n_interp_pts_lslr, From 592894c192bda5c07610510c369aa05e99609e6f Mon Sep 17 00:00:00 2001 From: Ian Bolliger Date: Fri, 15 Nov 2024 16:47:19 -0800 Subject: [PATCH 3/6] try that fix again --- pyCIAM/surge/lookup.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/pyCIAM/surge/lookup.py b/pyCIAM/surge/lookup.py index 789e785..43bb7bb 100644 --- a/pyCIAM/surge/lookup.py +++ b/pyCIAM/surge/lookup.py @@ -149,24 +149,15 @@ def _get_lslr_rhdiff_range( # occasionally, the gumbel fit was negative, so we set the 1-year return to 0 assert (rh_diff_max > 0).all() - seg_vals = pc_in[seg_var].values return xr.Dataset( { "lslr_by_seg": ( ("lslr", seg_var), - np.repeat( - np.linspace(min_lslr, max_lslr, n_interp_pts_lslr)[:, np.newaxis], - len(seg_vals), - axis=1, - ), + np.linspace(min_lslr.values, max_lslr.values, n_interp_pts_lslr), ), "rh_diff_by_seg": ( ("rh_diff", seg_var), - np.repeat( - np.linspace(0, rh_diff_max, n_interp_pts_rhdiff)[:, np.newaxis], - len(seg_vals), - axis=1, - ), + np.linspace(0, rh_diff_max.values, n_interp_pts_rhdiff), ), "lslr": np.arange(n_interp_pts_lslr), "rh_diff": np.arange(n_interp_pts_lslr), From 65882e80848766950c736f62982f371da507cc0a Mon Sep 17 00:00:00 2001 From: Ian Bolliger Date: Fri, 15 Nov 2024 17:21:19 -0800 Subject: [PATCH 4/6] be more precise on xarray deps --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index c089902..7e08fc8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,8 @@ dependencies = [ "parameterize_jobs", "pint-xarray", "scikit-learn", - "xarray[complete]", + "xarray[accel,parallel]", + "zarr" ] requires-python = ">=3.6" dynamic = ["version"] From 344b84bfdfb200d7c1c18728927560caa4d4d627 Mon Sep 17 00:00:00 2001 From: Ian Bolliger Date: Sun, 17 Nov 2024 15:27:38 -0800 Subject: [PATCH 5/6] fix potential non-unicode slr scenario coords --- pyCIAM/run.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/pyCIAM/run.py b/pyCIAM/run.py index 51b5e6a..01aee3a 100644 --- a/pyCIAM/run.py +++ b/pyCIAM/run.py @@ -504,7 +504,8 @@ def calc_costs( # --------- ELEVATION DISTRIBUTION-DEPENENT COSTS ---------- def calc_elev_bin_weights(slr, lb_elevs, bin_width): """Calculates the fraction of a cell inundated/abandoned given a defined - slr/retreat height.""" + slr/retreat height. + """ return _pos(np.minimum(slr - lb_elevs, bin_width)) / bin_width # loop over each elevation band to sum up elevation-distribution-dependent costs @@ -832,7 +833,6 @@ def select_optimal_case( ``optimalfixed``, which represents the optimal adaptation choice for this region for each socioeconomic and SLR trajectory. """ - opt_case = ( xr.open_zarr( str(all_case_cost_path), chunks=None, storage_options=storage_options @@ -898,7 +898,7 @@ def execute_pyciam( diaz_config=False, dask_client_func=Client, storage_options=None, - **model_kwargs + **model_kwargs, ): """Execute the full pyCIAM model. The following inputs are assumed: @@ -1032,7 +1032,6 @@ def execute_pyciam( **model_kwargs Passed directly to :py:func:`pyCIAM.calc_costs` """ - # convert filepaths to appropriate path representation ( params_path, @@ -1190,7 +1189,7 @@ def execute_pyciam( "case": CASES, "costtype": COSTTYPES, seg_var: ciam_in[seg_var].values, - "scenario": slr.scenario, + "scenario": slr.scenario.astype("unicode"), "quantile": quantiles, "year": np.arange(params.model_start, ciam_in.year.max().item() + 1), **{ @@ -1251,7 +1250,7 @@ def execute_pyciam( quantiles=quantiles, diaz_inputs=diaz_inputs, eps=eps, - **model_kwargs + **model_kwargs, ), dim="seg", ) @@ -1316,7 +1315,7 @@ def execute_pyciam( storage_options=storage_options, diaz_inputs=diaz_inputs, check=check, - **model_kwargs + **model_kwargs, ) ) @@ -1365,7 +1364,7 @@ def execute_pyciam( seg_var=seg_var, eps=eps, check=check, - storage_options=storage_options + storage_options=storage_options, ), axis=1, ) @@ -1439,7 +1438,7 @@ def get_refA( quantiles=[0.5], eps=1, diaz_inputs=False, - **model_kwargs + **model_kwargs, ): if diaz_inputs: inputs, slr = load_diaz_inputs( @@ -1463,7 +1462,7 @@ def get_refA( include_ncc=True, storage_options=storage_options, quantiles=quantiles, - **params.refA_scenario_selectors + **params.refA_scenario_selectors, ) slr = slr.unstack("scen_mc") slr = slr.squeeze(drop=True) @@ -1514,7 +1513,7 @@ def calc_all_cases( storage_options={}, check=True, diaz_inputs=False, - **model_kwargs + **model_kwargs, ): if check_finished_zarr_workflow( finalstore=output_path if check else None, @@ -1565,7 +1564,7 @@ def calc_all_cases( surge_lookup=surge, elev_chunksize=None, min_R_noadapt=refA, - **model_kwargs + **model_kwargs, ).to_dataset(name="costs") if seg_var != "seg": out = out.rename(seg=seg_var) @@ -1589,7 +1588,7 @@ def optimize_case( seg_var="seg_adm", check=True, eps=1, - storage_options={} + storage_options={}, ): # use last fpath to check if this task has already been run if check and check_finished_zarr_workflow( From 07a2d27fd3c987205de5d867953fcc7dd25d1759 Mon Sep 17 00:00:00 2001 From: Ian Bolliger Date: Fri, 22 Nov 2024 11:25:52 -0800 Subject: [PATCH 6/6] improve dict conversions in prep_sliiders --- pyCIAM/io.py | 61 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 39 insertions(+), 22 deletions(-) diff --git a/pyCIAM/io.py b/pyCIAM/io.py index f7f956b..63d985e 100644 --- a/pyCIAM/io.py +++ b/pyCIAM/io.py @@ -36,6 +36,7 @@ def prep_sliiders( seg_var="seg_adm", selectors={}, calc_popdens_with_wetland_area=True, + expand_exposure=True, storage_options={}, ): """Import the SLIIDERS dataset (or a different dataset formatted analogously), @@ -66,6 +67,11 @@ def prep_sliiders( If True, assume that population can also exist in Wetland area. This is observed empirically, but presumably at a lower density. Diaz 2016 assumes False but Depsky 2023 assumes True. + expand_exposure : bool, default True + If the input contains population ("pop") and capital ("K") for a fixed year, + plus a country-level scaling factor for each year, setting this to True + (default) expands this to a panel dataset of each variable. This substantially + increases size of the dataset, so can be set to False if not Needed storage_options : dict, optional Passed to :py:function:`xarray.open_zarr` @@ -99,14 +105,14 @@ def prep_sliiders( ).sel(selectors, drop=True) inputs = inputs_all.sel({seg_var: seg_vals}) - inputs = _s2d(inputs).assign(constants.to_dict()) + inputs = _s2d(inputs).assign(constants) # assign country level vars to each segment for v in inputs.data_vars: if "country" in inputs[v].dims: inputs[v] = inputs[v].sel(country=inputs.seg_country).drop("country") - if "vsl" not in inputs.data_vars: + if "vsl" not in inputs.data_vars and "vsl_ypc_mult" in inputs.data_vars: if "ref_income" in inputs: ref_income = inputs.ref_income else: @@ -118,7 +124,7 @@ def prep_sliiders( * (inputs.ypcc / ref_income) ** inputs.vsl_inc_elast ) - if "pop" not in inputs.data_vars: + if expand_exposure and "pop" not in inputs.data_vars: exp_year = [ v for v in inputs.data_vars if v.startswith("pop_") and "scale" not in v ] @@ -127,11 +133,11 @@ def prep_sliiders( pop_var = "pop_" + exp_year inputs["pop"] = inputs[pop_var] * inputs.pop_scale inputs = inputs.drop(pop_var) - if "K" not in inputs.data_vars: + if expand_exposure and "K" not in inputs.data_vars: K_var = "K_" + exp_year inputs["K"] = inputs[K_var] * inputs.K_scale inputs = inputs.drop(K_var) - if "dfact" not in inputs.data_vars: + if "dfact" not in inputs.data_vars and "npv_start" in inputs.data_vars: inputs["dfact"] = (1 / (1 + inputs.dr)) ** (inputs.year - inputs.npv_start) if "landrent" or "ypc" not in inputs.data_vars: @@ -139,7 +145,11 @@ def prep_sliiders( if calc_popdens_with_wetland_area: area = area + inputs.wetland popdens = (inputs.pop / area).fillna(0) - if "landrent" not in inputs.data_vars: + if ( + "landrent" not in inputs.data_vars + and "min_coastland_scale" in inputs.data_vars + and "dr" in inputs.data_vars + ): coastland_scale = np.minimum( 1, np.maximum( @@ -149,26 +159,32 @@ def prep_sliiders( ) inputs["landrent"] = inputs.interior * coastland_scale * inputs.dr - if "ypc" not in inputs.data_vars: + if ( + "ypc" not in inputs.data_vars + and "min_pyc_scale" in inputs.data_vars + and "ypc_scale_denom" in inputs.data_vars + and "ypc_scale_elast" in inputs.data_vars + ): ypc_scale = np.maximum( inputs.min_ypc_scale, (popdens / inputs.ypc_scale_denom) ** inputs.ypc_scale_elast, ) inputs["ypc"] = ypc_scale * inputs.ypcc + to_drop = [ + "interior", + "dr", + "min_coastland_scale", + "min_ypc_scale", + "ypc_scale_denom", + "ypc_scale_elast", + "vsl_ypc_mult", + "vsl_inc_elast", + ] + if expand_exposure: + to_drop += ["pop_scale", "K_scale"] return inputs.drop( - [ - "pop_scale", - "K_scale", - "interior", - "dr", - "min_coastland_scale", - "min_ypc_scale", - "ypc_scale_denom", - "ypc_scale_elast", - "vsl_ypc_mult", - "vsl_inc_elast", - ], + to_drop, errors="ignore", ) @@ -555,7 +571,8 @@ def get_nearest_slrs(slr_ds, lonlats, x1="seg_lon", y1="seg_lat"): def add_nearest_slrs(sliiders_ds, slr_ds): """Add a variable to ``sliiders_ds`` called `SLR_site_id` that contains the nearest - SLR site to each segment.""" + SLR site to each segment. + """ sliiders_lonlat = sliiders_ds[["seg_lon", "seg_lat"]].to_dataframe() return sliiders_ds.assign( SLR_site_id=get_nearest_slrs(slr_ds, sliiders_lonlat).to_xarray() @@ -672,7 +689,7 @@ def load_ciam_inputs( seg_vals, # dropping the "refA_scenario_selectors" b/c this doesn't need to be added to # the input dataset object - constants=params[params.map(type) != dict], + constants=params[params.map(type) != dict].to_dict(), seg_var=seg_var, selectors=selectors, storage_options=storage_options, @@ -771,7 +788,7 @@ def load_diaz_inputs( inputs = prep_sliiders( input_store, seg_vals, - constants=params[params.map(type) != dict], + constants=params[params.map(type) != dict].to_dict(), seg_var="seg", calc_popdens_with_wetland_area=False, storage_options=storage_options,